mayatool3_test3: mayachemtools/bin/ExtractFromPDBFiles.pl comparison

comparison mayachemtools/bin/ExtractFromPDBFiles.pl @ 0:73ae111cf86f draft

Uploaded

author	deepakjadmin
date	Wed, 20 Jan 2016 11:55:01 -0500
parents
children

comparison

equal deleted inserted replaced

--1:000000000000
+:73ae111cf86f
+#!/usr/bin/perl -w
+#
+# $RCSfile: ExtractFromPDBFiles.pl,v $
+# $Date: 2015/02/28 20:46:19 $
+# $Revision: 1.39 $
+#
+# Author: Manish Sud <msud@san.rr.com>
+#
+# Copyright (C) 2015 Manish Sud. All rights reserved.
+#
+# This file is part of MayaChemTools.
+#
+# MayaChemTools is free software; you can redistribute it and/or modify it under
+# the terms of the GNU Lesser General Public License as published by the Free
+# Software Foundation; either version 3 of the License, or (at your option) any
+# later version.
+#
+# MayaChemTools is distributed in the hope that it will be useful, but without
+# any warranty; without even the implied warranty of merchantability of fitness
+# for a particular purpose.  See the GNU Lesser General Public License for more
+# details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
+# write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
+# Boston, MA, 02111-1307, USA.
+#
+use strict;
+use FindBin; use lib "$FindBin::Bin/../lib";
+use Getopt::Long;
+use File::Basename;
+use Text::ParseWords;
+use Benchmark;
+use FileUtil;
+use TextUtil;
+use PDBFileUtil;
+use AminoAcids;
+use SequenceFileUtil;
+my($ScriptName, %Options, $StartTime, $EndTime, $TotalTime);
+# Autoflush STDOUT
+$| = 1;
+# Starting message...
+$ScriptName = basename($0);
+print "\n$ScriptName: Starting...\n\n";
+$StartTime = new Benchmark;
+# Get the options and setup script...
+SetupScriptUsage();
+if ($Options{help} || @ARGV < 1) {
+die GetUsageFromPod("$FindBin::Bin/$ScriptName");
+}
+my(@PDBFilesList);
+@PDBFilesList = ExpandFileNames(\@ARGV, "pdb");
+# Process options...
+print "Processing options...\n";
+my(%OptionsInfo);
+ProcessOptions();
+# Setup information about input files...
+print "Checking input PDB file(s)...\n";
+my(%PDBFilesInfo);
+RetrievePDBFilesInfo();
+# Process input files..
+my($FileIndex);
+if (@PDBFilesList > 1) {
+print "\nProcessing PDB files...\n";
+}
+for $FileIndex (0 .. $#PDBFilesList) {
+if ($PDBFilesInfo{FileOkay}[$FileIndex]) {
+print "\nProcessing file $PDBFilesList[$FileIndex]...\n";
+ExtractFromPDBFiles($FileIndex);
+}
+}
+print "\n$ScriptName:Done...\n\n";
+$EndTime = new Benchmark;
+$TotalTime = timediff ($EndTime, $StartTime);
+print "Total time: ", timestr($TotalTime), "\n";
+###############################################################################
+# Extract appropriate information...
+sub ExtractFromPDBFiles {
+my($FileIndex) = @_;
+my($PDBFile, $PDBRecordLinesRef);
+# Get PDB data...
+$PDBFile = $PDBFilesList[$FileIndex];
+$PDBRecordLinesRef = ReadPDBFile($PDBFile);
+if ($OptionsInfo{Mode} =~ /Chains/i) {
+ExtractChains($FileIndex, $PDBRecordLinesRef);
+}
+elsif ($OptionsInfo{Mode} =~ /Sequences/i) {
+ExtractSequences($FileIndex, $PDBRecordLinesRef);
+}
+elsif ($OptionsInfo{Mode} =~ /^(Atoms|CAlphas|AtomNums|AtomsRange|AtomNames)$/i) {
+ExtractByAtoms($FileIndex, $PDBRecordLinesRef);
+}
+elsif ($OptionsInfo{Mode} =~ /^(ResidueNums|ResiduesRange|ResidueNames)$/i) {
+ExtractByResidues($FileIndex, $PDBRecordLinesRef);
+}
+elsif ($OptionsInfo{Mode} =~ /Distance/i) {
+ExtractByDistance($FileIndex, $PDBRecordLinesRef);
+}
+elsif ($OptionsInfo{Mode} =~ /NonWater/i) {
+ExtractNonWaterRecords($FileIndex, $PDBRecordLinesRef);
+}
+elsif ($OptionsInfo{Mode} =~ /NonHydrogens/i) {
+ExtractNonHydrogenRecords($FileIndex, $PDBRecordLinesRef);
+}
+}
+# Extract chains and generate new PDB files...
+#
+sub ExtractChains {
+my($FileIndex, $PDBRecordLinesRef) = @_;
+my($ChainIndex, $ChainID, $ChainLabel, $PDBFileName, $RecordLine, $ChainsAndResiduesInfoRef, $AtomNumber, $AtomName, $ResidueName, $AtomChainID, $ResidueNumber, $AlternateLocation, $InsertionCode, $ConectRecordLinesRef, %ChainAtomNumbersMap);
+# Get chains and residues data...
+$ChainsAndResiduesInfoRef = GetChainsAndResidues($PDBRecordLinesRef, 'AtomAndHetatm', 0, 1);
+if ($OptionsInfo{CombineChains}) {
+$PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0];
+print "Generating PDBFileName file $PDBFileName...\n";
+open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n";
+# Write out header and other older recors...
+WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef);
+}
+for $ChainIndex (0 .. $#{$PDBFilesInfo{SpecifiedChains}[$FileIndex]}) {
+$ChainID = $PDBFilesInfo{SpecifiedChains}[$FileIndex][$ChainIndex];
+$ChainLabel = $PDBFilesInfo{ChainLabels}[$FileIndex][$ChainIndex];
+if (!$OptionsInfo{CombineChains}) {
+$PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][$ChainIndex];
+print "Generating PDBFileName file $PDBFileName...\n";
+open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n";
+# Write out header and other older recors...
+WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef);
+}
+# Write out ATOM/HETATM line for chain and collect all ATOM/HETATM serial numbers
+# for writing out appropriate CONECT records...
+%ChainAtomNumbersMap = ();
+for $RecordLine (@{$ChainsAndResiduesInfoRef->{Lines}{$ChainID}}) {
+print OUTFILE "$RecordLine\n";
+($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $AtomChainID, $ResidueNumber, $InsertionCode) = ParseAtomRecordLine($RecordLine);
+$AtomNumber = int $AtomNumber;
+$ChainAtomNumbersMap{$AtomNumber} = $AtomName;
+}
+# Write out TER record using information from last chain record...
+$AtomNumber += 1;
+print OUTFILE GenerateTerRecordLine($AtomNumber, $ResidueName, $AtomChainID, $ResidueNumber, $InsertionCode), "\n";
+# Write out CONECT records...
+$ConectRecordLinesRef = GetConectRecordLines($PDBRecordLinesRef, \%ChainAtomNumbersMap);
+for $RecordLine (@{$ConectRecordLinesRef}) {
+print OUTFILE "$RecordLine\n";
+}
+if (!$OptionsInfo{CombineChains}) {
+# Write out END record...
+print OUTFILE GenerateEndRecordLine(), "\n";
+close OUTFILE;
+}
+}
+if ($OptionsInfo{CombineChains}) {
+# Write out END record...
+print OUTFILE GenerateEndRecordLine(), "\n";
+close OUTFILE;
+}
+}
+# Extract sequences for individual chains or combine all the chains...
+sub ExtractSequences {
+my($FileIndex, $PDBRecordLinesRef) = @_;
+my($ChainIndex, $ChainID, $ChainLabel, $SequenceFileName, $Residue, $ResidueCode, $StandardResidue, $ChainSequence, $WrappedChainSequence, $ChainSequenceID, $ChainsAndResiduesInfoRef, $ChainResiduesRef, %ChainSequencesDataMap);
+if ($OptionsInfo{SequenceRecordSource} =~ /^SeqRes$/i) {
+$ChainsAndResiduesInfoRef = GetChainsAndResidues($PDBRecordLinesRef, 'SeqRes');
+}
+else {
+$ChainsAndResiduesInfoRef = GetChainsAndResidues($PDBRecordLinesRef);
+}
+# Generate sequence data for all the chains...
+%ChainSequencesDataMap = ();
+@{$ChainSequencesDataMap{IDs}} = ();
+%{$ChainSequencesDataMap{Sequence}} = ();
+%{$ChainSequencesDataMap{Description}} = ();
+for $ChainIndex (0 .. $#{$PDBFilesInfo{SpecifiedChains}[$FileIndex]}) {
+$ChainID = $PDBFilesInfo{SpecifiedChains}[$FileIndex][$ChainIndex];
+$ChainLabel = $PDBFilesInfo{ChainLabels}[$FileIndex][$ChainIndex];
+# Setup sequence ID...
+$ChainSequenceID = $PDBFilesInfo{ChainSequenceIDs}[$FileIndex][$ChainIndex];
+push @{$ChainSequencesDataMap{IDs}}, $ChainSequenceID;
+$ChainSequencesDataMap{Description}{$ChainID} = $ChainSequenceID;
+# Collect sequence data for the chain...
+if ($OptionsInfo{SequenceRecordSource} =~ /^SeqRes/i) {
+$ChainResiduesRef = \@{$ChainsAndResiduesInfoRef->{Residues}{$ChainID}};
+}
+else {
+$ChainResiduesRef = \@{$ChainsAndResiduesInfoRef->{Residues}{$ChainID}};
+}
+# Setup sequence data...
+$ChainSequence = '';
+RESIDUE: for $Residue (@{$ChainResiduesRef}) {
+($ResidueCode, $StandardResidue) = GetResidueCode($Residue);
+if (!$StandardResidue) {
+	if ($OptionsInfo{KeepNonStandardSequences}) {
+	  $ResidueCode = $OptionsInfo{NonStandardSequenceCode};
+	  warn "Warning: Keeping nonstandard residue $Residue in $ChainLabel...\n";
+	}
+	else {
+	  warn "Warning: Ignoring nonstandard residue $Residue in $ChainLabel...\n";
+	  next RESIDUE;
+	}
+}
+$ChainSequence .= $ResidueCode;
+}
+$ChainSequencesDataMap{Sequence}{$ChainID} = $ChainSequence;
+}
+# Write out the sequence files...
+my($SequenceID, $SequenceDescription, $Sequence, %SequencesDataMap );
+if ($OptionsInfo{CombineChainSequences}) {
+# Combine all the chain sequences...
+$Sequence = '';
+for $ChainIndex (0 .. $#{$PDBFilesInfo{SpecifiedChains}[$FileIndex]}) {
+$ChainID = $PDBFilesInfo{SpecifiedChains}[$FileIndex][$ChainIndex];
+$Sequence .= $ChainSequencesDataMap{Sequence}{$ChainID};
+}
+$SequenceID = $PDBFilesInfo{ChainSequenceIDsPrefix}[$FileIndex][0] . "_CombinedChains|PDB";;
+$SequenceDescription = $SequenceID;
+$SequenceFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0];
+print "Generating sequence file $SequenceFileName...\n";
+%SequencesDataMap = ();
+@{$SequencesDataMap{IDs}} = ();
+%{$SequencesDataMap{Sequence}} = ();
+%{$SequencesDataMap{Description}} = ();
+push @{$SequencesDataMap{IDs}}, $SequenceID;
+$SequencesDataMap{Description}{$SequenceID} = $SequenceDescription;
+$SequencesDataMap{Sequence}{$SequenceID} = $Sequence;
+WritePearsonFastaSequenceFile($SequenceFileName, \%SequencesDataMap, $OptionsInfo{MaxSequenceLength});
+}
+else {
+# For each specifed chain, write out the sequences...
+for $ChainIndex (0 .. $#{$PDBFilesInfo{SpecifiedChains}[$FileIndex]}) {
+$ChainID = $PDBFilesInfo{SpecifiedChains}[$FileIndex][$ChainIndex];
+$SequenceFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][$ChainIndex];
+$SequenceID = $ChainSequencesDataMap{IDs}[$ChainIndex];
+$SequenceDescription = $ChainSequencesDataMap{Description}{$ChainID};
+$Sequence = $ChainSequencesDataMap{Sequence}{$ChainID};
+print "Generating sequence file $SequenceFileName...\n";
+%SequencesDataMap = ();
+@{$SequencesDataMap{IDs}} = ();
+%{$SequencesDataMap{Sequence}} = ();
+%{$SequencesDataMap{Description}} = ();
+push @{$SequencesDataMap{IDs}}, $SequenceID;
+$SequencesDataMap{Description}{$SequenceID} = $SequenceDescription;
+$SequencesDataMap{Sequence}{$SequenceID} = $Sequence;
+WritePearsonFastaSequenceFile($SequenceFileName, \%SequencesDataMap, $OptionsInfo{MaxSequenceLength});
+}
+}
+}
+# Extract atoms...
+sub ExtractByAtoms {
+my($FileIndex, $PDBRecordLinesRef) = @_;
+my($PDBFileName,  $RecordLine, $ChainRecordCount, $AtomNumber, $AtomName, $IgnoreRecord, $ConectRecordLinesRef, %AtomNumbersMap);
+$PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0];
+print "Generating PDBFileName file $PDBFileName...\n";
+open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n";
+# Write out header and other older recors...
+WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef);
+# Write out all ATOM records along with TER and model records to indicate
+# chains and multiple models..
+%AtomNumbersMap = ();
+$ChainRecordCount = 0;
+for $RecordLine (@{$PDBRecordLinesRef}) {
+if (CheckRecordType($RecordLine)) {
+($AtomNumber, $AtomName) = ParseAtomRecordLine($RecordLine);
+# Check atoms...
+$IgnoreRecord = 1;
+if ($OptionsInfo{Mode} =~ /^Atoms$/i) {
+	$IgnoreRecord = 0;
+}
+elsif ($OptionsInfo{Mode} =~ /^(CAlphas|AtomNames)$/i) {
+	if (exists $OptionsInfo{SpecifiedAtomNamesMap}{lc $AtomName}) {
+	  $IgnoreRecord = 0;
+	}
+}
+elsif ($OptionsInfo{Mode} =~ /^AtomNums$/i) {
+	if (exists $OptionsInfo{SpecifiedAtomNumsMap}{$AtomNumber}) {
+	  $IgnoreRecord = 0;
+	}
+}
+elsif ($OptionsInfo{Mode} =~ /^AtomsRange$/i) {
+	if ($AtomNumber >= $OptionsInfo{SpecifiedStartAtomNum} && $AtomNumber <= $OptionsInfo{SpecifiedEndAtomNum}) {
+	  $IgnoreRecord = 0;
+	}
+}
+if (!$IgnoreRecord) {
+	$ChainRecordCount++;
+	print OUTFILE "$RecordLine\n";
+	$AtomNumber = int $AtomNumber;
+	$AtomNumbersMap{$AtomNumber} = $AtomName;
+}
+}
+elsif (IsTerRecordType($RecordLine)) {
+if ($ChainRecordCount) {
+	print OUTFILE GenerateTerRecordLine(), "\n";
+}
+$ChainRecordCount = 0;
+}
+elsif (IsModelRecordType($RecordLine) || IsEndmdlRecordType($RecordLine)) {
+print OUTFILE "$RecordLine\n";
+}
+}
+# Write out appropriate CONECT records...
+$ConectRecordLinesRef = GetConectRecordLines($PDBRecordLinesRef, \%AtomNumbersMap);
+for $RecordLine (@{$ConectRecordLinesRef}) {
+print OUTFILE "$RecordLine\n";
+}
+# Write out END record...
+print OUTFILE GenerateEndRecordLine(), "\n";
+close OUTFILE;
+}
+# Extract residues...
+sub ExtractByResidues {
+my($FileIndex, $PDBRecordLinesRef) = @_;
+my($PDBFileName,  $RecordLine, $ChainRecordCount, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $ConectRecordLinesRef, $IgnoreRecord, %AtomNumbersMap);
+$PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0];
+print "Generating PDBFileName file $PDBFileName...\n";
+open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n";
+# Write out header and other older recors...
+WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef);
+# Write out all ATOM records for specified residues with TER and model records to indicate
+# chains and multiple models...
+%AtomNumbersMap = ();
+$ChainRecordCount = 0;
+for $RecordLine (@{$PDBRecordLinesRef}) {
+if (CheckRecordType($RecordLine)) {
+($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber) = ParseAtomRecordLine($RecordLine);
+# Check residues...
+$IgnoreRecord = 1;
+if ($OptionsInfo{Mode} =~ /^ResidueNums$/i) {
+	if (exists $OptionsInfo{SpecifiedResidueNumsMap}{$ResidueNumber}) {
+	  $IgnoreRecord = 0;
+	}
+}
+elsif ($OptionsInfo{Mode} =~ /^ResiduesRange$/i) {
+	if ($ResidueNumber >= $OptionsInfo{SpecifiedStartResidueNum} && $ResidueNumber <= $OptionsInfo{SpecifiedEndResidueNum}) {
+	  $IgnoreRecord = 0;
+	}
+}
+elsif ($OptionsInfo{Mode} =~ /^ResidueNames$/i) {
+	if (exists $OptionsInfo{SpecifiedResidueNamesMap}{lc $ResidueName}) {
+	  $IgnoreRecord = 0;
+	}
+}
+if (!$IgnoreRecord) {
+	$ChainRecordCount++;
+	print OUTFILE "$RecordLine\n";
+	$AtomNumber = int $AtomNumber;
+	$AtomNumbersMap{$AtomNumber} = $AtomName;
+}
+}
+elsif (IsTerRecordType($RecordLine)) {
+if ($ChainRecordCount) {
+	print OUTFILE GenerateTerRecordLine(), "\n";
+}
+$ChainRecordCount = 0;
+}
+elsif (IsModelRecordType($RecordLine) || IsEndmdlRecordType($RecordLine)) {
+print OUTFILE "$RecordLine\n";
+}
+}
+# Write out appropriate CONECT records...
+$ConectRecordLinesRef = GetConectRecordLines($PDBRecordLinesRef, \%AtomNumbersMap);
+for $RecordLine (@{$ConectRecordLinesRef}) {
+print OUTFILE "$RecordLine\n";
+}
+# Write out END record...
+print OUTFILE GenerateEndRecordLine(), "\n";
+close OUTFILE;
+}
+# Extract non water records...
+sub ExtractNonWaterRecords {
+my($FileIndex, $PDBRecordLinesRef) = @_;
+my($PDBFileName,  $RecordLine, $ChainRecordCount, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ConectRecordLinesRef, %AtomNumbersMap);
+$PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0];
+print "Generating PDBFileName file $PDBFileName...\n";
+open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n";
+# Write out header and other older recors...
+WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef);
+# Write out all ATOM/HETATM non water records along with TER and model records to indicate
+# chains and multiple models..
+%AtomNumbersMap = ();
+$ChainRecordCount = 0;
+for $RecordLine (@{$PDBRecordLinesRef}) {
+if (CheckRecordType($RecordLine)) {
+($AtomNumber, $AtomName, $AlternateLocation, $ResidueName) = ParseAtomRecordLine($RecordLine);
+if (! exists $OptionsInfo{SpecifiedWaterResiduesMap}{$ResidueName} ) {
+	$ChainRecordCount++;
+	print OUTFILE "$RecordLine\n";
+	$AtomNumber = int $AtomNumber;
+	$AtomNumbersMap{$AtomNumber} = $AtomName;
+}
+}
+elsif (IsTerRecordType($RecordLine)) {
+if ($ChainRecordCount) {
+	print OUTFILE GenerateTerRecordLine(), "\n";
+}
+$ChainRecordCount = 0;
+}
+elsif (IsModelRecordType($RecordLine) || IsEndmdlRecordType($RecordLine)) {
+print OUTFILE "$RecordLine\n";
+}
+}
+# Write out appropriate CONECT records...
+$ConectRecordLinesRef = GetConectRecordLines($PDBRecordLinesRef, \%AtomNumbersMap);
+for $RecordLine (@{$ConectRecordLinesRef}) {
+print OUTFILE "$RecordLine\n";
+}
+# Write out END record...
+print OUTFILE GenerateEndRecordLine(), "\n";
+close OUTFILE;
+}
+# Extract non hydrogen records...
+sub ExtractNonHydrogenRecords {
+my($FileIndex, $PDBRecordLinesRef) = @_;
+my($PDBFileName,  $RecordLine, $ChainRecordCount, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge, $ConectRecordLinesRef, %AtomNumbersMap);
+$PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0];
+print "Generating PDBFileName file $PDBFileName...\n";
+open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n";
+# Write out header and other older recors...
+WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef);
+# Write out all ATOM/HETATM non hydrogen records along with TER and model records to indicate
+# chains and multiple models..
+%AtomNumbersMap = ();
+$ChainRecordCount = 0;
+for $RecordLine (@{$PDBRecordLinesRef}) {
+if (CheckRecordType($RecordLine)) {
+($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomRecordLine($RecordLine);
+if ($ElementSymbol !~ /^H$/i) {
+	$ChainRecordCount++;
+	print OUTFILE "$RecordLine\n";
+	$AtomNumber = int $AtomNumber;
+	$AtomNumbersMap{$AtomNumber} = $AtomName;
+}
+}
+elsif (IsTerRecordType($RecordLine)) {
+if ($ChainRecordCount) {
+	print OUTFILE GenerateTerRecordLine(), "\n";
+}
+$ChainRecordCount = 0;
+}
+elsif (IsModelRecordType($RecordLine) || IsEndmdlRecordType($RecordLine)) {
+print OUTFILE "$RecordLine\n";
+}
+}
+# Write out appropriate CONECT records...
+$ConectRecordLinesRef = GetConectRecordLines($PDBRecordLinesRef, \%AtomNumbersMap);
+for $RecordLine (@{$ConectRecordLinesRef}) {
+print OUTFILE "$RecordLine\n";
+}
+# Write out END record...
+print OUTFILE GenerateEndRecordLine(), "\n";
+close OUTFILE;
+}
+# Extract ATOM/HETATM records by distance...
+sub ExtractByDistance {
+my($FileIndex, $PDBRecordLinesRef) = @_;
+my($PDBFileName,  $RecordLine, $RecordLineNum, $ChainRecordCount, $ConectRecordLinesRef, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $IgnoreRecord, $ResidueID, @OriginCoords, @Coords, %AtomNumbersMap, %ResiduesDataMap);
+$PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0];
+print "Generating PDBFileName file $PDBFileName...\n";
+open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n";
+# Write out header and other older recors...
+WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef);
+# Setup coordinates of origin to calculate distance...
+@OriginCoords = ();
+push @OriginCoords, @{$PDBFilesInfo{DistanceOrigin}[$FileIndex]};
+# Write out all ATOM records for which meet specified criteria along with TER and model records to indicate
+# chains and multiple models...
+%AtomNumbersMap = ();
+%ResiduesDataMap = ();
+%{$ResiduesDataMap{ID}} = ();
+%{$ResiduesDataMap{Status}} = ();
+$ChainRecordCount = 0;
+$RecordLineNum = 0;
+for $RecordLine (@{$PDBRecordLinesRef}) {
+$RecordLineNum++;
+if (CheckRecordType($RecordLine)) {
+($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z) = ParseAtomRecordLine($RecordLine);
+@Coords = (); push @Coords, ($X, $Y, $Z);
+$IgnoreRecord = 1;
+if ($OptionsInfo{DistanceSelectionMode} =~ /^ByResidue$/i) {
+	$ResidueID = "${ResidueName}_${ResidueNumber}_${ChainID}";
+	if (exists $ResiduesDataMap{ID}{$ResidueID}) {
+	  # Residue data has been processed; check its selection status...
+	  if ($ResiduesDataMap{Status}{$ResidueID}) {
+	    $IgnoreRecord = 0;
+	  }
+	}
+	else {
+	  # Residue hasn't been processed...
+	  $ResiduesDataMap{ID}{$ResidueID} = $ResidueID;
+	  $ResiduesDataMap{Status}{$ResidueID} = 0;
+	  if (CheckResidueDistance($ResidueID, $RecordLineNum, $PDBRecordLinesRef, \@OriginCoords)) {
+	    $IgnoreRecord = 0;
+	    $ResiduesDataMap{Status}{$ResidueID} = 1;
+	  }
+	}
+}
+elsif ($OptionsInfo{DistanceSelectionMode} =~ /^ByAtom$/i) {
+	if (CheckDistance(\@Coords, \@OriginCoords)) {
+	  $IgnoreRecord = 0;
+	}
+}
+if (!$IgnoreRecord) {
+	$ChainRecordCount++;
+	print OUTFILE "$RecordLine\n";
+	$AtomNumber = int $AtomNumber;
+	$AtomNumbersMap{$AtomNumber} = $AtomName;
+}
+}
+elsif (IsTerRecordType($RecordLine)) {
+if ($ChainRecordCount) {
+	print OUTFILE GenerateTerRecordLine(), "\n";
+}
+$ChainRecordCount = 0;
+}
+elsif (IsModelRecordType($RecordLine) || IsEndmdlRecordType($RecordLine)) {
+print OUTFILE "$RecordLine\n";
+}
+}
+# Write out appropriate CONECT records...
+$ConectRecordLinesRef = GetConectRecordLines($PDBRecordLinesRef, \%AtomNumbersMap);
+for $RecordLine (@{$ConectRecordLinesRef}) {
+print OUTFILE "$RecordLine\n";
+}
+# Write out END record...
+print OUTFILE GenerateEndRecordLine(), "\n";
+close OUTFILE;
+}
+# Does record type correspond to the specified record type?
+sub CheckRecordType {
+my($RecordLine) = @_;
+my($Status);
+$Status = 0;
+if ($OptionsInfo{RecordMode} =~ /^Atom$/i) {
+$Status = IsAtomRecordType($RecordLine) ? 1 : 0;
+}
+elsif ($OptionsInfo{RecordMode} =~ /^Hetatm$/i) {
+$Status = IsHetatmRecordType($RecordLine) ? 1 : 0;
+}
+elsif ($OptionsInfo{RecordMode} =~ /^AtomAndHetatm$/i) {
+$Status = (IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine)) ? 1 : 0;
+}
+return $Status;
+}
+# Does record meets distance citerion specified by the user?
+sub CheckResidueDistance {
+my($SpecifiedResidueID, $StartingLineNum, $PDBRecordLinesRef, $OriginCoordsRef) = @_;
+my($Status, $RecordLine, $RecordLineIndex, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $ResidueID, @Coords);
+$Status = 0;
+RECORDLINE: for $RecordLineIndex (($StartingLineNum - 1) .. $#{$PDBRecordLinesRef}) {
+$RecordLine = $PDBRecordLinesRef->[$RecordLineIndex];
+if (!CheckRecordType($RecordLine)) {
+next RECORDLINE;
+}
+($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z) = ParseAtomRecordLine($RecordLine);
+$ResidueID = "${ResidueName}_${ResidueNumber}_${ChainID}";
+if ($ResidueID !~ /^$SpecifiedResidueID$/i) {
+# It's a new residue line...
+last RECORDLINE;
+}
+# Check distance...
+@Coords = (); push @Coords, ($X, $Y, $Z);
+if (CheckDistance(\@Coords, $OriginCoordsRef)) {
+# Distance criterion is met for at least one record in the residue...
+$Status = 1;
+last RECORDLINE;
+}
+}
+return $Status;
+}
+# Does record meets distance citerion specified by the user?
+sub CheckDistance {
+my($CoordsRef, $OriginCoordsRef) = @_;
+my($Status, $Index, $Distance, $DistanceSquare);
+$Status = 0;
+if ($OptionsInfo{ExtractionDistanceMode} =~ /^Residue$/i) {
+# Go over coordinates of all the atoms in the residue...
+my($ResidueCoordsCount) = scalar @{$OriginCoordsRef};
+INDEX: for ($Index = 0; $Index < $ResidueCoordsCount; $Index += 3) {
+$DistanceSquare = ($CoordsRef->[0] - $OriginCoordsRef->[$Index])**2 + ($CoordsRef->[1] - $OriginCoordsRef->[$Index + 1])**2 + ($CoordsRef->[2] - $OriginCoordsRef->[$Index + 2])**2;
+$Distance = sqrt $DistanceSquare;
+if ($Distance <= $OptionsInfo{MaxExtractionDistance}) {
+	$Status = 1;
+	last INDEX;
+}
+}
+}
+else {
+$DistanceSquare = 0;
+for $Index (0 .. 2) {
+$DistanceSquare += ($CoordsRef->[$Index] - $OriginCoordsRef->[$Index])**2;
+}
+$Distance = sqrt $DistanceSquare;
+$Status = ($Distance <= $OptionsInfo{MaxExtractionDistance}) ? 1 : 0;
+}
+return $Status;
+}
+# Write out modifed header and other older records...
+sub WriteHeaderAndOlderRecords {
+my($OutFileRef, $PDBRecordLinesRef) = @_;
+if ($OptionsInfo{ModifyHeaderRecord}) {
+# Write out modified HEADER record...
+my($Classification, $DepositionDate, $IDCode) = GetHeaderRecordInformation($PDBRecordLinesRef);
+$Classification = 'Data extracted using MayaChemTools';
+print $OutFileRef GenerateHeaderRecordLine($IDCode, $Classification), "\n";
+}
+else {
+print $OutFileRef $PDBRecordLinesRef->[0], "\n";
+}
+# Write out any old records...
+if ($OptionsInfo{KeepOldRecords}) {
+my($RecordLineIndex, $RecordLine);
+# Skip HEADER record and write out older records all the way upto first MODEL/ATOM/HETATM records from input file...
+RECORDLINE: for $RecordLineIndex (1 .. $#{$PDBRecordLinesRef}) {
+$RecordLine = $PDBRecordLinesRef->[$RecordLineIndex];
+if (IsModelRecordType($RecordLine) || IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine)) {
+	last RECORDLINE;
+}
+print $OutFileRef "$RecordLine\n";
+}
+}
+}
+# Get header record information assuming it's the first record...
+sub GetHeaderRecordInformation {
+my($PDBRecordLinesRef) = @_;
+my($Classification, $DepositionDate, $IDCode, $HeaderRecordLine);
+($Classification, $DepositionDate, $IDCode) = ('') x 3;
+$HeaderRecordLine = $PDBRecordLinesRef->[0];
+if (IsHeaderRecordType($HeaderRecordLine)) {
+($Classification, $DepositionDate, $IDCode) = ParseHeaderRecordLine($HeaderRecordLine);
+}
+return ($Classification, $DepositionDate, $IDCode);
+}
+# Get one letter residue code...
+sub GetResidueCode {
+my($ResidueName) = @_;
+my($ResidueCode, $StandardResidue);
+$ResidueCode = $OptionsInfo{NonStandardSequenceCode};
+$StandardResidue = 0;
+if (length($ResidueName) == 3) {
+# Assume it's an amino acid...
+if (AminoAcids::IsAminoAcid($ResidueName)) {
+# Standard amino acid...
+$ResidueCode = AminoAcids::GetAminoAcidOneLetterCode($ResidueName);
+$StandardResidue = 1;
+}
+}
+elsif (length($ResidueName) == 1) {
+# Assume it's a nucleic acid...
+if ($ResidueName =~ /^(A|G|T|U|C)$/i) {
+$ResidueCode = $ResidueName;
+$StandardResidue = 1;
+}
+}
+return ($ResidueCode, $StandardResidue);
+}
+# Process option values...
+sub ProcessOptions {
+%OptionsInfo = ();
+$OptionsInfo{Mode} = $Options{mode};
+my(@SpecifiedChains) = ();
+if ($Options{chains} =~ /^(First|All)$/i) {
+$OptionsInfo{ChainsToExtract} = $Options{chains};
+}
+else {
+@SpecifiedChains = split /\,/, $Options{chains};
+$OptionsInfo{ChainsToExtract} = 'Specified';
+}
+@{$OptionsInfo{SpecifiedChains}} = ();
+push @{$OptionsInfo{SpecifiedChains}}, @SpecifiedChains;
+$OptionsInfo{CombineChains} = ($Options{combinechains} =~ /^Yes$/i) ? 1 : 0;
+$OptionsInfo{CombineChainSequences} = ($Options{combinechains} =~ /^Yes$/i) ? 1 : 0;
+ProcessResiduesOptions();
+ProcessAtomsOptions();
+ProcessDistanceOptions();
+$OptionsInfo{WaterResidueNames} = $Options{waterresiduenames};
+@{$OptionsInfo{SpecifiedWaterResiduesList}} = ();
+%{$OptionsInfo{SpecifiedWaterResiduesMap}} = ();
+my(@SpecifiedWaterResiduesList);
+@SpecifiedWaterResiduesList = ();
+if ($OptionsInfo{Mode} =~ /^NonWater$/i) {
+my($WaterResidueName);
+if ($OptionsInfo{WaterResidueNames} =~ /Automatic/i) {
+push @SpecifiedWaterResiduesList, ('HOH', 'WAT', 'H2O');
+}
+else {
+@SpecifiedWaterResiduesList = split /\,/, $Options{waterresiduenames};
+}
+for $WaterResidueName (@SpecifiedWaterResiduesList) {
+$OptionsInfo{SpecifiedWaterResiduesMap}{$WaterResidueName} = $WaterResidueName;
+}
+}
+push @{$OptionsInfo{SpecifiedWaterResiduesList}}, @SpecifiedWaterResiduesList;
+$OptionsInfo{RecordMode} = $Options{recordmode} ? $Options{recordmode} : ($Options{mode} =~ /^(Atoms|CAlphas|AtomNums|AtomsRange|AtomNames)$/i ? "Atom" : "AtomAndHetatm");
+$OptionsInfo{KeepOldRecords} = ($Options{keepoldrecords} =~ /^Yes$/i) ? 1 : 0;
+$OptionsInfo{ModifyHeaderRecord} = ($Options{modifyheader} =~ /^Yes$/i) ? 1 : 0;
+$OptionsInfo{KeepNonStandardSequences} = ($Options{nonstandardkeep} =~ /^Yes$/i) ? 1 : 0;
+$OptionsInfo{NonStandardSequenceCode} = $Options{nonstandardcode};
+$OptionsInfo{MaxSequenceLength} = $Options{sequencelength};
+$OptionsInfo{SequenceRecordSource} = $Options{sequencerecords};
+$OptionsInfo{SequenceIDPrefixSource} = $Options{sequenceidprefix};
+$OptionsInfo{OverwriteFiles} = $Options{overwrite} ? 1 : 0;
+$OptionsInfo{OutFileRoot} = $Options{root} ? $Options{root} : 0;
+}
+# Process specified residue options...
+sub ProcessResiduesOptions {
+my($ResidueNum, $StartResidueNum, $EndResNum, $ResidueName, @SpecifiedResidueNumsList, @SpecifiedResidueNamesList);
+@SpecifiedResidueNumsList = ();
+($StartResidueNum, $EndResNum) = (0, 0);
+@SpecifiedResidueNamesList = ();
+if ($OptionsInfo{Mode} =~ /^(ResidueNums|ResiduesRange|ResidueNames)$/i) {
+if (!$Options{residues}) {
+die "Error: You must specify a value for \"--Residues\" option in \"ResidueNums, ResiduesRange, or ResidueNames\" \"-m, --mode\". \n";
+}
+$OptionsInfo{Residues} = $Options{residues};
+$OptionsInfo{Residues} =~ s/ //g;
+if ($OptionsInfo{Mode} =~ /^ResidueNames$/i) {
+@SpecifiedResidueNamesList = split /\,/, $OptionsInfo{Residues};
+}
+else {
+@SpecifiedResidueNumsList = split /\,/, $OptionsInfo{Residues};
+for $ResidueNum (@SpecifiedResidueNumsList) {
+	if (!IsPositiveInteger($ResidueNum)) {
+	  die "Error: Invalid residue number value, $ResidueNum, for \"--Residues\" option during \"ResidueNumes\" or \"ResiduesRange\"value of \"-m --mode\" option: Residue number must be a positive integer.\n";
+	}
+}
+if ($OptionsInfo{Mode} =~ /^ResiduesRange$/i) {
+	if (@SpecifiedResidueNumsList != 2) {
+	  die "Error: Invalid number of residue number values, ", scalar(@SpecifiedResidueNumsList), ", for \"--Residues\" option during \"ResiduesRange\" value of \"-m --mode\" option: The number of values must be 2 corresponding to start and end residue numbers.\n";
+	}
+	if ($SpecifiedResidueNumsList[0] > $SpecifiedResidueNumsList[1]) {
+	  die "Error: Invalid residue number values, @SpecifiedResidueNumsList, for \"--Residues\" option during \"ResiduesRange\" value of \"-m --mode\" option: The start residue number must be less than end residue number.\n";
+	}
+	($StartResidueNum, $EndResNum) = @SpecifiedResidueNumsList;
+}
+}
+}
+@{$OptionsInfo{SpecifiedResidueNumsList}} = ();
+push @{$OptionsInfo{SpecifiedResidueNumsList}}, @SpecifiedResidueNumsList;
+$OptionsInfo{SpecifiedStartResidueNum} = $StartResidueNum;
+$OptionsInfo{SpecifiedEndResidueNum} = $EndResNum;
+@{$OptionsInfo{SpecifiedResidueNamesList}} = ();
+push @{$OptionsInfo{SpecifiedResidueNamesList}}, @SpecifiedResidueNamesList;
+# Set up a specified residue numbers map...
+%{$OptionsInfo{SpecifiedResidueNumsMap}} = ();
+for $ResidueNum (@{$OptionsInfo{SpecifiedResidueNumsList}}) {
+$OptionsInfo{SpecifiedResidueNumsMap}{$ResidueNum} = $ResidueNum;
+}
+# Set up a specified residue names map...
+%{$OptionsInfo{SpecifiedResidueNamesMap}} = ();
+for $ResidueName (@{$OptionsInfo{SpecifiedResidueNamesList}}) {
+$OptionsInfo{SpecifiedResidueNamesMap}{lc $ResidueName} = lc $ResidueName;
+}
+}
+# Process specified atom options...
+sub ProcessAtomsOptions {
+my($AtomNum, $StartAtomNum, $EndAtomNum, $AtomName, @SpecifiedAtomNumsList, @SpecifiedAtomNamesList);
+@SpecifiedAtomNumsList = ();
+($StartAtomNum, $EndAtomNum) = (0, 0);
+@SpecifiedAtomNamesList = ();
+if ($OptionsInfo{Mode} =~ /^(AtomNums|AtomsRange|AtomNames)$/i) {
+if (!$Options{atoms}) {
+die "Error: You must specify a value for \"--Atoms\" option in \"AtomNums, AtomsRange, or AtomNames\" \"-m, --mode\". \n";
+}
+$OptionsInfo{Atoms} = $Options{atoms};
+$OptionsInfo{Atoms} =~ s/ //g;
+if ($OptionsInfo{Mode} =~ /^AtomNames$/i) {
+@SpecifiedAtomNamesList = split /\,/, $OptionsInfo{Atoms};
+}
+else {
+@SpecifiedAtomNumsList = split /\,/, $OptionsInfo{Atoms};
+for $AtomNum (@SpecifiedAtomNumsList) {
+	if (!IsPositiveInteger($AtomNum)) {
+	  die "Error: Invalid atom number value, $AtomNum, for \"--Atoms\" option during \"AtomNums\" or \"AtomsRange\"value of \"-m --mode\" option: Atom number must be a positive integer.\n";
+	}
+}
+if ($OptionsInfo{Mode} =~ /^AtomsRange$/i) {
+	if (@SpecifiedAtomNumsList != 2) {
+	  die "Error: Invalid number of atom number values, ", scalar(@SpecifiedAtomNumsList), ", for \"--Atoms\" option during \"AtomsRange\" value of \"-m --mode\" option: The number of values must be 2 corresponding to start and end atom numbers.\n";
+	}
+	if ($SpecifiedAtomNumsList[0] > $SpecifiedAtomNumsList[1]) {
+	  die "Error: Invalid atom number values, @SpecifiedAtomNumsList, for \"--Atoms\" option during \"AtomsRange\" value of \"-m --mode\" option: The start atom number must be less than end atom number.\n";
+	}
+	($StartAtomNum, $EndAtomNum) = @SpecifiedAtomNumsList;
+}
+}
+}
+elsif ($OptionsInfo{Mode} =~ /^CAlphas$/i) {
+@SpecifiedAtomNamesList = ("CA");
+}
+@{$OptionsInfo{SpecifiedAtomNumsList}} = ();
+push @{$OptionsInfo{SpecifiedAtomNumsList}}, @SpecifiedAtomNumsList;
+$OptionsInfo{SpecifiedStartAtomNum} = $StartAtomNum;
+$OptionsInfo{SpecifiedEndAtomNum} = $EndAtomNum;
+@{$OptionsInfo{SpecifiedAtomNamesList}} = ();
+push @{$OptionsInfo{SpecifiedAtomNamesList}}, @SpecifiedAtomNamesList;
+# Set up a specified residue numbers map...
+%{$OptionsInfo{SpecifiedAtomNumsMap}} = ();
+for $AtomNum (@{$OptionsInfo{SpecifiedAtomNumsList}}) {
+$OptionsInfo{SpecifiedAtomNumsMap}{$AtomNum} = $AtomNum;
+}
+# Set up a specified residue names map...
+%{$OptionsInfo{SpecifiedAtomNamesMap}} = ();
+for $AtomName (@{$OptionsInfo{SpecifiedAtomNamesList}}) {
+$OptionsInfo{SpecifiedAtomNamesMap}{lc $AtomName} = lc $AtomName;
+}
+}
+# Process specified distance options...
+sub ProcessDistanceOptions {
+my(@SpecifiedDistanceOrigin) = ();
+$OptionsInfo{MaxExtractionDistance} = $Options{distance};
+$OptionsInfo{ExtractionDistanceMode} = $Options{distancemode};
+$OptionsInfo{ExtractionDistanceOrigin} = $Options{distanceorigin} ? $Options{distanceorigin} : '';
+$OptionsInfo{DistanceSelectionMode} = $Options{distanceselectionmode};
+if ($OptionsInfo{Mode} =~ /^Distance$/i) {
+if (!$Options{distanceorigin}) {
+die "Error: You must specify a value for \"--distanceorigin\" option in \"Distance\" \"-m, --mode\". \n";
+}
+@SpecifiedDistanceOrigin = split /\,/, $Options{distanceorigin};
+if ($OptionsInfo{ExtractionDistanceMode} =~ /^Atom$/i) {
+if (@SpecifiedDistanceOrigin != 2) {
+	die "Error: Invalid number of values, ", scalar(@SpecifiedDistanceOrigin), " for option \"distanceorigin\" option during \"Atom\" value of \"--distancemode\" : The number of values must be 2.\n";
+}
+if (!IsPositiveInteger($SpecifiedDistanceOrigin[0])) {
+	die "Error: Invalid atom number value, ", $SpecifiedDistanceOrigin[0], ", for option \"distanceorigin\" option during \"Atom\" value of \"--distancemode\". Allowed values: > 0\n";
+}
+}
+elsif ($OptionsInfo{ExtractionDistanceMode} =~ /^Hetatm$/i) {
+if (@SpecifiedDistanceOrigin != 2) {
+	die "Error: Invalid number of values, ", scalar(@SpecifiedDistanceOrigin), " for option \"distanceorigin\" option during \"Hetatm\" value of \"--distancemode\" : The number of values must be 2.\n";
+}
+if (!IsPositiveInteger($SpecifiedDistanceOrigin[0])) {
+	die "Error: Invalid hetatm number value, ", $SpecifiedDistanceOrigin[0], ", for option \"distanceorigin\" option during \"Hetatm\" value of \"--distancemode\". Allowed values: > 0\n";
+}
+}
+elsif ($OptionsInfo{ExtractionDistanceMode} =~ /^Residue$/i) {
+if (!(@SpecifiedDistanceOrigin == 2 || @SpecifiedDistanceOrigin == 3)) {
+	die "Error: Invalid number of values, ", scalar(@SpecifiedDistanceOrigin), " for option \"distanceorigin\" option during \"Residue\" value of \"--distancemode\" : The number of values must be either 2 or 3.\n";
+}
+if (!IsPositiveInteger($SpecifiedDistanceOrigin[0])) {
+	die "Error: Invalid residue number value, ", $SpecifiedDistanceOrigin[0], ", for option \"distanceorigin\" option during \"Residue\" value of \"--distancemode\". Allowed values: > 0\n";
+}
+}
+elsif ($OptionsInfo{ExtractionDistanceMode} =~ /^XYZ$/i) {
+if (@SpecifiedDistanceOrigin != 3) {
+	die "Error: Invalid number of values, ", scalar(@SpecifiedDistanceOrigin), " for option \"distanceorigin\" option during \"XYZ\" value of \"--distancemode\" : The number of values must be 3.\n";
+}
+my($Value);
+for $Value (@SpecifiedDistanceOrigin) {
+	if (!IsNumerical($Value)) {
+	  die "Error: Invalid coordinate value, ", $SpecifiedDistanceOrigin[0], ", for option \"distanceorigin\" option during \"XYZ\" value of \"--distancemode\". Allowed values: numerical\n";
+	}
+}
+}
+}
+@{$OptionsInfo{SpecifiedExtractionDistanceOrigin}} = ();
+push @{$OptionsInfo{SpecifiedExtractionDistanceOrigin}}, @SpecifiedDistanceOrigin;
+}
+# Retrieve information about PDB files...
+sub RetrievePDBFilesInfo {
+my($Index, $PDBFile, $PDBRecordLinesRef, $ChainID, $ChainLabel, $ChainsAndResiduesInfoRef, $Mode, $FileDir, $FileName, $FileExt, $OutFileName, $OutFileRoot, @SpecifiedChains, @DistanceOrigin, @OutFileNames, @ChainLabels, @ChainSequenceIDs, @ChainSequenceIDsPrefix);
+%PDBFilesInfo = ();
+@{$PDBFilesInfo{FileOkay}} = ();
+@{$PDBFilesInfo{OutFileRoot}} = ();
+@{$PDBFilesInfo{OutFileNames}} = ();
+@{$PDBFilesInfo{ChainLabels}} = ();
+@{$PDBFilesInfo{ChainSequenceIDs}} = ();
+@{$PDBFilesInfo{ChainSequenceIDsPrefix}} = ();
+@{$PDBFilesInfo{SpecifiedChains}} = ();
+@{$PDBFilesInfo{DistanceOrigin}} = ();
+FILELIST: for $Index (0 .. $#PDBFilesList) {
+$PDBFilesInfo{FileOkay}[$Index] = 0;
+$PDBFilesInfo{OutFileRoot}[$Index] = '';
+@{$PDBFilesInfo{OutFileNames}[$Index]} = ();
+@{$PDBFilesInfo{OutFileNames}[$Index]} = ();
+@{$PDBFilesInfo{ChainLabels}[$Index]} = ();
+@{$PDBFilesInfo{ChainSequenceIDs}[$Index]} = ();
+@{$PDBFilesInfo{ChainSequenceIDsPrefix}[$Index]} = ();
+@{$PDBFilesInfo{SpecifiedChains}[$Index]} = ();
+@{$PDBFilesInfo{DistanceOrigin}[$Index]} = ();
+$PDBFile = $PDBFilesList[$Index];
+if (!(-e $PDBFile)) {
+warn "Warning: Ignoring file $PDBFile: It doesn't exist\n";
+next FILELIST;
+}
+if (!CheckFileType($PDBFile, "pdb")) {
+warn "Warning: Ignoring file $PDBFile: It's not a PDB file\n";
+next FILELIST;
+}
+if (! open PDBFILE, "$PDBFile") {
+warn "Warning: Ignoring file $PDBFile: Couldn't open it: $! \n";
+next FILELIST;
+}
+close PDBFILE;
+# Get PDB data...
+$PDBRecordLinesRef = ReadPDBFile($PDBFile);
+if ($OptionsInfo{Mode} =~ /^Sequences$/i && $OptionsInfo{SequenceRecordSource} =~ /^SeqRes$/i) {
+$ChainsAndResiduesInfoRef = GetChainsAndResidues($PDBRecordLinesRef, 'SeqRes');
+}
+else {
+$ChainsAndResiduesInfoRef = GetChainsAndResidues($PDBRecordLinesRef);
+}
+if (!scalar @{$ChainsAndResiduesInfoRef->{ChainIDs}}) {
+warn "Warning: Ignoring file $PDBFile: No chains found \n";
+next FILELIST;
+}
+# Make sure specified chains exist in PDB file...
+@SpecifiedChains = ();
+if ($OptionsInfo{ChainsToExtract} =~ /^Specified$/i) {
+for $ChainID (@{$OptionsInfo{SpecifiedChains}}) {
+	if (exists $ChainsAndResiduesInfoRef->{Residues}{$ChainID}) {
+	  push @SpecifiedChains, $ChainID;
+	}
+	else {
+	  warn "Warning: Ignoring file $PDBFile: Specified chain, $ChainID, in \"-c, --chains\" option doesn't exist.\n";
+	  next FILELIST;
+	}
+}
+}
+elsif ($OptionsInfo{ChainsToExtract} =~ /^First$/i) {
+push @SpecifiedChains, $ChainsAndResiduesInfoRef->{ChainIDs}[0];
+}
+elsif ($OptionsInfo{ChainsToExtract} =~ /^All$/i) {
+push @SpecifiedChains, @{$ChainsAndResiduesInfoRef->{ChainIDs}};
+}
+# Setup chain labels to use for sequence IDs and generating output files...
+@ChainLabels = ();
+for $ChainID (@SpecifiedChains) {
+$ChainLabel = $ChainID; $ChainLabel =~ s/^None//ig;
+$ChainLabel = "Chain${ChainLabel}";
+push @ChainLabels, $ChainLabel;
+}
+# Make sure specified distance origin is valid...
+@DistanceOrigin = ();
+if ($OptionsInfo{Mode} =~ /^Distance$/i) {
+if ($OptionsInfo{ExtractionDistanceMode} =~ /^(Atom|Hetatm)$/i) {
+	my($RecordType, $SpecifiedAtomName, $SpecifiedAtomNumber, $RecordFound, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $RecordLine);
+	$RecordType = $OptionsInfo{ExtractionDistanceMode};
+	($SpecifiedAtomNumber, $SpecifiedAtomName) = @{$OptionsInfo{SpecifiedExtractionDistanceOrigin}};
+	$RecordFound = 0;
+	LINE: for $RecordLine (@{$PDBRecordLinesRef}) {
+	  if (!(IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine))) {
+	    next LINE;
+	  }
+	  ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z) = ParseAtomRecordLine($RecordLine);
+	  $AtomName = RemoveLeadingAndTrailingWhiteSpaces($AtomName);
+	  if (($RecordType =~ /^Atom$/i && IsAtomRecordType($RecordLine)) || ($RecordType =~ /^Hetatm$/i && IsHetatmRecordType($RecordLine))) {
+	    if ($AtomNumber == $SpecifiedAtomNumber && $AtomName eq $SpecifiedAtomName) {
+	      $RecordFound = 1;
+	      last LINE;
+	    }
+	  }
+	}
+	if (!$RecordFound) {
+	  warn "Warning: Ignoring file $PDBFile: ", uc($RecordType), " record corresponding to \"--distanceorigin\" option value, $OptionsInfo{ExtractionDistanceOrigin}, doesn't exist.\n";
+	  next FILELIST;
+	}
+	push @DistanceOrigin, ($X, $Y, $Z);
+}
+elsif ($OptionsInfo{ExtractionDistanceMode} =~ /^Residue$/i) {
+	my($SpecifiedResidueNumber, $SpecifiedResidueName, $SpecifiedChainID, $RecordFound, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $RecordLine);
+	$SpecifiedChainID = '';
+	if (@{$OptionsInfo{SpecifiedExtractionDistanceOrigin}} == 3) {
+	  ($SpecifiedResidueNumber, $SpecifiedResidueName, $SpecifiedChainID) = @{$OptionsInfo{SpecifiedExtractionDistanceOrigin}};
+	}
+	else {
+	  ($SpecifiedResidueNumber, $SpecifiedResidueName) = @{$OptionsInfo{SpecifiedExtractionDistanceOrigin}};
+	}
+	$RecordFound = 0;
+	LINE: for $RecordLine (@{$PDBRecordLinesRef}) {
+	  if (!(IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine))) {
+	    next LINE;
+	  }
+	  ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z) = ParseAtomRecordLine($RecordLine);
+	  $ResidueName = RemoveLeadingAndTrailingWhiteSpaces($ResidueName);
+	  $ChainID = RemoveLeadingAndTrailingWhiteSpaces($ChainID);
+	  if ($SpecifiedChainID && ($SpecifiedChainID ne $ChainID)) {
+	    next LINE;
+	  }
+	  if ($ResidueNumber == $SpecifiedResidueNumber && $ResidueName eq $SpecifiedResidueName) {
+	    # Store coordinates for all the atoms...
+	    $RecordFound = 1;
+	    push @DistanceOrigin, ($X, $Y, $Z);
+	    next LINE;
+	  }
+	}
+	if (!$RecordFound) {
+	  warn "Warning: Ignoring file $PDBFile: ATOM/HETATM record corresponding to \"--distanceorigin\" option value, $OptionsInfo{ExtractionDistanceOrigin}, doesn't exist.\n";
+	  next FILELIST;
+	}
+}
+elsif ($OptionsInfo{ExtractionDistanceMode} =~ /^XYZ$/i) {
+	push @DistanceOrigin, @{$OptionsInfo{SpecifiedExtractionDistanceOrigin}};
+}
+}
+# Setup output file names...
+@OutFileNames = ();
+$FileDir = ""; $FileName = ""; $FileExt = "";
+($FileDir, $FileName, $FileExt) = ParseFileName($PDBFile);
+if ($OptionsInfo{OutFileRoot} && (@PDBFilesList == 1)) {
+my ($RootFileDir, $RootFileName, $RootFileExt) = ParseFileName($OptionsInfo{OutFileRoot});
+if ($RootFileName && $RootFileExt) {
+	$FileName = $RootFileName;
+}
+else {
+	$FileName = $OptionsInfo{OutFileRoot};
+}
+$OutFileRoot = $FileName;
+}
+else {
+$OutFileRoot = $FileName;
+}
+$Mode = $OptionsInfo{Mode};
+if ($Mode =~ /^(Atoms|CAlphas|AtomNums|AtomsRange|AtomNames|ResidueNums|ResiduesRange|ResidueNames|Distance|NonWater|NonHydrogens)$/i) {
+$OutFileName = '';
+if ($Mode =~ /^CAlphas$/i) {
+	$OutFileName = "${OutFileRoot}CAlphas.pdb";
+}
+elsif ($Mode =~ /^Atoms$/i) {
+	$OutFileName = "${OutFileRoot}Atoms.pdb";
+}
+elsif ($Mode =~ /^AtomNums$/i) {
+	$OutFileName = "${OutFileRoot}AtomNums.pdb";
+}
+elsif ($Mode =~ /^AtomsRange$/i) {
+	$OutFileName = "${OutFileRoot}AtomsRange.pdb";
+}
+elsif ($Mode =~ /^AtomNames$/i) {
+	$OutFileName = "${OutFileRoot}AtomNames.pdb";
+}
+elsif ($Mode =~ /^ResidueNums$/i) {
+	$OutFileName = "${OutFileRoot}ResidueNums.pdb";
+}
+elsif ($Mode =~ /^ResiduesRange$/i) {
+	$OutFileName = "${OutFileRoot}ResiduesRange.pdb";
+}
+elsif ($Mode =~ /^ResidueNames$/i) {
+	$OutFileName = "${OutFileRoot}ResidueNames.pdb";
+}
+elsif ($Mode =~ /^NonWater$/i) {
+	$OutFileName = "${OutFileRoot}NonWater.pdb";
+}
+elsif ($Mode =~ /^NonHydrogens$/i) {
+	$OutFileName = "${OutFileRoot}NonHydrogens.pdb";
+}
+elsif ($Mode =~ /^Distance$/i) {
+	my($DistanceMode) = '';
+	if ($OptionsInfo{ExtractionDistanceMode} =~ /^Atom$/i) {
+	  $DistanceMode = 'Atom';
+	}
+	elsif ($OptionsInfo{ExtractionDistanceMode} =~ /^Hetatm$/i) {
+	  $DistanceMode = 'Hetatm';
+	}
+	elsif ($OptionsInfo{ExtractionDistanceMode} =~ /^Residue$/i) {
+	  $DistanceMode = 'Residue';
+	}
+	elsif ($OptionsInfo{ExtractionDistanceMode} =~ /^XYZ$/i) {
+	  $DistanceMode = 'XYZ';
+	}
+	$OutFileName = "${OutFileRoot}DistanceBy${DistanceMode}.pdb";
+}
+push @OutFileNames, $OutFileName;
+if (!$OptionsInfo{OverwriteFiles} && (-e $OutFileName)) {
+	warn "Warning: Ignoring file $PDBFile: The file $OutFileName already exists\n";
+	next FILELIST;
+}
+}
+elsif ($Mode =~ /^(Chains|Sequences)$/i) {
+if ($OptionsInfo{CombineChainSequences}) {
+	$OutFileName = ($Mode =~ /^Chains$/i) ? "${OutFileRoot}ExtractedChains.pdb" : "${OutFileRoot}SequencesChainsCombined.fasta";
+	push @OutFileNames, $OutFileName;
+	if (!$OptionsInfo{OverwriteFiles} && (-e $OutFileName)) {
+	  warn "Warning: Ignoring file $PDBFile: The file $OutFileName already exists\n";
+	  next FILELIST;
+	}
+}
+else {
+	for $ChainLabel (@ChainLabels) {
+	  $OutFileName = ($Mode =~ /^Chains$/i) ? "${OutFileRoot}${ChainLabel}.pdb" : "${OutFileRoot}Sequences${ChainLabel}.fasta";
+	  push @OutFileNames, $OutFileName;
+	  if (!$OptionsInfo{OverwriteFiles} && (-e $OutFileName)) {
+	    warn "Warning: Ignoring file $PDBFile: The file $OutFileName already exists\n";
+	    next FILELIST;
+	  }
+	}
+}
+}
+@ChainSequenceIDs = ();
+@ChainSequenceIDsPrefix = ();
+if ($Mode =~ /^Sequences$/i) {
+my($HeaderRecordLine, $Classification, $DepositionDate, $IDCode, $IDPrefix);
+($Classification, $DepositionDate, $IDCode) = GetHeaderRecordInformation($PDBRecordLinesRef);
+if ($OptionsInfo{SequenceIDPrefixSource} =~ /^FileName$/i) {
+	$IDPrefix = $FileName;
+}
+elsif ($OptionsInfo{SequenceIDPrefixSource} =~ /^HeaderRecord$/i) {
+	$IDPrefix = IsNotEmpty($IDCode) ? $IDCode : '';
+}
+else {
+	$IDPrefix = IsNotEmpty($IDCode) ? $IDCode : $FileName;
+}
+for $ChainLabel (@ChainLabels) {
+	push @ChainSequenceIDsPrefix, $IDPrefix;
+	push @ChainSequenceIDs, "${IDPrefix}_${ChainLabel}|PDB";
+}
+}
+$PDBFilesInfo{FileOkay}[$Index] = 1;
+$PDBFilesInfo{OutFileRoot}[$Index] = $OutFileRoot;
+push @{$PDBFilesInfo{OutFileNames}[$Index]}, @OutFileNames;
+push @{$PDBFilesInfo{ChainLabels}[$Index]}, @ChainLabels;
+push @{$PDBFilesInfo{ChainSequenceIDsPrefix}[$Index]}, @ChainSequenceIDsPrefix;
+push @{$PDBFilesInfo{ChainSequenceIDs}[$Index]}, @ChainSequenceIDs;
+push @{$PDBFilesInfo{SpecifiedChains}[$Index]}, @SpecifiedChains;
+push @{$PDBFilesInfo{DistanceOrigin}[$Index]}, @DistanceOrigin;
+}
+}
+# Setup script usage  and retrieve command line arguments specified using various options...
+sub SetupScriptUsage {
+# Retrieve all the options...
+%Options = ();
+$Options{chains} = 'First';
+$Options{combinechains} = 'no';
+$Options{distance} = 10.0;
+$Options{distancemode} = 'XYZ';
+$Options{distanceselectionmode} = 'ByAtom';
+$Options{keepoldrecords} = 'no';
+$Options{mode} = 'NonWater';
+$Options{modifyheader} = 'yes';
+$Options{nonstandardkeep} = 'yes';
+$Options{nonstandardcode} = 'X';
+$Options{sequencelength} = 80;
+$Options{sequenceidprefix} = 'Automatic';
+$Options{sequencerecords} = 'Atom';
+$Options{waterresiduenames} = 'Automatic';
+if (!GetOptions(\%Options, "atoms|a=s", "chains|c=s", "combinechains=s", "distance|d=f", "distancemode=s", "distanceorigin=s", "distanceselectionmode=s", "help|h", "keepoldrecords|k=s", "mode|m=s", "modifyheader=s", "nonstandardkeep=s", "nonstandardcode=s", "overwrite|o", "root|r=s", "recordmode=s", "residues=s", "sequencelength=i", "sequenceidprefix=s", "sequencerecords=s", "waterresiduenames=s", "workingdir|w=s")) {
+die "\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n";
+}
+if ($Options{workingdir}) {
+if (! -d $Options{workingdir}) {
+die "Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n";
+}
+chdir $Options{workingdir} or die "Error: Couldn't chdir $Options{workingdir}: $! \n";
+}
+if ($Options{combinechains} !~ /^(yes|no)$/i) {
+die "Error: The value specified, $Options{combinechains}, for option \"--CombineChains\" is not valid. Allowed values: yes or no\n";
+}
+if ($Options{distancemode} !~ /^(Atom|Hetatm|Residue|XYZ)$/i) {
+die "Error: The value specified, $Options{distancemode}, for option \"--DistanceMode\" is not valid. Allowed values: Atom, Hetatm, Residue, or XYZ\n";
+}
+if ($Options{distanceselectionmode} !~ /^(ByAtom|ByResidue)$/i) {
+die "Error: The value specified, $Options{distanceselectionmode}, for option \"--DistanceSelectionMode\" is not valid. Allowed values: ByAtom or ByResidue\n";
+}
+if ($Options{keepoldrecords} !~ /^(yes|no)$/i) {
+die "Error: The value specified, $Options{keepoldrecords}, for option \"--KeepOldRecords\" is not valid. Allowed values: yes or no\n";
+}
+if ($Options{mode} !~ /^(Chains|Sequences|Atoms|CAlphas|AtomNums|AtomsRange|AtomNames|ResidueNums|ResidueNames|ResiduesRange|Distance|NonWater|NonHydrogens)$/i) {
+die "Error: The value specified, $Options{mode}, for option \"m, --mode\" is not valid. Allowed values: Chains, Sequences, Atoms, CAlphas, AtomNums, AtomsRange, AtomNames, ResidueNums, ResiduesRange, ResidueNames, Distance, NonWater, NonHydrogens\n";
+}
+if ($Options{modifyheader} !~ /^(yes|no)$/i) {
+die "Error: The value specified, $Options{modifyheader}, for option \"--ModifyHeader\" is not valid. Allowed values: yes or no\n";
+}
+if ($Options{nonstandardkeep} !~ /^(yes|no)$/i) {
+die "Error: The value specified, $Options{nonstandardkeep}, for option \"--NonStandardKeep\" is not valid. Allowed values: yes or no\n";
+}
+if ($Options{nonstandardcode} !~ /^(\?|\-|X)$/i) {
+die "Error: The value specified, $Options{nonstandardcode}, for option \"--NonStandardCode\" is not valid. Allowed values: ?, -, or X\n";
+}
+if ($Options{recordmode} && $Options{recordmode} !~ /^(Atom|Hetatm|AtomAndHetatm)$/i) {
+die "Error: The value specified, $Options{recordmode}, for option \"--RecordMode\" is not valid. Allowed values: Atom, Hetatm, AtomAndHetatm\n";
+}
+if (!IsPositiveInteger($Options{sequencelength})) {
+die "Error: The value specified, $Options{sequencelength}, for option \"--SequenceLength\" is not valid. Allowed values: >0\n";
+}
+if ($Options{sequencerecords} !~ /^(Atom|SeqRes)$/i) {
+die "Error: The value specified, $Options{sequencerecords}, for option \"--SequenceRecords\" is not valid. Allowed values: Atom or SeqRes\n";
+}
+if ($Options{sequenceidprefix} !~ /^(FileName|HeaderRecord|Automatic)$/i) {
+die "Error: The value specified, $Options{sequenceidprefix}, for option \"--SequenceIDPrefix\" is not valid. Allowed values: FileName, HeaderRecord, or AutomaticAtom\n";
+}
+}
+__END__
+=head1 NAME
+ExtractFromPDBFiles.pl - Extract specific data from PDBFile(s)
+=head1 SYNOPSIS
+ExtractFromPDBFiles.pl PDBFile(s)...
+ExtractFromPDBFiles.pl [B<-a, --Atoms> "AtomNum, [AtomNum...]" | "StartAtomNum, EndAtomNum" |
+"AtomName, [AtomName...]"] [B<-c, --chains> First | All | "ChainID, [ChainID,...]"]
+[<--CombineChains> yes | no] [B<-d, --distance> number] [B<--DistanceMode> Atom | Hetatm | Residue | XYZ]
+[B<--DistanceOrigin> "AtomNumber, AtomName" | "HetatmNumber, HetAtmName" | "ResidueNumber, ResidueName, [ChainID]" | "X,Y,Z">]
+[<--DistanceSelectionMode> ByAtom | ByResidue] [B<-h, --help>] [B<-k, --KeepOldRecords> yes | no]
+[B<-m, --mode > Chains | Sequences | Atoms | CAlphas | AtomNums | AtomsRange | AtomNames |
+ResidueNums | ResiduesRange | ResidueNames | Distance | NonWater | NonHydrogens]
+[B<--ModifyHeader> yes | no] [B<--NonStandardKeep> yes | no] [B<--NonStandardCode> character]
+[B<-o, --overwrite>] [B<-r, --root> rootname] B<--RecordMode> I<Atom | Hetatm | AtomAndHetatm>]
+[B<--Residues> "ResidueNum,[ResidueNum...]" | StartResidueNum,EndResiduNum ]
+[B<--SequenceLength> number] [B<--SequenceRecords> Atom | SeqRes]
+[B<--SequenceIDPrefix> FileName | HeaderRecord | Automatic]
+[B<--WaterResidueNames> Automatic | "ResidueName, [ResidueName,...]"]
+[B<-w, --WorkingDir> dirname] PDBFile(s)...
+=head1 DESCRIPTION
+Extract specific data from I<PDBFile(s)> and generate appropriate PDB or sequence file(s).
+Multiple PDBFile names are separated by spaces. The valid file extension is I<.pdb>.
+All other file name extensions are ignored during the wild card expansion. All the PDB files
+in a current directory can be specified either by I<*.pdb> or the current directory name.
+During I<Chains> and I<Sequences> values of B<-m, --mode> option, all ATOM/HETAM records
+for chains after the first model in PDB fils containing data for multiple models are ignored.
+=head1 OPTIONS
+=over 4
+=item B<-a, --Atoms> I<"AtomNum,[AtomNum...]" | "StartAtomNum,EndAtomNum" | "AtomName,[AtomName...]">
+Specify which atom records to extract from I<PDBFiles(s)> during I<AtomNums>,
+I<AtomsRange>, and I<AtomNames> value of B<-m, --mode> option: extract records
+corresponding to atom numbers specified in a comma delimited list of atom numbers/names,
+or with in the range of start and end atom numbers. Possible values: I<"AtomNum[,AtomNum,..]">,
+I<StartAtomNum,EndAtomNum>, or I<"AtomName[,AtomName,..]">. Default: I<None>. Examples:
+10
+15,20
+N,CA,C,O
+=item B<-c, --chains> I<First | All | ChainID,[ChainID,...]>
+Specify which chains to extract from I<PDBFile(s)> during I<Chains | Sequences> value of
+B<-m, --mode> option: first chain, all chains, or a specific list of comma delimited chain IDs.
+Possible values: I<First | All | ChainID,[ChainID,...]>. Default: I<First>. Examples:
+A
+A,B
+All
+=item B<--CombineChains> I<yes | no>
+Specify whether to combine extracted chains data into a single file during I<Chains> or
+I<Sequences> value of B<-m, --mode> option. Possible values: I<yes | no>. Default: I<no>.
+During I<Chains> value of <-m, --mode> option with I<Yes> value of <--CombineChains>,
+extracted data for specified chains is written into a single file instead of individual file for each
+chain.
+During I<Sequences> value of <-m, --mode> option with I<Yes> value of <--CombineChains>,
+residues sequences for specified chains are extracted and concatenated into a single sequence
+file instead of  individual file for each chain.
+=item B<-d, --distance> I<number>
+Specify distance used to extract ATOM/HETATM recods during I<Distance> value of
+B<-m, --mode> option. Default: I<10.0> angstroms.
+B<--RecordMode> option controls type of record lines to extract from I<PDBFile(s)>:
+ATOM, HETATM or both.
+=item B<--DistanceMode> I<Atom | Hetatm | Residue | XYZ>
+Specify how to extract ATOM/HETATM records from I<PDBFile(s)> during I<Distance> value of
+B<-m, --mode> option: extract all the records within a certain distance specifed by B<-d, --distance>
+from an atom or hetro atom record, a residue, or any artbitrary point. Possible values: I<Atom |
+Hetatm | Residue | XYZ>. Default: I<XYZ>.
+During I<Residue> value of B<--distancemode>, distance of ATOM/HETATM records is calculated from
+all the atoms in the residue and the records are selected as long as any atom of the residue lies with
+in the distace specified using B<-d, --distance> option.
+B<--RecordMode> option controls type of record lines to extract from I<PDBFile(s)>:
+ATOM, HETATM or both.
+=item B<--DistanceSelectionMode> I<ByAtom | ByResidue>
+Specify how how to extract ATOM/HETATM records from I<PDBFile(s)> during I<Distance> value of
+B<-m, --mode> option for all values of B<--DistanceMode> option: extract only those ATOM/HETATM
+records that meet specified distance criterion; extract all records corresponding to a residue as
+long as one of the ATOM/HETATM record in the residue satisfies specified distance criterion. Possible
+values: I<ByAtom, ByResidue>. Default value: I<ByAtom>.
+=item B<--DistanceOrigin> I<"AtomNumber,AtomName" | "HetatmNumber,HetAtmName" | "ResidueNumber,ResidueName[,ChainID]" | "X,Y,Z">
+This value is B<--distancemode> specific. In general, it identifies a point used to select
+other ATOM/HETATMS with in a specific distance from this point.
+For I<Atom> value of B<--distancemode>, this option corresponds to an atom specification.
+Format: I<AtomNumber,AtomName>. Example:
+455,CA
+For I<Hetatm> value of B<--distancemode>, this option corresponds to a hetatm specification.
+Format: I<HetatmNumber,HetAtmName>. Example:
+5295,C1
+For I<Residue> value of B<--distancemode>, this option corresponds to a residue specification.
+Format: I<ResidueNumber, ResidueName[,ChainID]>. Example:
+78,MSE
+977,RET,A
+978,RET,B
+For I<XYZ> value of B<--distancemode>, this option corresponds to a coordinate of an
+arbitrary point. Format: I<X,Y,X>. Example:
+10.044,19.261,-4.292
+B<--RecordMode> option controls type of record lines to extract from I<PDBFile(s)>:
+ATOM, HETATM or both.
+=item B<-h, --help>
+Print this help message.
+=item B<-k, --KeepOldRecords> I<yes | no>
+Specify whether to transfer old non ATOM and HETATM records from input PDBFile(s) to new
+PDBFile(s) during I<Chains | Atoms | HetAtms | CAlphas | Distance| NonWater | NonHydrogens>
+value of B<-m --mode> option. By default, except for the HEADER record, all
+other unnecessary non ATOM/HETATM records are dropped during the
+generation of new PDB files. Possible values: I<yes | no>. Default: I<no>.
+=item B<-m, --mode > I<Chains | Sequences | Atoms | CAlphas | AtomNums | AtomsRange | AtomNames | ResidueNums | ResiduesRange | ResidueNames | Distance | NonWater | NonHydrogens>
+Specify what to extract from I<PDBFile(s)>: I<Chains> - retrieve records for
+specified chains; I<Sequences> - generate sequence files for specific chains;
+I<Atoms> - extract atom records; I<CAlphas> - extract atom records for alpha
+carbon atoms; I<AtomNums> - extract atom records for specified atom numbers;
+I<AtomsRange> - extract atom records between specified atom number range;
+I<AtomNames> - extract atom records for specified atom names; I<ResidueNums>
+- extract records for specified residue numbers; I<ResiduesRange> - extract records
+for residues between specified residue number range; I<ResidueNames> - extract
+records for specified residue names; I<Distance> - extract records with in a
+certain distance from a specific position; I<NonWater> - extract records corresponding
+to residues other than water; I<NonHydrogens> - extract non-hydrogen records.
+Possible values: I<Chains, Sequences Atoms, CAlphas, AtomNums, AtomsRange,
+AtomNames, ResidueNums, ResiduesRange, ResidueNames, Distance, NonWater,
+NonHydrogens>. Default value: I<NonWater>
+During the generation of new PDB files, unnecessay CONECT records are dropped.
+For I<Chains> mode, data for appropriate chains specified by B<--c --chains> option
+is extracted from I<PDBFile(s)> and placed into new PDB file(s).
+For I<Sequences> mode, residues names using various sequence related options are
+extracted for chains specified by B<--c --chains> option from I<PDBFile(s)> and
+FASTA sequence file(s) are generated.
+For I<Distance> mode, all ATOM/HETATM records with in a distance specified
+by B<-d --distance> option from a specific atom, residue or a point indicated by
+B<--distancemode> are extracted and placed into new PDB file(s).
+For I<NonWater> mode, non water ATOM/HETATM record lines, identified using value of
+B<--WaterResidueNames>, are extracted and written to new PDB file(s).
+For I<NonHydrogens> mode, ATOM/HETATOM record lines containing element symbol
+other than I<H> are extracted and written to new PDB file(s).
+For all other options, appropriate ATOM/HETATM records are extracted to generate new
+PDB file(s).
+B<--RecordMode> option controls type of record lines to extract and process from
+I<PDBFile(s)>: ATOM, HETATM or both.
+=item B<--ModifyHeader> I<yes | no>
+Specify whether to modify HEADER record during the generation of new PDB files
+for B<-m, --mode> values of I<Chains | Atoms | CAlphas | Distance>. Possible values:
+I<yes | no>.  Default: I<yes>. By default, Classification data is replaced by I<Data extracted
+using MayaChemTools> before writing out HEADER record.
+=item B<--NonStandardKeep> I<yes | no>
+Specify whether to include and convert non-standard three letter residue codes into
+a code specified using B<--nonstandardcode> option and include them into sequence file(s)
+generated during I<Sequences> value of B<-m, --mode> option. Possible values: I<yes | no>.
+Default: I<yes>.
+A warning is also printed about the presence of non-standard residues. Any residue other
+than standard 20 amino acids and 5 nucleic acid is considered non-standard; additionally,
+HETATM residues in chains also tagged as non-standard.
+=item B<--NonStandardCode> I<character>
+A single character code to use for non-standard residues. Default: I<X>. Possible values:
+I<?, -, or X>.
+=item B<-o, --overwrite>
+Overwrite existing files.
+=item B<-r, --root> I<rootname>
+New PDB and sequence file name is generated using the root: <Root><Mode>.<Ext>.
+Default new file name: <PDBFileName>Chain<ChainID>.pdb for I<Chains> B<mode>;
+<PDBFileName>SequenceChain<ChainID>.fasta for I<Sequences> B<mode>;
+<PDBFileName>DistanceBy<DistanceMode>.pdb for I<Distance> B<-m, --mode>
+<PDBFileName><Mode>.pdb for I<Atoms | CAlphas | NonWater | NonHydrogens> B<-m, --mode>
+values. This option is ignored for multiple input files.
+=item B<--RecordMode> I<Atom | Hetatm | AtomAndHetatm>
+Specify type of record lines to extract and process from I<PDBFile(s)> during various
+values of B<-m, --mode> option: extract only ATOM record lines; extract only HETATM
+record lines; extract both ATOM and HETATM lines. Possible values: I<Atom | Hetatm
+| AtomAndHetatm | XYZ>. Default during I<Atoms, CAlphas, AtomNums, AtomsRange,
+AtomNames> values of B<-m, --mode> option: I<Atom>; otherwise: I<AtomAndHetatm>.
+This option is ignored during I<Chains, Sequences> values of B<-m, --mode> option.
+=item B<--Residues> I<"ResidueNum,[ResidueNum...]" | "StartResidueNum,EndResiduNum" | "ResidueName,[ResidueName...]">
+Specify which resiude records to extract from I<PDBFiles(s)> during I<ResidueNums>,
+I<ResiduesRange>,and I<ResidueNames> value of B<-m, --mode> option: extract records
+corresponding to residue numbers specified in a comma delimited list of residue numbers/names,
+or with in the range of start and end residue numbers. Possible values: I<"ResidueNum[,ResidueNum,..]">,
+I<StartResidueNum,EndResiduNum>, or I<<"ResidueName[,ResidueName,..]">. Default: I<None>. Examples:
+20
+5,10
+TYR,SER,THR
+B<--RecordMode> option controls type of record lines to extract from I<PDBFile(s)>:
+ATOM, HETATM or both.
+=item B<--SequenceLength> I<number>
+Maximum sequence length per line in sequence file(s). Default: I<80>.
+=item B<--SequenceRecords> I<Atom | SeqRes>
+Specify which records to use for extracting residue names from I<PDBFiles(s)> during
+I<Sequences> value of B<-m, --mode> option: use ATOM records to compile a list
+of residues in a chain or parse SEQRES record to get a list of residues. Possible values:
+I<Atom | SeqRes>. Default: I<Atom>.
+=item B<--SequenceIDPrefix> I<FileName | HeaderRecord | Automatic>
+Specify how to generate a prefix for sequence IDs during I<Sequences> value
+of B<-m, --mode> option: use input file name prefix; retrieve PDB ID from HEADER record;
+or automatically decide the method for generating the prefix. The chain IDs are also
+appended to the prefix. Possible values: I<FileName | HeaderRecord | Automatic>.
+Default: I<Automatic>
+=item B<--WaterResidueNames> I<Automatic | "ResidueName,[ResidueName,...]">
+Identification of water residues during I<NonWater> value of B<-m, --mode> option. Possible values:
+I<Automatic | "ResidueName,[ResidueName,...]">. Default: I<Automatic> - corresponds
+to "HOH,WAT,H20". You can also specify a different comma delimited list of residue names
+to use for water.
+=item B<-w, --WorkingDir> I<dirname>
+Location of working directory. Default: current directory.
+=back
+=head1 EXAMPLES
+To extract non-water records from Sample2.pdb file and generate Sample2NonWater.pdb
+file, type:
+% ExtractFromPDBFiles.pl Sample2.pdb
+To extract non-water records corresponding to only ATOM records from Sample2.pdb file
+and generate Sample2NonWater.pdb file, type:
+% ExtractFromPDBFiles.pl --RecordMode Atom Sample2.pdb
+To extract non-water records from Sample2.pdb file using HOH or WAT residue name for water along
+with all old non-coordinate records and generate Sample2NewNonWater.pdb file, type:
+% ExtractFromPDBFiles.pl -m NonWater --WaterResidueNames "HOH,WAT"
+-KeepOldRecords Yes -r Sample2New -o Sample2.pdb
+To extract non-hydrogens records from Sample2.pdb file and generate Sample2NonHydrogen.pdb
+file, type:
+% ExtractFromPDBFiles.pl -m NonHydrogens Sample2.pdb
+To extract data for first chain in Sample2.pdb and generate Sample2ChainA.pdb, type
+file, type:
+% ExtractFromPDBFiles.pl -m chains -o Sample2.pdb
+To extract data for both chains in Sample2.pdb and generate Sample2ChainA.pdb and
+Sample2ChainB.pdb, type:
+% ExtractFromPDBFiles.pl -m chains -c All -o Sample2.pdb
+To extract data for alpha carbons in Sample2.pdb and generate Sample2CAlphas.pdb, type:
+% ExtractFromPDBFiles.pl -m CAlphas -o Sample2.pdb
+To extract records for specific residue numbers in all chains from Sample2.pdb file and generate
+Sample2ResidueNums.pdb file, type:
+% ExtractFromPDBFiles.pl -m ResidueNums --Residues "3,6"
+Sample2.pdb
+To extract records for a specific range of residue number in all chains from Sample2.pdb
+file and generate Sample2ResiduesRange.pdb file, type:
+% ExtractFromPDBFiles.pl -m ResiduesRange --Residues "10,30"
+Sample2.pdb
+To extract data for all ATOM and HETATM records with in 10 angstrom of an atom specifed by
+atom serial number and name "1,N" in Sample2.pdb file and generate Sample2DistanceByAtom.pdb,
+type:
+% ExtractFromPDBFiles.pl -m Distance --DistanceMode Atom
+--DistanceOrigin "1,N" -k No --distance 10 -o Sample2.pdb
+To extract data for all ATOM and HETATM records for complete residues with any atom or hetatm
+less than 10 angstrom of an atom specifed by atom serial number and name "1,N" in Sample2.pdb
+file and generate Sample2DistanceByAtom.pdb, type:
+% ExtractFromPDBFiles.pl -m Distance --DistanceMode Atom
+--DistanceOrigin "1,N" --DistanceSelectionMode ByResidue
+-k No --distance 10 -o Sample2.pdb
+To extract data for all ATOM and HETATM records with in 25 angstrom of an arbitrary point "0,0,0"
+in Sample2.pdb file and generate Sample2DistanceByXYZ.pdb, type:
+% ExtractFromPDBFiles.pl -m Distance --DistanceMode XYZ
+--DistanceOrigin "0,0,0" -k No --distance 25 -o Sample2.pdb
+=head1 AUTHOR
+Manish Sud <msud@san.rr.com>
+=head1 SEE ALSO
+InfoPDBFiles.pl, ModifyPDBFiles.pl
+=head1 COPYRIGHT
+Copyright (C) 2015 Manish Sud. All rights reserved.
+This file is part of MayaChemTools.
+MayaChemTools is free software; you can redistribute it and/or modify it under
+the terms of the GNU Lesser General Public License as published by the Free
+Software Foundation; either version 3 of the License, or (at your option)
+any later version.
+=cut

Mercurial > repos > deepakjadmin > mayatool3_test3

comparison mayachemtools/bin/ExtractFromPDBFiles.pl @ 0:73ae111cf86f draft