MayaChemTools

   1 #!/usr/bin/perl -w
   2 #
   3 # $RCSfile: ExtractFromPDBFiles.pl,v $
   4 # $Date: 2015/02/28 20:46:19 $
   5 # $Revision: 1.39 $
   6 #
   7 # Author: Manish Sud <msud@san.rr.com>
   8 #
   9 # Copyright (C) 2015 Manish Sud. All rights reserved.
  10 #
  11 # This file is part of MayaChemTools.
  12 #
  13 # MayaChemTools is free software; you can redistribute it and/or modify it under
  14 # the terms of the GNU Lesser General Public License as published by the Free
  15 # Software Foundation; either version 3 of the License, or (at your option) any
  16 # later version.
  17 #
  18 # MayaChemTools is distributed in the hope that it will be useful, but without
  19 # any warranty; without even the implied warranty of merchantability of fitness
  20 # for a particular purpose.  See the GNU Lesser General Public License for more
  21 # details.
  22 #
  23 # You should have received a copy of the GNU Lesser General Public License
  24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  26 # Boston, MA, 02111-1307, USA.
  27 #
  28 
  29 use strict;
  30 use FindBin; use lib "$FindBin::Bin/../lib";
  31 use Getopt::Long;
  32 use File::Basename;
  33 use Text::ParseWords;
  34 use Benchmark;
  35 use FileUtil;
  36 use TextUtil;
  37 use PDBFileUtil;
  38 use AminoAcids;
  39 use SequenceFileUtil;
  40 
  41 my($ScriptName, %Options, $StartTime, $EndTime, $TotalTime);
  42 
  43 # Autoflush STDOUT
  44 $| = 1;
  45 
  46 # Starting message...
  47 $ScriptName = basename($0);
  48 print "\n$ScriptName: Starting...\n\n";
  49 $StartTime = new Benchmark;
  50 
  51 # Get the options and setup script...
  52 SetupScriptUsage();
  53 if ($Options{help} || @ARGV < 1) {
  54   die GetUsageFromPod("$FindBin::Bin/$ScriptName");
  55 }
  56 
  57 my(@PDBFilesList);
  58 @PDBFilesList = ExpandFileNames(\@ARGV, "pdb");
  59 
  60 # Process options...
  61 print "Processing options...\n";
  62 my(%OptionsInfo);
  63 ProcessOptions();
  64 
  65 # Setup information about input files...
  66 print "Checking input PDB file(s)...\n";
  67 my(%PDBFilesInfo);
  68 RetrievePDBFilesInfo();
  69 
  70 # Process input files..
  71 my($FileIndex);
  72 if (@PDBFilesList > 1) {
  73   print "\nProcessing PDB files...\n";
  74 }
  75 for $FileIndex (0 .. $#PDBFilesList) {
  76   if ($PDBFilesInfo{FileOkay}[$FileIndex]) {
  77     print "\nProcessing file $PDBFilesList[$FileIndex]...\n";
  78     ExtractFromPDBFiles($FileIndex);
  79   }
  80 }
  81 print "\n$ScriptName:Done...\n\n";
  82 
  83 $EndTime = new Benchmark;
  84 $TotalTime = timediff ($EndTime, $StartTime);
  85 print "Total time: ", timestr($TotalTime), "\n";
  86 
  87 ###############################################################################
  88 
  89 # Extract appropriate information...
  90 sub ExtractFromPDBFiles {
  91   my($FileIndex) = @_;
  92   my($PDBFile, $PDBRecordLinesRef);
  93 
  94   # Get PDB data...
  95   $PDBFile = $PDBFilesList[$FileIndex];
  96   $PDBRecordLinesRef = ReadPDBFile($PDBFile);
  97 
  98   if ($OptionsInfo{Mode} =~ /Chains/i) {
  99     ExtractChains($FileIndex, $PDBRecordLinesRef);
 100   }
 101   elsif ($OptionsInfo{Mode} =~ /Sequences/i) {
 102     ExtractSequences($FileIndex, $PDBRecordLinesRef);
 103   }
 104   elsif ($OptionsInfo{Mode} =~ /^(Atoms|CAlphas|AtomNums|AtomsRange|AtomNames)$/i) {
 105     ExtractByAtoms($FileIndex, $PDBRecordLinesRef);
 106   }
 107   elsif ($OptionsInfo{Mode} =~ /^(ResidueNums|ResiduesRange|ResidueNames)$/i) {
 108     ExtractByResidues($FileIndex, $PDBRecordLinesRef);
 109   }
 110   elsif ($OptionsInfo{Mode} =~ /Distance/i) {
 111     ExtractByDistance($FileIndex, $PDBRecordLinesRef);
 112   }
 113   elsif ($OptionsInfo{Mode} =~ /NonWater/i) {
 114     ExtractNonWaterRecords($FileIndex, $PDBRecordLinesRef);
 115   }
 116   elsif ($OptionsInfo{Mode} =~ /NonHydrogens/i) {
 117     ExtractNonHydrogenRecords($FileIndex, $PDBRecordLinesRef);
 118   }
 119 }
 120 
 121 # Extract chains and generate new PDB files...
 122 #
 123 sub ExtractChains {
 124   my($FileIndex, $PDBRecordLinesRef) = @_;
 125   my($ChainIndex, $ChainID, $ChainLabel, $PDBFileName, $RecordLine, $ChainsAndResiduesInfoRef, $AtomNumber, $AtomName, $ResidueName, $AtomChainID, $ResidueNumber, $AlternateLocation, $InsertionCode, $ConectRecordLinesRef, %ChainAtomNumbersMap);
 126 
 127   # Get chains and residues data...
 128   $ChainsAndResiduesInfoRef = GetChainsAndResidues($PDBRecordLinesRef, 'AtomAndHetatm', 0, 1);
 129 
 130   if ($OptionsInfo{CombineChains}) {
 131     $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0];
 132     print "Generating PDBFileName file $PDBFileName...\n";
 133 
 134     open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n";
 135 
 136     # Write out header and other older recors...
 137     WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef);
 138   }
 139 
 140   for $ChainIndex (0 .. $#{$PDBFilesInfo{SpecifiedChains}[$FileIndex]}) {
 141     $ChainID = $PDBFilesInfo{SpecifiedChains}[$FileIndex][$ChainIndex];
 142     $ChainLabel = $PDBFilesInfo{ChainLabels}[$FileIndex][$ChainIndex];
 143 
 144     if (!$OptionsInfo{CombineChains}) {
 145       $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][$ChainIndex];
 146       print "Generating PDBFileName file $PDBFileName...\n";
 147 
 148       open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n";
 149 
 150       # Write out header and other older recors...
 151       WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef);
 152     }
 153 
 154     # Write out ATOM/HETATM line for chain and collect all ATOM/HETATM serial numbers
 155     # for writing out appropriate CONECT records...
 156     %ChainAtomNumbersMap = ();
 157     for $RecordLine (@{$ChainsAndResiduesInfoRef->{Lines}{$ChainID}}) {
 158       print OUTFILE "$RecordLine\n";
 159       ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $AtomChainID, $ResidueNumber, $InsertionCode) = ParseAtomRecordLine($RecordLine);
 160       $AtomNumber = int $AtomNumber;
 161       $ChainAtomNumbersMap{$AtomNumber} = $AtomName;
 162     }
 163     # Write out TER record using information from last chain record...
 164     $AtomNumber += 1;
 165     print OUTFILE GenerateTerRecordLine($AtomNumber, $ResidueName, $AtomChainID, $ResidueNumber, $InsertionCode), "\n";
 166 
 167     # Write out CONECT records...
 168     $ConectRecordLinesRef = GetConectRecordLines($PDBRecordLinesRef, \%ChainAtomNumbersMap);
 169 
 170     for $RecordLine (@{$ConectRecordLinesRef}) {
 171       print OUTFILE "$RecordLine\n";
 172     }
 173 
 174     if (!$OptionsInfo{CombineChains}) {
 175       # Write out END record...
 176       print OUTFILE GenerateEndRecordLine(), "\n";
 177 
 178       close OUTFILE;
 179     }
 180   }
 181 
 182   if ($OptionsInfo{CombineChains}) {
 183     # Write out END record...
 184     print OUTFILE GenerateEndRecordLine(), "\n";
 185 
 186     close OUTFILE;
 187   }
 188 
 189 }
 190 
 191 # Extract sequences for individual chains or combine all the chains...
 192 sub ExtractSequences {
 193   my($FileIndex, $PDBRecordLinesRef) = @_;
 194   my($ChainIndex, $ChainID, $ChainLabel, $SequenceFileName, $Residue, $ResidueCode, $StandardResidue, $ChainSequence, $WrappedChainSequence, $ChainSequenceID, $ChainsAndResiduesInfoRef, $ChainResiduesRef, %ChainSequencesDataMap);
 195 
 196   if ($OptionsInfo{SequenceRecordSource} =~ /^SeqRes$/i) {
 197     $ChainsAndResiduesInfoRef = GetChainsAndResidues($PDBRecordLinesRef, 'SeqRes');
 198   }
 199   else {
 200     $ChainsAndResiduesInfoRef = GetChainsAndResidues($PDBRecordLinesRef);
 201   }
 202 
 203   # Generate sequence data for all the chains...
 204   %ChainSequencesDataMap = ();
 205   @{$ChainSequencesDataMap{IDs}} = ();
 206   %{$ChainSequencesDataMap{Sequence}} = ();
 207   %{$ChainSequencesDataMap{Description}} = ();
 208 
 209   for $ChainIndex (0 .. $#{$PDBFilesInfo{SpecifiedChains}[$FileIndex]}) {
 210     $ChainID = $PDBFilesInfo{SpecifiedChains}[$FileIndex][$ChainIndex];
 211     $ChainLabel = $PDBFilesInfo{ChainLabels}[$FileIndex][$ChainIndex];
 212 
 213     # Setup sequence ID...
 214     $ChainSequenceID = $PDBFilesInfo{ChainSequenceIDs}[$FileIndex][$ChainIndex];
 215     push @{$ChainSequencesDataMap{IDs}}, $ChainSequenceID;
 216     $ChainSequencesDataMap{Description}{$ChainID} = $ChainSequenceID;
 217 
 218     # Collect sequence data for the chain...
 219     if ($OptionsInfo{SequenceRecordSource} =~ /^SeqRes/i) {
 220       $ChainResiduesRef = \@{$ChainsAndResiduesInfoRef->{Residues}{$ChainID}};
 221     }
 222     else {
 223       $ChainResiduesRef = \@{$ChainsAndResiduesInfoRef->{Residues}{$ChainID}};
 224     }
 225     # Setup sequence data...
 226     $ChainSequence = '';
 227     RESIDUE: for $Residue (@{$ChainResiduesRef}) {
 228       ($ResidueCode, $StandardResidue) = GetResidueCode($Residue);
 229       if (!$StandardResidue) {
 230         if ($OptionsInfo{KeepNonStandardSequences}) {
 231           $ResidueCode = $OptionsInfo{NonStandardSequenceCode};
 232           warn "Warning: Keeping nonstandard residue $Residue in $ChainLabel...\n";
 233         }
 234         else {
 235           warn "Warning: Ignoring nonstandard residue $Residue in $ChainLabel...\n";
 236           next RESIDUE;
 237         }
 238       }
 239       $ChainSequence .= $ResidueCode;
 240     }
 241     $ChainSequencesDataMap{Sequence}{$ChainID} = $ChainSequence;
 242 
 243   }
 244 
 245   # Write out the sequence files...
 246   my($SequenceID, $SequenceDescription, $Sequence, %SequencesDataMap );
 247   if ($OptionsInfo{CombineChainSequences}) {
 248     # Combine all the chain sequences...
 249     $Sequence = '';
 250     for $ChainIndex (0 .. $#{$PDBFilesInfo{SpecifiedChains}[$FileIndex]}) {
 251       $ChainID = $PDBFilesInfo{SpecifiedChains}[$FileIndex][$ChainIndex];
 252 
 253       $Sequence .= $ChainSequencesDataMap{Sequence}{$ChainID};
 254     }
 255     $SequenceID = $PDBFilesInfo{ChainSequenceIDsPrefix}[$FileIndex][0] . "_CombinedChains|PDB";;
 256     $SequenceDescription = $SequenceID;
 257     $SequenceFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0];
 258 
 259     print "Generating sequence file $SequenceFileName...\n";
 260     %SequencesDataMap = ();
 261     @{$SequencesDataMap{IDs}} = ();
 262     %{$SequencesDataMap{Sequence}} = ();
 263     %{$SequencesDataMap{Description}} = ();
 264 
 265     push @{$SequencesDataMap{IDs}}, $SequenceID;
 266     $SequencesDataMap{Description}{$SequenceID} = $SequenceDescription;
 267     $SequencesDataMap{Sequence}{$SequenceID} = $Sequence;
 268 
 269     WritePearsonFastaSequenceFile($SequenceFileName, \%SequencesDataMap, $OptionsInfo{MaxSequenceLength});
 270   }
 271   else {
 272     # For each specifed chain, write out the sequences...
 273     for $ChainIndex (0 .. $#{$PDBFilesInfo{SpecifiedChains}[$FileIndex]}) {
 274       $ChainID = $PDBFilesInfo{SpecifiedChains}[$FileIndex][$ChainIndex];
 275 
 276       $SequenceFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][$ChainIndex];
 277 
 278       $SequenceID = $ChainSequencesDataMap{IDs}[$ChainIndex];
 279       $SequenceDescription = $ChainSequencesDataMap{Description}{$ChainID};
 280       $Sequence = $ChainSequencesDataMap{Sequence}{$ChainID};
 281 
 282       print "Generating sequence file $SequenceFileName...\n";
 283       %SequencesDataMap = ();
 284       @{$SequencesDataMap{IDs}} = ();
 285       %{$SequencesDataMap{Sequence}} = ();
 286       %{$SequencesDataMap{Description}} = ();
 287 
 288       push @{$SequencesDataMap{IDs}}, $SequenceID;
 289       $SequencesDataMap{Description}{$SequenceID} = $SequenceDescription;
 290       $SequencesDataMap{Sequence}{$SequenceID} = $Sequence;
 291 
 292       WritePearsonFastaSequenceFile($SequenceFileName, \%SequencesDataMap, $OptionsInfo{MaxSequenceLength});
 293     }
 294   }
 295 }
 296 
 297 # Extract atoms...
 298 sub ExtractByAtoms {
 299   my($FileIndex, $PDBRecordLinesRef) = @_;
 300   my($PDBFileName,  $RecordLine, $ChainRecordCount, $AtomNumber, $AtomName, $IgnoreRecord, $ConectRecordLinesRef, %AtomNumbersMap);
 301 
 302   $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0];
 303   print "Generating PDBFileName file $PDBFileName...\n";
 304   open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n";
 305 
 306   # Write out header and other older recors...
 307   WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef);
 308 
 309   # Write out all ATOM records along with TER and model records to indicate
 310   # chains and multiple models..
 311   %AtomNumbersMap = ();
 312   $ChainRecordCount = 0;
 313   for $RecordLine (@{$PDBRecordLinesRef}) {
 314     if (CheckRecordType($RecordLine)) {
 315       ($AtomNumber, $AtomName) = ParseAtomRecordLine($RecordLine);
 316 
 317       # Check atoms...
 318       $IgnoreRecord = 1;
 319       if ($OptionsInfo{Mode} =~ /^Atoms$/i) {
 320         $IgnoreRecord = 0;
 321       }
 322       elsif ($OptionsInfo{Mode} =~ /^(CAlphas|AtomNames)$/i) {
 323         if (exists $OptionsInfo{SpecifiedAtomNamesMap}{lc $AtomName}) {
 324           $IgnoreRecord = 0;
 325         }
 326       }
 327       elsif ($OptionsInfo{Mode} =~ /^AtomNums$/i) {
 328         if (exists $OptionsInfo{SpecifiedAtomNumsMap}{$AtomNumber}) {
 329           $IgnoreRecord = 0;
 330         }
 331       }
 332       elsif ($OptionsInfo{Mode} =~ /^AtomsRange$/i) {
 333         if ($AtomNumber >= $OptionsInfo{SpecifiedStartAtomNum} && $AtomNumber <= $OptionsInfo{SpecifiedEndAtomNum}) {
 334           $IgnoreRecord = 0;
 335         }
 336       }
 337 
 338       if (!$IgnoreRecord) {
 339         $ChainRecordCount++;
 340         print OUTFILE "$RecordLine\n";
 341 
 342         $AtomNumber = int $AtomNumber;
 343         $AtomNumbersMap{$AtomNumber} = $AtomName;
 344       }
 345     }
 346     elsif (IsTerRecordType($RecordLine)) {
 347       if ($ChainRecordCount) {
 348         print OUTFILE GenerateTerRecordLine(), "\n";
 349       }
 350       $ChainRecordCount = 0;
 351     }
 352     elsif (IsModelRecordType($RecordLine) || IsEndmdlRecordType($RecordLine)) {
 353       print OUTFILE "$RecordLine\n";
 354     }
 355   }
 356 
 357   # Write out appropriate CONECT records...
 358   $ConectRecordLinesRef = GetConectRecordLines($PDBRecordLinesRef, \%AtomNumbersMap);
 359   for $RecordLine (@{$ConectRecordLinesRef}) {
 360     print OUTFILE "$RecordLine\n";
 361   }
 362 
 363   # Write out END record...
 364   print OUTFILE GenerateEndRecordLine(), "\n";
 365 
 366   close OUTFILE;
 367 }
 368 
 369 # Extract residues...
 370 sub ExtractByResidues {
 371   my($FileIndex, $PDBRecordLinesRef) = @_;
 372   my($PDBFileName,  $RecordLine, $ChainRecordCount, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $ConectRecordLinesRef, $IgnoreRecord, %AtomNumbersMap);
 373 
 374   $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0];
 375   print "Generating PDBFileName file $PDBFileName...\n";
 376   open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n";
 377 
 378   # Write out header and other older recors...
 379   WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef);
 380 
 381   # Write out all ATOM records for specified residues with TER and model records to indicate
 382   # chains and multiple models...
 383   %AtomNumbersMap = ();
 384   $ChainRecordCount = 0;
 385   for $RecordLine (@{$PDBRecordLinesRef}) {
 386     if (CheckRecordType($RecordLine)) {
 387       ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber) = ParseAtomRecordLine($RecordLine);
 388 
 389       # Check residues...
 390       $IgnoreRecord = 1;
 391       if ($OptionsInfo{Mode} =~ /^ResidueNums$/i) {
 392         if (exists $OptionsInfo{SpecifiedResidueNumsMap}{$ResidueNumber}) {
 393           $IgnoreRecord = 0;
 394         }
 395       }
 396       elsif ($OptionsInfo{Mode} =~ /^ResiduesRange$/i) {
 397         if ($ResidueNumber >= $OptionsInfo{SpecifiedStartResidueNum} && $ResidueNumber <= $OptionsInfo{SpecifiedEndResidueNum}) {
 398           $IgnoreRecord = 0;
 399         }
 400       }
 401       elsif ($OptionsInfo{Mode} =~ /^ResidueNames$/i) {
 402         if (exists $OptionsInfo{SpecifiedResidueNamesMap}{lc $ResidueName}) {
 403           $IgnoreRecord = 0;
 404         }
 405       }
 406       if (!$IgnoreRecord) {
 407         $ChainRecordCount++;
 408         print OUTFILE "$RecordLine\n";
 409         $AtomNumber = int $AtomNumber;
 410         $AtomNumbersMap{$AtomNumber} = $AtomName;
 411       }
 412     }
 413     elsif (IsTerRecordType($RecordLine)) {
 414       if ($ChainRecordCount) {
 415         print OUTFILE GenerateTerRecordLine(), "\n";
 416       }
 417       $ChainRecordCount = 0;
 418     }
 419     elsif (IsModelRecordType($RecordLine) || IsEndmdlRecordType($RecordLine)) {
 420       print OUTFILE "$RecordLine\n";
 421     }
 422   }
 423 
 424   # Write out appropriate CONECT records...
 425   $ConectRecordLinesRef = GetConectRecordLines($PDBRecordLinesRef, \%AtomNumbersMap);
 426   for $RecordLine (@{$ConectRecordLinesRef}) {
 427     print OUTFILE "$RecordLine\n";
 428   }
 429   # Write out END record...
 430   print OUTFILE GenerateEndRecordLine(), "\n";
 431 
 432   close OUTFILE;
 433 }
 434 
 435 # Extract non water records...
 436 sub ExtractNonWaterRecords {
 437   my($FileIndex, $PDBRecordLinesRef) = @_;
 438   my($PDBFileName,  $RecordLine, $ChainRecordCount, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ConectRecordLinesRef, %AtomNumbersMap);
 439 
 440   $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0];
 441   print "Generating PDBFileName file $PDBFileName...\n";
 442   open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n";
 443 
 444   # Write out header and other older recors...
 445   WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef);
 446 
 447   # Write out all ATOM/HETATM non water records along with TER and model records to indicate
 448   # chains and multiple models..
 449   %AtomNumbersMap = ();
 450   $ChainRecordCount = 0;
 451   for $RecordLine (@{$PDBRecordLinesRef}) {
 452     if (CheckRecordType($RecordLine)) {
 453       ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName) = ParseAtomRecordLine($RecordLine);
 454       if (! exists $OptionsInfo{SpecifiedWaterResiduesMap}{$ResidueName} ) {
 455         $ChainRecordCount++;
 456         print OUTFILE "$RecordLine\n";
 457         $AtomNumber = int $AtomNumber;
 458         $AtomNumbersMap{$AtomNumber} = $AtomName;
 459       }
 460     }
 461     elsif (IsTerRecordType($RecordLine)) {
 462       if ($ChainRecordCount) {
 463         print OUTFILE GenerateTerRecordLine(), "\n";
 464       }
 465       $ChainRecordCount = 0;
 466     }
 467     elsif (IsModelRecordType($RecordLine) || IsEndmdlRecordType($RecordLine)) {
 468       print OUTFILE "$RecordLine\n";
 469     }
 470   }
 471 
 472   # Write out appropriate CONECT records...
 473   $ConectRecordLinesRef = GetConectRecordLines($PDBRecordLinesRef, \%AtomNumbersMap);
 474   for $RecordLine (@{$ConectRecordLinesRef}) {
 475     print OUTFILE "$RecordLine\n";
 476   }
 477   # Write out END record...
 478   print OUTFILE GenerateEndRecordLine(), "\n";
 479 
 480   close OUTFILE;
 481 }
 482 
 483 # Extract non hydrogen records...
 484 sub ExtractNonHydrogenRecords {
 485   my($FileIndex, $PDBRecordLinesRef) = @_;
 486   my($PDBFileName,  $RecordLine, $ChainRecordCount, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge, $ConectRecordLinesRef, %AtomNumbersMap);
 487 
 488   $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0];
 489   print "Generating PDBFileName file $PDBFileName...\n";
 490   open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n";
 491 
 492   # Write out header and other older recors...
 493   WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef);
 494 
 495   # Write out all ATOM/HETATM non hydrogen records along with TER and model records to indicate
 496   # chains and multiple models..
 497   %AtomNumbersMap = ();
 498   $ChainRecordCount = 0;
 499   for $RecordLine (@{$PDBRecordLinesRef}) {
 500     if (CheckRecordType($RecordLine)) {
 501       ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomRecordLine($RecordLine);
 502       if ($ElementSymbol !~ /^H$/i) {
 503         $ChainRecordCount++;
 504         print OUTFILE "$RecordLine\n";
 505         $AtomNumber = int $AtomNumber;
 506         $AtomNumbersMap{$AtomNumber} = $AtomName;
 507       }
 508     }
 509     elsif (IsTerRecordType($RecordLine)) {
 510       if ($ChainRecordCount) {
 511         print OUTFILE GenerateTerRecordLine(), "\n";
 512       }
 513       $ChainRecordCount = 0;
 514     }
 515     elsif (IsModelRecordType($RecordLine) || IsEndmdlRecordType($RecordLine)) {
 516       print OUTFILE "$RecordLine\n";
 517     }
 518   }
 519 
 520   # Write out appropriate CONECT records...
 521   $ConectRecordLinesRef = GetConectRecordLines($PDBRecordLinesRef, \%AtomNumbersMap);
 522   for $RecordLine (@{$ConectRecordLinesRef}) {
 523     print OUTFILE "$RecordLine\n";
 524   }
 525   # Write out END record...
 526   print OUTFILE GenerateEndRecordLine(), "\n";
 527 
 528   close OUTFILE;
 529 }
 530 
 531 # Extract ATOM/HETATM records by distance...
 532 sub ExtractByDistance {
 533   my($FileIndex, $PDBRecordLinesRef) = @_;
 534   my($PDBFileName,  $RecordLine, $RecordLineNum, $ChainRecordCount, $ConectRecordLinesRef, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $IgnoreRecord, $ResidueID, @OriginCoords, @Coords, %AtomNumbersMap, %ResiduesDataMap);
 535 
 536   $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0];
 537   print "Generating PDBFileName file $PDBFileName...\n";
 538   open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n";
 539 
 540   # Write out header and other older recors...
 541   WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef);
 542 
 543   # Setup coordinates of origin to calculate distance...
 544   @OriginCoords = ();
 545   push @OriginCoords, @{$PDBFilesInfo{DistanceOrigin}[$FileIndex]};
 546 
 547   # Write out all ATOM records for which meet specified criteria along with TER and model records to indicate
 548   # chains and multiple models...
 549   %AtomNumbersMap = ();
 550 
 551   %ResiduesDataMap = ();
 552   %{$ResiduesDataMap{ID}} = ();
 553   %{$ResiduesDataMap{Status}} = ();
 554 
 555   $ChainRecordCount = 0;
 556   $RecordLineNum = 0;
 557 
 558   for $RecordLine (@{$PDBRecordLinesRef}) {
 559     $RecordLineNum++;
 560     if (CheckRecordType($RecordLine)) {
 561       ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z) = ParseAtomRecordLine($RecordLine);
 562       @Coords = (); push @Coords, ($X, $Y, $Z);
 563 
 564       $IgnoreRecord = 1;
 565       if ($OptionsInfo{DistanceSelectionMode} =~ /^ByResidue$/i) {
 566         $ResidueID = "${ResidueName}_${ResidueNumber}_${ChainID}";
 567         if (exists $ResiduesDataMap{ID}{$ResidueID}) {
 568           # Residue data has been processed; check its selection status...
 569           if ($ResiduesDataMap{Status}{$ResidueID}) {
 570             $IgnoreRecord = 0;
 571           }
 572         }
 573         else {
 574           # Residue hasn't been processed...
 575           $ResiduesDataMap{ID}{$ResidueID} = $ResidueID;
 576           $ResiduesDataMap{Status}{$ResidueID} = 0;
 577           if (CheckResidueDistance($ResidueID, $RecordLineNum, $PDBRecordLinesRef, \@OriginCoords)) {
 578             $IgnoreRecord = 0;
 579             $ResiduesDataMap{Status}{$ResidueID} = 1;
 580           }
 581         }
 582       }
 583       elsif ($OptionsInfo{DistanceSelectionMode} =~ /^ByAtom$/i) {
 584         if (CheckDistance(\@Coords, \@OriginCoords)) {
 585           $IgnoreRecord = 0;
 586         }
 587       }
 588 
 589       if (!$IgnoreRecord) {
 590         $ChainRecordCount++;
 591         print OUTFILE "$RecordLine\n";
 592         $AtomNumber = int $AtomNumber;
 593         $AtomNumbersMap{$AtomNumber} = $AtomName;
 594       }
 595     }
 596     elsif (IsTerRecordType($RecordLine)) {
 597       if ($ChainRecordCount) {
 598         print OUTFILE GenerateTerRecordLine(), "\n";
 599       }
 600       $ChainRecordCount = 0;
 601     }
 602     elsif (IsModelRecordType($RecordLine) || IsEndmdlRecordType($RecordLine)) {
 603       print OUTFILE "$RecordLine\n";
 604     }
 605   }
 606 
 607   # Write out appropriate CONECT records...
 608   $ConectRecordLinesRef = GetConectRecordLines($PDBRecordLinesRef, \%AtomNumbersMap);
 609   for $RecordLine (@{$ConectRecordLinesRef}) {
 610     print OUTFILE "$RecordLine\n";
 611   }
 612 
 613   # Write out END record...
 614   print OUTFILE GenerateEndRecordLine(), "\n";
 615 
 616   close OUTFILE;
 617 }
 618 
 619 # Does record type correspond to the specified record type?
 620 sub CheckRecordType {
 621   my($RecordLine) = @_;
 622   my($Status);
 623 
 624   $Status = 0;
 625   if ($OptionsInfo{RecordMode} =~ /^Atom$/i) {
 626     $Status = IsAtomRecordType($RecordLine) ? 1 : 0;
 627   }
 628   elsif ($OptionsInfo{RecordMode} =~ /^Hetatm$/i) {
 629     $Status = IsHetatmRecordType($RecordLine) ? 1 : 0;
 630   }
 631   elsif ($OptionsInfo{RecordMode} =~ /^AtomAndHetatm$/i) {
 632     $Status = (IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine)) ? 1 : 0;
 633   }
 634 
 635   return $Status;
 636 }
 637 
 638 # Does record meets distance citerion specified by the user?
 639 sub CheckResidueDistance {
 640   my($SpecifiedResidueID, $StartingLineNum, $PDBRecordLinesRef, $OriginCoordsRef) = @_;
 641   my($Status, $RecordLine, $RecordLineIndex, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $ResidueID, @Coords);
 642 
 643   $Status = 0;
 644 
 645   RECORDLINE: for $RecordLineIndex (($StartingLineNum - 1) .. $#{$PDBRecordLinesRef}) {
 646     $RecordLine = $PDBRecordLinesRef->[$RecordLineIndex];
 647     if (!CheckRecordType($RecordLine)) {
 648       next RECORDLINE;
 649     }
 650     ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z) = ParseAtomRecordLine($RecordLine);
 651     $ResidueID = "${ResidueName}_${ResidueNumber}_${ChainID}";
 652 
 653     if ($ResidueID !~ /^$SpecifiedResidueID$/i) {
 654       # It's a new residue line...
 655       last RECORDLINE;
 656     }
 657 
 658     # Check distance...
 659     @Coords = (); push @Coords, ($X, $Y, $Z);
 660     if (CheckDistance(\@Coords, $OriginCoordsRef)) {
 661       # Distance criterion is met for at least one record in the residue...
 662       $Status = 1;
 663       last RECORDLINE;
 664     }
 665   }
 666   return $Status;
 667 }
 668 
 669 # Does record meets distance citerion specified by the user?
 670 sub CheckDistance {
 671   my($CoordsRef, $OriginCoordsRef) = @_;
 672   my($Status, $Index, $Distance, $DistanceSquare);
 673 
 674   $Status = 0;
 675 
 676   if ($OptionsInfo{ExtractionDistanceMode} =~ /^Residue$/i) {
 677     # Go over coordinates of all the atoms in the residue...
 678     my($ResidueCoordsCount) = scalar @{$OriginCoordsRef};
 679     INDEX: for ($Index = 0; $Index < $ResidueCoordsCount; $Index += 3) {
 680       $DistanceSquare = ($CoordsRef->[0] - $OriginCoordsRef->[$Index])**2 + ($CoordsRef->[1] - $OriginCoordsRef->[$Index + 1])**2 + ($CoordsRef->[2] - $OriginCoordsRef->[$Index + 2])**2;
 681       $Distance = sqrt $DistanceSquare;
 682       if ($Distance <= $OptionsInfo{MaxExtractionDistance}) {
 683         $Status = 1;
 684         last INDEX;
 685       }
 686     }
 687   }
 688   else {
 689     $DistanceSquare = 0;
 690     for $Index (0 .. 2) {
 691       $DistanceSquare += ($CoordsRef->[$Index] - $OriginCoordsRef->[$Index])**2;
 692     }
 693     $Distance = sqrt $DistanceSquare;
 694     $Status = ($Distance <= $OptionsInfo{MaxExtractionDistance}) ? 1 : 0;
 695   }
 696 
 697   return $Status;
 698 }
 699 
 700 # Write out modifed header and other older records...
 701 sub WriteHeaderAndOlderRecords {
 702   my($OutFileRef, $PDBRecordLinesRef) = @_;
 703 
 704   if ($OptionsInfo{ModifyHeaderRecord}) {
 705     # Write out modified HEADER record...
 706     my($Classification, $DepositionDate, $IDCode) = GetHeaderRecordInformation($PDBRecordLinesRef);
 707     $Classification = 'Data extracted using MayaChemTools';
 708     print $OutFileRef GenerateHeaderRecordLine($IDCode, $Classification), "\n";
 709   }
 710   else {
 711     print $OutFileRef $PDBRecordLinesRef->[0], "\n";
 712   }
 713 
 714   # Write out any old records...
 715   if ($OptionsInfo{KeepOldRecords}) {
 716     my($RecordLineIndex, $RecordLine);
 717     # Skip HEADER record and write out older records all the way upto first MODEL/ATOM/HETATM records from input file...
 718     RECORDLINE: for $RecordLineIndex (1 .. $#{$PDBRecordLinesRef}) {
 719       $RecordLine = $PDBRecordLinesRef->[$RecordLineIndex];
 720       if (IsModelRecordType($RecordLine) || IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine)) {
 721         last RECORDLINE;
 722       }
 723       print $OutFileRef "$RecordLine\n";
 724     }
 725   }
 726 }
 727 
 728 # Get header record information assuming it's the first record...
 729 sub GetHeaderRecordInformation {
 730   my($PDBRecordLinesRef) = @_;
 731   my($Classification, $DepositionDate, $IDCode, $HeaderRecordLine);
 732 
 733   ($Classification, $DepositionDate, $IDCode) = ('') x 3;
 734   $HeaderRecordLine = $PDBRecordLinesRef->[0];
 735   if (IsHeaderRecordType($HeaderRecordLine)) {
 736     ($Classification, $DepositionDate, $IDCode) = ParseHeaderRecordLine($HeaderRecordLine);
 737   }
 738   return ($Classification, $DepositionDate, $IDCode);
 739 }
 740 
 741 # Get one letter residue code...
 742 sub GetResidueCode {
 743   my($ResidueName) = @_;
 744   my($ResidueCode, $StandardResidue);
 745 
 746   $ResidueCode = $OptionsInfo{NonStandardSequenceCode};
 747   $StandardResidue = 0;
 748 
 749   if (length($ResidueName) == 3) {
 750     # Assume it's an amino acid...
 751     if (AminoAcids::IsAminoAcid($ResidueName)) {
 752       # Standard amino acid...
 753       $ResidueCode = AminoAcids::GetAminoAcidOneLetterCode($ResidueName);
 754       $StandardResidue = 1;
 755     }
 756   }
 757   elsif (length($ResidueName) == 1) {
 758     # Assume it's a nucleic acid...
 759     if ($ResidueName =~ /^(A|G|T|U|C)$/i) {
 760       $ResidueCode = $ResidueName;
 761       $StandardResidue = 1;
 762     }
 763   }
 764 
 765   return ($ResidueCode, $StandardResidue);
 766 }
 767 
 768 # Process option values...
 769 sub ProcessOptions {
 770   %OptionsInfo = ();
 771   $OptionsInfo{Mode} = $Options{mode};
 772 
 773   my(@SpecifiedChains) = ();
 774   if ($Options{chains} =~ /^(First|All)$/i) {
 775     $OptionsInfo{ChainsToExtract} = $Options{chains};
 776   }
 777   else {
 778     @SpecifiedChains = split /\,/, $Options{chains};
 779     $OptionsInfo{ChainsToExtract} = 'Specified';
 780   }
 781   @{$OptionsInfo{SpecifiedChains}} = ();
 782   push @{$OptionsInfo{SpecifiedChains}}, @SpecifiedChains;
 783 
 784   $OptionsInfo{CombineChains} = ($Options{combinechains} =~ /^Yes$/i) ? 1 : 0;
 785 
 786   $OptionsInfo{CombineChainSequences} = ($Options{combinechains} =~ /^Yes$/i) ? 1 : 0;
 787 
 788   ProcessResiduesOptions();
 789   ProcessAtomsOptions();
 790   ProcessDistanceOptions();
 791 
 792   $OptionsInfo{WaterResidueNames} = $Options{waterresiduenames};
 793   @{$OptionsInfo{SpecifiedWaterResiduesList}} = ();
 794   %{$OptionsInfo{SpecifiedWaterResiduesMap}} = ();
 795 
 796   my(@SpecifiedWaterResiduesList);
 797   @SpecifiedWaterResiduesList = ();
 798 
 799   if ($OptionsInfo{Mode} =~ /^NonWater$/i) {
 800     my($WaterResidueName);
 801     if ($OptionsInfo{WaterResidueNames} =~ /Automatic/i) {
 802       push @SpecifiedWaterResiduesList, ('HOH', 'WAT', 'H2O');
 803     }
 804     else {
 805       @SpecifiedWaterResiduesList = split /\,/, $Options{waterresiduenames};
 806     }
 807     for $WaterResidueName (@SpecifiedWaterResiduesList) {
 808       $OptionsInfo{SpecifiedWaterResiduesMap}{$WaterResidueName} = $WaterResidueName;
 809     }
 810   }
 811   push @{$OptionsInfo{SpecifiedWaterResiduesList}}, @SpecifiedWaterResiduesList;
 812 
 813   $OptionsInfo{RecordMode} = $Options{recordmode} ? $Options{recordmode} : ($Options{mode} =~ /^(Atoms|CAlphas|AtomNums|AtomsRange|AtomNames)$/i ? "Atom" : "AtomAndHetatm");
 814 
 815   $OptionsInfo{KeepOldRecords} = ($Options{keepoldrecords} =~ /^Yes$/i) ? 1 : 0;
 816 
 817   $OptionsInfo{ModifyHeaderRecord} = ($Options{modifyheader} =~ /^Yes$/i) ? 1 : 0;
 818 
 819   $OptionsInfo{KeepNonStandardSequences} = ($Options{nonstandardkeep} =~ /^Yes$/i) ? 1 : 0;
 820   $OptionsInfo{NonStandardSequenceCode} = $Options{nonstandardcode};
 821   $OptionsInfo{MaxSequenceLength} = $Options{sequencelength};
 822   $OptionsInfo{SequenceRecordSource} = $Options{sequencerecords};
 823   $OptionsInfo{SequenceIDPrefixSource} = $Options{sequenceidprefix};
 824 
 825   $OptionsInfo{OverwriteFiles} = $Options{overwrite} ? 1 : 0;
 826   $OptionsInfo{OutFileRoot} = $Options{root} ? $Options{root} : 0;
 827 }
 828 
 829 # Process specified residue options...
 830 sub ProcessResiduesOptions {
 831   my($ResidueNum, $StartResidueNum, $EndResNum, $ResidueName, @SpecifiedResidueNumsList, @SpecifiedResidueNamesList);
 832 
 833   @SpecifiedResidueNumsList = ();
 834   ($StartResidueNum, $EndResNum) = (0, 0);
 835 
 836   @SpecifiedResidueNamesList = ();
 837 
 838   if ($OptionsInfo{Mode} =~ /^(ResidueNums|ResiduesRange|ResidueNames)$/i) {
 839     if (!$Options{residues}) {
 840       die "Error: You must specify a value for \"--Residues\" option in \"ResidueNums, ResiduesRange, or ResidueNames\" \"-m, --mode\". \n";
 841     }
 842     $OptionsInfo{Residues} = $Options{residues};
 843     $OptionsInfo{Residues} =~ s/ //g;
 844 
 845     if ($OptionsInfo{Mode} =~ /^ResidueNames$/i) {
 846       @SpecifiedResidueNamesList = split /\,/, $OptionsInfo{Residues};
 847     }
 848     else {
 849       @SpecifiedResidueNumsList = split /\,/, $OptionsInfo{Residues};
 850       for $ResidueNum (@SpecifiedResidueNumsList) {
 851         if (!IsPositiveInteger($ResidueNum)) {
 852           die "Error: Invalid residue number value, $ResidueNum, for \"--Residues\" option during \"ResidueNumes\" or \"ResiduesRange\"value of \"-m --mode\" option: Residue number must be a positive integer.\n";
 853         }
 854       }
 855       if ($OptionsInfo{Mode} =~ /^ResiduesRange$/i) {
 856         if (@SpecifiedResidueNumsList != 2) {
 857           die "Error: Invalid number of residue number values, ", scalar(@SpecifiedResidueNumsList), ", for \"--Residues\" option during \"ResiduesRange\" value of \"-m --mode\" option: The number of values must be 2 corresponding to start and end residue numbers.\n";
 858         }
 859         if ($SpecifiedResidueNumsList[0] > $SpecifiedResidueNumsList[1]) {
 860           die "Error: Invalid residue number values, @SpecifiedResidueNumsList, for \"--Residues\" option during \"ResiduesRange\" value of \"-m --mode\" option: The start residue number must be less than end residue number.\n";
 861         }
 862         ($StartResidueNum, $EndResNum) = @SpecifiedResidueNumsList;
 863       }
 864     }
 865   }
 866 
 867   @{$OptionsInfo{SpecifiedResidueNumsList}} = ();
 868   push @{$OptionsInfo{SpecifiedResidueNumsList}}, @SpecifiedResidueNumsList;
 869 
 870   $OptionsInfo{SpecifiedStartResidueNum} = $StartResidueNum;
 871   $OptionsInfo{SpecifiedEndResidueNum} = $EndResNum;
 872 
 873   @{$OptionsInfo{SpecifiedResidueNamesList}} = ();
 874   push @{$OptionsInfo{SpecifiedResidueNamesList}}, @SpecifiedResidueNamesList;
 875 
 876   # Set up a specified residue numbers map...
 877   %{$OptionsInfo{SpecifiedResidueNumsMap}} = ();
 878   for $ResidueNum (@{$OptionsInfo{SpecifiedResidueNumsList}}) {
 879     $OptionsInfo{SpecifiedResidueNumsMap}{$ResidueNum} = $ResidueNum;
 880   }
 881 
 882   # Set up a specified residue names map...
 883   %{$OptionsInfo{SpecifiedResidueNamesMap}} = ();
 884   for $ResidueName (@{$OptionsInfo{SpecifiedResidueNamesList}}) {
 885     $OptionsInfo{SpecifiedResidueNamesMap}{lc $ResidueName} = lc $ResidueName;
 886   }
 887 
 888 }
 889 
 890 # Process specified atom options...
 891 sub ProcessAtomsOptions {
 892   my($AtomNum, $StartAtomNum, $EndAtomNum, $AtomName, @SpecifiedAtomNumsList, @SpecifiedAtomNamesList);
 893 
 894   @SpecifiedAtomNumsList = ();
 895   ($StartAtomNum, $EndAtomNum) = (0, 0);
 896 
 897   @SpecifiedAtomNamesList = ();
 898 
 899   if ($OptionsInfo{Mode} =~ /^(AtomNums|AtomsRange|AtomNames)$/i) {
 900     if (!$Options{atoms}) {
 901       die "Error: You must specify a value for \"--Atoms\" option in \"AtomNums, AtomsRange, or AtomNames\" \"-m, --mode\". \n";
 902     }
 903     $OptionsInfo{Atoms} = $Options{atoms};
 904     $OptionsInfo{Atoms} =~ s/ //g;
 905 
 906     if ($OptionsInfo{Mode} =~ /^AtomNames$/i) {
 907       @SpecifiedAtomNamesList = split /\,/, $OptionsInfo{Atoms};
 908     }
 909     else {
 910       @SpecifiedAtomNumsList = split /\,/, $OptionsInfo{Atoms};
 911       for $AtomNum (@SpecifiedAtomNumsList) {
 912         if (!IsPositiveInteger($AtomNum)) {
 913           die "Error: Invalid atom number value, $AtomNum, for \"--Atoms\" option during \"AtomNums\" or \"AtomsRange\"value of \"-m --mode\" option: Atom number must be a positive integer.\n";
 914         }
 915       }
 916       if ($OptionsInfo{Mode} =~ /^AtomsRange$/i) {
 917         if (@SpecifiedAtomNumsList != 2) {
 918           die "Error: Invalid number of atom number values, ", scalar(@SpecifiedAtomNumsList), ", for \"--Atoms\" option during \"AtomsRange\" value of \"-m --mode\" option: The number of values must be 2 corresponding to start and end atom numbers.\n";
 919         }
 920         if ($SpecifiedAtomNumsList[0] > $SpecifiedAtomNumsList[1]) {
 921           die "Error: Invalid atom number values, @SpecifiedAtomNumsList, for \"--Atoms\" option during \"AtomsRange\" value of \"-m --mode\" option: The start atom number must be less than end atom number.\n";
 922         }
 923         ($StartAtomNum, $EndAtomNum) = @SpecifiedAtomNumsList;
 924       }
 925     }
 926   }
 927   elsif ($OptionsInfo{Mode} =~ /^CAlphas$/i) {
 928     @SpecifiedAtomNamesList = ("CA");
 929   }
 930 
 931   @{$OptionsInfo{SpecifiedAtomNumsList}} = ();
 932   push @{$OptionsInfo{SpecifiedAtomNumsList}}, @SpecifiedAtomNumsList;
 933 
 934   $OptionsInfo{SpecifiedStartAtomNum} = $StartAtomNum;
 935   $OptionsInfo{SpecifiedEndAtomNum} = $EndAtomNum;
 936 
 937   @{$OptionsInfo{SpecifiedAtomNamesList}} = ();
 938   push @{$OptionsInfo{SpecifiedAtomNamesList}}, @SpecifiedAtomNamesList;
 939 
 940   # Set up a specified residue numbers map...
 941   %{$OptionsInfo{SpecifiedAtomNumsMap}} = ();
 942   for $AtomNum (@{$OptionsInfo{SpecifiedAtomNumsList}}) {
 943     $OptionsInfo{SpecifiedAtomNumsMap}{$AtomNum} = $AtomNum;
 944   }
 945 
 946   # Set up a specified residue names map...
 947   %{$OptionsInfo{SpecifiedAtomNamesMap}} = ();
 948   for $AtomName (@{$OptionsInfo{SpecifiedAtomNamesList}}) {
 949     $OptionsInfo{SpecifiedAtomNamesMap}{lc $AtomName} = lc $AtomName;
 950   }
 951 
 952 }
 953 
 954 # Process specified distance options...
 955 sub ProcessDistanceOptions {
 956   my(@SpecifiedDistanceOrigin) = ();
 957 
 958   $OptionsInfo{MaxExtractionDistance} = $Options{distance};
 959   $OptionsInfo{ExtractionDistanceMode} = $Options{distancemode};
 960   $OptionsInfo{ExtractionDistanceOrigin} = $Options{distanceorigin} ? $Options{distanceorigin} : '';
 961   $OptionsInfo{DistanceSelectionMode} = $Options{distanceselectionmode};
 962 
 963   if ($OptionsInfo{Mode} =~ /^Distance$/i) {
 964     if (!$Options{distanceorigin}) {
 965       die "Error: You must specify a value for \"--distanceorigin\" option in \"Distance\" \"-m, --mode\". \n";
 966     }
 967     @SpecifiedDistanceOrigin = split /\,/, $Options{distanceorigin};
 968     if ($OptionsInfo{ExtractionDistanceMode} =~ /^Atom$/i) {
 969       if (@SpecifiedDistanceOrigin != 2) {
 970         die "Error: Invalid number of values, ", scalar(@SpecifiedDistanceOrigin), " for option \"distanceorigin\" option during \"Atom\" value of \"--distancemode\" : The number of values must be 2.\n";
 971       }
 972       if (!IsPositiveInteger($SpecifiedDistanceOrigin[0])) {
 973         die "Error: Invalid atom number value, ", $SpecifiedDistanceOrigin[0], ", for option \"distanceorigin\" option during \"Atom\" value of \"--distancemode\". Allowed values: > 0\n";
 974       }
 975     }
 976     elsif ($OptionsInfo{ExtractionDistanceMode} =~ /^Hetatm$/i) {
 977       if (@SpecifiedDistanceOrigin != 2) {
 978         die "Error: Invalid number of values, ", scalar(@SpecifiedDistanceOrigin), " for option \"distanceorigin\" option during \"Hetatm\" value of \"--distancemode\" : The number of values must be 2.\n";
 979       }
 980       if (!IsPositiveInteger($SpecifiedDistanceOrigin[0])) {
 981         die "Error: Invalid hetatm number value, ", $SpecifiedDistanceOrigin[0], ", for option \"distanceorigin\" option during \"Hetatm\" value of \"--distancemode\". Allowed values: > 0\n";
 982       }
 983     }
 984     elsif ($OptionsInfo{ExtractionDistanceMode} =~ /^Residue$/i) {
 985       if (!(@SpecifiedDistanceOrigin == 2 || @SpecifiedDistanceOrigin == 3)) {
 986         die "Error: Invalid number of values, ", scalar(@SpecifiedDistanceOrigin), " for option \"distanceorigin\" option during \"Residue\" value of \"--distancemode\" : The number of values must be either 2 or 3.\n";
 987       }
 988       if (!IsPositiveInteger($SpecifiedDistanceOrigin[0])) {
 989         die "Error: Invalid residue number value, ", $SpecifiedDistanceOrigin[0], ", for option \"distanceorigin\" option during \"Residue\" value of \"--distancemode\". Allowed values: > 0\n";
 990       }
 991     }
 992     elsif ($OptionsInfo{ExtractionDistanceMode} =~ /^XYZ$/i) {
 993       if (@SpecifiedDistanceOrigin != 3) {
 994         die "Error: Invalid number of values, ", scalar(@SpecifiedDistanceOrigin), " for option \"distanceorigin\" option during \"XYZ\" value of \"--distancemode\" : The number of values must be 3.\n";
 995       }
 996       my($Value);
 997       for $Value (@SpecifiedDistanceOrigin) {
 998         if (!IsNumerical($Value)) {
 999           die "Error: Invalid coordinate value, ", $SpecifiedDistanceOrigin[0], ", for option \"distanceorigin\" option during \"XYZ\" value of \"--distancemode\". Allowed values: numerical\n";
1000         }
1001       }
1002     }
1003   }
1004   @{$OptionsInfo{SpecifiedExtractionDistanceOrigin}} = ();
1005   push @{$OptionsInfo{SpecifiedExtractionDistanceOrigin}}, @SpecifiedDistanceOrigin;
1006 
1007 }
1008 
1009 # Retrieve information about PDB files...
1010 sub RetrievePDBFilesInfo {
1011   my($Index, $PDBFile, $PDBRecordLinesRef, $ChainID, $ChainLabel, $ChainsAndResiduesInfoRef, $Mode, $FileDir, $FileName, $FileExt, $OutFileName, $OutFileRoot, @SpecifiedChains, @DistanceOrigin, @OutFileNames, @ChainLabels, @ChainSequenceIDs, @ChainSequenceIDsPrefix);
1012 
1013   %PDBFilesInfo = ();
1014   @{$PDBFilesInfo{FileOkay}} = ();
1015   @{$PDBFilesInfo{OutFileRoot}} = ();
1016   @{$PDBFilesInfo{OutFileNames}} = ();
1017   @{$PDBFilesInfo{ChainLabels}} = ();
1018   @{$PDBFilesInfo{ChainSequenceIDs}} = ();
1019   @{$PDBFilesInfo{ChainSequenceIDsPrefix}} = ();
1020   @{$PDBFilesInfo{SpecifiedChains}} = ();
1021   @{$PDBFilesInfo{DistanceOrigin}} = ();
1022 
1023   FILELIST: for $Index (0 .. $#PDBFilesList) {
1024     $PDBFilesInfo{FileOkay}[$Index] = 0;
1025 
1026     $PDBFilesInfo{OutFileRoot}[$Index] = '';
1027     @{$PDBFilesInfo{OutFileNames}[$Index]} = ();
1028     @{$PDBFilesInfo{OutFileNames}[$Index]} = ();
1029     @{$PDBFilesInfo{ChainLabels}[$Index]} = ();
1030     @{$PDBFilesInfo{ChainSequenceIDs}[$Index]} = ();
1031     @{$PDBFilesInfo{ChainSequenceIDsPrefix}[$Index]} = ();
1032     @{$PDBFilesInfo{SpecifiedChains}[$Index]} = ();
1033     @{$PDBFilesInfo{DistanceOrigin}[$Index]} = ();
1034 
1035     $PDBFile = $PDBFilesList[$Index];
1036     if (!(-e $PDBFile)) {
1037       warn "Warning: Ignoring file $PDBFile: It doesn't exist\n";
1038       next FILELIST;
1039     }
1040     if (!CheckFileType($PDBFile, "pdb")) {
1041       warn "Warning: Ignoring file $PDBFile: It's not a PDB file\n";
1042       next FILELIST;
1043     }
1044     if (! open PDBFILE, "$PDBFile") {
1045       warn "Warning: Ignoring file $PDBFile: Couldn't open it: $! \n";
1046       next FILELIST;
1047     }
1048     close PDBFILE;
1049 
1050     # Get PDB data...
1051     $PDBRecordLinesRef = ReadPDBFile($PDBFile);
1052     if ($OptionsInfo{Mode} =~ /^Sequences$/i && $OptionsInfo{SequenceRecordSource} =~ /^SeqRes$/i) {
1053       $ChainsAndResiduesInfoRef = GetChainsAndResidues($PDBRecordLinesRef, 'SeqRes');
1054     }
1055     else {
1056       $ChainsAndResiduesInfoRef = GetChainsAndResidues($PDBRecordLinesRef);
1057     }
1058     if (!scalar @{$ChainsAndResiduesInfoRef->{ChainIDs}}) {
1059       warn "Warning: Ignoring file $PDBFile: No chains found \n";
1060       next FILELIST;
1061     }
1062 
1063     # Make sure specified chains exist in PDB file...
1064     @SpecifiedChains = ();
1065     if ($OptionsInfo{ChainsToExtract} =~ /^Specified$/i) {
1066       for $ChainID (@{$OptionsInfo{SpecifiedChains}}) {
1067         if (exists $ChainsAndResiduesInfoRef->{Residues}{$ChainID}) {
1068           push @SpecifiedChains, $ChainID;
1069         }
1070         else {
1071           warn "Warning: Ignoring file $PDBFile: Specified chain, $ChainID, in \"-c, --chains\" option doesn't exist.\n";
1072           next FILELIST;
1073         }
1074       }
1075     }
1076     elsif ($OptionsInfo{ChainsToExtract} =~ /^First$/i) {
1077       push @SpecifiedChains, $ChainsAndResiduesInfoRef->{ChainIDs}[0];
1078     }
1079     elsif ($OptionsInfo{ChainsToExtract} =~ /^All$/i) {
1080       push @SpecifiedChains, @{$ChainsAndResiduesInfoRef->{ChainIDs}};
1081     }
1082     # Setup chain labels to use for sequence IDs and generating output files...
1083     @ChainLabels = ();
1084     for $ChainID (@SpecifiedChains) {
1085       $ChainLabel = $ChainID; $ChainLabel =~ s/^None//ig;
1086       $ChainLabel = "Chain${ChainLabel}";
1087       push @ChainLabels, $ChainLabel;
1088     }
1089 
1090     # Make sure specified distance origin is valid...
1091     @DistanceOrigin = ();
1092     if ($OptionsInfo{Mode} =~ /^Distance$/i) {
1093       if ($OptionsInfo{ExtractionDistanceMode} =~ /^(Atom|Hetatm)$/i) {
1094         my($RecordType, $SpecifiedAtomName, $SpecifiedAtomNumber, $RecordFound, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $RecordLine);
1095         $RecordType = $OptionsInfo{ExtractionDistanceMode};
1096         ($SpecifiedAtomNumber, $SpecifiedAtomName) = @{$OptionsInfo{SpecifiedExtractionDistanceOrigin}};
1097         $RecordFound = 0;
1098         LINE: for $RecordLine (@{$PDBRecordLinesRef}) {
1099           if (!(IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine))) {
1100             next LINE;
1101           }
1102           ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z) = ParseAtomRecordLine($RecordLine);
1103           $AtomName = RemoveLeadingAndTrailingWhiteSpaces($AtomName);
1104           if (($RecordType =~ /^Atom$/i && IsAtomRecordType($RecordLine)) || ($RecordType =~ /^Hetatm$/i && IsHetatmRecordType($RecordLine))) {
1105             if ($AtomNumber == $SpecifiedAtomNumber && $AtomName eq $SpecifiedAtomName) {
1106               $RecordFound = 1;
1107               last LINE;
1108             }
1109           }
1110         }
1111         if (!$RecordFound) {
1112           warn "Warning: Ignoring file $PDBFile: ", uc($RecordType), " record corresponding to \"--distanceorigin\" option value, $OptionsInfo{ExtractionDistanceOrigin}, doesn't exist.\n";
1113           next FILELIST;
1114         }
1115         push @DistanceOrigin, ($X, $Y, $Z);
1116       }
1117       elsif ($OptionsInfo{ExtractionDistanceMode} =~ /^Residue$/i) {
1118         my($SpecifiedResidueNumber, $SpecifiedResidueName, $SpecifiedChainID, $RecordFound, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $RecordLine);
1119         $SpecifiedChainID = '';
1120         if (@{$OptionsInfo{SpecifiedExtractionDistanceOrigin}} == 3) {
1121           ($SpecifiedResidueNumber, $SpecifiedResidueName, $SpecifiedChainID) = @{$OptionsInfo{SpecifiedExtractionDistanceOrigin}};
1122         }
1123         else {
1124           ($SpecifiedResidueNumber, $SpecifiedResidueName) = @{$OptionsInfo{SpecifiedExtractionDistanceOrigin}};
1125         }
1126         $RecordFound = 0;
1127         LINE: for $RecordLine (@{$PDBRecordLinesRef}) {
1128           if (!(IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine))) {
1129             next LINE;
1130           }
1131           ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z) = ParseAtomRecordLine($RecordLine);
1132           $ResidueName = RemoveLeadingAndTrailingWhiteSpaces($ResidueName);
1133           $ChainID = RemoveLeadingAndTrailingWhiteSpaces($ChainID);
1134           if ($SpecifiedChainID && ($SpecifiedChainID ne $ChainID)) {
1135             next LINE;
1136           }
1137           if ($ResidueNumber == $SpecifiedResidueNumber && $ResidueName eq $SpecifiedResidueName) {
1138             # Store coordinates for all the atoms...
1139             $RecordFound = 1;
1140             push @DistanceOrigin, ($X, $Y, $Z);
1141             next LINE;
1142           }
1143         }
1144         if (!$RecordFound) {
1145           warn "Warning: Ignoring file $PDBFile: ATOM/HETATM record corresponding to \"--distanceorigin\" option value, $OptionsInfo{ExtractionDistanceOrigin}, doesn't exist.\n";
1146           next FILELIST;
1147         }
1148       }
1149       elsif ($OptionsInfo{ExtractionDistanceMode} =~ /^XYZ$/i) {
1150         push @DistanceOrigin, @{$OptionsInfo{SpecifiedExtractionDistanceOrigin}};
1151       }
1152     }
1153     # Setup output file names...
1154     @OutFileNames = ();
1155     $FileDir = ""; $FileName = ""; $FileExt = "";
1156     ($FileDir, $FileName, $FileExt) = ParseFileName($PDBFile);
1157     if ($OptionsInfo{OutFileRoot} && (@PDBFilesList == 1)) {
1158       my ($RootFileDir, $RootFileName, $RootFileExt) = ParseFileName($OptionsInfo{OutFileRoot});
1159       if ($RootFileName && $RootFileExt) {
1160         $FileName = $RootFileName;
1161       }
1162       else {
1163         $FileName = $OptionsInfo{OutFileRoot};
1164       }
1165       $OutFileRoot = $FileName;
1166     }
1167     else {
1168       $OutFileRoot = $FileName;
1169     }
1170     $Mode = $OptionsInfo{Mode};
1171     if ($Mode =~ /^(Atoms|CAlphas|AtomNums|AtomsRange|AtomNames|ResidueNums|ResiduesRange|ResidueNames|Distance|NonWater|NonHydrogens)$/i) {
1172       $OutFileName = '';
1173       if ($Mode =~ /^CAlphas$/i) {
1174         $OutFileName = "${OutFileRoot}CAlphas.pdb";
1175       }
1176       elsif ($Mode =~ /^Atoms$/i) {
1177         $OutFileName = "${OutFileRoot}Atoms.pdb";
1178       }
1179       elsif ($Mode =~ /^AtomNums$/i) {
1180         $OutFileName = "${OutFileRoot}AtomNums.pdb";
1181       }
1182       elsif ($Mode =~ /^AtomsRange$/i) {
1183         $OutFileName = "${OutFileRoot}AtomsRange.pdb";
1184       }
1185       elsif ($Mode =~ /^AtomNames$/i) {
1186         $OutFileName = "${OutFileRoot}AtomNames.pdb";
1187       }
1188       elsif ($Mode =~ /^ResidueNums$/i) {
1189         $OutFileName = "${OutFileRoot}ResidueNums.pdb";
1190       }
1191       elsif ($Mode =~ /^ResiduesRange$/i) {
1192         $OutFileName = "${OutFileRoot}ResiduesRange.pdb";
1193       }
1194       elsif ($Mode =~ /^ResidueNames$/i) {
1195         $OutFileName = "${OutFileRoot}ResidueNames.pdb";
1196       }
1197       elsif ($Mode =~ /^NonWater$/i) {
1198         $OutFileName = "${OutFileRoot}NonWater.pdb";
1199       }
1200       elsif ($Mode =~ /^NonHydrogens$/i) {
1201         $OutFileName = "${OutFileRoot}NonHydrogens.pdb";
1202       }
1203       elsif ($Mode =~ /^Distance$/i) {
1204         my($DistanceMode) = '';
1205         if ($OptionsInfo{ExtractionDistanceMode} =~ /^Atom$/i) {
1206           $DistanceMode = 'Atom';
1207         }
1208         elsif ($OptionsInfo{ExtractionDistanceMode} =~ /^Hetatm$/i) {
1209           $DistanceMode = 'Hetatm';
1210         }
1211         elsif ($OptionsInfo{ExtractionDistanceMode} =~ /^Residue$/i) {
1212           $DistanceMode = 'Residue';
1213         }
1214         elsif ($OptionsInfo{ExtractionDistanceMode} =~ /^XYZ$/i) {
1215           $DistanceMode = 'XYZ';
1216         }
1217         $OutFileName = "${OutFileRoot}DistanceBy${DistanceMode}.pdb";
1218       }
1219       push @OutFileNames, $OutFileName;
1220       if (!$OptionsInfo{OverwriteFiles} && (-e $OutFileName)) {
1221         warn "Warning: Ignoring file $PDBFile: The file $OutFileName already exists\n";
1222         next FILELIST;
1223       }
1224     }
1225     elsif ($Mode =~ /^(Chains|Sequences)$/i) {
1226       if ($OptionsInfo{CombineChainSequences}) {
1227         $OutFileName = ($Mode =~ /^Chains$/i) ? "${OutFileRoot}ExtractedChains.pdb" : "${OutFileRoot}SequencesChainsCombined.fasta";
1228         push @OutFileNames, $OutFileName;
1229         if (!$OptionsInfo{OverwriteFiles} && (-e $OutFileName)) {
1230           warn "Warning: Ignoring file $PDBFile: The file $OutFileName already exists\n";
1231           next FILELIST;
1232         }
1233       }
1234       else {
1235         for $ChainLabel (@ChainLabels) {
1236           $OutFileName = ($Mode =~ /^Chains$/i) ? "${OutFileRoot}${ChainLabel}.pdb" : "${OutFileRoot}Sequences${ChainLabel}.fasta";
1237           push @OutFileNames, $OutFileName;
1238           if (!$OptionsInfo{OverwriteFiles} && (-e $OutFileName)) {
1239             warn "Warning: Ignoring file $PDBFile: The file $OutFileName already exists\n";
1240             next FILELIST;
1241           }
1242         }
1243       }
1244     }
1245     @ChainSequenceIDs = ();
1246     @ChainSequenceIDsPrefix = ();
1247     if ($Mode =~ /^Sequences$/i) {
1248       my($HeaderRecordLine, $Classification, $DepositionDate, $IDCode, $IDPrefix);
1249       ($Classification, $DepositionDate, $IDCode) = GetHeaderRecordInformation($PDBRecordLinesRef);
1250 
1251       if ($OptionsInfo{SequenceIDPrefixSource} =~ /^FileName$/i) {
1252         $IDPrefix = $FileName;
1253       }
1254       elsif ($OptionsInfo{SequenceIDPrefixSource} =~ /^HeaderRecord$/i) {
1255         $IDPrefix = IsNotEmpty($IDCode) ? $IDCode : '';
1256       }
1257       else {
1258         $IDPrefix = IsNotEmpty($IDCode) ? $IDCode : $FileName;
1259       }
1260 
1261       for $ChainLabel (@ChainLabels) {
1262         push @ChainSequenceIDsPrefix, $IDPrefix;
1263         push @ChainSequenceIDs, "${IDPrefix}_${ChainLabel}|PDB";
1264       }
1265     }
1266 
1267     $PDBFilesInfo{FileOkay}[$Index] = 1;
1268     $PDBFilesInfo{OutFileRoot}[$Index] = $OutFileRoot;
1269 
1270     push @{$PDBFilesInfo{OutFileNames}[$Index]}, @OutFileNames;
1271     push @{$PDBFilesInfo{ChainLabels}[$Index]}, @ChainLabels;
1272     push @{$PDBFilesInfo{ChainSequenceIDsPrefix}[$Index]}, @ChainSequenceIDsPrefix;
1273     push @{$PDBFilesInfo{ChainSequenceIDs}[$Index]}, @ChainSequenceIDs;
1274     push @{$PDBFilesInfo{SpecifiedChains}[$Index]}, @SpecifiedChains;
1275     push @{$PDBFilesInfo{DistanceOrigin}[$Index]}, @DistanceOrigin;
1276   }
1277 }
1278 
1279 
1280 # Setup script usage  and retrieve command line arguments specified using various options...
1281 sub SetupScriptUsage {
1282 
1283   # Retrieve all the options...
1284   %Options = ();
1285   $Options{chains} = 'First';
1286   $Options{combinechains} = 'no';
1287   $Options{distance} = 10.0;
1288   $Options{distancemode} = 'XYZ';
1289   $Options{distanceselectionmode} = 'ByAtom';
1290   $Options{keepoldrecords} = 'no';
1291   $Options{mode} = 'NonWater';
1292   $Options{modifyheader} = 'yes';
1293   $Options{nonstandardkeep} = 'yes';
1294   $Options{nonstandardcode} = 'X';
1295   $Options{sequencelength} = 80;
1296   $Options{sequenceidprefix} = 'Automatic';
1297   $Options{sequencerecords} = 'Atom';
1298   $Options{waterresiduenames} = 'Automatic';
1299 
1300   if (!GetOptions(\%Options, "atoms|a=s", "chains|c=s", "combinechains=s", "distance|d=f", "distancemode=s", "distanceorigin=s", "distanceselectionmode=s", "help|h", "keepoldrecords|k=s", "mode|m=s", "modifyheader=s", "nonstandardkeep=s", "nonstandardcode=s", "overwrite|o", "root|r=s", "recordmode=s", "residues=s", "sequencelength=i", "sequenceidprefix=s", "sequencerecords=s", "waterresiduenames=s", "workingdir|w=s")) {
1301     die "\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n";
1302   }
1303   if ($Options{workingdir}) {
1304     if (! -d $Options{workingdir}) {
1305       die "Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n";
1306     }
1307     chdir $Options{workingdir} or die "Error: Couldn't chdir $Options{workingdir}: $! \n";
1308   }
1309   if ($Options{combinechains} !~ /^(yes|no)$/i) {
1310     die "Error: The value specified, $Options{combinechains}, for option \"--CombineChains\" is not valid. Allowed values: yes or no\n";
1311   }
1312   if ($Options{distancemode} !~ /^(Atom|Hetatm|Residue|XYZ)$/i) {
1313     die "Error: The value specified, $Options{distancemode}, for option \"--DistanceMode\" is not valid. Allowed values: Atom, Hetatm, Residue, or XYZ\n";
1314   }
1315   if ($Options{distanceselectionmode} !~ /^(ByAtom|ByResidue)$/i) {
1316     die "Error: The value specified, $Options{distanceselectionmode}, for option \"--DistanceSelectionMode\" is not valid. Allowed values: ByAtom or ByResidue\n";
1317   }
1318   if ($Options{keepoldrecords} !~ /^(yes|no)$/i) {
1319     die "Error: The value specified, $Options{keepoldrecords}, for option \"--KeepOldRecords\" is not valid. Allowed values: yes or no\n";
1320   }
1321   if ($Options{mode} !~ /^(Chains|Sequences|Atoms|CAlphas|AtomNums|AtomsRange|AtomNames|ResidueNums|ResidueNames|ResiduesRange|Distance|NonWater|NonHydrogens)$/i) {
1322     die "Error: The value specified, $Options{mode}, for option \"m, --mode\" is not valid. Allowed values: Chains, Sequences, Atoms, CAlphas, AtomNums, AtomsRange, AtomNames, ResidueNums, ResiduesRange, ResidueNames, Distance, NonWater, NonHydrogens\n";
1323   }
1324   if ($Options{modifyheader} !~ /^(yes|no)$/i) {
1325     die "Error: The value specified, $Options{modifyheader}, for option \"--ModifyHeader\" is not valid. Allowed values: yes or no\n";
1326   }
1327   if ($Options{nonstandardkeep} !~ /^(yes|no)$/i) {
1328     die "Error: The value specified, $Options{nonstandardkeep}, for option \"--NonStandardKeep\" is not valid. Allowed values: yes or no\n";
1329   }
1330   if ($Options{nonstandardcode} !~ /^(\?|\-|X)$/i) {
1331     die "Error: The value specified, $Options{nonstandardcode}, for option \"--NonStandardCode\" is not valid. Allowed values: ?, -, or X\n";
1332   }
1333   if ($Options{recordmode} && $Options{recordmode} !~ /^(Atom|Hetatm|AtomAndHetatm)$/i) {
1334     die "Error: The value specified, $Options{recordmode}, for option \"--RecordMode\" is not valid. Allowed values: Atom, Hetatm, AtomAndHetatm\n";
1335   }
1336   if (!IsPositiveInteger($Options{sequencelength})) {
1337     die "Error: The value specified, $Options{sequencelength}, for option \"--SequenceLength\" is not valid. Allowed values: >0\n";
1338   }
1339   if ($Options{sequencerecords} !~ /^(Atom|SeqRes)$/i) {
1340     die "Error: The value specified, $Options{sequencerecords}, for option \"--SequenceRecords\" is not valid. Allowed values: Atom or SeqRes\n";
1341   }
1342   if ($Options{sequenceidprefix} !~ /^(FileName|HeaderRecord|Automatic)$/i) {
1343     die "Error: The value specified, $Options{sequenceidprefix}, for option \"--SequenceIDPrefix\" is not valid. Allowed values: FileName, HeaderRecord, or AutomaticAtom\n";
1344   }
1345 }
1346