diff lib/AminoAcids.pm @ 0:4816e4a8ae95 draft default tip

Uploaded
author deepakjadmin
date Wed, 20 Jan 2016 09:23:18 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/AminoAcids.pm	Wed Jan 20 09:23:18 2016 -0500
@@ -0,0 +1,442 @@
+package AminoAcids;
+#
+# $RCSfile: AminoAcids.pm,v $
+# $Date: 2015/02/28 20:47:02 $
+# $Revision: 1.25 $
+#
+# Author: Manish Sud <msud@san.rr.com>
+#
+# Copyright (C) 2015 Manish Sud. All rights reserved.
+#
+# This file is part of MayaChemTools.
+#
+# MayaChemTools is free software; you can redistribute it and/or modify it under
+# the terms of the GNU Lesser General Public License as published by the Free
+# Software Foundation; either version 3 of the License, or (at your option) any
+# later version.
+#
+# MayaChemTools is distributed in the hope that it will be useful, but without
+# any warranty; without even the implied warranty of merchantability of fitness
+# for a particular purpose.  See the GNU Lesser General Public License for more
+# details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
+# write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
+# Boston, MA, 02111-1307, USA.
+#
+
+use strict;
+use Carp;
+use Text::ParseWords;
+use TextUtil;
+use FileUtil;
+
+use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
+
+@ISA = qw(Exporter);
+@EXPORT = qw();
+@EXPORT_OK = qw(GetAminoAcids GetAminoAcidPropertiesData GetAminoAcidPropertiesNames IsAminoAcid IsAminoAcidProperty);
+
+%EXPORT_TAGS = (all  => [@EXPORT, @EXPORT_OK]);
+
+#
+# Load amino acids data...
+#
+my(%AminoAcidDataMap, %AminoAcidThreeLetterCodeMap, %AminoAcidOneLetterCodeMap, %AminoAcidNameMap, @AminoAcidPropertyNames, %AminoAcidPropertyNamesMap, );
+_LoadAminoAcidsData();
+
+#
+# Get a list of all known amino acids as one of these values:
+# one letter code, three letter code, or amino acid name...
+#
+sub GetAminoAcids {
+  my($NameType, $ThreeLetterCode, $Name, @AminoAcidNames, %AminoAcidNamesMap);
+
+  $NameType = 'ThreeLetterCode';
+  if (@_ >= 1) {
+    ($NameType) = @_;
+  }
+
+  # Collect names...
+  %AminoAcidNamesMap = ();
+  for $ThreeLetterCode (keys %AminoAcidDataMap) {
+    NAME : {
+      if ($NameType =~ /^OneLetterCode$/i) {$Name = $AminoAcidDataMap{$ThreeLetterCode}{OneLetterCode}; last NAME; }
+      if ($NameType =~ /^AminoAcid$/i) {$Name = $AminoAcidDataMap{$ThreeLetterCode}{AminoAcid}; last NAME; }
+      $Name = $ThreeLetterCode;
+    }
+    $AminoAcidNamesMap{$Name} = $Name;
+  }
+
+  # Sort 'em out
+  @AminoAcidNames = ();
+  for $Name (sort keys %AminoAcidNamesMap) {
+    push @AminoAcidNames, $Name;
+  }
+
+  return (wantarray ? @AminoAcidNames : \@AminoAcidNames);
+}
+
+
+#
+# Get all available properties data for an amino acid using any of these symbols:
+# three letter code; one letter code; name.
+#
+# A reference to a hash array is returned with keys and values representing property
+# name and its values respectively.
+#
+sub GetAminoAcidPropertiesData {
+  my($AminoAcidID) = @_;
+  my($ThreeLetterCode);
+
+  if ($ThreeLetterCode = _ValidateAminoAcidID($AminoAcidID)) {
+    return \%{$AminoAcidDataMap{$ThreeLetterCode}};
+  }
+  else {
+    return undef;
+  }
+}
+
+#
+# Get names of all available amino acid properties. A reference to  an array containing
+# names of all available properties is returned.
+#
+sub GetAminoAcidPropertiesNames {
+  my($Mode);
+  my($PropertyName, @PropertyNames);
+
+  $Mode = 'ByGroup';
+  if (@_ == 1) {
+    ($Mode) = @_;
+  }
+
+  @PropertyNames = ();
+  if ($Mode =~ /^Alphabetical$/i) {
+    my($PropertyName);
+    # ThreeLetterCode, OneLetterCode, and AminoAcid are always listed first...
+    push @PropertyNames, qw(ThreeLetterCode OneLetterCode AminoAcid);
+    for $PropertyName (sort keys %AminoAcidPropertyNamesMap) {
+      if ($PropertyName !~ /^(ThreeLetterCode|OneLetterCode|AminoAcid)$/) {
+	push @PropertyNames, $PropertyName;
+      }
+    }
+  }
+  else {
+    push @PropertyNames, @AminoAcidPropertyNames;
+  }
+  return (wantarray ? @PropertyNames : \@PropertyNames);
+}
+
+#
+# Is it a known amino acid? Input is either an one/three letter code or a name.
+#
+sub IsAminoAcid {
+  my($AminoAcidID) = @_;
+  my($Status);
+
+  $Status = (_ValidateAminoAcidID($AminoAcidID)) ? 1 : 0;
+
+  return $Status;
+}
+
+
+#
+# Is it an available amino acid property?
+#
+sub IsAminoAcidProperty {
+  my($PropertyName) = @_;
+  my($Status);
+
+  $Status = (exists($AminoAcidPropertyNamesMap{$PropertyName})) ? 1 : 0;
+
+  return $Status;
+}
+
+#
+# Implents GetAminoAcid<PropertyName> for a valid proprty name.
+#
+sub AUTOLOAD {
+  my($AminoAcidID) = @_;
+  my($FunctionName, $PropertyName, $PropertyValue, $ThreeLetterCode);
+
+  $PropertyValue = undef;
+
+  use vars qw($AUTOLOAD);
+  $FunctionName = $AUTOLOAD;
+  $FunctionName =~ s/.*:://;
+
+  # Only Get<PropertyName> functions are supported...
+  if ($FunctionName !~ /^Get/) {
+    croak "Error: Function, AminoAcid::$FunctionName, is not supported by AUTOLOAD in AminoAcid module: Only Get<PropertyName> functions are implemented...";
+  }
+
+  $PropertyName = $FunctionName;
+  $PropertyName =~  s/^GetAminoAcid//;
+  if (!exists $AminoAcidPropertyNamesMap{$PropertyName}) {
+    croak "Error: Function, AminoAcid::$FunctionName, is not supported by AUTOLOAD in AminoAcid module: Unknown amino acid property name, $PropertyName, specified...";
+  }
+
+  if (!($ThreeLetterCode = _ValidateAminoAcidID($AminoAcidID))) {
+    return undef;
+  }
+  $PropertyValue = $AminoAcidDataMap{$ThreeLetterCode}{$PropertyName};
+  return $PropertyValue;
+}
+
+
+#
+# Load AminoAcidsData.csv files from <MayaChemTools>/lib directory...
+#
+sub _LoadAminoAcidsData {
+  my($AminoAcidsDataFile, $MayaChemToolsLibDir);
+
+  $MayaChemToolsLibDir = GetMayaChemToolsLibDirName();
+
+  $AminoAcidsDataFile =  "$MayaChemToolsLibDir" . "/data/AminoAcidsData.csv";
+
+  if (! -e "$AminoAcidsDataFile") {
+    croak "Error: MayaChemTools package file, $AminoAcidsDataFile, is missing: Possible installation problems...";
+  }
+
+  _LoadData($AminoAcidsDataFile);
+}
+
+#
+# Load AminoAcidsData.csv file from <MayaChemTools>/lib directory...
+#
+sub _LoadData {
+  my($AminoAcidsDataFile) = @_;
+
+  %AminoAcidDataMap = ();
+  @AminoAcidPropertyNames = ();
+  %AminoAcidPropertyNamesMap = ();
+  %AminoAcidThreeLetterCodeMap = ();
+  %AminoAcidOneLetterCodeMap = ();
+  %AminoAcidNameMap = ();
+
+  # Load property data for all amino acids...
+  #
+  # File Format:
+  #"ThreeLetterCode","OneLetterCode","AminoAcid","AcidicBasic","PolarNonpolar","Charged","Aromatic","HydrophobicHydophilic","IsoelectricPoint","pKCOOH","pKNH3+","MolecularWeight","MolecularWeightMinusH2O(18.01524)","ExactMass","ExactMassMinusH2O(18.01056)","vanderWaalsVolume","%AccessibleResidues","%BuriedResidues","AlphaHelixChouAndFasman","AlphaHelixDeleageAndRoux","AlphaHelixLevitt","AminoAcidsComposition","AminoAcidsCompositionInSwissProt","AntiparallelBetaStrand","AverageAreaBuried","AverageFlexibility","BetaSheetChouAndFasman","BetaSheetDeleageAndRoux","BetaSheetLevitt","BetaTurnChouAndFasman","BetaTurnDeleageAndRoux","BetaTurnLevitt","Bulkiness","CoilDeleageAndRoux","HPLCHFBARetention","HPLCRetentionAtpH2.1","HPLCRetentionAtpH7.4","HPLCTFARetention","HydrophobicityAbrahamAndLeo","HydrophobicityBlack","HydrophobicityBullAndBreese","HydrophobicityChothia","HydrophobicityEisenbergAndOthers","HydrophobicityFauchereAndOthers","HydrophobicityGuy","HydrophobicityHPLCAtpH3.4Cowan","HydrophobicityHPLCAtpH7.5Cowan","HydrophobicityHPLCParkerAndOthers","HydrophobicityHPLCWilsonAndOthers","HydrophobicityHoppAndWoods","HydrophobicityJanin","HydrophobicityKyteAndDoolittle","HydrophobicityManavalanAndOthers","HydrophobicityMiyazawaAndOthers","HydrophobicityOMHSweetAndOthers","HydrophobicityRaoAndArgos","HydrophobicityRfMobility","HydrophobicityRoseAndOthers","HydrophobicityRoseman","HydrophobicityWellingAndOthers","HydrophobicityWolfendenAndOthers","MolecularWeight","NumberOfCodons","ParallelBetaStrand","PolarityGrantham","PolarityZimmerman","RatioHeteroEndToSide","RecognitionFactors","Refractivity","RelativeMutability","TotalBetaStrand","LinearStructure","LinearStructureAtpH7.4"
+  #
+  #
+  my($ThreeLetterCode, $OneLetterCode, $AminoAcidName, $Line, $NumOfCols, $InDelim, $Index, $Name, $Value, $Units, @LineWords, @ColLabels);
+
+  $InDelim = "\,";
+  open AMINOACIDSDATAFILE, "$AminoAcidsDataFile" or croak "Couldn't open $AminoAcidsDataFile: $! ...";
+
+  # Skip lines up to column labels...
+  LINE: while ($Line = GetTextLine(\*AMINOACIDSDATAFILE)) {
+    if ($Line !~ /^#/) {
+      last LINE;
+    }
+  }
+  @ColLabels= quotewords($InDelim, 0, $Line);
+  $NumOfCols = @ColLabels;
+
+  # Extract property names from column labels...
+  @AminoAcidPropertyNames = ();
+  for $Index (0 .. $#ColLabels) {
+    $Name = $ColLabels[$Index];
+    push @AminoAcidPropertyNames, $Name;
+
+    # Store property names...
+    $AminoAcidPropertyNamesMap{$Name} = $Name;
+  }
+
+  # Process amino acid data...
+  LINE: while ($Line = GetTextLine(\*AMINOACIDSDATAFILE)) {
+    if ($Line =~ /^#/) {
+      next LINE;
+    }
+    @LineWords = ();
+    @LineWords = quotewords($InDelim, 0, $Line);
+    if (@LineWords != $NumOfCols) {
+      croak "Error: The number of data fields, @LineWords, in $AminoAcidsDataFile must be $NumOfCols.\nLine: $Line...";
+    }
+    $ThreeLetterCode = $LineWords[0]; $OneLetterCode = $LineWords[1]; $AminoAcidName = $LineWords[3];
+    if (exists $AminoAcidDataMap{$ThreeLetterCode}) {
+      carp "Warning: Ignoring data for amino acid $ThreeLetterCode: It has already been loaded.\nLine: $Line....";
+      next LINE;
+    }
+
+    # Store all the values...
+    %{$AminoAcidDataMap{$ThreeLetterCode}} = ();
+    for $Index (0 .. $#LineWords) {
+      $Name = $AminoAcidPropertyNames[$Index];
+      $Value = $LineWords[$Index];
+      $AminoAcidDataMap{$ThreeLetterCode}{$Name} = $Value;
+    }
+  }
+  close AMINOACIDSDATAFILE;
+
+  # Setup one letter and amino acid name maps...
+  _SetupAminoAcidIDMap();
+}
+
+
+#
+# Setup lowercase three/one letter code and name maps pointing
+# to three letter code as show in data file.
+#
+sub _SetupAminoAcidIDMap {
+  my($ThreeLetterCode, $OneLetterCode, $AminoAcidName);
+
+  %AminoAcidThreeLetterCodeMap = ();
+  %AminoAcidOneLetterCodeMap = ();
+  %AminoAcidNameMap = ();
+
+  for $ThreeLetterCode (keys %AminoAcidDataMap) {
+    $OneLetterCode = $AminoAcidDataMap{$ThreeLetterCode}{OneLetterCode};
+    $AminoAcidName = $AminoAcidDataMap{$ThreeLetterCode}{AminoAcid};
+
+    $AminoAcidThreeLetterCodeMap{lc($ThreeLetterCode)} = $ThreeLetterCode;
+    $AminoAcidOneLetterCodeMap{lc($OneLetterCode)} = $ThreeLetterCode;
+    $AminoAcidNameMap{lc($AminoAcidName)} = $ThreeLetterCode;
+  }
+}
+
+# Validate amino acid ID...
+sub _ValidateAminoAcidID {
+  my($AminoAcidID) = @_;
+  my($ThreeLetterCode);
+
+
+  if (length($AminoAcidID) == 3) {
+    if (! exists $AminoAcidThreeLetterCodeMap{lc($AminoAcidID)}) {
+      return undef;
+    }
+    $ThreeLetterCode = $AminoAcidThreeLetterCodeMap{lc($AminoAcidID)};
+  }
+  elsif (length($AminoAcidID) == 1) {
+    if (! exists $AminoAcidOneLetterCodeMap{lc($AminoAcidID)}) {
+      return undef;
+    }
+    $ThreeLetterCode = $AminoAcidOneLetterCodeMap{lc($AminoAcidID)};
+  }
+  else {
+    if (! exists $AminoAcidNameMap{lc($AminoAcidID)}) {
+      return undef;
+    }
+    $ThreeLetterCode = $AminoAcidNameMap{lc($AminoAcidID)};
+  }
+  return $ThreeLetterCode;
+}
+
+
+1;
+
+__END__
+
+=head1 NAME
+
+AminoAcids
+
+=head1 SYNOPSIS
+
+use AminoAcids;
+
+use AminoAcids qw(:all);
+
+=head1 DESCRIPTION
+
+B<AminoAcids> module provides the following functions:
+
+GetAminoAcidPropertiesData, GetAminoAcidPropertiesNames, GetAminoAcid<PropertyName>,
+GetAminoAcids, IsAminoAcid, IsAminoAcidProperty
+
+=head1 FUNCTIONS
+
+=over 4
+
+=item B<GetAminoAcidPropertiesData>
+
+    $DataHashRef = GetAminoAcidPropertiesData($AminoAcidID);
+
+Returns a reference to hash containing property names and values for a specified
+amino acid.
+
+=item B<GetAminoAcidPropertiesNames>
+
+    @Names = GetAminoAcidPropertiesNames([$Mode]);
+    $NamesRef = GetAminoAcidPropertiesNames([$Mode]);
+
+Returns an array or a reference to an array containing names of amino acids
+properties. Order of amino acids properties is controlled by optional parameter
+I<Mode>. Possible values for I<Mode>: I<Alphabetical or  ByGroup>; Default: I<ByGroup>
+
+=item B<GetAminoAcidPropertyName>
+
+    $Value = GetAminoAcid<PropertyName>($AminoAcidID);
+
+Returns amino acid property value for a specified amino acid. These functions are
+not defined in this modules; these are implemented on the fly using Perl's AUTOLOAD
+funcion. Here is the list of known amino acids I<property names>: DNACodons, RNACodons,
+AcidicBasic, PolarNonpolar, Charged, Aromatic, HydrophobicHydophilic, IsoelectricPoint,
+pKCOOH, pKNH3+, ChemicalFormula, MolecularWeight, ExactMass, ChemicalFormulaMinusH2O,
+MolecularWeightMinusH2O(18.01524), ExactMassMinusH2O(18.01056), vanderWaalsVolume,
+%AccessibleResidues, %BuriedResidues, AlphaHelixChouAndFasman,
+AlphaHelixDeleageAndRoux, AlphaHelixLevitt, AminoAcidsComposition,
+AminoAcidsCompositionInSwissProt, AntiparallelBetaStrand, AverageAreaBuried, AverageFlexibility,
+BetaSheetChouAndFasman, BetaSheetDeleageAndRoux, BetaSheetLevitt,
+BetaTurnChouAndFasman, BetaTurnDeleageAndRoux, BetaTurnLevitt, Bulkiness,
+CoilDeleageAndRoux, HPLCHFBARetention, HPLCRetentionAtpH2.1, HPLCRetentionAtpH7.4,
+HPLCTFARetention, HydrophobicityAbrahamAndLeo, HydrophobicityBlack,
+HydrophobicityBullAndBreese, HydrophobicityChothia, HydrophobicityEisenbergAndOthers,
+HydrophobicityFauchereAndOthers, HydrophobicityGuy, HydrophobicityHPLCAtpH3.4Cowan,
+HydrophobicityHPLCAtpH7.5Cowan, HydrophobicityHPLCParkerAndOthers,
+HydrophobicityHPLCWilsonAndOthers, HydrophobicityHoppAndWoods, HydrophobicityJanin,
+HydrophobicityKyteAndDoolittle, HydrophobicityManavalanAndOthers,
+HydrophobicityMiyazawaAndOthers, HydrophobicityOMHSweetAndOthers,
+HydrophobicityRaoAndArgos, HydrophobicityRfMobility, HydrophobicityRoseAndOthers,
+HydrophobicityRoseman, HydrophobicityWellingAndOthers, HydrophobicityWolfendenAndOthers,
+ParallelBetaStrand, PolarityGrantham, PolarityZimmerman, RatioHeteroEndToSide,
+RecognitionFactors, Refractivity, RelativeMutability, TotalBetaStrand, LinearStructure,
+LinearStructureAtpH7.4
+
+=item B<GetAminoAcids>
+
+    $NamesRef = GetAminoAcids([$NameType]);
+    (@Names) = GetAminoAcids([$NameType]);
+
+Returns an array or a reference to an array containing names of amino acids
+as one letter code, three letter code, or amino acid name controlled by optional
+parameter $NameType. By default, amino acids names are returned as three
+letter code. Possible values for I<NameType>: I<ThreeLetterCode, OneLetterCode, or
+AminoAcid>.
+
+=item B<IsAminoAcid>
+
+    $Status = IsAminoAcid($AminoAcidID);
+
+Returns a flag indicating whether or not its a known amino acid ID.
+
+=item B<IsAminoAcidProperty>
+
+    $Status = IsAminoAcid($PropertyName);
+
+Returns a flag indicating whether or not its a known amino acid property name.
+
+=back
+
+=head1 AUTHOR
+
+Manish Sud <msud@san.rr.com>
+
+=head1 SEE ALSO
+
+NucleicAcids.pm, PeriodicTable.pm
+
+=head1 COPYRIGHT
+
+Copyright (C) 2015 Manish Sud. All rights reserved.
+
+This file is part of MayaChemTools.
+
+MayaChemTools is free software; you can redistribute it and/or modify it under
+the terms of the GNU Lesser General Public License as published by the Free
+Software Foundation; either version 3 of the License, or (at your option)
+any later version.
+
+=cut