MayaChemTools

   1 package AminoAcids;
   2 #
   3 # $RCSfile: AminoAcids.pm,v $
   4 # $Date: 2015/02/28 20:47:02 $
   5 # $Revision: 1.25 $
   6 #
   7 # Author: Manish Sud <msud@san.rr.com>
   8 #
   9 # Copyright (C) 2015 Manish Sud. All rights reserved.
  10 #
  11 # This file is part of MayaChemTools.
  12 #
  13 # MayaChemTools is free software; you can redistribute it and/or modify it under
  14 # the terms of the GNU Lesser General Public License as published by the Free
  15 # Software Foundation; either version 3 of the License, or (at your option) any
  16 # later version.
  17 #
  18 # MayaChemTools is distributed in the hope that it will be useful, but without
  19 # any warranty; without even the implied warranty of merchantability of fitness
  20 # for a particular purpose.  See the GNU Lesser General Public License for more
  21 # details.
  22 #
  23 # You should have received a copy of the GNU Lesser General Public License
  24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  26 # Boston, MA, 02111-1307, USA.
  27 #
  28 
  29 use strict;
  30 use Carp;
  31 use Text::ParseWords;
  32 use TextUtil;
  33 use FileUtil;
  34 
  35 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  36 
  37 @ISA = qw(Exporter);
  38 @EXPORT = qw();
  39 @EXPORT_OK = qw(GetAminoAcids GetAminoAcidPropertiesData GetAminoAcidPropertiesNames IsAminoAcid IsAminoAcidProperty);
  40 
  41 %EXPORT_TAGS = (all  => [@EXPORT, @EXPORT_OK]);
  42 
  43 #
  44 # Load amino acids data...
  45 #
  46 my(%AminoAcidDataMap, %AminoAcidThreeLetterCodeMap, %AminoAcidOneLetterCodeMap, %AminoAcidNameMap, @AminoAcidPropertyNames, %AminoAcidPropertyNamesMap, );
  47 _LoadAminoAcidsData();
  48 
  49 #
  50 # Get a list of all known amino acids as one of these values:
  51 # one letter code, three letter code, or amino acid name...
  52 #
  53 sub GetAminoAcids {
  54   my($NameType, $ThreeLetterCode, $Name, @AminoAcidNames, %AminoAcidNamesMap);
  55 
  56   $NameType = 'ThreeLetterCode';
  57   if (@_ >= 1) {
  58     ($NameType) = @_;
  59   }
  60 
  61   # Collect names...
  62   %AminoAcidNamesMap = ();
  63   for $ThreeLetterCode (keys %AminoAcidDataMap) {
  64     NAME : {
  65       if ($NameType =~ /^OneLetterCode$/i) {$Name = $AminoAcidDataMap{$ThreeLetterCode}{OneLetterCode}; last NAME; }
  66       if ($NameType =~ /^AminoAcid$/i) {$Name = $AminoAcidDataMap{$ThreeLetterCode}{AminoAcid}; last NAME; }
  67       $Name = $ThreeLetterCode;
  68     }
  69     $AminoAcidNamesMap{$Name} = $Name;
  70   }
  71 
  72   # Sort 'em out
  73   @AminoAcidNames = ();
  74   for $Name (sort keys %AminoAcidNamesMap) {
  75     push @AminoAcidNames, $Name;
  76   }
  77 
  78   return (wantarray ? @AminoAcidNames : \@AminoAcidNames);
  79 }
  80 
  81 
  82 #
  83 # Get all available properties data for an amino acid using any of these symbols:
  84 # three letter code; one letter code; name.
  85 #
  86 # A reference to a hash array is returned with keys and values representing property
  87 # name and its values respectively.
  88 #
  89 sub GetAminoAcidPropertiesData {
  90   my($AminoAcidID) = @_;
  91   my($ThreeLetterCode);
  92 
  93   if ($ThreeLetterCode = _ValidateAminoAcidID($AminoAcidID)) {
  94     return \%{$AminoAcidDataMap{$ThreeLetterCode}};
  95   }
  96   else {
  97     return undef;
  98   }
  99 }
 100 
 101 #
 102 # Get names of all available amino acid properties. A reference to  an array containing
 103 # names of all available properties is returned.
 104 #
 105 sub GetAminoAcidPropertiesNames {
 106   my($Mode);
 107   my($PropertyName, @PropertyNames);
 108 
 109   $Mode = 'ByGroup';
 110   if (@_ == 1) {
 111     ($Mode) = @_;
 112   }
 113 
 114   @PropertyNames = ();
 115   if ($Mode =~ /^Alphabetical$/i) {
 116     my($PropertyName);
 117     # ThreeLetterCode, OneLetterCode, and AminoAcid are always listed first...
 118     push @PropertyNames, qw(ThreeLetterCode OneLetterCode AminoAcid);
 119     for $PropertyName (sort keys %AminoAcidPropertyNamesMap) {
 120       if ($PropertyName !~ /^(ThreeLetterCode|OneLetterCode|AminoAcid)$/) {
 121         push @PropertyNames, $PropertyName;
 122       }
 123     }
 124   }
 125   else {
 126     push @PropertyNames, @AminoAcidPropertyNames;
 127   }
 128   return (wantarray ? @PropertyNames : \@PropertyNames);
 129 }
 130 
 131 #
 132 # Is it a known amino acid? Input is either an one/three letter code or a name.
 133 #
 134 sub IsAminoAcid {
 135   my($AminoAcidID) = @_;
 136   my($Status);
 137 
 138   $Status = (_ValidateAminoAcidID($AminoAcidID)) ? 1 : 0;
 139 
 140   return $Status;
 141 }
 142 
 143 
 144 #
 145 # Is it an available amino acid property?
 146 #
 147 sub IsAminoAcidProperty {
 148   my($PropertyName) = @_;
 149   my($Status);
 150 
 151   $Status = (exists($AminoAcidPropertyNamesMap{$PropertyName})) ? 1 : 0;
 152 
 153   return $Status;
 154 }
 155 
 156 #
 157 # Implents GetAminoAcid<PropertyName> for a valid proprty name.
 158 #
 159 sub AUTOLOAD {
 160   my($AminoAcidID) = @_;
 161   my($FunctionName, $PropertyName, $PropertyValue, $ThreeLetterCode);
 162 
 163   $PropertyValue = undef;
 164 
 165   use vars qw($AUTOLOAD);
 166   $FunctionName = $AUTOLOAD;
 167   $FunctionName =~ s/.*:://;
 168 
 169   # Only Get<PropertyName> functions are supported...
 170   if ($FunctionName !~ /^Get/) {
 171     croak "Error: Function, AminoAcid::$FunctionName, is not supported by AUTOLOAD in AminoAcid module: Only Get<PropertyName> functions are implemented...";
 172   }
 173 
 174   $PropertyName = $FunctionName;
 175   $PropertyName =~  s/^GetAminoAcid//;
 176   if (!exists $AminoAcidPropertyNamesMap{$PropertyName}) {
 177     croak "Error: Function, AminoAcid::$FunctionName, is not supported by AUTOLOAD in AminoAcid module: Unknown amino acid property name, $PropertyName, specified...";
 178   }
 179 
 180   if (!($ThreeLetterCode = _ValidateAminoAcidID($AminoAcidID))) {
 181     return undef;
 182   }
 183   $PropertyValue = $AminoAcidDataMap{$ThreeLetterCode}{$PropertyName};
 184   return $PropertyValue;
 185 }
 186 
 187 
 188 #
 189 # Load AminoAcidsData.csv files from <MayaChemTools>/lib directory...
 190 #
 191 sub _LoadAminoAcidsData {
 192   my($AminoAcidsDataFile, $MayaChemToolsLibDir);
 193 
 194   $MayaChemToolsLibDir = GetMayaChemToolsLibDirName();
 195 
 196   $AminoAcidsDataFile =  "$MayaChemToolsLibDir" . "/data/AminoAcidsData.csv";
 197 
 198   if (! -e "$AminoAcidsDataFile") {
 199     croak "Error: MayaChemTools package file, $AminoAcidsDataFile, is missing: Possible installation problems...";
 200   }
 201 
 202   _LoadData($AminoAcidsDataFile);
 203 }
 204 
 205 #
 206 # Load AminoAcidsData.csv file from <MayaChemTools>/lib directory...
 207 #
 208 sub _LoadData {
 209   my($AminoAcidsDataFile) = @_;
 210 
 211   %AminoAcidDataMap = ();
 212   @AminoAcidPropertyNames = ();
 213   %AminoAcidPropertyNamesMap = ();
 214   %AminoAcidThreeLetterCodeMap = ();
 215   %AminoAcidOneLetterCodeMap = ();
 216   %AminoAcidNameMap = ();
 217 
 218   # Load property data for all amino acids...
 219   #
 220   # File Format:
 221   #"ThreeLetterCode","OneLetterCode","AminoAcid","AcidicBasic","PolarNonpolar","Charged","Aromatic","HydrophobicHydophilic","IsoelectricPoint","pKCOOH","pKNH3+","MolecularWeight","MolecularWeightMinusH2O(18.01524)","ExactMass","ExactMassMinusH2O(18.01056)","vanderWaalsVolume","%AccessibleResidues","%BuriedResidues","AlphaHelixChouAndFasman","AlphaHelixDeleageAndRoux","AlphaHelixLevitt","AminoAcidsComposition","AminoAcidsCompositionInSwissProt","AntiparallelBetaStrand","AverageAreaBuried","AverageFlexibility","BetaSheetChouAndFasman","BetaSheetDeleageAndRoux","BetaSheetLevitt","BetaTurnChouAndFasman","BetaTurnDeleageAndRoux","BetaTurnLevitt","Bulkiness","CoilDeleageAndRoux","HPLCHFBARetention","HPLCRetentionAtpH2.1","HPLCRetentionAtpH7.4","HPLCTFARetention","HydrophobicityAbrahamAndLeo","HydrophobicityBlack","HydrophobicityBullAndBreese","HydrophobicityChothia","HydrophobicityEisenbergAndOthers","HydrophobicityFauchereAndOthers","HydrophobicityGuy","HydrophobicityHPLCAtpH3.4Cowan","HydrophobicityHPLCAtpH7.5Cowan","HydrophobicityHPLCParkerAndOthers","HydrophobicityHPLCWilsonAndOthers","HydrophobicityHoppAndWoods","HydrophobicityJanin","HydrophobicityKyteAndDoolittle","HydrophobicityManavalanAndOthers","HydrophobicityMiyazawaAndOthers","HydrophobicityOMHSweetAndOthers","HydrophobicityRaoAndArgos","HydrophobicityRfMobility","HydrophobicityRoseAndOthers","HydrophobicityRoseman","HydrophobicityWellingAndOthers","HydrophobicityWolfendenAndOthers","MolecularWeight","NumberOfCodons","ParallelBetaStrand","PolarityGrantham","PolarityZimmerman","RatioHeteroEndToSide","RecognitionFactors","Refractivity","RelativeMutability","TotalBetaStrand","LinearStructure","LinearStructureAtpH7.4"
 222   #
 223   #
 224   my($ThreeLetterCode, $OneLetterCode, $AminoAcidName, $Line, $NumOfCols, $InDelim, $Index, $Name, $Value, $Units, @LineWords, @ColLabels);
 225 
 226   $InDelim = "\,";
 227   open AMINOACIDSDATAFILE, "$AminoAcidsDataFile" or croak "Couldn't open $AminoAcidsDataFile: $! ...";
 228 
 229   # Skip lines up to column labels...
 230   LINE: while ($Line = GetTextLine(\*AMINOACIDSDATAFILE)) {
 231     if ($Line !~ /^#/) {
 232       last LINE;
 233     }
 234   }
 235   @ColLabels= quotewords($InDelim, 0, $Line);
 236   $NumOfCols = @ColLabels;
 237 
 238   # Extract property names from column labels...
 239   @AminoAcidPropertyNames = ();
 240   for $Index (0 .. $#ColLabels) {
 241     $Name = $ColLabels[$Index];
 242     push @AminoAcidPropertyNames, $Name;
 243 
 244     # Store property names...
 245     $AminoAcidPropertyNamesMap{$Name} = $Name;
 246   }
 247 
 248   # Process amino acid data...
 249   LINE: while ($Line = GetTextLine(\*AMINOACIDSDATAFILE)) {
 250     if ($Line =~ /^#/) {
 251       next LINE;
 252     }
 253     @LineWords = ();
 254     @LineWords = quotewords($InDelim, 0, $Line);
 255     if (@LineWords != $NumOfCols) {
 256       croak "Error: The number of data fields, @LineWords, in $AminoAcidsDataFile must be $NumOfCols.\nLine: $Line...";
 257     }
 258     $ThreeLetterCode = $LineWords[0]; $OneLetterCode = $LineWords[1]; $AminoAcidName = $LineWords[3];
 259     if (exists $AminoAcidDataMap{$ThreeLetterCode}) {
 260       carp "Warning: Ignoring data for amino acid $ThreeLetterCode: It has already been loaded.\nLine: $Line....";
 261       next LINE;
 262     }
 263 
 264     # Store all the values...
 265     %{$AminoAcidDataMap{$ThreeLetterCode}} = ();
 266     for $Index (0 .. $#LineWords) {
 267       $Name = $AminoAcidPropertyNames[$Index];
 268       $Value = $LineWords[$Index];
 269       $AminoAcidDataMap{$ThreeLetterCode}{$Name} = $Value;
 270     }
 271   }
 272   close AMINOACIDSDATAFILE;
 273 
 274   # Setup one letter and amino acid name maps...
 275   _SetupAminoAcidIDMap();
 276 }
 277 
 278 
 279 #
 280 # Setup lowercase three/one letter code and name maps pointing
 281 # to three letter code as show in data file.
 282 #
 283 sub _SetupAminoAcidIDMap {
 284   my($ThreeLetterCode, $OneLetterCode, $AminoAcidName);
 285 
 286   %AminoAcidThreeLetterCodeMap = ();
 287   %AminoAcidOneLetterCodeMap = ();
 288   %AminoAcidNameMap = ();
 289 
 290   for $ThreeLetterCode (keys %AminoAcidDataMap) {
 291     $OneLetterCode = $AminoAcidDataMap{$ThreeLetterCode}{OneLetterCode};
 292     $AminoAcidName = $AminoAcidDataMap{$ThreeLetterCode}{AminoAcid};
 293 
 294     $AminoAcidThreeLetterCodeMap{lc($ThreeLetterCode)} = $ThreeLetterCode;
 295     $AminoAcidOneLetterCodeMap{lc($OneLetterCode)} = $ThreeLetterCode;
 296     $AminoAcidNameMap{lc($AminoAcidName)} = $ThreeLetterCode;
 297   }
 298 }
 299 
 300 # Validate amino acid ID...
 301 sub _ValidateAminoAcidID {
 302   my($AminoAcidID) = @_;
 303   my($ThreeLetterCode);
 304 
 305 
 306   if (length($AminoAcidID) == 3) {
 307     if (! exists $AminoAcidThreeLetterCodeMap{lc($AminoAcidID)}) {
 308       return undef;
 309     }
 310     $ThreeLetterCode = $AminoAcidThreeLetterCodeMap{lc($AminoAcidID)};
 311   }
 312   elsif (length($AminoAcidID) == 1) {
 313     if (! exists $AminoAcidOneLetterCodeMap{lc($AminoAcidID)}) {
 314       return undef;
 315     }
 316     $ThreeLetterCode = $AminoAcidOneLetterCodeMap{lc($AminoAcidID)};
 317   }
 318   else {
 319     if (! exists $AminoAcidNameMap{lc($AminoAcidID)}) {
 320       return undef;
 321     }
 322     $ThreeLetterCode = $AminoAcidNameMap{lc($AminoAcidID)};
 323   }
 324   return $ThreeLetterCode;
 325 }
 326 
 327