1 package AminoAcids; 2 # 3 # $RCSfile: AminoAcids.pm,v $ 4 # $Date: 2015/02/28 20:47:02 $ 5 # $Revision: 1.25 $ 6 # 7 # Author: Manish Sud <msud@san.rr.com> 8 # 9 # Copyright (C) 2015 Manish Sud. All rights reserved. 10 # 11 # This file is part of MayaChemTools. 12 # 13 # MayaChemTools is free software; you can redistribute it and/or modify it under 14 # the terms of the GNU Lesser General Public License as published by the Free 15 # Software Foundation; either version 3 of the License, or (at your option) any 16 # later version. 17 # 18 # MayaChemTools is distributed in the hope that it will be useful, but without 19 # any warranty; without even the implied warranty of merchantability of fitness 20 # for a particular purpose. See the GNU Lesser General Public License for more 21 # details. 22 # 23 # You should have received a copy of the GNU Lesser General Public License 24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 26 # Boston, MA, 02111-1307, USA. 27 # 28 29 use strict; 30 use Carp; 31 use Text::ParseWords; 32 use TextUtil; 33 use FileUtil; 34 35 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); 36 37 @ISA = qw(Exporter); 38 @EXPORT = qw(); 39 @EXPORT_OK = qw(GetAminoAcids GetAminoAcidPropertiesData GetAminoAcidPropertiesNames IsAminoAcid IsAminoAcidProperty); 40 41 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); 42 43 # 44 # Load amino acids data... 45 # 46 my(%AminoAcidDataMap, %AminoAcidThreeLetterCodeMap, %AminoAcidOneLetterCodeMap, %AminoAcidNameMap, @AminoAcidPropertyNames, %AminoAcidPropertyNamesMap, ); 47 _LoadAminoAcidsData(); 48 49 # 50 # Get a list of all known amino acids as one of these values: 51 # one letter code, three letter code, or amino acid name... 52 # 53 sub GetAminoAcids { 54 my($NameType, $ThreeLetterCode, $Name, @AminoAcidNames, %AminoAcidNamesMap); 55 56 $NameType = 'ThreeLetterCode'; 57 if (@_ >= 1) { 58 ($NameType) = @_; 59 } 60 61 # Collect names... 62 %AminoAcidNamesMap = (); 63 for $ThreeLetterCode (keys %AminoAcidDataMap) { 64 NAME : { 65 if ($NameType =~ /^OneLetterCode$/i) {$Name = $AminoAcidDataMap{$ThreeLetterCode}{OneLetterCode}; last NAME; } 66 if ($NameType =~ /^AminoAcid$/i) {$Name = $AminoAcidDataMap{$ThreeLetterCode}{AminoAcid}; last NAME; } 67 $Name = $ThreeLetterCode; 68 } 69 $AminoAcidNamesMap{$Name} = $Name; 70 } 71 72 # Sort 'em out 73 @AminoAcidNames = (); 74 for $Name (sort keys %AminoAcidNamesMap) { 75 push @AminoAcidNames, $Name; 76 } 77 78 return (wantarray ? @AminoAcidNames : \@AminoAcidNames); 79 } 80 81 82 # 83 # Get all available properties data for an amino acid using any of these symbols: 84 # three letter code; one letter code; name. 85 # 86 # A reference to a hash array is returned with keys and values representing property 87 # name and its values respectively. 88 # 89 sub GetAminoAcidPropertiesData { 90 my($AminoAcidID) = @_; 91 my($ThreeLetterCode); 92 93 if ($ThreeLetterCode = _ValidateAminoAcidID($AminoAcidID)) { 94 return \%{$AminoAcidDataMap{$ThreeLetterCode}}; 95 } 96 else { 97 return undef; 98 } 99 } 100 101 # 102 # Get names of all available amino acid properties. A reference to an array containing 103 # names of all available properties is returned. 104 # 105 sub GetAminoAcidPropertiesNames { 106 my($Mode); 107 my($PropertyName, @PropertyNames); 108 109 $Mode = 'ByGroup'; 110 if (@_ == 1) { 111 ($Mode) = @_; 112 } 113 114 @PropertyNames = (); 115 if ($Mode =~ /^Alphabetical$/i) { 116 my($PropertyName); 117 # ThreeLetterCode, OneLetterCode, and AminoAcid are always listed first... 118 push @PropertyNames, qw(ThreeLetterCode OneLetterCode AminoAcid); 119 for $PropertyName (sort keys %AminoAcidPropertyNamesMap) { 120 if ($PropertyName !~ /^(ThreeLetterCode|OneLetterCode|AminoAcid)$/) { 121 push @PropertyNames, $PropertyName; 122 } 123 } 124 } 125 else { 126 push @PropertyNames, @AminoAcidPropertyNames; 127 } 128 return (wantarray ? @PropertyNames : \@PropertyNames); 129 } 130 131 # 132 # Is it a known amino acid? Input is either an one/three letter code or a name. 133 # 134 sub IsAminoAcid { 135 my($AminoAcidID) = @_; 136 my($Status); 137 138 $Status = (_ValidateAminoAcidID($AminoAcidID)) ? 1 : 0; 139 140 return $Status; 141 } 142 143 144 # 145 # Is it an available amino acid property? 146 # 147 sub IsAminoAcidProperty { 148 my($PropertyName) = @_; 149 my($Status); 150 151 $Status = (exists($AminoAcidPropertyNamesMap{$PropertyName})) ? 1 : 0; 152 153 return $Status; 154 } 155 156 # 157 # Implents GetAminoAcid<PropertyName> for a valid proprty name. 158 # 159 sub AUTOLOAD { 160 my($AminoAcidID) = @_; 161 my($FunctionName, $PropertyName, $PropertyValue, $ThreeLetterCode); 162 163 $PropertyValue = undef; 164 165 use vars qw($AUTOLOAD); 166 $FunctionName = $AUTOLOAD; 167 $FunctionName =~ s/.*:://; 168 169 # Only Get<PropertyName> functions are supported... 170 if ($FunctionName !~ /^Get/) { 171 croak "Error: Function, AminoAcid::$FunctionName, is not supported by AUTOLOAD in AminoAcid module: Only Get<PropertyName> functions are implemented..."; 172 } 173 174 $PropertyName = $FunctionName; 175 $PropertyName =~ s/^GetAminoAcid//; 176 if (!exists $AminoAcidPropertyNamesMap{$PropertyName}) { 177 croak "Error: Function, AminoAcid::$FunctionName, is not supported by AUTOLOAD in AminoAcid module: Unknown amino acid property name, $PropertyName, specified..."; 178 } 179 180 if (!($ThreeLetterCode = _ValidateAminoAcidID($AminoAcidID))) { 181 return undef; 182 } 183 $PropertyValue = $AminoAcidDataMap{$ThreeLetterCode}{$PropertyName}; 184 return $PropertyValue; 185 } 186 187 188 # 189 # Load AminoAcidsData.csv files from <MayaChemTools>/lib directory... 190 # 191 sub _LoadAminoAcidsData { 192 my($AminoAcidsDataFile, $MayaChemToolsLibDir); 193 194 $MayaChemToolsLibDir = GetMayaChemToolsLibDirName(); 195 196 $AminoAcidsDataFile = "$MayaChemToolsLibDir" . "/data/AminoAcidsData.csv"; 197 198 if (! -e "$AminoAcidsDataFile") { 199 croak "Error: MayaChemTools package file, $AminoAcidsDataFile, is missing: Possible installation problems..."; 200 } 201 202 _LoadData($AminoAcidsDataFile); 203 } 204 205 # 206 # Load AminoAcidsData.csv file from <MayaChemTools>/lib directory... 207 # 208 sub _LoadData { 209 my($AminoAcidsDataFile) = @_; 210 211 %AminoAcidDataMap = (); 212 @AminoAcidPropertyNames = (); 213 %AminoAcidPropertyNamesMap = (); 214 %AminoAcidThreeLetterCodeMap = (); 215 %AminoAcidOneLetterCodeMap = (); 216 %AminoAcidNameMap = (); 217 218 # Load property data for all amino acids... 219 # 220 # File Format: 221 #"ThreeLetterCode","OneLetterCode","AminoAcid","AcidicBasic","PolarNonpolar","Charged","Aromatic","HydrophobicHydophilic","IsoelectricPoint","pKCOOH","pKNH3+","MolecularWeight","MolecularWeightMinusH2O(18.01524)","ExactMass","ExactMassMinusH2O(18.01056)","vanderWaalsVolume","%AccessibleResidues","%BuriedResidues","AlphaHelixChouAndFasman","AlphaHelixDeleageAndRoux","AlphaHelixLevitt","AminoAcidsComposition","AminoAcidsCompositionInSwissProt","AntiparallelBetaStrand","AverageAreaBuried","AverageFlexibility","BetaSheetChouAndFasman","BetaSheetDeleageAndRoux","BetaSheetLevitt","BetaTurnChouAndFasman","BetaTurnDeleageAndRoux","BetaTurnLevitt","Bulkiness","CoilDeleageAndRoux","HPLCHFBARetention","HPLCRetentionAtpH2.1","HPLCRetentionAtpH7.4","HPLCTFARetention","HydrophobicityAbrahamAndLeo","HydrophobicityBlack","HydrophobicityBullAndBreese","HydrophobicityChothia","HydrophobicityEisenbergAndOthers","HydrophobicityFauchereAndOthers","HydrophobicityGuy","HydrophobicityHPLCAtpH3.4Cowan","HydrophobicityHPLCAtpH7.5Cowan","HydrophobicityHPLCParkerAndOthers","HydrophobicityHPLCWilsonAndOthers","HydrophobicityHoppAndWoods","HydrophobicityJanin","HydrophobicityKyteAndDoolittle","HydrophobicityManavalanAndOthers","HydrophobicityMiyazawaAndOthers","HydrophobicityOMHSweetAndOthers","HydrophobicityRaoAndArgos","HydrophobicityRfMobility","HydrophobicityRoseAndOthers","HydrophobicityRoseman","HydrophobicityWellingAndOthers","HydrophobicityWolfendenAndOthers","MolecularWeight","NumberOfCodons","ParallelBetaStrand","PolarityGrantham","PolarityZimmerman","RatioHeteroEndToSide","RecognitionFactors","Refractivity","RelativeMutability","TotalBetaStrand","LinearStructure","LinearStructureAtpH7.4" 222 # 223 # 224 my($ThreeLetterCode, $OneLetterCode, $AminoAcidName, $Line, $NumOfCols, $InDelim, $Index, $Name, $Value, $Units, @LineWords, @ColLabels); 225 226 $InDelim = "\,"; 227 open AMINOACIDSDATAFILE, "$AminoAcidsDataFile" or croak "Couldn't open $AminoAcidsDataFile: $! ..."; 228 229 # Skip lines up to column labels... 230 LINE: while ($Line = GetTextLine(\*AMINOACIDSDATAFILE)) { 231 if ($Line !~ /^#/) { 232 last LINE; 233 } 234 } 235 @ColLabels= quotewords($InDelim, 0, $Line); 236 $NumOfCols = @ColLabels; 237 238 # Extract property names from column labels... 239 @AminoAcidPropertyNames = (); 240 for $Index (0 .. $#ColLabels) { 241 $Name = $ColLabels[$Index]; 242 push @AminoAcidPropertyNames, $Name; 243 244 # Store property names... 245 $AminoAcidPropertyNamesMap{$Name} = $Name; 246 } 247 248 # Process amino acid data... 249 LINE: while ($Line = GetTextLine(\*AMINOACIDSDATAFILE)) { 250 if ($Line =~ /^#/) { 251 next LINE; 252 } 253 @LineWords = (); 254 @LineWords = quotewords($InDelim, 0, $Line); 255 if (@LineWords != $NumOfCols) { 256 croak "Error: The number of data fields, @LineWords, in $AminoAcidsDataFile must be $NumOfCols.\nLine: $Line..."; 257 } 258 $ThreeLetterCode = $LineWords[0]; $OneLetterCode = $LineWords[1]; $AminoAcidName = $LineWords[3]; 259 if (exists $AminoAcidDataMap{$ThreeLetterCode}) { 260 carp "Warning: Ignoring data for amino acid $ThreeLetterCode: It has already been loaded.\nLine: $Line...."; 261 next LINE; 262 } 263 264 # Store all the values... 265 %{$AminoAcidDataMap{$ThreeLetterCode}} = (); 266 for $Index (0 .. $#LineWords) { 267 $Name = $AminoAcidPropertyNames[$Index]; 268 $Value = $LineWords[$Index]; 269 $AminoAcidDataMap{$ThreeLetterCode}{$Name} = $Value; 270 } 271 } 272 close AMINOACIDSDATAFILE; 273 274 # Setup one letter and amino acid name maps... 275 _SetupAminoAcidIDMap(); 276 } 277 278 279 # 280 # Setup lowercase three/one letter code and name maps pointing 281 # to three letter code as show in data file. 282 # 283 sub _SetupAminoAcidIDMap { 284 my($ThreeLetterCode, $OneLetterCode, $AminoAcidName); 285 286 %AminoAcidThreeLetterCodeMap = (); 287 %AminoAcidOneLetterCodeMap = (); 288 %AminoAcidNameMap = (); 289 290 for $ThreeLetterCode (keys %AminoAcidDataMap) { 291 $OneLetterCode = $AminoAcidDataMap{$ThreeLetterCode}{OneLetterCode}; 292 $AminoAcidName = $AminoAcidDataMap{$ThreeLetterCode}{AminoAcid}; 293 294 $AminoAcidThreeLetterCodeMap{lc($ThreeLetterCode)} = $ThreeLetterCode; 295 $AminoAcidOneLetterCodeMap{lc($OneLetterCode)} = $ThreeLetterCode; 296 $AminoAcidNameMap{lc($AminoAcidName)} = $ThreeLetterCode; 297 } 298 } 299 300 # Validate amino acid ID... 301 sub _ValidateAminoAcidID { 302 my($AminoAcidID) = @_; 303 my($ThreeLetterCode); 304 305 306 if (length($AminoAcidID) == 3) { 307 if (! exists $AminoAcidThreeLetterCodeMap{lc($AminoAcidID)}) { 308 return undef; 309 } 310 $ThreeLetterCode = $AminoAcidThreeLetterCodeMap{lc($AminoAcidID)}; 311 } 312 elsif (length($AminoAcidID) == 1) { 313 if (! exists $AminoAcidOneLetterCodeMap{lc($AminoAcidID)}) { 314 return undef; 315 } 316 $ThreeLetterCode = $AminoAcidOneLetterCodeMap{lc($AminoAcidID)}; 317 } 318 else { 319 if (! exists $AminoAcidNameMap{lc($AminoAcidID)}) { 320 return undef; 321 } 322 $ThreeLetterCode = $AminoAcidNameMap{lc($AminoAcidID)}; 323 } 324 return $ThreeLetterCode; 325 } 326 327