0
|
1 package AminoAcids;
|
|
2 #
|
|
3 # $RCSfile: AminoAcids.pm,v $
|
|
4 # $Date: 2015/02/28 20:47:02 $
|
|
5 # $Revision: 1.25 $
|
|
6 #
|
|
7 # Author: Manish Sud <msud@san.rr.com>
|
|
8 #
|
|
9 # Copyright (C) 2015 Manish Sud. All rights reserved.
|
|
10 #
|
|
11 # This file is part of MayaChemTools.
|
|
12 #
|
|
13 # MayaChemTools is free software; you can redistribute it and/or modify it under
|
|
14 # the terms of the GNU Lesser General Public License as published by the Free
|
|
15 # Software Foundation; either version 3 of the License, or (at your option) any
|
|
16 # later version.
|
|
17 #
|
|
18 # MayaChemTools is distributed in the hope that it will be useful, but without
|
|
19 # any warranty; without even the implied warranty of merchantability of fitness
|
|
20 # for a particular purpose. See the GNU Lesser General Public License for more
|
|
21 # details.
|
|
22 #
|
|
23 # You should have received a copy of the GNU Lesser General Public License
|
|
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
|
|
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
|
|
26 # Boston, MA, 02111-1307, USA.
|
|
27 #
|
|
28
|
|
29 use strict;
|
|
30 use Carp;
|
|
31 use Text::ParseWords;
|
|
32 use TextUtil;
|
|
33 use FileUtil;
|
|
34
|
|
35 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
|
|
36
|
|
37 @ISA = qw(Exporter);
|
|
38 @EXPORT = qw();
|
|
39 @EXPORT_OK = qw(GetAminoAcids GetAminoAcidPropertiesData GetAminoAcidPropertiesNames IsAminoAcid IsAminoAcidProperty);
|
|
40
|
|
41 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]);
|
|
42
|
|
43 #
|
|
44 # Load amino acids data...
|
|
45 #
|
|
46 my(%AminoAcidDataMap, %AminoAcidThreeLetterCodeMap, %AminoAcidOneLetterCodeMap, %AminoAcidNameMap, @AminoAcidPropertyNames, %AminoAcidPropertyNamesMap, );
|
|
47 _LoadAminoAcidsData();
|
|
48
|
|
49 #
|
|
50 # Get a list of all known amino acids as one of these values:
|
|
51 # one letter code, three letter code, or amino acid name...
|
|
52 #
|
|
53 sub GetAminoAcids {
|
|
54 my($NameType, $ThreeLetterCode, $Name, @AminoAcidNames, %AminoAcidNamesMap);
|
|
55
|
|
56 $NameType = 'ThreeLetterCode';
|
|
57 if (@_ >= 1) {
|
|
58 ($NameType) = @_;
|
|
59 }
|
|
60
|
|
61 # Collect names...
|
|
62 %AminoAcidNamesMap = ();
|
|
63 for $ThreeLetterCode (keys %AminoAcidDataMap) {
|
|
64 NAME : {
|
|
65 if ($NameType =~ /^OneLetterCode$/i) {$Name = $AminoAcidDataMap{$ThreeLetterCode}{OneLetterCode}; last NAME; }
|
|
66 if ($NameType =~ /^AminoAcid$/i) {$Name = $AminoAcidDataMap{$ThreeLetterCode}{AminoAcid}; last NAME; }
|
|
67 $Name = $ThreeLetterCode;
|
|
68 }
|
|
69 $AminoAcidNamesMap{$Name} = $Name;
|
|
70 }
|
|
71
|
|
72 # Sort 'em out
|
|
73 @AminoAcidNames = ();
|
|
74 for $Name (sort keys %AminoAcidNamesMap) {
|
|
75 push @AminoAcidNames, $Name;
|
|
76 }
|
|
77
|
|
78 return (wantarray ? @AminoAcidNames : \@AminoAcidNames);
|
|
79 }
|
|
80
|
|
81
|
|
82 #
|
|
83 # Get all available properties data for an amino acid using any of these symbols:
|
|
84 # three letter code; one letter code; name.
|
|
85 #
|
|
86 # A reference to a hash array is returned with keys and values representing property
|
|
87 # name and its values respectively.
|
|
88 #
|
|
89 sub GetAminoAcidPropertiesData {
|
|
90 my($AminoAcidID) = @_;
|
|
91 my($ThreeLetterCode);
|
|
92
|
|
93 if ($ThreeLetterCode = _ValidateAminoAcidID($AminoAcidID)) {
|
|
94 return \%{$AminoAcidDataMap{$ThreeLetterCode}};
|
|
95 }
|
|
96 else {
|
|
97 return undef;
|
|
98 }
|
|
99 }
|
|
100
|
|
101 #
|
|
102 # Get names of all available amino acid properties. A reference to an array containing
|
|
103 # names of all available properties is returned.
|
|
104 #
|
|
105 sub GetAminoAcidPropertiesNames {
|
|
106 my($Mode);
|
|
107 my($PropertyName, @PropertyNames);
|
|
108
|
|
109 $Mode = 'ByGroup';
|
|
110 if (@_ == 1) {
|
|
111 ($Mode) = @_;
|
|
112 }
|
|
113
|
|
114 @PropertyNames = ();
|
|
115 if ($Mode =~ /^Alphabetical$/i) {
|
|
116 my($PropertyName);
|
|
117 # ThreeLetterCode, OneLetterCode, and AminoAcid are always listed first...
|
|
118 push @PropertyNames, qw(ThreeLetterCode OneLetterCode AminoAcid);
|
|
119 for $PropertyName (sort keys %AminoAcidPropertyNamesMap) {
|
|
120 if ($PropertyName !~ /^(ThreeLetterCode|OneLetterCode|AminoAcid)$/) {
|
|
121 push @PropertyNames, $PropertyName;
|
|
122 }
|
|
123 }
|
|
124 }
|
|
125 else {
|
|
126 push @PropertyNames, @AminoAcidPropertyNames;
|
|
127 }
|
|
128 return (wantarray ? @PropertyNames : \@PropertyNames);
|
|
129 }
|
|
130
|
|
131 #
|
|
132 # Is it a known amino acid? Input is either an one/three letter code or a name.
|
|
133 #
|
|
134 sub IsAminoAcid {
|
|
135 my($AminoAcidID) = @_;
|
|
136 my($Status);
|
|
137
|
|
138 $Status = (_ValidateAminoAcidID($AminoAcidID)) ? 1 : 0;
|
|
139
|
|
140 return $Status;
|
|
141 }
|
|
142
|
|
143
|
|
144 #
|
|
145 # Is it an available amino acid property?
|
|
146 #
|
|
147 sub IsAminoAcidProperty {
|
|
148 my($PropertyName) = @_;
|
|
149 my($Status);
|
|
150
|
|
151 $Status = (exists($AminoAcidPropertyNamesMap{$PropertyName})) ? 1 : 0;
|
|
152
|
|
153 return $Status;
|
|
154 }
|
|
155
|
|
156 #
|
|
157 # Implents GetAminoAcid<PropertyName> for a valid proprty name.
|
|
158 #
|
|
159 sub AUTOLOAD {
|
|
160 my($AminoAcidID) = @_;
|
|
161 my($FunctionName, $PropertyName, $PropertyValue, $ThreeLetterCode);
|
|
162
|
|
163 $PropertyValue = undef;
|
|
164
|
|
165 use vars qw($AUTOLOAD);
|
|
166 $FunctionName = $AUTOLOAD;
|
|
167 $FunctionName =~ s/.*:://;
|
|
168
|
|
169 # Only Get<PropertyName> functions are supported...
|
|
170 if ($FunctionName !~ /^Get/) {
|
|
171 croak "Error: Function, AminoAcid::$FunctionName, is not supported by AUTOLOAD in AminoAcid module: Only Get<PropertyName> functions are implemented...";
|
|
172 }
|
|
173
|
|
174 $PropertyName = $FunctionName;
|
|
175 $PropertyName =~ s/^GetAminoAcid//;
|
|
176 if (!exists $AminoAcidPropertyNamesMap{$PropertyName}) {
|
|
177 croak "Error: Function, AminoAcid::$FunctionName, is not supported by AUTOLOAD in AminoAcid module: Unknown amino acid property name, $PropertyName, specified...";
|
|
178 }
|
|
179
|
|
180 if (!($ThreeLetterCode = _ValidateAminoAcidID($AminoAcidID))) {
|
|
181 return undef;
|
|
182 }
|
|
183 $PropertyValue = $AminoAcidDataMap{$ThreeLetterCode}{$PropertyName};
|
|
184 return $PropertyValue;
|
|
185 }
|
|
186
|
|
187
|
|
188 #
|
|
189 # Load AminoAcidsData.csv files from <MayaChemTools>/lib directory...
|
|
190 #
|
|
191 sub _LoadAminoAcidsData {
|
|
192 my($AminoAcidsDataFile, $MayaChemToolsLibDir);
|
|
193
|
|
194 $MayaChemToolsLibDir = GetMayaChemToolsLibDirName();
|
|
195
|
|
196 $AminoAcidsDataFile = "$MayaChemToolsLibDir" . "/data/AminoAcidsData.csv";
|
|
197
|
|
198 if (! -e "$AminoAcidsDataFile") {
|
|
199 croak "Error: MayaChemTools package file, $AminoAcidsDataFile, is missing: Possible installation problems...";
|
|
200 }
|
|
201
|
|
202 _LoadData($AminoAcidsDataFile);
|
|
203 }
|
|
204
|
|
205 #
|
|
206 # Load AminoAcidsData.csv file from <MayaChemTools>/lib directory...
|
|
207 #
|
|
208 sub _LoadData {
|
|
209 my($AminoAcidsDataFile) = @_;
|
|
210
|
|
211 %AminoAcidDataMap = ();
|
|
212 @AminoAcidPropertyNames = ();
|
|
213 %AminoAcidPropertyNamesMap = ();
|
|
214 %AminoAcidThreeLetterCodeMap = ();
|
|
215 %AminoAcidOneLetterCodeMap = ();
|
|
216 %AminoAcidNameMap = ();
|
|
217
|
|
218 # Load property data for all amino acids...
|
|
219 #
|
|
220 # File Format:
|
|
221 #"ThreeLetterCode","OneLetterCode","AminoAcid","AcidicBasic","PolarNonpolar","Charged","Aromatic","HydrophobicHydophilic","IsoelectricPoint","pKCOOH","pKNH3+","MolecularWeight","MolecularWeightMinusH2O(18.01524)","ExactMass","ExactMassMinusH2O(18.01056)","vanderWaalsVolume","%AccessibleResidues","%BuriedResidues","AlphaHelixChouAndFasman","AlphaHelixDeleageAndRoux","AlphaHelixLevitt","AminoAcidsComposition","AminoAcidsCompositionInSwissProt","AntiparallelBetaStrand","AverageAreaBuried","AverageFlexibility","BetaSheetChouAndFasman","BetaSheetDeleageAndRoux","BetaSheetLevitt","BetaTurnChouAndFasman","BetaTurnDeleageAndRoux","BetaTurnLevitt","Bulkiness","CoilDeleageAndRoux","HPLCHFBARetention","HPLCRetentionAtpH2.1","HPLCRetentionAtpH7.4","HPLCTFARetention","HydrophobicityAbrahamAndLeo","HydrophobicityBlack","HydrophobicityBullAndBreese","HydrophobicityChothia","HydrophobicityEisenbergAndOthers","HydrophobicityFauchereAndOthers","HydrophobicityGuy","HydrophobicityHPLCAtpH3.4Cowan","HydrophobicityHPLCAtpH7.5Cowan","HydrophobicityHPLCParkerAndOthers","HydrophobicityHPLCWilsonAndOthers","HydrophobicityHoppAndWoods","HydrophobicityJanin","HydrophobicityKyteAndDoolittle","HydrophobicityManavalanAndOthers","HydrophobicityMiyazawaAndOthers","HydrophobicityOMHSweetAndOthers","HydrophobicityRaoAndArgos","HydrophobicityRfMobility","HydrophobicityRoseAndOthers","HydrophobicityRoseman","HydrophobicityWellingAndOthers","HydrophobicityWolfendenAndOthers","MolecularWeight","NumberOfCodons","ParallelBetaStrand","PolarityGrantham","PolarityZimmerman","RatioHeteroEndToSide","RecognitionFactors","Refractivity","RelativeMutability","TotalBetaStrand","LinearStructure","LinearStructureAtpH7.4"
|
|
222 #
|
|
223 #
|
|
224 my($ThreeLetterCode, $OneLetterCode, $AminoAcidName, $Line, $NumOfCols, $InDelim, $Index, $Name, $Value, $Units, @LineWords, @ColLabels);
|
|
225
|
|
226 $InDelim = "\,";
|
|
227 open AMINOACIDSDATAFILE, "$AminoAcidsDataFile" or croak "Couldn't open $AminoAcidsDataFile: $! ...";
|
|
228
|
|
229 # Skip lines up to column labels...
|
|
230 LINE: while ($Line = GetTextLine(\*AMINOACIDSDATAFILE)) {
|
|
231 if ($Line !~ /^#/) {
|
|
232 last LINE;
|
|
233 }
|
|
234 }
|
|
235 @ColLabels= quotewords($InDelim, 0, $Line);
|
|
236 $NumOfCols = @ColLabels;
|
|
237
|
|
238 # Extract property names from column labels...
|
|
239 @AminoAcidPropertyNames = ();
|
|
240 for $Index (0 .. $#ColLabels) {
|
|
241 $Name = $ColLabels[$Index];
|
|
242 push @AminoAcidPropertyNames, $Name;
|
|
243
|
|
244 # Store property names...
|
|
245 $AminoAcidPropertyNamesMap{$Name} = $Name;
|
|
246 }
|
|
247
|
|
248 # Process amino acid data...
|
|
249 LINE: while ($Line = GetTextLine(\*AMINOACIDSDATAFILE)) {
|
|
250 if ($Line =~ /^#/) {
|
|
251 next LINE;
|
|
252 }
|
|
253 @LineWords = ();
|
|
254 @LineWords = quotewords($InDelim, 0, $Line);
|
|
255 if (@LineWords != $NumOfCols) {
|
|
256 croak "Error: The number of data fields, @LineWords, in $AminoAcidsDataFile must be $NumOfCols.\nLine: $Line...";
|
|
257 }
|
|
258 $ThreeLetterCode = $LineWords[0]; $OneLetterCode = $LineWords[1]; $AminoAcidName = $LineWords[3];
|
|
259 if (exists $AminoAcidDataMap{$ThreeLetterCode}) {
|
|
260 carp "Warning: Ignoring data for amino acid $ThreeLetterCode: It has already been loaded.\nLine: $Line....";
|
|
261 next LINE;
|
|
262 }
|
|
263
|
|
264 # Store all the values...
|
|
265 %{$AminoAcidDataMap{$ThreeLetterCode}} = ();
|
|
266 for $Index (0 .. $#LineWords) {
|
|
267 $Name = $AminoAcidPropertyNames[$Index];
|
|
268 $Value = $LineWords[$Index];
|
|
269 $AminoAcidDataMap{$ThreeLetterCode}{$Name} = $Value;
|
|
270 }
|
|
271 }
|
|
272 close AMINOACIDSDATAFILE;
|
|
273
|
|
274 # Setup one letter and amino acid name maps...
|
|
275 _SetupAminoAcidIDMap();
|
|
276 }
|
|
277
|
|
278
|
|
279 #
|
|
280 # Setup lowercase three/one letter code and name maps pointing
|
|
281 # to three letter code as show in data file.
|
|
282 #
|
|
283 sub _SetupAminoAcidIDMap {
|
|
284 my($ThreeLetterCode, $OneLetterCode, $AminoAcidName);
|
|
285
|
|
286 %AminoAcidThreeLetterCodeMap = ();
|
|
287 %AminoAcidOneLetterCodeMap = ();
|
|
288 %AminoAcidNameMap = ();
|
|
289
|
|
290 for $ThreeLetterCode (keys %AminoAcidDataMap) {
|
|
291 $OneLetterCode = $AminoAcidDataMap{$ThreeLetterCode}{OneLetterCode};
|
|
292 $AminoAcidName = $AminoAcidDataMap{$ThreeLetterCode}{AminoAcid};
|
|
293
|
|
294 $AminoAcidThreeLetterCodeMap{lc($ThreeLetterCode)} = $ThreeLetterCode;
|
|
295 $AminoAcidOneLetterCodeMap{lc($OneLetterCode)} = $ThreeLetterCode;
|
|
296 $AminoAcidNameMap{lc($AminoAcidName)} = $ThreeLetterCode;
|
|
297 }
|
|
298 }
|
|
299
|
|
300 # Validate amino acid ID...
|
|
301 sub _ValidateAminoAcidID {
|
|
302 my($AminoAcidID) = @_;
|
|
303 my($ThreeLetterCode);
|
|
304
|
|
305
|
|
306 if (length($AminoAcidID) == 3) {
|
|
307 if (! exists $AminoAcidThreeLetterCodeMap{lc($AminoAcidID)}) {
|
|
308 return undef;
|
|
309 }
|
|
310 $ThreeLetterCode = $AminoAcidThreeLetterCodeMap{lc($AminoAcidID)};
|
|
311 }
|
|
312 elsif (length($AminoAcidID) == 1) {
|
|
313 if (! exists $AminoAcidOneLetterCodeMap{lc($AminoAcidID)}) {
|
|
314 return undef;
|
|
315 }
|
|
316 $ThreeLetterCode = $AminoAcidOneLetterCodeMap{lc($AminoAcidID)};
|
|
317 }
|
|
318 else {
|
|
319 if (! exists $AminoAcidNameMap{lc($AminoAcidID)}) {
|
|
320 return undef;
|
|
321 }
|
|
322 $ThreeLetterCode = $AminoAcidNameMap{lc($AminoAcidID)};
|
|
323 }
|
|
324 return $ThreeLetterCode;
|
|
325 }
|
|
326
|
|
327
|
|
328 1;
|
|
329
|
|
330 __END__
|
|
331
|
|
332 =head1 NAME
|
|
333
|
|
334 AminoAcids
|
|
335
|
|
336 =head1 SYNOPSIS
|
|
337
|
|
338 use AminoAcids;
|
|
339
|
|
340 use AminoAcids qw(:all);
|
|
341
|
|
342 =head1 DESCRIPTION
|
|
343
|
|
344 B<AminoAcids> module provides the following functions:
|
|
345
|
|
346 GetAminoAcidPropertiesData, GetAminoAcidPropertiesNames, GetAminoAcid<PropertyName>,
|
|
347 GetAminoAcids, IsAminoAcid, IsAminoAcidProperty
|
|
348
|
|
349 =head1 FUNCTIONS
|
|
350
|
|
351 =over 4
|
|
352
|
|
353 =item B<GetAminoAcidPropertiesData>
|
|
354
|
|
355 $DataHashRef = GetAminoAcidPropertiesData($AminoAcidID);
|
|
356
|
|
357 Returns a reference to hash containing property names and values for a specified
|
|
358 amino acid.
|
|
359
|
|
360 =item B<GetAminoAcidPropertiesNames>
|
|
361
|
|
362 @Names = GetAminoAcidPropertiesNames([$Mode]);
|
|
363 $NamesRef = GetAminoAcidPropertiesNames([$Mode]);
|
|
364
|
|
365 Returns an array or a reference to an array containing names of amino acids
|
|
366 properties. Order of amino acids properties is controlled by optional parameter
|
|
367 I<Mode>. Possible values for I<Mode>: I<Alphabetical or ByGroup>; Default: I<ByGroup>
|
|
368
|
|
369 =item B<GetAminoAcidPropertyName>
|
|
370
|
|
371 $Value = GetAminoAcid<PropertyName>($AminoAcidID);
|
|
372
|
|
373 Returns amino acid property value for a specified amino acid. These functions are
|
|
374 not defined in this modules; these are implemented on the fly using Perl's AUTOLOAD
|
|
375 funcion. Here is the list of known amino acids I<property names>: DNACodons, RNACodons,
|
|
376 AcidicBasic, PolarNonpolar, Charged, Aromatic, HydrophobicHydophilic, IsoelectricPoint,
|
|
377 pKCOOH, pKNH3+, ChemicalFormula, MolecularWeight, ExactMass, ChemicalFormulaMinusH2O,
|
|
378 MolecularWeightMinusH2O(18.01524), ExactMassMinusH2O(18.01056), vanderWaalsVolume,
|
|
379 %AccessibleResidues, %BuriedResidues, AlphaHelixChouAndFasman,
|
|
380 AlphaHelixDeleageAndRoux, AlphaHelixLevitt, AminoAcidsComposition,
|
|
381 AminoAcidsCompositionInSwissProt, AntiparallelBetaStrand, AverageAreaBuried, AverageFlexibility,
|
|
382 BetaSheetChouAndFasman, BetaSheetDeleageAndRoux, BetaSheetLevitt,
|
|
383 BetaTurnChouAndFasman, BetaTurnDeleageAndRoux, BetaTurnLevitt, Bulkiness,
|
|
384 CoilDeleageAndRoux, HPLCHFBARetention, HPLCRetentionAtpH2.1, HPLCRetentionAtpH7.4,
|
|
385 HPLCTFARetention, HydrophobicityAbrahamAndLeo, HydrophobicityBlack,
|
|
386 HydrophobicityBullAndBreese, HydrophobicityChothia, HydrophobicityEisenbergAndOthers,
|
|
387 HydrophobicityFauchereAndOthers, HydrophobicityGuy, HydrophobicityHPLCAtpH3.4Cowan,
|
|
388 HydrophobicityHPLCAtpH7.5Cowan, HydrophobicityHPLCParkerAndOthers,
|
|
389 HydrophobicityHPLCWilsonAndOthers, HydrophobicityHoppAndWoods, HydrophobicityJanin,
|
|
390 HydrophobicityKyteAndDoolittle, HydrophobicityManavalanAndOthers,
|
|
391 HydrophobicityMiyazawaAndOthers, HydrophobicityOMHSweetAndOthers,
|
|
392 HydrophobicityRaoAndArgos, HydrophobicityRfMobility, HydrophobicityRoseAndOthers,
|
|
393 HydrophobicityRoseman, HydrophobicityWellingAndOthers, HydrophobicityWolfendenAndOthers,
|
|
394 ParallelBetaStrand, PolarityGrantham, PolarityZimmerman, RatioHeteroEndToSide,
|
|
395 RecognitionFactors, Refractivity, RelativeMutability, TotalBetaStrand, LinearStructure,
|
|
396 LinearStructureAtpH7.4
|
|
397
|
|
398 =item B<GetAminoAcids>
|
|
399
|
|
400 $NamesRef = GetAminoAcids([$NameType]);
|
|
401 (@Names) = GetAminoAcids([$NameType]);
|
|
402
|
|
403 Returns an array or a reference to an array containing names of amino acids
|
|
404 as one letter code, three letter code, or amino acid name controlled by optional
|
|
405 parameter $NameType. By default, amino acids names are returned as three
|
|
406 letter code. Possible values for I<NameType>: I<ThreeLetterCode, OneLetterCode, or
|
|
407 AminoAcid>.
|
|
408
|
|
409 =item B<IsAminoAcid>
|
|
410
|
|
411 $Status = IsAminoAcid($AminoAcidID);
|
|
412
|
|
413 Returns a flag indicating whether or not its a known amino acid ID.
|
|
414
|
|
415 =item B<IsAminoAcidProperty>
|
|
416
|
|
417 $Status = IsAminoAcid($PropertyName);
|
|
418
|
|
419 Returns a flag indicating whether or not its a known amino acid property name.
|
|
420
|
|
421 =back
|
|
422
|
|
423 =head1 AUTHOR
|
|
424
|
|
425 Manish Sud <msud@san.rr.com>
|
|
426
|
|
427 =head1 SEE ALSO
|
|
428
|
|
429 NucleicAcids.pm, PeriodicTable.pm
|
|
430
|
|
431 =head1 COPYRIGHT
|
|
432
|
|
433 Copyright (C) 2015 Manish Sud. All rights reserved.
|
|
434
|
|
435 This file is part of MayaChemTools.
|
|
436
|
|
437 MayaChemTools is free software; you can redistribute it and/or modify it under
|
|
438 the terms of the GNU Lesser General Public License as published by the Free
|
|
439 Software Foundation; either version 3 of the License, or (at your option)
|
|
440 any later version.
|
|
441
|
|
442 =cut
|