comparison mayachemtools/lib/AminoAcids.pm @ 0:73ae111cf86f draft

Uploaded
author deepakjadmin
date Wed, 20 Jan 2016 11:55:01 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:73ae111cf86f
1 package AminoAcids;
2 #
3 # $RCSfile: AminoAcids.pm,v $
4 # $Date: 2015/02/28 20:47:02 $
5 # $Revision: 1.25 $
6 #
7 # Author: Manish Sud <msud@san.rr.com>
8 #
9 # Copyright (C) 2015 Manish Sud. All rights reserved.
10 #
11 # This file is part of MayaChemTools.
12 #
13 # MayaChemTools is free software; you can redistribute it and/or modify it under
14 # the terms of the GNU Lesser General Public License as published by the Free
15 # Software Foundation; either version 3 of the License, or (at your option) any
16 # later version.
17 #
18 # MayaChemTools is distributed in the hope that it will be useful, but without
19 # any warranty; without even the implied warranty of merchantability of fitness
20 # for a particular purpose. See the GNU Lesser General Public License for more
21 # details.
22 #
23 # You should have received a copy of the GNU Lesser General Public License
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
26 # Boston, MA, 02111-1307, USA.
27 #
28
29 use strict;
30 use Carp;
31 use Text::ParseWords;
32 use TextUtil;
33 use FileUtil;
34
35 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
36
37 @ISA = qw(Exporter);
38 @EXPORT = qw();
39 @EXPORT_OK = qw(GetAminoAcids GetAminoAcidPropertiesData GetAminoAcidPropertiesNames IsAminoAcid IsAminoAcidProperty);
40
41 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]);
42
43 #
44 # Load amino acids data...
45 #
46 my(%AminoAcidDataMap, %AminoAcidThreeLetterCodeMap, %AminoAcidOneLetterCodeMap, %AminoAcidNameMap, @AminoAcidPropertyNames, %AminoAcidPropertyNamesMap, );
47 _LoadAminoAcidsData();
48
49 #
50 # Get a list of all known amino acids as one of these values:
51 # one letter code, three letter code, or amino acid name...
52 #
53 sub GetAminoAcids {
54 my($NameType, $ThreeLetterCode, $Name, @AminoAcidNames, %AminoAcidNamesMap);
55
56 $NameType = 'ThreeLetterCode';
57 if (@_ >= 1) {
58 ($NameType) = @_;
59 }
60
61 # Collect names...
62 %AminoAcidNamesMap = ();
63 for $ThreeLetterCode (keys %AminoAcidDataMap) {
64 NAME : {
65 if ($NameType =~ /^OneLetterCode$/i) {$Name = $AminoAcidDataMap{$ThreeLetterCode}{OneLetterCode}; last NAME; }
66 if ($NameType =~ /^AminoAcid$/i) {$Name = $AminoAcidDataMap{$ThreeLetterCode}{AminoAcid}; last NAME; }
67 $Name = $ThreeLetterCode;
68 }
69 $AminoAcidNamesMap{$Name} = $Name;
70 }
71
72 # Sort 'em out
73 @AminoAcidNames = ();
74 for $Name (sort keys %AminoAcidNamesMap) {
75 push @AminoAcidNames, $Name;
76 }
77
78 return (wantarray ? @AminoAcidNames : \@AminoAcidNames);
79 }
80
81
82 #
83 # Get all available properties data for an amino acid using any of these symbols:
84 # three letter code; one letter code; name.
85 #
86 # A reference to a hash array is returned with keys and values representing property
87 # name and its values respectively.
88 #
89 sub GetAminoAcidPropertiesData {
90 my($AminoAcidID) = @_;
91 my($ThreeLetterCode);
92
93 if ($ThreeLetterCode = _ValidateAminoAcidID($AminoAcidID)) {
94 return \%{$AminoAcidDataMap{$ThreeLetterCode}};
95 }
96 else {
97 return undef;
98 }
99 }
100
101 #
102 # Get names of all available amino acid properties. A reference to an array containing
103 # names of all available properties is returned.
104 #
105 sub GetAminoAcidPropertiesNames {
106 my($Mode);
107 my($PropertyName, @PropertyNames);
108
109 $Mode = 'ByGroup';
110 if (@_ == 1) {
111 ($Mode) = @_;
112 }
113
114 @PropertyNames = ();
115 if ($Mode =~ /^Alphabetical$/i) {
116 my($PropertyName);
117 # ThreeLetterCode, OneLetterCode, and AminoAcid are always listed first...
118 push @PropertyNames, qw(ThreeLetterCode OneLetterCode AminoAcid);
119 for $PropertyName (sort keys %AminoAcidPropertyNamesMap) {
120 if ($PropertyName !~ /^(ThreeLetterCode|OneLetterCode|AminoAcid)$/) {
121 push @PropertyNames, $PropertyName;
122 }
123 }
124 }
125 else {
126 push @PropertyNames, @AminoAcidPropertyNames;
127 }
128 return (wantarray ? @PropertyNames : \@PropertyNames);
129 }
130
131 #
132 # Is it a known amino acid? Input is either an one/three letter code or a name.
133 #
134 sub IsAminoAcid {
135 my($AminoAcidID) = @_;
136 my($Status);
137
138 $Status = (_ValidateAminoAcidID($AminoAcidID)) ? 1 : 0;
139
140 return $Status;
141 }
142
143
144 #
145 # Is it an available amino acid property?
146 #
147 sub IsAminoAcidProperty {
148 my($PropertyName) = @_;
149 my($Status);
150
151 $Status = (exists($AminoAcidPropertyNamesMap{$PropertyName})) ? 1 : 0;
152
153 return $Status;
154 }
155
156 #
157 # Implents GetAminoAcid<PropertyName> for a valid proprty name.
158 #
159 sub AUTOLOAD {
160 my($AminoAcidID) = @_;
161 my($FunctionName, $PropertyName, $PropertyValue, $ThreeLetterCode);
162
163 $PropertyValue = undef;
164
165 use vars qw($AUTOLOAD);
166 $FunctionName = $AUTOLOAD;
167 $FunctionName =~ s/.*:://;
168
169 # Only Get<PropertyName> functions are supported...
170 if ($FunctionName !~ /^Get/) {
171 croak "Error: Function, AminoAcid::$FunctionName, is not supported by AUTOLOAD in AminoAcid module: Only Get<PropertyName> functions are implemented...";
172 }
173
174 $PropertyName = $FunctionName;
175 $PropertyName =~ s/^GetAminoAcid//;
176 if (!exists $AminoAcidPropertyNamesMap{$PropertyName}) {
177 croak "Error: Function, AminoAcid::$FunctionName, is not supported by AUTOLOAD in AminoAcid module: Unknown amino acid property name, $PropertyName, specified...";
178 }
179
180 if (!($ThreeLetterCode = _ValidateAminoAcidID($AminoAcidID))) {
181 return undef;
182 }
183 $PropertyValue = $AminoAcidDataMap{$ThreeLetterCode}{$PropertyName};
184 return $PropertyValue;
185 }
186
187
188 #
189 # Load AminoAcidsData.csv files from <MayaChemTools>/lib directory...
190 #
191 sub _LoadAminoAcidsData {
192 my($AminoAcidsDataFile, $MayaChemToolsLibDir);
193
194 $MayaChemToolsLibDir = GetMayaChemToolsLibDirName();
195
196 $AminoAcidsDataFile = "$MayaChemToolsLibDir" . "/data/AminoAcidsData.csv";
197
198 if (! -e "$AminoAcidsDataFile") {
199 croak "Error: MayaChemTools package file, $AminoAcidsDataFile, is missing: Possible installation problems...";
200 }
201
202 _LoadData($AminoAcidsDataFile);
203 }
204
205 #
206 # Load AminoAcidsData.csv file from <MayaChemTools>/lib directory...
207 #
208 sub _LoadData {
209 my($AminoAcidsDataFile) = @_;
210
211 %AminoAcidDataMap = ();
212 @AminoAcidPropertyNames = ();
213 %AminoAcidPropertyNamesMap = ();
214 %AminoAcidThreeLetterCodeMap = ();
215 %AminoAcidOneLetterCodeMap = ();
216 %AminoAcidNameMap = ();
217
218 # Load property data for all amino acids...
219 #
220 # File Format:
221 #"ThreeLetterCode","OneLetterCode","AminoAcid","AcidicBasic","PolarNonpolar","Charged","Aromatic","HydrophobicHydophilic","IsoelectricPoint","pKCOOH","pKNH3+","MolecularWeight","MolecularWeightMinusH2O(18.01524)","ExactMass","ExactMassMinusH2O(18.01056)","vanderWaalsVolume","%AccessibleResidues","%BuriedResidues","AlphaHelixChouAndFasman","AlphaHelixDeleageAndRoux","AlphaHelixLevitt","AminoAcidsComposition","AminoAcidsCompositionInSwissProt","AntiparallelBetaStrand","AverageAreaBuried","AverageFlexibility","BetaSheetChouAndFasman","BetaSheetDeleageAndRoux","BetaSheetLevitt","BetaTurnChouAndFasman","BetaTurnDeleageAndRoux","BetaTurnLevitt","Bulkiness","CoilDeleageAndRoux","HPLCHFBARetention","HPLCRetentionAtpH2.1","HPLCRetentionAtpH7.4","HPLCTFARetention","HydrophobicityAbrahamAndLeo","HydrophobicityBlack","HydrophobicityBullAndBreese","HydrophobicityChothia","HydrophobicityEisenbergAndOthers","HydrophobicityFauchereAndOthers","HydrophobicityGuy","HydrophobicityHPLCAtpH3.4Cowan","HydrophobicityHPLCAtpH7.5Cowan","HydrophobicityHPLCParkerAndOthers","HydrophobicityHPLCWilsonAndOthers","HydrophobicityHoppAndWoods","HydrophobicityJanin","HydrophobicityKyteAndDoolittle","HydrophobicityManavalanAndOthers","HydrophobicityMiyazawaAndOthers","HydrophobicityOMHSweetAndOthers","HydrophobicityRaoAndArgos","HydrophobicityRfMobility","HydrophobicityRoseAndOthers","HydrophobicityRoseman","HydrophobicityWellingAndOthers","HydrophobicityWolfendenAndOthers","MolecularWeight","NumberOfCodons","ParallelBetaStrand","PolarityGrantham","PolarityZimmerman","RatioHeteroEndToSide","RecognitionFactors","Refractivity","RelativeMutability","TotalBetaStrand","LinearStructure","LinearStructureAtpH7.4"
222 #
223 #
224 my($ThreeLetterCode, $OneLetterCode, $AminoAcidName, $Line, $NumOfCols, $InDelim, $Index, $Name, $Value, $Units, @LineWords, @ColLabels);
225
226 $InDelim = "\,";
227 open AMINOACIDSDATAFILE, "$AminoAcidsDataFile" or croak "Couldn't open $AminoAcidsDataFile: $! ...";
228
229 # Skip lines up to column labels...
230 LINE: while ($Line = GetTextLine(\*AMINOACIDSDATAFILE)) {
231 if ($Line !~ /^#/) {
232 last LINE;
233 }
234 }
235 @ColLabels= quotewords($InDelim, 0, $Line);
236 $NumOfCols = @ColLabels;
237
238 # Extract property names from column labels...
239 @AminoAcidPropertyNames = ();
240 for $Index (0 .. $#ColLabels) {
241 $Name = $ColLabels[$Index];
242 push @AminoAcidPropertyNames, $Name;
243
244 # Store property names...
245 $AminoAcidPropertyNamesMap{$Name} = $Name;
246 }
247
248 # Process amino acid data...
249 LINE: while ($Line = GetTextLine(\*AMINOACIDSDATAFILE)) {
250 if ($Line =~ /^#/) {
251 next LINE;
252 }
253 @LineWords = ();
254 @LineWords = quotewords($InDelim, 0, $Line);
255 if (@LineWords != $NumOfCols) {
256 croak "Error: The number of data fields, @LineWords, in $AminoAcidsDataFile must be $NumOfCols.\nLine: $Line...";
257 }
258 $ThreeLetterCode = $LineWords[0]; $OneLetterCode = $LineWords[1]; $AminoAcidName = $LineWords[3];
259 if (exists $AminoAcidDataMap{$ThreeLetterCode}) {
260 carp "Warning: Ignoring data for amino acid $ThreeLetterCode: It has already been loaded.\nLine: $Line....";
261 next LINE;
262 }
263
264 # Store all the values...
265 %{$AminoAcidDataMap{$ThreeLetterCode}} = ();
266 for $Index (0 .. $#LineWords) {
267 $Name = $AminoAcidPropertyNames[$Index];
268 $Value = $LineWords[$Index];
269 $AminoAcidDataMap{$ThreeLetterCode}{$Name} = $Value;
270 }
271 }
272 close AMINOACIDSDATAFILE;
273
274 # Setup one letter and amino acid name maps...
275 _SetupAminoAcidIDMap();
276 }
277
278
279 #
280 # Setup lowercase three/one letter code and name maps pointing
281 # to three letter code as show in data file.
282 #
283 sub _SetupAminoAcidIDMap {
284 my($ThreeLetterCode, $OneLetterCode, $AminoAcidName);
285
286 %AminoAcidThreeLetterCodeMap = ();
287 %AminoAcidOneLetterCodeMap = ();
288 %AminoAcidNameMap = ();
289
290 for $ThreeLetterCode (keys %AminoAcidDataMap) {
291 $OneLetterCode = $AminoAcidDataMap{$ThreeLetterCode}{OneLetterCode};
292 $AminoAcidName = $AminoAcidDataMap{$ThreeLetterCode}{AminoAcid};
293
294 $AminoAcidThreeLetterCodeMap{lc($ThreeLetterCode)} = $ThreeLetterCode;
295 $AminoAcidOneLetterCodeMap{lc($OneLetterCode)} = $ThreeLetterCode;
296 $AminoAcidNameMap{lc($AminoAcidName)} = $ThreeLetterCode;
297 }
298 }
299
300 # Validate amino acid ID...
301 sub _ValidateAminoAcidID {
302 my($AminoAcidID) = @_;
303 my($ThreeLetterCode);
304
305
306 if (length($AminoAcidID) == 3) {
307 if (! exists $AminoAcidThreeLetterCodeMap{lc($AminoAcidID)}) {
308 return undef;
309 }
310 $ThreeLetterCode = $AminoAcidThreeLetterCodeMap{lc($AminoAcidID)};
311 }
312 elsif (length($AminoAcidID) == 1) {
313 if (! exists $AminoAcidOneLetterCodeMap{lc($AminoAcidID)}) {
314 return undef;
315 }
316 $ThreeLetterCode = $AminoAcidOneLetterCodeMap{lc($AminoAcidID)};
317 }
318 else {
319 if (! exists $AminoAcidNameMap{lc($AminoAcidID)}) {
320 return undef;
321 }
322 $ThreeLetterCode = $AminoAcidNameMap{lc($AminoAcidID)};
323 }
324 return $ThreeLetterCode;
325 }
326
327
328 1;
329
330 __END__
331
332 =head1 NAME
333
334 AminoAcids
335
336 =head1 SYNOPSIS
337
338 use AminoAcids;
339
340 use AminoAcids qw(:all);
341
342 =head1 DESCRIPTION
343
344 B<AminoAcids> module provides the following functions:
345
346 GetAminoAcidPropertiesData, GetAminoAcidPropertiesNames, GetAminoAcid<PropertyName>,
347 GetAminoAcids, IsAminoAcid, IsAminoAcidProperty
348
349 =head1 FUNCTIONS
350
351 =over 4
352
353 =item B<GetAminoAcidPropertiesData>
354
355 $DataHashRef = GetAminoAcidPropertiesData($AminoAcidID);
356
357 Returns a reference to hash containing property names and values for a specified
358 amino acid.
359
360 =item B<GetAminoAcidPropertiesNames>
361
362 @Names = GetAminoAcidPropertiesNames([$Mode]);
363 $NamesRef = GetAminoAcidPropertiesNames([$Mode]);
364
365 Returns an array or a reference to an array containing names of amino acids
366 properties. Order of amino acids properties is controlled by optional parameter
367 I<Mode>. Possible values for I<Mode>: I<Alphabetical or ByGroup>; Default: I<ByGroup>
368
369 =item B<GetAminoAcidPropertyName>
370
371 $Value = GetAminoAcid<PropertyName>($AminoAcidID);
372
373 Returns amino acid property value for a specified amino acid. These functions are
374 not defined in this modules; these are implemented on the fly using Perl's AUTOLOAD
375 funcion. Here is the list of known amino acids I<property names>: DNACodons, RNACodons,
376 AcidicBasic, PolarNonpolar, Charged, Aromatic, HydrophobicHydophilic, IsoelectricPoint,
377 pKCOOH, pKNH3+, ChemicalFormula, MolecularWeight, ExactMass, ChemicalFormulaMinusH2O,
378 MolecularWeightMinusH2O(18.01524), ExactMassMinusH2O(18.01056), vanderWaalsVolume,
379 %AccessibleResidues, %BuriedResidues, AlphaHelixChouAndFasman,
380 AlphaHelixDeleageAndRoux, AlphaHelixLevitt, AminoAcidsComposition,
381 AminoAcidsCompositionInSwissProt, AntiparallelBetaStrand, AverageAreaBuried, AverageFlexibility,
382 BetaSheetChouAndFasman, BetaSheetDeleageAndRoux, BetaSheetLevitt,
383 BetaTurnChouAndFasman, BetaTurnDeleageAndRoux, BetaTurnLevitt, Bulkiness,
384 CoilDeleageAndRoux, HPLCHFBARetention, HPLCRetentionAtpH2.1, HPLCRetentionAtpH7.4,
385 HPLCTFARetention, HydrophobicityAbrahamAndLeo, HydrophobicityBlack,
386 HydrophobicityBullAndBreese, HydrophobicityChothia, HydrophobicityEisenbergAndOthers,
387 HydrophobicityFauchereAndOthers, HydrophobicityGuy, HydrophobicityHPLCAtpH3.4Cowan,
388 HydrophobicityHPLCAtpH7.5Cowan, HydrophobicityHPLCParkerAndOthers,
389 HydrophobicityHPLCWilsonAndOthers, HydrophobicityHoppAndWoods, HydrophobicityJanin,
390 HydrophobicityKyteAndDoolittle, HydrophobicityManavalanAndOthers,
391 HydrophobicityMiyazawaAndOthers, HydrophobicityOMHSweetAndOthers,
392 HydrophobicityRaoAndArgos, HydrophobicityRfMobility, HydrophobicityRoseAndOthers,
393 HydrophobicityRoseman, HydrophobicityWellingAndOthers, HydrophobicityWolfendenAndOthers,
394 ParallelBetaStrand, PolarityGrantham, PolarityZimmerman, RatioHeteroEndToSide,
395 RecognitionFactors, Refractivity, RelativeMutability, TotalBetaStrand, LinearStructure,
396 LinearStructureAtpH7.4
397
398 =item B<GetAminoAcids>
399
400 $NamesRef = GetAminoAcids([$NameType]);
401 (@Names) = GetAminoAcids([$NameType]);
402
403 Returns an array or a reference to an array containing names of amino acids
404 as one letter code, three letter code, or amino acid name controlled by optional
405 parameter $NameType. By default, amino acids names are returned as three
406 letter code. Possible values for I<NameType>: I<ThreeLetterCode, OneLetterCode, or
407 AminoAcid>.
408
409 =item B<IsAminoAcid>
410
411 $Status = IsAminoAcid($AminoAcidID);
412
413 Returns a flag indicating whether or not its a known amino acid ID.
414
415 =item B<IsAminoAcidProperty>
416
417 $Status = IsAminoAcid($PropertyName);
418
419 Returns a flag indicating whether or not its a known amino acid property name.
420
421 =back
422
423 =head1 AUTHOR
424
425 Manish Sud <msud@san.rr.com>
426
427 =head1 SEE ALSO
428
429 NucleicAcids.pm, PeriodicTable.pm
430
431 =head1 COPYRIGHT
432
433 Copyright (C) 2015 Manish Sud. All rights reserved.
434
435 This file is part of MayaChemTools.
436
437 MayaChemTools is free software; you can redistribute it and/or modify it under
438 the terms of the GNU Lesser General Public License as published by the Free
439 Software Foundation; either version 3 of the License, or (at your option)
440 any later version.
441
442 =cut