Mercurial > repos > deepakjadmin > mayatool3_test3
comparison mayachemtools/lib/AminoAcids.pm @ 0:73ae111cf86f draft
Uploaded
author | deepakjadmin |
---|---|
date | Wed, 20 Jan 2016 11:55:01 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:73ae111cf86f |
---|---|
1 package AminoAcids; | |
2 # | |
3 # $RCSfile: AminoAcids.pm,v $ | |
4 # $Date: 2015/02/28 20:47:02 $ | |
5 # $Revision: 1.25 $ | |
6 # | |
7 # Author: Manish Sud <msud@san.rr.com> | |
8 # | |
9 # Copyright (C) 2015 Manish Sud. All rights reserved. | |
10 # | |
11 # This file is part of MayaChemTools. | |
12 # | |
13 # MayaChemTools is free software; you can redistribute it and/or modify it under | |
14 # the terms of the GNU Lesser General Public License as published by the Free | |
15 # Software Foundation; either version 3 of the License, or (at your option) any | |
16 # later version. | |
17 # | |
18 # MayaChemTools is distributed in the hope that it will be useful, but without | |
19 # any warranty; without even the implied warranty of merchantability of fitness | |
20 # for a particular purpose. See the GNU Lesser General Public License for more | |
21 # details. | |
22 # | |
23 # You should have received a copy of the GNU Lesser General Public License | |
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or | |
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, | |
26 # Boston, MA, 02111-1307, USA. | |
27 # | |
28 | |
29 use strict; | |
30 use Carp; | |
31 use Text::ParseWords; | |
32 use TextUtil; | |
33 use FileUtil; | |
34 | |
35 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); | |
36 | |
37 @ISA = qw(Exporter); | |
38 @EXPORT = qw(); | |
39 @EXPORT_OK = qw(GetAminoAcids GetAminoAcidPropertiesData GetAminoAcidPropertiesNames IsAminoAcid IsAminoAcidProperty); | |
40 | |
41 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); | |
42 | |
43 # | |
44 # Load amino acids data... | |
45 # | |
46 my(%AminoAcidDataMap, %AminoAcidThreeLetterCodeMap, %AminoAcidOneLetterCodeMap, %AminoAcidNameMap, @AminoAcidPropertyNames, %AminoAcidPropertyNamesMap, ); | |
47 _LoadAminoAcidsData(); | |
48 | |
49 # | |
50 # Get a list of all known amino acids as one of these values: | |
51 # one letter code, three letter code, or amino acid name... | |
52 # | |
53 sub GetAminoAcids { | |
54 my($NameType, $ThreeLetterCode, $Name, @AminoAcidNames, %AminoAcidNamesMap); | |
55 | |
56 $NameType = 'ThreeLetterCode'; | |
57 if (@_ >= 1) { | |
58 ($NameType) = @_; | |
59 } | |
60 | |
61 # Collect names... | |
62 %AminoAcidNamesMap = (); | |
63 for $ThreeLetterCode (keys %AminoAcidDataMap) { | |
64 NAME : { | |
65 if ($NameType =~ /^OneLetterCode$/i) {$Name = $AminoAcidDataMap{$ThreeLetterCode}{OneLetterCode}; last NAME; } | |
66 if ($NameType =~ /^AminoAcid$/i) {$Name = $AminoAcidDataMap{$ThreeLetterCode}{AminoAcid}; last NAME; } | |
67 $Name = $ThreeLetterCode; | |
68 } | |
69 $AminoAcidNamesMap{$Name} = $Name; | |
70 } | |
71 | |
72 # Sort 'em out | |
73 @AminoAcidNames = (); | |
74 for $Name (sort keys %AminoAcidNamesMap) { | |
75 push @AminoAcidNames, $Name; | |
76 } | |
77 | |
78 return (wantarray ? @AminoAcidNames : \@AminoAcidNames); | |
79 } | |
80 | |
81 | |
82 # | |
83 # Get all available properties data for an amino acid using any of these symbols: | |
84 # three letter code; one letter code; name. | |
85 # | |
86 # A reference to a hash array is returned with keys and values representing property | |
87 # name and its values respectively. | |
88 # | |
89 sub GetAminoAcidPropertiesData { | |
90 my($AminoAcidID) = @_; | |
91 my($ThreeLetterCode); | |
92 | |
93 if ($ThreeLetterCode = _ValidateAminoAcidID($AminoAcidID)) { | |
94 return \%{$AminoAcidDataMap{$ThreeLetterCode}}; | |
95 } | |
96 else { | |
97 return undef; | |
98 } | |
99 } | |
100 | |
101 # | |
102 # Get names of all available amino acid properties. A reference to an array containing | |
103 # names of all available properties is returned. | |
104 # | |
105 sub GetAminoAcidPropertiesNames { | |
106 my($Mode); | |
107 my($PropertyName, @PropertyNames); | |
108 | |
109 $Mode = 'ByGroup'; | |
110 if (@_ == 1) { | |
111 ($Mode) = @_; | |
112 } | |
113 | |
114 @PropertyNames = (); | |
115 if ($Mode =~ /^Alphabetical$/i) { | |
116 my($PropertyName); | |
117 # ThreeLetterCode, OneLetterCode, and AminoAcid are always listed first... | |
118 push @PropertyNames, qw(ThreeLetterCode OneLetterCode AminoAcid); | |
119 for $PropertyName (sort keys %AminoAcidPropertyNamesMap) { | |
120 if ($PropertyName !~ /^(ThreeLetterCode|OneLetterCode|AminoAcid)$/) { | |
121 push @PropertyNames, $PropertyName; | |
122 } | |
123 } | |
124 } | |
125 else { | |
126 push @PropertyNames, @AminoAcidPropertyNames; | |
127 } | |
128 return (wantarray ? @PropertyNames : \@PropertyNames); | |
129 } | |
130 | |
131 # | |
132 # Is it a known amino acid? Input is either an one/three letter code or a name. | |
133 # | |
134 sub IsAminoAcid { | |
135 my($AminoAcidID) = @_; | |
136 my($Status); | |
137 | |
138 $Status = (_ValidateAminoAcidID($AminoAcidID)) ? 1 : 0; | |
139 | |
140 return $Status; | |
141 } | |
142 | |
143 | |
144 # | |
145 # Is it an available amino acid property? | |
146 # | |
147 sub IsAminoAcidProperty { | |
148 my($PropertyName) = @_; | |
149 my($Status); | |
150 | |
151 $Status = (exists($AminoAcidPropertyNamesMap{$PropertyName})) ? 1 : 0; | |
152 | |
153 return $Status; | |
154 } | |
155 | |
156 # | |
157 # Implents GetAminoAcid<PropertyName> for a valid proprty name. | |
158 # | |
159 sub AUTOLOAD { | |
160 my($AminoAcidID) = @_; | |
161 my($FunctionName, $PropertyName, $PropertyValue, $ThreeLetterCode); | |
162 | |
163 $PropertyValue = undef; | |
164 | |
165 use vars qw($AUTOLOAD); | |
166 $FunctionName = $AUTOLOAD; | |
167 $FunctionName =~ s/.*:://; | |
168 | |
169 # Only Get<PropertyName> functions are supported... | |
170 if ($FunctionName !~ /^Get/) { | |
171 croak "Error: Function, AminoAcid::$FunctionName, is not supported by AUTOLOAD in AminoAcid module: Only Get<PropertyName> functions are implemented..."; | |
172 } | |
173 | |
174 $PropertyName = $FunctionName; | |
175 $PropertyName =~ s/^GetAminoAcid//; | |
176 if (!exists $AminoAcidPropertyNamesMap{$PropertyName}) { | |
177 croak "Error: Function, AminoAcid::$FunctionName, is not supported by AUTOLOAD in AminoAcid module: Unknown amino acid property name, $PropertyName, specified..."; | |
178 } | |
179 | |
180 if (!($ThreeLetterCode = _ValidateAminoAcidID($AminoAcidID))) { | |
181 return undef; | |
182 } | |
183 $PropertyValue = $AminoAcidDataMap{$ThreeLetterCode}{$PropertyName}; | |
184 return $PropertyValue; | |
185 } | |
186 | |
187 | |
188 # | |
189 # Load AminoAcidsData.csv files from <MayaChemTools>/lib directory... | |
190 # | |
191 sub _LoadAminoAcidsData { | |
192 my($AminoAcidsDataFile, $MayaChemToolsLibDir); | |
193 | |
194 $MayaChemToolsLibDir = GetMayaChemToolsLibDirName(); | |
195 | |
196 $AminoAcidsDataFile = "$MayaChemToolsLibDir" . "/data/AminoAcidsData.csv"; | |
197 | |
198 if (! -e "$AminoAcidsDataFile") { | |
199 croak "Error: MayaChemTools package file, $AminoAcidsDataFile, is missing: Possible installation problems..."; | |
200 } | |
201 | |
202 _LoadData($AminoAcidsDataFile); | |
203 } | |
204 | |
205 # | |
206 # Load AminoAcidsData.csv file from <MayaChemTools>/lib directory... | |
207 # | |
208 sub _LoadData { | |
209 my($AminoAcidsDataFile) = @_; | |
210 | |
211 %AminoAcidDataMap = (); | |
212 @AminoAcidPropertyNames = (); | |
213 %AminoAcidPropertyNamesMap = (); | |
214 %AminoAcidThreeLetterCodeMap = (); | |
215 %AminoAcidOneLetterCodeMap = (); | |
216 %AminoAcidNameMap = (); | |
217 | |
218 # Load property data for all amino acids... | |
219 # | |
220 # File Format: | |
221 #"ThreeLetterCode","OneLetterCode","AminoAcid","AcidicBasic","PolarNonpolar","Charged","Aromatic","HydrophobicHydophilic","IsoelectricPoint","pKCOOH","pKNH3+","MolecularWeight","MolecularWeightMinusH2O(18.01524)","ExactMass","ExactMassMinusH2O(18.01056)","vanderWaalsVolume","%AccessibleResidues","%BuriedResidues","AlphaHelixChouAndFasman","AlphaHelixDeleageAndRoux","AlphaHelixLevitt","AminoAcidsComposition","AminoAcidsCompositionInSwissProt","AntiparallelBetaStrand","AverageAreaBuried","AverageFlexibility","BetaSheetChouAndFasman","BetaSheetDeleageAndRoux","BetaSheetLevitt","BetaTurnChouAndFasman","BetaTurnDeleageAndRoux","BetaTurnLevitt","Bulkiness","CoilDeleageAndRoux","HPLCHFBARetention","HPLCRetentionAtpH2.1","HPLCRetentionAtpH7.4","HPLCTFARetention","HydrophobicityAbrahamAndLeo","HydrophobicityBlack","HydrophobicityBullAndBreese","HydrophobicityChothia","HydrophobicityEisenbergAndOthers","HydrophobicityFauchereAndOthers","HydrophobicityGuy","HydrophobicityHPLCAtpH3.4Cowan","HydrophobicityHPLCAtpH7.5Cowan","HydrophobicityHPLCParkerAndOthers","HydrophobicityHPLCWilsonAndOthers","HydrophobicityHoppAndWoods","HydrophobicityJanin","HydrophobicityKyteAndDoolittle","HydrophobicityManavalanAndOthers","HydrophobicityMiyazawaAndOthers","HydrophobicityOMHSweetAndOthers","HydrophobicityRaoAndArgos","HydrophobicityRfMobility","HydrophobicityRoseAndOthers","HydrophobicityRoseman","HydrophobicityWellingAndOthers","HydrophobicityWolfendenAndOthers","MolecularWeight","NumberOfCodons","ParallelBetaStrand","PolarityGrantham","PolarityZimmerman","RatioHeteroEndToSide","RecognitionFactors","Refractivity","RelativeMutability","TotalBetaStrand","LinearStructure","LinearStructureAtpH7.4" | |
222 # | |
223 # | |
224 my($ThreeLetterCode, $OneLetterCode, $AminoAcidName, $Line, $NumOfCols, $InDelim, $Index, $Name, $Value, $Units, @LineWords, @ColLabels); | |
225 | |
226 $InDelim = "\,"; | |
227 open AMINOACIDSDATAFILE, "$AminoAcidsDataFile" or croak "Couldn't open $AminoAcidsDataFile: $! ..."; | |
228 | |
229 # Skip lines up to column labels... | |
230 LINE: while ($Line = GetTextLine(\*AMINOACIDSDATAFILE)) { | |
231 if ($Line !~ /^#/) { | |
232 last LINE; | |
233 } | |
234 } | |
235 @ColLabels= quotewords($InDelim, 0, $Line); | |
236 $NumOfCols = @ColLabels; | |
237 | |
238 # Extract property names from column labels... | |
239 @AminoAcidPropertyNames = (); | |
240 for $Index (0 .. $#ColLabels) { | |
241 $Name = $ColLabels[$Index]; | |
242 push @AminoAcidPropertyNames, $Name; | |
243 | |
244 # Store property names... | |
245 $AminoAcidPropertyNamesMap{$Name} = $Name; | |
246 } | |
247 | |
248 # Process amino acid data... | |
249 LINE: while ($Line = GetTextLine(\*AMINOACIDSDATAFILE)) { | |
250 if ($Line =~ /^#/) { | |
251 next LINE; | |
252 } | |
253 @LineWords = (); | |
254 @LineWords = quotewords($InDelim, 0, $Line); | |
255 if (@LineWords != $NumOfCols) { | |
256 croak "Error: The number of data fields, @LineWords, in $AminoAcidsDataFile must be $NumOfCols.\nLine: $Line..."; | |
257 } | |
258 $ThreeLetterCode = $LineWords[0]; $OneLetterCode = $LineWords[1]; $AminoAcidName = $LineWords[3]; | |
259 if (exists $AminoAcidDataMap{$ThreeLetterCode}) { | |
260 carp "Warning: Ignoring data for amino acid $ThreeLetterCode: It has already been loaded.\nLine: $Line...."; | |
261 next LINE; | |
262 } | |
263 | |
264 # Store all the values... | |
265 %{$AminoAcidDataMap{$ThreeLetterCode}} = (); | |
266 for $Index (0 .. $#LineWords) { | |
267 $Name = $AminoAcidPropertyNames[$Index]; | |
268 $Value = $LineWords[$Index]; | |
269 $AminoAcidDataMap{$ThreeLetterCode}{$Name} = $Value; | |
270 } | |
271 } | |
272 close AMINOACIDSDATAFILE; | |
273 | |
274 # Setup one letter and amino acid name maps... | |
275 _SetupAminoAcidIDMap(); | |
276 } | |
277 | |
278 | |
279 # | |
280 # Setup lowercase three/one letter code and name maps pointing | |
281 # to three letter code as show in data file. | |
282 # | |
283 sub _SetupAminoAcidIDMap { | |
284 my($ThreeLetterCode, $OneLetterCode, $AminoAcidName); | |
285 | |
286 %AminoAcidThreeLetterCodeMap = (); | |
287 %AminoAcidOneLetterCodeMap = (); | |
288 %AminoAcidNameMap = (); | |
289 | |
290 for $ThreeLetterCode (keys %AminoAcidDataMap) { | |
291 $OneLetterCode = $AminoAcidDataMap{$ThreeLetterCode}{OneLetterCode}; | |
292 $AminoAcidName = $AminoAcidDataMap{$ThreeLetterCode}{AminoAcid}; | |
293 | |
294 $AminoAcidThreeLetterCodeMap{lc($ThreeLetterCode)} = $ThreeLetterCode; | |
295 $AminoAcidOneLetterCodeMap{lc($OneLetterCode)} = $ThreeLetterCode; | |
296 $AminoAcidNameMap{lc($AminoAcidName)} = $ThreeLetterCode; | |
297 } | |
298 } | |
299 | |
300 # Validate amino acid ID... | |
301 sub _ValidateAminoAcidID { | |
302 my($AminoAcidID) = @_; | |
303 my($ThreeLetterCode); | |
304 | |
305 | |
306 if (length($AminoAcidID) == 3) { | |
307 if (! exists $AminoAcidThreeLetterCodeMap{lc($AminoAcidID)}) { | |
308 return undef; | |
309 } | |
310 $ThreeLetterCode = $AminoAcidThreeLetterCodeMap{lc($AminoAcidID)}; | |
311 } | |
312 elsif (length($AminoAcidID) == 1) { | |
313 if (! exists $AminoAcidOneLetterCodeMap{lc($AminoAcidID)}) { | |
314 return undef; | |
315 } | |
316 $ThreeLetterCode = $AminoAcidOneLetterCodeMap{lc($AminoAcidID)}; | |
317 } | |
318 else { | |
319 if (! exists $AminoAcidNameMap{lc($AminoAcidID)}) { | |
320 return undef; | |
321 } | |
322 $ThreeLetterCode = $AminoAcidNameMap{lc($AminoAcidID)}; | |
323 } | |
324 return $ThreeLetterCode; | |
325 } | |
326 | |
327 | |
328 1; | |
329 | |
330 __END__ | |
331 | |
332 =head1 NAME | |
333 | |
334 AminoAcids | |
335 | |
336 =head1 SYNOPSIS | |
337 | |
338 use AminoAcids; | |
339 | |
340 use AminoAcids qw(:all); | |
341 | |
342 =head1 DESCRIPTION | |
343 | |
344 B<AminoAcids> module provides the following functions: | |
345 | |
346 GetAminoAcidPropertiesData, GetAminoAcidPropertiesNames, GetAminoAcid<PropertyName>, | |
347 GetAminoAcids, IsAminoAcid, IsAminoAcidProperty | |
348 | |
349 =head1 FUNCTIONS | |
350 | |
351 =over 4 | |
352 | |
353 =item B<GetAminoAcidPropertiesData> | |
354 | |
355 $DataHashRef = GetAminoAcidPropertiesData($AminoAcidID); | |
356 | |
357 Returns a reference to hash containing property names and values for a specified | |
358 amino acid. | |
359 | |
360 =item B<GetAminoAcidPropertiesNames> | |
361 | |
362 @Names = GetAminoAcidPropertiesNames([$Mode]); | |
363 $NamesRef = GetAminoAcidPropertiesNames([$Mode]); | |
364 | |
365 Returns an array or a reference to an array containing names of amino acids | |
366 properties. Order of amino acids properties is controlled by optional parameter | |
367 I<Mode>. Possible values for I<Mode>: I<Alphabetical or ByGroup>; Default: I<ByGroup> | |
368 | |
369 =item B<GetAminoAcidPropertyName> | |
370 | |
371 $Value = GetAminoAcid<PropertyName>($AminoAcidID); | |
372 | |
373 Returns amino acid property value for a specified amino acid. These functions are | |
374 not defined in this modules; these are implemented on the fly using Perl's AUTOLOAD | |
375 funcion. Here is the list of known amino acids I<property names>: DNACodons, RNACodons, | |
376 AcidicBasic, PolarNonpolar, Charged, Aromatic, HydrophobicHydophilic, IsoelectricPoint, | |
377 pKCOOH, pKNH3+, ChemicalFormula, MolecularWeight, ExactMass, ChemicalFormulaMinusH2O, | |
378 MolecularWeightMinusH2O(18.01524), ExactMassMinusH2O(18.01056), vanderWaalsVolume, | |
379 %AccessibleResidues, %BuriedResidues, AlphaHelixChouAndFasman, | |
380 AlphaHelixDeleageAndRoux, AlphaHelixLevitt, AminoAcidsComposition, | |
381 AminoAcidsCompositionInSwissProt, AntiparallelBetaStrand, AverageAreaBuried, AverageFlexibility, | |
382 BetaSheetChouAndFasman, BetaSheetDeleageAndRoux, BetaSheetLevitt, | |
383 BetaTurnChouAndFasman, BetaTurnDeleageAndRoux, BetaTurnLevitt, Bulkiness, | |
384 CoilDeleageAndRoux, HPLCHFBARetention, HPLCRetentionAtpH2.1, HPLCRetentionAtpH7.4, | |
385 HPLCTFARetention, HydrophobicityAbrahamAndLeo, HydrophobicityBlack, | |
386 HydrophobicityBullAndBreese, HydrophobicityChothia, HydrophobicityEisenbergAndOthers, | |
387 HydrophobicityFauchereAndOthers, HydrophobicityGuy, HydrophobicityHPLCAtpH3.4Cowan, | |
388 HydrophobicityHPLCAtpH7.5Cowan, HydrophobicityHPLCParkerAndOthers, | |
389 HydrophobicityHPLCWilsonAndOthers, HydrophobicityHoppAndWoods, HydrophobicityJanin, | |
390 HydrophobicityKyteAndDoolittle, HydrophobicityManavalanAndOthers, | |
391 HydrophobicityMiyazawaAndOthers, HydrophobicityOMHSweetAndOthers, | |
392 HydrophobicityRaoAndArgos, HydrophobicityRfMobility, HydrophobicityRoseAndOthers, | |
393 HydrophobicityRoseman, HydrophobicityWellingAndOthers, HydrophobicityWolfendenAndOthers, | |
394 ParallelBetaStrand, PolarityGrantham, PolarityZimmerman, RatioHeteroEndToSide, | |
395 RecognitionFactors, Refractivity, RelativeMutability, TotalBetaStrand, LinearStructure, | |
396 LinearStructureAtpH7.4 | |
397 | |
398 =item B<GetAminoAcids> | |
399 | |
400 $NamesRef = GetAminoAcids([$NameType]); | |
401 (@Names) = GetAminoAcids([$NameType]); | |
402 | |
403 Returns an array or a reference to an array containing names of amino acids | |
404 as one letter code, three letter code, or amino acid name controlled by optional | |
405 parameter $NameType. By default, amino acids names are returned as three | |
406 letter code. Possible values for I<NameType>: I<ThreeLetterCode, OneLetterCode, or | |
407 AminoAcid>. | |
408 | |
409 =item B<IsAminoAcid> | |
410 | |
411 $Status = IsAminoAcid($AminoAcidID); | |
412 | |
413 Returns a flag indicating whether or not its a known amino acid ID. | |
414 | |
415 =item B<IsAminoAcidProperty> | |
416 | |
417 $Status = IsAminoAcid($PropertyName); | |
418 | |
419 Returns a flag indicating whether or not its a known amino acid property name. | |
420 | |
421 =back | |
422 | |
423 =head1 AUTHOR | |
424 | |
425 Manish Sud <msud@san.rr.com> | |
426 | |
427 =head1 SEE ALSO | |
428 | |
429 NucleicAcids.pm, PeriodicTable.pm | |
430 | |
431 =head1 COPYRIGHT | |
432 | |
433 Copyright (C) 2015 Manish Sud. All rights reserved. | |
434 | |
435 This file is part of MayaChemTools. | |
436 | |
437 MayaChemTools is free software; you can redistribute it and/or modify it under | |
438 the terms of the GNU Lesser General Public License as published by the Free | |
439 Software Foundation; either version 3 of the License, or (at your option) | |
440 any later version. | |
441 | |
442 =cut |