Mercurial > repos > deepakjadmin > mayatool3_test2
view lib/AminoAcids.pm @ 0:4816e4a8ae95 draft default tip
Uploaded
author | deepakjadmin |
---|---|
date | Wed, 20 Jan 2016 09:23:18 -0500 |
parents | |
children |
line wrap: on
line source
package AminoAcids; # # $RCSfile: AminoAcids.pm,v $ # $Date: 2015/02/28 20:47:02 $ # $Revision: 1.25 $ # # Author: Manish Sud <msud@san.rr.com> # # Copyright (C) 2015 Manish Sud. All rights reserved. # # This file is part of MayaChemTools. # # MayaChemTools is free software; you can redistribute it and/or modify it under # the terms of the GNU Lesser General Public License as published by the Free # Software Foundation; either version 3 of the License, or (at your option) any # later version. # # MayaChemTools is distributed in the hope that it will be useful, but without # any warranty; without even the implied warranty of merchantability of fitness # for a particular purpose. See the GNU Lesser General Public License for more # details. # # You should have received a copy of the GNU Lesser General Public License # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, # Boston, MA, 02111-1307, USA. # use strict; use Carp; use Text::ParseWords; use TextUtil; use FileUtil; use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); @ISA = qw(Exporter); @EXPORT = qw(); @EXPORT_OK = qw(GetAminoAcids GetAminoAcidPropertiesData GetAminoAcidPropertiesNames IsAminoAcid IsAminoAcidProperty); %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); # # Load amino acids data... # my(%AminoAcidDataMap, %AminoAcidThreeLetterCodeMap, %AminoAcidOneLetterCodeMap, %AminoAcidNameMap, @AminoAcidPropertyNames, %AminoAcidPropertyNamesMap, ); _LoadAminoAcidsData(); # # Get a list of all known amino acids as one of these values: # one letter code, three letter code, or amino acid name... # sub GetAminoAcids { my($NameType, $ThreeLetterCode, $Name, @AminoAcidNames, %AminoAcidNamesMap); $NameType = 'ThreeLetterCode'; if (@_ >= 1) { ($NameType) = @_; } # Collect names... %AminoAcidNamesMap = (); for $ThreeLetterCode (keys %AminoAcidDataMap) { NAME : { if ($NameType =~ /^OneLetterCode$/i) {$Name = $AminoAcidDataMap{$ThreeLetterCode}{OneLetterCode}; last NAME; } if ($NameType =~ /^AminoAcid$/i) {$Name = $AminoAcidDataMap{$ThreeLetterCode}{AminoAcid}; last NAME; } $Name = $ThreeLetterCode; } $AminoAcidNamesMap{$Name} = $Name; } # Sort 'em out @AminoAcidNames = (); for $Name (sort keys %AminoAcidNamesMap) { push @AminoAcidNames, $Name; } return (wantarray ? @AminoAcidNames : \@AminoAcidNames); } # # Get all available properties data for an amino acid using any of these symbols: # three letter code; one letter code; name. # # A reference to a hash array is returned with keys and values representing property # name and its values respectively. # sub GetAminoAcidPropertiesData { my($AminoAcidID) = @_; my($ThreeLetterCode); if ($ThreeLetterCode = _ValidateAminoAcidID($AminoAcidID)) { return \%{$AminoAcidDataMap{$ThreeLetterCode}}; } else { return undef; } } # # Get names of all available amino acid properties. A reference to an array containing # names of all available properties is returned. # sub GetAminoAcidPropertiesNames { my($Mode); my($PropertyName, @PropertyNames); $Mode = 'ByGroup'; if (@_ == 1) { ($Mode) = @_; } @PropertyNames = (); if ($Mode =~ /^Alphabetical$/i) { my($PropertyName); # ThreeLetterCode, OneLetterCode, and AminoAcid are always listed first... push @PropertyNames, qw(ThreeLetterCode OneLetterCode AminoAcid); for $PropertyName (sort keys %AminoAcidPropertyNamesMap) { if ($PropertyName !~ /^(ThreeLetterCode|OneLetterCode|AminoAcid)$/) { push @PropertyNames, $PropertyName; } } } else { push @PropertyNames, @AminoAcidPropertyNames; } return (wantarray ? @PropertyNames : \@PropertyNames); } # # Is it a known amino acid? Input is either an one/three letter code or a name. # sub IsAminoAcid { my($AminoAcidID) = @_; my($Status); $Status = (_ValidateAminoAcidID($AminoAcidID)) ? 1 : 0; return $Status; } # # Is it an available amino acid property? # sub IsAminoAcidProperty { my($PropertyName) = @_; my($Status); $Status = (exists($AminoAcidPropertyNamesMap{$PropertyName})) ? 1 : 0; return $Status; } # # Implents GetAminoAcid<PropertyName> for a valid proprty name. # sub AUTOLOAD { my($AminoAcidID) = @_; my($FunctionName, $PropertyName, $PropertyValue, $ThreeLetterCode); $PropertyValue = undef; use vars qw($AUTOLOAD); $FunctionName = $AUTOLOAD; $FunctionName =~ s/.*:://; # Only Get<PropertyName> functions are supported... if ($FunctionName !~ /^Get/) { croak "Error: Function, AminoAcid::$FunctionName, is not supported by AUTOLOAD in AminoAcid module: Only Get<PropertyName> functions are implemented..."; } $PropertyName = $FunctionName; $PropertyName =~ s/^GetAminoAcid//; if (!exists $AminoAcidPropertyNamesMap{$PropertyName}) { croak "Error: Function, AminoAcid::$FunctionName, is not supported by AUTOLOAD in AminoAcid module: Unknown amino acid property name, $PropertyName, specified..."; } if (!($ThreeLetterCode = _ValidateAminoAcidID($AminoAcidID))) { return undef; } $PropertyValue = $AminoAcidDataMap{$ThreeLetterCode}{$PropertyName}; return $PropertyValue; } # # Load AminoAcidsData.csv files from <MayaChemTools>/lib directory... # sub _LoadAminoAcidsData { my($AminoAcidsDataFile, $MayaChemToolsLibDir); $MayaChemToolsLibDir = GetMayaChemToolsLibDirName(); $AminoAcidsDataFile = "$MayaChemToolsLibDir" . "/data/AminoAcidsData.csv"; if (! -e "$AminoAcidsDataFile") { croak "Error: MayaChemTools package file, $AminoAcidsDataFile, is missing: Possible installation problems..."; } _LoadData($AminoAcidsDataFile); } # # Load AminoAcidsData.csv file from <MayaChemTools>/lib directory... # sub _LoadData { my($AminoAcidsDataFile) = @_; %AminoAcidDataMap = (); @AminoAcidPropertyNames = (); %AminoAcidPropertyNamesMap = (); %AminoAcidThreeLetterCodeMap = (); %AminoAcidOneLetterCodeMap = (); %AminoAcidNameMap = (); # Load property data for all amino acids... # # File Format: #"ThreeLetterCode","OneLetterCode","AminoAcid","AcidicBasic","PolarNonpolar","Charged","Aromatic","HydrophobicHydophilic","IsoelectricPoint","pKCOOH","pKNH3+","MolecularWeight","MolecularWeightMinusH2O(18.01524)","ExactMass","ExactMassMinusH2O(18.01056)","vanderWaalsVolume","%AccessibleResidues","%BuriedResidues","AlphaHelixChouAndFasman","AlphaHelixDeleageAndRoux","AlphaHelixLevitt","AminoAcidsComposition","AminoAcidsCompositionInSwissProt","AntiparallelBetaStrand","AverageAreaBuried","AverageFlexibility","BetaSheetChouAndFasman","BetaSheetDeleageAndRoux","BetaSheetLevitt","BetaTurnChouAndFasman","BetaTurnDeleageAndRoux","BetaTurnLevitt","Bulkiness","CoilDeleageAndRoux","HPLCHFBARetention","HPLCRetentionAtpH2.1","HPLCRetentionAtpH7.4","HPLCTFARetention","HydrophobicityAbrahamAndLeo","HydrophobicityBlack","HydrophobicityBullAndBreese","HydrophobicityChothia","HydrophobicityEisenbergAndOthers","HydrophobicityFauchereAndOthers","HydrophobicityGuy","HydrophobicityHPLCAtpH3.4Cowan","HydrophobicityHPLCAtpH7.5Cowan","HydrophobicityHPLCParkerAndOthers","HydrophobicityHPLCWilsonAndOthers","HydrophobicityHoppAndWoods","HydrophobicityJanin","HydrophobicityKyteAndDoolittle","HydrophobicityManavalanAndOthers","HydrophobicityMiyazawaAndOthers","HydrophobicityOMHSweetAndOthers","HydrophobicityRaoAndArgos","HydrophobicityRfMobility","HydrophobicityRoseAndOthers","HydrophobicityRoseman","HydrophobicityWellingAndOthers","HydrophobicityWolfendenAndOthers","MolecularWeight","NumberOfCodons","ParallelBetaStrand","PolarityGrantham","PolarityZimmerman","RatioHeteroEndToSide","RecognitionFactors","Refractivity","RelativeMutability","TotalBetaStrand","LinearStructure","LinearStructureAtpH7.4" # # my($ThreeLetterCode, $OneLetterCode, $AminoAcidName, $Line, $NumOfCols, $InDelim, $Index, $Name, $Value, $Units, @LineWords, @ColLabels); $InDelim = "\,"; open AMINOACIDSDATAFILE, "$AminoAcidsDataFile" or croak "Couldn't open $AminoAcidsDataFile: $! ..."; # Skip lines up to column labels... LINE: while ($Line = GetTextLine(\*AMINOACIDSDATAFILE)) { if ($Line !~ /^#/) { last LINE; } } @ColLabels= quotewords($InDelim, 0, $Line); $NumOfCols = @ColLabels; # Extract property names from column labels... @AminoAcidPropertyNames = (); for $Index (0 .. $#ColLabels) { $Name = $ColLabels[$Index]; push @AminoAcidPropertyNames, $Name; # Store property names... $AminoAcidPropertyNamesMap{$Name} = $Name; } # Process amino acid data... LINE: while ($Line = GetTextLine(\*AMINOACIDSDATAFILE)) { if ($Line =~ /^#/) { next LINE; } @LineWords = (); @LineWords = quotewords($InDelim, 0, $Line); if (@LineWords != $NumOfCols) { croak "Error: The number of data fields, @LineWords, in $AminoAcidsDataFile must be $NumOfCols.\nLine: $Line..."; } $ThreeLetterCode = $LineWords[0]; $OneLetterCode = $LineWords[1]; $AminoAcidName = $LineWords[3]; if (exists $AminoAcidDataMap{$ThreeLetterCode}) { carp "Warning: Ignoring data for amino acid $ThreeLetterCode: It has already been loaded.\nLine: $Line...."; next LINE; } # Store all the values... %{$AminoAcidDataMap{$ThreeLetterCode}} = (); for $Index (0 .. $#LineWords) { $Name = $AminoAcidPropertyNames[$Index]; $Value = $LineWords[$Index]; $AminoAcidDataMap{$ThreeLetterCode}{$Name} = $Value; } } close AMINOACIDSDATAFILE; # Setup one letter and amino acid name maps... _SetupAminoAcidIDMap(); } # # Setup lowercase three/one letter code and name maps pointing # to three letter code as show in data file. # sub _SetupAminoAcidIDMap { my($ThreeLetterCode, $OneLetterCode, $AminoAcidName); %AminoAcidThreeLetterCodeMap = (); %AminoAcidOneLetterCodeMap = (); %AminoAcidNameMap = (); for $ThreeLetterCode (keys %AminoAcidDataMap) { $OneLetterCode = $AminoAcidDataMap{$ThreeLetterCode}{OneLetterCode}; $AminoAcidName = $AminoAcidDataMap{$ThreeLetterCode}{AminoAcid}; $AminoAcidThreeLetterCodeMap{lc($ThreeLetterCode)} = $ThreeLetterCode; $AminoAcidOneLetterCodeMap{lc($OneLetterCode)} = $ThreeLetterCode; $AminoAcidNameMap{lc($AminoAcidName)} = $ThreeLetterCode; } } # Validate amino acid ID... sub _ValidateAminoAcidID { my($AminoAcidID) = @_; my($ThreeLetterCode); if (length($AminoAcidID) == 3) { if (! exists $AminoAcidThreeLetterCodeMap{lc($AminoAcidID)}) { return undef; } $ThreeLetterCode = $AminoAcidThreeLetterCodeMap{lc($AminoAcidID)}; } elsif (length($AminoAcidID) == 1) { if (! exists $AminoAcidOneLetterCodeMap{lc($AminoAcidID)}) { return undef; } $ThreeLetterCode = $AminoAcidOneLetterCodeMap{lc($AminoAcidID)}; } else { if (! exists $AminoAcidNameMap{lc($AminoAcidID)}) { return undef; } $ThreeLetterCode = $AminoAcidNameMap{lc($AminoAcidID)}; } return $ThreeLetterCode; } 1; __END__ =head1 NAME AminoAcids =head1 SYNOPSIS use AminoAcids; use AminoAcids qw(:all); =head1 DESCRIPTION B<AminoAcids> module provides the following functions: GetAminoAcidPropertiesData, GetAminoAcidPropertiesNames, GetAminoAcid<PropertyName>, GetAminoAcids, IsAminoAcid, IsAminoAcidProperty =head1 FUNCTIONS =over 4 =item B<GetAminoAcidPropertiesData> $DataHashRef = GetAminoAcidPropertiesData($AminoAcidID); Returns a reference to hash containing property names and values for a specified amino acid. =item B<GetAminoAcidPropertiesNames> @Names = GetAminoAcidPropertiesNames([$Mode]); $NamesRef = GetAminoAcidPropertiesNames([$Mode]); Returns an array or a reference to an array containing names of amino acids properties. Order of amino acids properties is controlled by optional parameter I<Mode>. Possible values for I<Mode>: I<Alphabetical or ByGroup>; Default: I<ByGroup> =item B<GetAminoAcidPropertyName> $Value = GetAminoAcid<PropertyName>($AminoAcidID); Returns amino acid property value for a specified amino acid. These functions are not defined in this modules; these are implemented on the fly using Perl's AUTOLOAD funcion. Here is the list of known amino acids I<property names>: DNACodons, RNACodons, AcidicBasic, PolarNonpolar, Charged, Aromatic, HydrophobicHydophilic, IsoelectricPoint, pKCOOH, pKNH3+, ChemicalFormula, MolecularWeight, ExactMass, ChemicalFormulaMinusH2O, MolecularWeightMinusH2O(18.01524), ExactMassMinusH2O(18.01056), vanderWaalsVolume, %AccessibleResidues, %BuriedResidues, AlphaHelixChouAndFasman, AlphaHelixDeleageAndRoux, AlphaHelixLevitt, AminoAcidsComposition, AminoAcidsCompositionInSwissProt, AntiparallelBetaStrand, AverageAreaBuried, AverageFlexibility, BetaSheetChouAndFasman, BetaSheetDeleageAndRoux, BetaSheetLevitt, BetaTurnChouAndFasman, BetaTurnDeleageAndRoux, BetaTurnLevitt, Bulkiness, CoilDeleageAndRoux, HPLCHFBARetention, HPLCRetentionAtpH2.1, HPLCRetentionAtpH7.4, HPLCTFARetention, HydrophobicityAbrahamAndLeo, HydrophobicityBlack, HydrophobicityBullAndBreese, HydrophobicityChothia, HydrophobicityEisenbergAndOthers, HydrophobicityFauchereAndOthers, HydrophobicityGuy, HydrophobicityHPLCAtpH3.4Cowan, HydrophobicityHPLCAtpH7.5Cowan, HydrophobicityHPLCParkerAndOthers, HydrophobicityHPLCWilsonAndOthers, HydrophobicityHoppAndWoods, HydrophobicityJanin, HydrophobicityKyteAndDoolittle, HydrophobicityManavalanAndOthers, HydrophobicityMiyazawaAndOthers, HydrophobicityOMHSweetAndOthers, HydrophobicityRaoAndArgos, HydrophobicityRfMobility, HydrophobicityRoseAndOthers, HydrophobicityRoseman, HydrophobicityWellingAndOthers, HydrophobicityWolfendenAndOthers, ParallelBetaStrand, PolarityGrantham, PolarityZimmerman, RatioHeteroEndToSide, RecognitionFactors, Refractivity, RelativeMutability, TotalBetaStrand, LinearStructure, LinearStructureAtpH7.4 =item B<GetAminoAcids> $NamesRef = GetAminoAcids([$NameType]); (@Names) = GetAminoAcids([$NameType]); Returns an array or a reference to an array containing names of amino acids as one letter code, three letter code, or amino acid name controlled by optional parameter $NameType. By default, amino acids names are returned as three letter code. Possible values for I<NameType>: I<ThreeLetterCode, OneLetterCode, or AminoAcid>. =item B<IsAminoAcid> $Status = IsAminoAcid($AminoAcidID); Returns a flag indicating whether or not its a known amino acid ID. =item B<IsAminoAcidProperty> $Status = IsAminoAcid($PropertyName); Returns a flag indicating whether or not its a known amino acid property name. =back =head1 AUTHOR Manish Sud <msud@san.rr.com> =head1 SEE ALSO NucleicAcids.pm, PeriodicTable.pm =head1 COPYRIGHT Copyright (C) 2015 Manish Sud. All rights reserved. This file is part of MayaChemTools. MayaChemTools is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. =cut