Mercurial > repos > deepakjadmin > mayatool3_test2
diff lib/NucleicAcids.pm @ 0:4816e4a8ae95 draft default tip
Uploaded
author | deepakjadmin |
---|---|
date | Wed, 20 Jan 2016 09:23:18 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/NucleicAcids.pm Wed Jan 20 09:23:18 2016 -0500 @@ -0,0 +1,475 @@ +package NucleicAcids; +# +# $RCSfile: NucleicAcids.pm,v $ +# $Date: 2015/02/28 20:47:18 $ +# $Revision: 1.25 $ +# +# Author: Manish Sud <msud@san.rr.com> +# +# Copyright (C) 2015 Manish Sud. All rights reserved. +# +# This file is part of MayaChemTools. +# +# MayaChemTools is free software; you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the Free +# Software Foundation; either version 3 of the License, or (at your option) any +# later version. +# +# MayaChemTools is distributed in the hope that it will be useful, but without +# any warranty; without even the implied warranty of merchantability of fitness +# for a particular purpose. See the GNU Lesser General Public License for more +# details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or +# write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, +# Boston, MA, 02111-1307, USA. +# + +use strict; +use Carp; +use Text::ParseWords; +use TextUtil; +use FileUtil; + +use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); + +@ISA = qw(Exporter); +@EXPORT = qw(); +@EXPORT_OK = qw(GetNucleicAcids GetNucleicAcidsByType GetNucleicAcidPropertiesData GetNucleicAcidPropertiesNames IsNucleicAcid IsNucleicAcidProperty IsNucleicAcidType); + +%EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); + +# +# Load nucleic acids data... +# +my(%NucleicAcidDataMap, %NucleicAcidCodeMap, %NucleicAcidOtherCodeMap, %NucleicAcidNameMap, @NucleicAcidCodes, @NucleicAcidPropertyNames, %NucleicAcidPropertyNamesMap, %NucleicAcidTypesMap); +_LoadNucleicAcidsData(); + +# +# Get a list of all known nucleic acids as one of these values: +# code or nucleic acid name... +# +sub GetNucleicAcids { + my($NameType, $Code, $Name, @NucleicAcidNames); + + $NameType = 'Code'; + if (@_ >= 1) { + ($NameType) = @_; + } + + # Collect names... + @NucleicAcidNames = (); + for $Code (@NucleicAcidCodes) { + NAME : { + if ($NameType =~ /^Name$/i) {$Name = $NucleicAcidDataMap{$Code}{Name}; last NAME; } + $Name = $Code; + } + push @NucleicAcidNames, $Name; + } + + return (wantarray ? @NucleicAcidNames : \@NucleicAcidNames); +} + +# +# Get a list of all known nucleic acids by one of these specified types: +# Nucleobase, Nucleoside, Deoxynucleoside, Nucleotide, Deoxynucleotide. Default: Nucleoside +# +sub GetNucleicAcidsByType { + my($NameType, $Type, $Code, $Name, @NucleicAcidNames); + + $Type = 'Nucleoside'; + $NameType = 'Code'; + if (@_ == 2) { + ($Type, $NameType) = @_; + } + elsif (@_ == 1) { + ($Type) = @_; + } + + # Collect names... + @NucleicAcidNames = (); + CODE: for $Code (@NucleicAcidCodes) { + if ($NucleicAcidDataMap{$Code}{Type} !~ /^$Type$/i ) { + next CODE; + } + NAME : { + if ($NameType =~ /^Name$/i) {$Name = $NucleicAcidDataMap{$Code}{Name}; last NAME; } + $Name = $Code; + } + push @NucleicAcidNames, $Name; + } + + return (wantarray ? @NucleicAcidNames : \@NucleicAcidNames); +} + +# +# Get all available properties data for an nucleic acid using any of these symbols: +# code, other code or name. +# +# A reference to a hash array is returned with keys and values representing property +# name and its values respectively. +# +sub GetNucleicAcidPropertiesData { + my($NucleicAcidID) = @_; + my($Code); + + if ($Code = _ValidateNucleicAcidID($NucleicAcidID)) { + return \%{$NucleicAcidDataMap{$Code}}; + } + else { + return undef; + } +} + +# +# Get names of all available nucleic acid properties. A reference to an array containing +# names of all available properties is returned. +# +sub GetNucleicAcidPropertiesNames { + my($Mode); + my($PropertyName, @PropertyNames); + + $Mode = 'ByGroup'; + if (@_ == 1) { + ($Mode) = @_; + } + + @PropertyNames = (); + if ($Mode =~ /^Alphabetical$/i) { + my($PropertyName); + # Code, OtherCodes and Name are always listed first... + push @PropertyNames, qw(Code OtherCodes Name); + for $PropertyName (sort keys %NucleicAcidPropertyNamesMap) { + if ($PropertyName !~ /^(Code|OtherCodes|Name)$/) { + push @PropertyNames, $PropertyName; + } + } + } + else { + push @PropertyNames, @NucleicAcidPropertyNames; + } + return (wantarray ? @PropertyNames : \@PropertyNames); +} + +# +# Is it a known nucleic acid? Input is either a code or a name +# +sub IsNucleicAcid { + my($NucleicAcidID) = @_; + my($Status); + + $Status = (_ValidateNucleicAcidID($NucleicAcidID)) ? 1 : 0; + + return $Status; +} + +# +# Is it an available nucleic acid property? +# +sub IsNucleicAcidProperty { + my($PropertyName) = @_; + my($Status); + + $Status = (exists($NucleicAcidPropertyNamesMap{$PropertyName})) ? 1 : 0; + + return $Status; +} + +# +# Is it an available nucleic acid type? +# +sub IsNucleicAcidType { + my($Type) = @_; + my($Status); + + $Status = (exists($NucleicAcidTypesMap{lc($Type)})) ? 1 : 0; + + return $Status; +} + +# +# Implents GetNucleicAcid<PropertyName> for a valid proprty name. +# +sub AUTOLOAD { + my($NucleicAcidID) = @_; + my($FunctionName, $PropertyName, $PropertyValue, $Code); + + $PropertyValue = undef; + + use vars qw($AUTOLOAD); + $FunctionName = $AUTOLOAD; + $FunctionName =~ s/.*:://; + + # Only Get<PropertyName> functions are supported... + if ($FunctionName !~ /^Get/) { + croak "Error: Function, NucleicAcid::$FunctionName, is not supported by AUTOLOAD in NucleicAcid module: Only Get<PropertyName> functions are implemented..."; + } + + $PropertyName = $FunctionName; + $PropertyName =~ s/^GetNucleicAcid//; + if (!exists $NucleicAcidPropertyNamesMap{$PropertyName}) { + croak "Error: Function, NucleicAcid::$FunctionName, is not supported by AUTOLOAD in NucleicAcid module: Unknown nucleic acid property name, $PropertyName, specified..."; + } + + if (!($Code = _ValidateNucleicAcidID($NucleicAcidID))) { + return undef; + } + $PropertyValue = $NucleicAcidDataMap{$Code}{$PropertyName}; + return $PropertyValue; +} + +# +# Load NucleicAcidsData.csv files from <MayaChemTools>/lib directory... +# +sub _LoadNucleicAcidsData { + my($NucleicAcidsDataFile, $MayaChemToolsLibDir); + + $MayaChemToolsLibDir = GetMayaChemToolsLibDirName(); + + $NucleicAcidsDataFile = "$MayaChemToolsLibDir" . "/data/NucleicAcidsData.csv"; + + if (! -e "$NucleicAcidsDataFile") { + croak "Error: MayaChemTools package file, $NucleicAcidsDataFile, is missing: Possible installation problems..."; + } + + _LoadData($NucleicAcidsDataFile); +} + +# +# Load NucleicAcidsData.csv file from <MayaChemTools>/lib directory... +# +sub _LoadData { + my($NucleicAcidsDataFile) = @_; + + %NucleicAcidDataMap = (); + @NucleicAcidCodes = (); + @NucleicAcidPropertyNames = (); + %NucleicAcidPropertyNamesMap = (); + %NucleicAcidCodeMap = (); + %NucleicAcidOtherCodeMap = (); + %NucleicAcidNameMap = (); + %NucleicAcidTypesMap = (); + + # Load property data for all nucleic acids... + # + # File Format: + # "Code","OtherCodes","BasePair","Name","Type","ChemicalFormula","ChemicalFormulaAtpH7.5","MolecularWeight","ExactMass","ElementalComposition" + # + my($Code, $OtherCodes, $NucleicAcidName, $Line, $NumOfCols, $InDelim, $Index, $Name, $Value, $Units, @LineWords, @ColLabels); + + $InDelim = "\,"; + open NUCLEICACIDSDATAFILE, "$NucleicAcidsDataFile" or croak "Couldn't open $NucleicAcidsDataFile: $! ..."; + + # Skip lines up to column labels... + LINE: while ($Line = GetTextLine(\*NUCLEICACIDSDATAFILE)) { + if ($Line !~ /^#/) { + last LINE; + } + } + @ColLabels= quotewords($InDelim, 0, $Line); + $NumOfCols = @ColLabels; + + # Extract property names from column labels... + @NucleicAcidPropertyNames = (); + for $Index (0 .. $#ColLabels) { + $Name = $ColLabels[$Index]; + push @NucleicAcidPropertyNames, $Name; + + # Store property names... + $NucleicAcidPropertyNamesMap{$Name} = $Name; + } + + # Process nucleic acid data... + LINE: while ($Line = GetTextLine(\*NUCLEICACIDSDATAFILE)) { + if ($Line =~ /^#/) { + next LINE; + } + @LineWords = (); + @LineWords = quotewords($InDelim, 0, $Line); + if (@LineWords != $NumOfCols) { + croak "Error: The number of data fields, @LineWords, in $NucleicAcidsDataFile must be $NumOfCols.\nLine: $Line..."; + } + $Code = $LineWords[0]; $OtherCodes = $LineWords[1]; $NucleicAcidName = $LineWords[3]; + if (exists $NucleicAcidDataMap{$Code}) { + carp "Warning: Ignoring data for nucleic acid $Code: It has already been loaded.\nLine: $Line...."; + next LINE; + } + + # Store all the values... + push @NucleicAcidCodes, $Code; + %{$NucleicAcidDataMap{$Code}} = (); + for $Index (0 .. $#LineWords) { + $Name = $NucleicAcidPropertyNames[$Index]; + $Value = $LineWords[$Index]; + $NucleicAcidDataMap{$Code}{$Name} = $Value; + } + } + close NUCLEICACIDSDATAFILE; + + # Setup one letter and nucleic acid name maps... + _SetupNucleicAcidIDMap(); +} + +# +# Setup lowercase other codes and name maps pointing +# to code as show in data file. +# +sub _SetupNucleicAcidIDMap { + my($Code, @OtherCodes, $OtherCode, $NucleicAcidName, $NucleicAcidType); + + %NucleicAcidCodeMap = (); + %NucleicAcidOtherCodeMap = (); + %NucleicAcidNameMap = (); + %NucleicAcidTypesMap = (); + + for $Code (keys %NucleicAcidDataMap) { + $NucleicAcidCodeMap{lc($Code)} = $Code; + + $NucleicAcidName = $NucleicAcidDataMap{$Code}{Name}; + $NucleicAcidNameMap{lc($NucleicAcidName)} = $Code; + + $NucleicAcidType = $NucleicAcidDataMap{$Code}{Type}; + if (! exists $NucleicAcidTypesMap{$NucleicAcidType}) { + $NucleicAcidTypesMap{lc($NucleicAcidType)} = $NucleicAcidType; + } + + @OtherCodes = split /\,/, $NucleicAcidDataMap{$Code}{OtherCodes}; + OTHERCODE: for $OtherCode (@OtherCodes) { + if (!$OtherCode) { + next OTHERCODE; + } + $OtherCode = RemoveLeadingAndTrailingWhiteSpaces($OtherCode); + $NucleicAcidOtherCodeMap{lc($OtherCode)} = $Code; + } + } +} + +# Validate Nucleic acid ID... +sub _ValidateNucleicAcidID { + my($NucleicAcidID) = @_; + my($Code) = undef; + + if (exists $NucleicAcidCodeMap{lc($NucleicAcidID)}) { + $Code = $NucleicAcidCodeMap{lc($NucleicAcidID)}; + } + elsif (exists $NucleicAcidOtherCodeMap{lc($NucleicAcidID)}) { + $Code = $NucleicAcidOtherCodeMap{lc($NucleicAcidID)}; + } + elsif (exists $NucleicAcidNameMap{lc($NucleicAcidID)}) { + $Code = $NucleicAcidNameMap{lc($NucleicAcidID)}; + } + return $Code; +} + + +1; + +__END__ + +=head1 NAME + +NucleicAcids + +=head1 SYNOPSIS + +use NucleicAcids; + +use NucleicAcids qw(:all); + +=head1 DESCRIPTION + +B<NucleicAcids> module the provides the following functions: + +GetNucleicAcidPropertiesData, GetNucleicAcidPropertiesNames, +GetNucleicAcids, GetNucleicAcidsByType, IsNucleicAcid, IsNucleicAcidProperty, +IsNucleicAcidType + +=head1 Functions + +=over 4 + +=item B<GetNucleicAcids> + + (@Names) = GetNucleicAcids([$NameType]); + $NamesRef = GetNucleicAcids([$NameType]); + +Returns an array or a reference to an array containing names of nucleic acids +as a code or nucleic acid name controlled by optional parameter I<NameType>. By +default, nucleic acids names are returned as the code. Possible values for +I<NameType>: I<Code or Name>. + +=item B<GetNucleicAcidsByType> + + (@Names) = GetNucleicAcidsByType([$Type, $NameType]); + $NamesRef = GetNucleicAcidsByType([$Type, $NameType]); + +Returns an array or a reference to an array containing names of nucleic acids +specified by parameter I<Type> as a code or name controlled by optional +parameter I<NameType>. Default values for I<Type>: I<Nucleoside>. Default value for +I<NameType>: I<Code>. Possible values for I<Type>: I<Nucleobase, Nucleoside, Deoxynucleoside, +Nucleotide, Deoxynucleotide>. Possible values for I<NameType>: I<Code or Name>. + +=item B<GetNucleicAcidPropertiesData> + + $DataHashRef = GetNucleicAcidPropertiesData($NucleicAcidID); + +Returns a reference to hash containing property names and values for a specified +I<NucleicAcidID>. + +=item B<GetNucleicAcidPropertyName> + + $Value = GetNucleicAcid<PropertyName>($NucleicAcidID); + +Returns nucleic acid property value for a specified I<NucleicAcidID>. This function is +implemented on-the-fly using Perl's AUTOLOAD functionality. + +=item B<GetNucleicAcidPropertiesNames> + + @Names = GetNucleicAcidPropertiesNames([$Mode]); + $NamesRef = GetNucleicAcidPropertiesNames([$Mode]); + +Returns an array or a reference to an array containing names of properties for +nucleic acids. Order of nucleic acids properties is controlled by optional parameter +I<Mode>. Possible values for I<Mode>: I<Alphabetical or ByGroup>; Default: I<ByGroup>. + +=item B<IsNucleicAcid> + + $Status = IsNucleicAcid($NucleicAcidID); + +Returns 1 or 0 based on whether it's a known nucleic acid ID. + +=item B<IsNucleicAcidProperty> + + $Status = IsNucleicAcid($PropertyName); + +Returns 1 or 0 based on whether it's a known nucleic acid property name. + +=item B<IsNucleicAcidType> + + $Status = IsNucleicAcidType(); + +Returns 1 or 0 based on whether it's a known nucleic acid type. + +=back + +=head1 AUTHOR + +Manish Sud <msud@san.rr.com> + +=head1 SEE ALSO + +AminoAcids.pm, PeriodicTable.pm + +=head1 COPYRIGHT + +Copyright (C) 2015 Manish Sud. All rights reserved. + +This file is part of MayaChemTools. + +MayaChemTools is free software; you can redistribute it and/or modify it under +the terms of the GNU Lesser General Public License as published by the Free +Software Foundation; either version 3 of the License, or (at your option) +any later version. + +=cut