Mercurial > repos > deepakjadmin > mayatool3_test2
diff lib/MolecularFormula.pm @ 0:4816e4a8ae95 draft default tip
Uploaded
author | deepakjadmin |
---|---|
date | Wed, 20 Jan 2016 09:23:18 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/MolecularFormula.pm Wed Jan 20 09:23:18 2016 -0500 @@ -0,0 +1,438 @@ +package MolecularFormula; +# +# $RCSfile: MolecularFormula.pm,v $ +# $Date: 2015/02/28 20:47:18 $ +# $Revision: 1.25 $ +# +# Author: Manish Sud <msud@san.rr.com> +# +# Copyright (C) 2015 Manish Sud. All rights reserved. +# +# This file is part of MayaChemTools. +# +# MayaChemTools is free software; you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the Free +# Software Foundation; either version 3 of the License, or (at your option) any +# later version. +# +# MayaChemTools is distributed in the hope that it will be useful, but without +# any warranty; without even the implied warranty of merchantability of fitness +# for a particular purpose. See the GNU Lesser General Public License for more +# details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or +# write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, +# Boston, MA, 02111-1307, USA. +# + +use strict; +use Carp; +use Text::ParseWords; +use TextUtil; +use PeriodicTable; + +use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); + +@ISA = qw(Exporter); +@EXPORT = qw(); +@EXPORT_OK = qw(CalculateMolecularWeight CalculateExactMass CalculateElementalComposition FormatCompositionInfomation GetElementsAndCount IsMolecularFormula); + +%EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); + +# +# Calculate molecular weight assuming its a valid molecular formula... +# +sub CalculateMolecularWeight { + my($MolecularFormula) = @_; + my($Index, $MolecularWeight, $ElementSymbol, $ElementCount, $AtomicWeight, $FormulaElementsRef, $FormulaElementCountRef); + + ($FormulaElementsRef, $FormulaElementCountRef) = _ProcessMolecularFormula($MolecularFormula); + if (!(defined($FormulaElementsRef) && defined($FormulaElementCountRef))) { + return undef; + } + + $MolecularWeight = 0; + + for $Index (0 .. $#{$FormulaElementsRef}) { + $ElementSymbol = $FormulaElementsRef->[$Index]; + $ElementCount = $FormulaElementCountRef->[$Index]; + $AtomicWeight = PeriodicTable::GetElementAtomicWeight($ElementSymbol); + $MolecularWeight += $AtomicWeight * $ElementCount; + } + return $MolecularWeight; +} + +# +# Calculate exact mass assuming it's a valid formula... +# +sub CalculateExactMass { + my($MolecularFormula) = @_; + my($Index, $ElementSymbol, $ElementCount, $ExactMass, $RelativeAtomicMass, $FormulaElementsRef, $FormulaElementCountRef); + + ($FormulaElementsRef, $FormulaElementCountRef) = _ProcessMolecularFormula($MolecularFormula); + if (!(defined($FormulaElementsRef) && defined($FormulaElementCountRef))) { + return undef; + } + $ExactMass = 0; + + for $Index (0 .. $#{$FormulaElementsRef}) { + $ElementSymbol = $FormulaElementsRef->[$Index]; + $ElementCount = $FormulaElementCountRef->[$Index]; + $RelativeAtomicMass = PeriodicTable::GetElementMostAbundantNaturalIsotopeMass($ElementSymbol); + if (!defined($RelativeAtomicMass)) { + next ELEMENT; + } + $ExactMass += $RelativeAtomicMass * $ElementCount; + } + return $ExactMass; +} + + +# +# Calculate elemental composition and return reference to arrays +# containing elements and their percent composition... +# +sub CalculateElementalComposition { + my($MolecularFormula) = @_; + my($Index, $MolecularWeight, $ElementSymbol, $ElementCount, $AtomicWeight, $Composition, $CompositionMultiplier, $FormulaElementsRef, $FormulaElementCountRef, @FormulaElements, @FormulaElementComposition); + + $MolecularWeight = CalculateMolecularWeight($MolecularFormula); + if (! defined $MolecularWeight) { + return (undef, undef); + } + ($FormulaElementsRef, $FormulaElementCountRef) = _ProcessMolecularFormula($MolecularFormula); + + @FormulaElements = (); + @FormulaElementComposition = (); + + if (!$MolecularWeight) { + return ( \@FormulaElements, \@FormulaElementComposition); + } + + $CompositionMultiplier = 100 / $MolecularWeight; + + for $Index (0 .. $#{$FormulaElementsRef}) { + $ElementSymbol = $FormulaElementsRef->[$Index]; + $ElementCount = $FormulaElementCountRef->[$Index]; + $AtomicWeight = PeriodicTable::GetElementAtomicWeight($ElementSymbol); + $Composition = ($AtomicWeight * $ElementCount) * $CompositionMultiplier; + + push @FormulaElements, $ElementSymbol; + push @FormulaElementComposition, $Composition; + } + + return ( \@FormulaElements, \@FormulaElementComposition); +} + +# Using refernece to element and its composition arrays, format composition information +# as: Element: Composition;... +# +sub FormatCompositionInfomation { + my($Index, $ElementSymbol, $ElementComposition, $ElementsRef, $ElementCompositionRef, $Precision, $Composition); + + $Precision = 2; + if (@_ == 3) { + ($ElementsRef, $ElementCompositionRef, $Precision) = @_; + } + else { + ($ElementsRef, $ElementCompositionRef) = @_; + } + + $Composition = ''; + for $Index (0 .. $#{$ElementsRef}) { + $ElementSymbol = $ElementsRef->[$Index]; + $ElementComposition = $ElementCompositionRef->[$Index]; + $ElementComposition = sprintf("%.${Precision}f", $ElementComposition); + + $Composition .= ($Composition) ? '; ' : ''; + $Composition .= "${ElementSymbol}: ${ElementComposition}%"; + } + + return $Composition; +} + +# +# Get elements and their count... +# +sub GetElementsAndCount { + my($MolecularFormula) = @_; + my($FormulaElementsRef, $FormulaElementCountRef, $ErrorMsg); + + ($FormulaElementsRef, $FormulaElementCountRef, $ErrorMsg) = _ProcessMolecularFormula($MolecularFormula); + + return ($FormulaElementsRef, $FormulaElementCountRef); +} + +# +# Is it a valid molecular formula? +# +sub IsMolecularFormula { + my($MolecularFormula, $PrintErrorMsg, $Status, $FormulaElementsRef, $FormulaElementCountRef, $ErrorMsg); + + ($MolecularFormula) = @_; + + ($FormulaElementsRef, $FormulaElementCountRef, $ErrorMsg) = _ProcessMolecularFormula($MolecularFormula); + $Status = (defined($FormulaElementsRef) && defined($FormulaElementCountRef)) ? 1 : 0; + + return (wantarray ? ($Status, $ErrorMsg) : $Status); +} + +# +# Process molecular formula. For a valid formula, return references to arrays conatining elements +# and element count; otherwsie, return undef. +# +sub _ProcessMolecularFormula { + my($MolecularFormula) = @_; + my($ErrorMsg) = ''; + + $MolecularFormula = _CleanUpFormula($MolecularFormula); + + # Make sure it only contains numbers and letters... + if ($MolecularFormula =~ /[^a-zA-Z0-9\(\)\[\]]/) { + $ErrorMsg = 'Molecular formula contains characters other than a-zA-Z0-9'; + return (undef, undef, $ErrorMsg); + } + + # Parse the formula... + my($ElementSpec, $FormulaElementSpec, $Spec, $ElementSymbol, $ElementCount, @FormulaElements, @ElementCount, %FormulaElementsToCountMap, @SubFormulaElements, %SubFormulaElementsToCountMap); + + @FormulaElements = (); @ElementCount = (); + %FormulaElementsToCountMap = (); + +# Setup element symbol and count regular expression... +# IUPAC: http://www.iupac.org/reports/provisional/abstract04/RB-prs310804/Chap4-3.04.pdf +# + + $FormulaElementSpec = qr/ + \G( # $1 + (?: + ([A-Z][a-z]?) # Two or one letter element symbol; $2 + ([0-9]*) # Optionally followed by element count; $3 + ) + | \( | \[ + | \)[0-9]* | \][0-9]* + | . + ) + /x; + + my($ProcessingParenthesis); + $ProcessingParenthesis = 0; + # Go over the formula... + FORMULA: while ($MolecularFormula =~ /$FormulaElementSpec/gx) { + ($Spec, $ElementSymbol, $ElementCount) = ($1, $2, $3); + + # Handle parenthesis in formula to indicate repeating units... + if ($Spec =~ /^(\(|\[)/) { + if ($ProcessingParenthesis) { + $ErrorMsg = "Molecular formula contains multiple level of () or []"; + return (undef, undef, $ErrorMsg); + } + $ProcessingParenthesis = 1; + @SubFormulaElements = (); + %SubFormulaElementsToCountMap = (); + next FORMULA; + } + elsif ($Spec =~ /^(\)|\])/) { + $ProcessingParenthesis = 0; + + # Retrieve repeat count and move data to @FormulaElements and %FormulaElementsToCountMap; + my($RepeatCount, $Symbol, $Count); + $RepeatCount = $Spec; + $RepeatCount =~ s/(\)|\])//g; + if (!$RepeatCount) { + $RepeatCount = 1; + } + # Copy data... + for $Symbol (@SubFormulaElements) { + $Count = $SubFormulaElementsToCountMap{$Symbol} * $RepeatCount; + _SetupFormulaElementData(\@FormulaElements, \%FormulaElementsToCountMap, $Symbol, $Count); + } + + # Get ready again... + @SubFormulaElements = (); + %SubFormulaElementsToCountMap = (); + + next FORMULA; + } + + # Retrieve element symbol and count... + $ElementSymbol = ($Spec && !$ElementSymbol) ? $Spec : ($ElementSymbol ? $ElementSymbol : ''); + $ElementCount = $ElementCount ? $ElementCount : 1; + if (!PeriodicTable::IsElement($ElementSymbol)) { + $ErrorMsg = "Molecular formula contains unknown elemental symbol $ElementSymbol"; + return (undef, undef, $ErrorMsg); + } + + if ($ProcessingParenthesis) { + _SetupFormulaElementData(\@SubFormulaElements, \%SubFormulaElementsToCountMap, $ElementSymbol, $ElementCount); + } + else { + _SetupFormulaElementData(\@FormulaElements, \%FormulaElementsToCountMap, $ElementSymbol, $ElementCount); + } + } + + # Setup element count array... + for $ElementSymbol (@FormulaElements) { + $ElementCount = $FormulaElementsToCountMap{$ElementSymbol}; + push @ElementCount, $ElementCount; + } + + # Make sure it all adds up to 100%; otherwise, adjust the last value.. + + return (\@FormulaElements, \@ElementCount, $ErrorMsg); +} + +# Clean it up... +sub _CleanUpFormula { + my($MolecularFormula) = @_; + #Take out any spaces... + $MolecularFormula =~ s/ //g; + + # Eliminate any charge specifications: +, - or [1-9]+[+-] + # e.g NO+ [Al(H2O)6]3+ [H2NO3]+ + if ($MolecularFormula =~ /[\+\-]/) { + if ($MolecularFormula =~ /\][0-9]+[\+\-]/) { + # Bracket followed optionally by number and then, +/- ... + # [Al(H2O)6]3+ ... + $MolecularFormula =~ s/\][0-9]+[\+\-]/\]/g; + } + elsif ($MolecularFormula =~ /[\+\-][0-9]*/) { + # +/- followed optionally by a number... + # C37H42N2O6+2, Cu+ + $MolecularFormula =~ s/[\+\-][0-9]*//g; + } + } + + # Eliminate any brackets - ] or ) - not followed by numbers: + # e.g. Li[H2PO4] + if ($MolecularFormula !~ /\][0-9]+/) { + $MolecularFormula =~ s/[\[\]]//g; + } + if ($MolecularFormula !~ /\)[0-9]+/) { + $MolecularFormula =~ s/[\(\)]//g; + } + # Change adducts to parenthesis format... + # Na2CO3.10H2O -> Na2CO3(H2O)10 + # 3CdSO4.8H2O -> (CdSO4)3(H2O)8 + if ($MolecularFormula =~ /\./) { + my($SubFormula, $Count, $Spec); + my(@MolecularFormulaSplits) = split /\./, $MolecularFormula; + $MolecularFormula = ''; + for $SubFormula (@MolecularFormulaSplits) { + ($Count, $Spec) = $SubFormula =~ /^([0-9]*)(.*?)$/; + if ($Count) { + $MolecularFormula .= "(${Spec})${Count}"; + } + else { + $MolecularFormula .= $Spec; + } + } + } + + return $MolecularFormula; +} + +# Store the element and count... +sub _SetupFormulaElementData { + my($ElementsRef, $ElementsToCountMapRef, $Element, $Count) = @_; + + if (exists $ElementsToCountMapRef->{$Element}) { + $ElementsToCountMapRef->{$Element} += $Count; + } + else { + push @{$ElementsRef}, $Element; + $ElementsToCountMapRef->{$Element} = $Count; + } +} + +1; + +__END__ + +=head1 NAME + +MolecularFormula + +=head1 SYNOPSIS + +use MolecularFormula; + +use MolecularFormula qw(:all); + +=head1 DESCRIPTION + +B<MolecularFormula> module provides the following functions: + +CalculateElementalComposition, CalculateExactMass, CalculateMolecularWeight, +FormatCompositionInfomation, GetElementsAndCount, IsMolecularFormula + +=head1 FUNCTIONS + +=over 4 + +=item B<CalculateMolecularWeight> + + $MolecularWeight = CalculateMolecularWeight($MolecularFormula); + +Calculates and returns the molecular weight for a specified I<MolecularFormula>. + +=item B<CalculateElementalComposition> + + ($ElementsRef, $ElementCompositionRef) = + CalculateElementalComposition($MolecularFormula); + +Calculates the percent composition in a specified I<MolecularFormula> and returns references +to arrays containing elements and their percent composition. + +=item B<CalculateExactMass> + + $ExactMass = CalculateMolecularWeight($MolecularFormula); + +Calculates and returns the exact mass for a specified I<MolecularFormula>. + +=item B<FormatCompositionInfomation> + + $FormattedString = FormatCompositionInfomation($ElementsRef, + $ElementCompositionRef, [$Precision]); + +Returns a formatted elemental composition string using references to elements and elemental +composition arrays. Precision is an optional parameter; its default value is I<2>. + +=item B<GetElementsAndCount> + + ($ElementsRef, $ElementCountRef) = GetElementsAndCount( + $MolecularFormula); + +Retrieves elements and their count composition in a specified I<MolecularFormula> and +returns references to arrays containing elements and their count. + +=item B<IsMolecularFormula> + + $Status = IsMolecularFormula($MolecularFormula); + ($Status, $ErrorMsg) = IsMolecularFormula($MolecularFormula); + +Returns 1 or 0 a based on whether it's a valid I<MolecularFormula>. + +=back + +=head1 AUTHOR + +Manish Sud <msud@san.rr.com> + +=head1 SEE ALSO + +Molecule.pm + +=head1 COPYRIGHT + +Copyright (C) 2015 Manish Sud. All rights reserved. + +This file is part of MayaChemTools. + +MayaChemTools is free software; you can redistribute it and/or modify it under +the terms of the GNU Lesser General Public License as published by the Free +Software Foundation; either version 3 of the License, or (at your option) +any later version. + +=cut