Mercurial > repos > deepakjadmin > mayatool3_test3
view mayachemtools/lib/MolecularFormula.pm @ 2:dfff2614510e draft
Deleted selected files
author | deepakjadmin |
---|---|
date | Wed, 20 Jan 2016 12:15:15 -0500 |
parents | 73ae111cf86f |
children |
line wrap: on
line source
package MolecularFormula; # # $RCSfile: MolecularFormula.pm,v $ # $Date: 2015/02/28 20:47:18 $ # $Revision: 1.25 $ # # Author: Manish Sud <msud@san.rr.com> # # Copyright (C) 2015 Manish Sud. All rights reserved. # # This file is part of MayaChemTools. # # MayaChemTools is free software; you can redistribute it and/or modify it under # the terms of the GNU Lesser General Public License as published by the Free # Software Foundation; either version 3 of the License, or (at your option) any # later version. # # MayaChemTools is distributed in the hope that it will be useful, but without # any warranty; without even the implied warranty of merchantability of fitness # for a particular purpose. See the GNU Lesser General Public License for more # details. # # You should have received a copy of the GNU Lesser General Public License # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, # Boston, MA, 02111-1307, USA. # use strict; use Carp; use Text::ParseWords; use TextUtil; use PeriodicTable; use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); @ISA = qw(Exporter); @EXPORT = qw(); @EXPORT_OK = qw(CalculateMolecularWeight CalculateExactMass CalculateElementalComposition FormatCompositionInfomation GetElementsAndCount IsMolecularFormula); %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); # # Calculate molecular weight assuming its a valid molecular formula... # sub CalculateMolecularWeight { my($MolecularFormula) = @_; my($Index, $MolecularWeight, $ElementSymbol, $ElementCount, $AtomicWeight, $FormulaElementsRef, $FormulaElementCountRef); ($FormulaElementsRef, $FormulaElementCountRef) = _ProcessMolecularFormula($MolecularFormula); if (!(defined($FormulaElementsRef) && defined($FormulaElementCountRef))) { return undef; } $MolecularWeight = 0; for $Index (0 .. $#{$FormulaElementsRef}) { $ElementSymbol = $FormulaElementsRef->[$Index]; $ElementCount = $FormulaElementCountRef->[$Index]; $AtomicWeight = PeriodicTable::GetElementAtomicWeight($ElementSymbol); $MolecularWeight += $AtomicWeight * $ElementCount; } return $MolecularWeight; } # # Calculate exact mass assuming it's a valid formula... # sub CalculateExactMass { my($MolecularFormula) = @_; my($Index, $ElementSymbol, $ElementCount, $ExactMass, $RelativeAtomicMass, $FormulaElementsRef, $FormulaElementCountRef); ($FormulaElementsRef, $FormulaElementCountRef) = _ProcessMolecularFormula($MolecularFormula); if (!(defined($FormulaElementsRef) && defined($FormulaElementCountRef))) { return undef; } $ExactMass = 0; for $Index (0 .. $#{$FormulaElementsRef}) { $ElementSymbol = $FormulaElementsRef->[$Index]; $ElementCount = $FormulaElementCountRef->[$Index]; $RelativeAtomicMass = PeriodicTable::GetElementMostAbundantNaturalIsotopeMass($ElementSymbol); if (!defined($RelativeAtomicMass)) { next ELEMENT; } $ExactMass += $RelativeAtomicMass * $ElementCount; } return $ExactMass; } # # Calculate elemental composition and return reference to arrays # containing elements and their percent composition... # sub CalculateElementalComposition { my($MolecularFormula) = @_; my($Index, $MolecularWeight, $ElementSymbol, $ElementCount, $AtomicWeight, $Composition, $CompositionMultiplier, $FormulaElementsRef, $FormulaElementCountRef, @FormulaElements, @FormulaElementComposition); $MolecularWeight = CalculateMolecularWeight($MolecularFormula); if (! defined $MolecularWeight) { return (undef, undef); } ($FormulaElementsRef, $FormulaElementCountRef) = _ProcessMolecularFormula($MolecularFormula); @FormulaElements = (); @FormulaElementComposition = (); if (!$MolecularWeight) { return ( \@FormulaElements, \@FormulaElementComposition); } $CompositionMultiplier = 100 / $MolecularWeight; for $Index (0 .. $#{$FormulaElementsRef}) { $ElementSymbol = $FormulaElementsRef->[$Index]; $ElementCount = $FormulaElementCountRef->[$Index]; $AtomicWeight = PeriodicTable::GetElementAtomicWeight($ElementSymbol); $Composition = ($AtomicWeight * $ElementCount) * $CompositionMultiplier; push @FormulaElements, $ElementSymbol; push @FormulaElementComposition, $Composition; } return ( \@FormulaElements, \@FormulaElementComposition); } # Using refernece to element and its composition arrays, format composition information # as: Element: Composition;... # sub FormatCompositionInfomation { my($Index, $ElementSymbol, $ElementComposition, $ElementsRef, $ElementCompositionRef, $Precision, $Composition); $Precision = 2; if (@_ == 3) { ($ElementsRef, $ElementCompositionRef, $Precision) = @_; } else { ($ElementsRef, $ElementCompositionRef) = @_; } $Composition = ''; for $Index (0 .. $#{$ElementsRef}) { $ElementSymbol = $ElementsRef->[$Index]; $ElementComposition = $ElementCompositionRef->[$Index]; $ElementComposition = sprintf("%.${Precision}f", $ElementComposition); $Composition .= ($Composition) ? '; ' : ''; $Composition .= "${ElementSymbol}: ${ElementComposition}%"; } return $Composition; } # # Get elements and their count... # sub GetElementsAndCount { my($MolecularFormula) = @_; my($FormulaElementsRef, $FormulaElementCountRef, $ErrorMsg); ($FormulaElementsRef, $FormulaElementCountRef, $ErrorMsg) = _ProcessMolecularFormula($MolecularFormula); return ($FormulaElementsRef, $FormulaElementCountRef); } # # Is it a valid molecular formula? # sub IsMolecularFormula { my($MolecularFormula, $PrintErrorMsg, $Status, $FormulaElementsRef, $FormulaElementCountRef, $ErrorMsg); ($MolecularFormula) = @_; ($FormulaElementsRef, $FormulaElementCountRef, $ErrorMsg) = _ProcessMolecularFormula($MolecularFormula); $Status = (defined($FormulaElementsRef) && defined($FormulaElementCountRef)) ? 1 : 0; return (wantarray ? ($Status, $ErrorMsg) : $Status); } # # Process molecular formula. For a valid formula, return references to arrays conatining elements # and element count; otherwsie, return undef. # sub _ProcessMolecularFormula { my($MolecularFormula) = @_; my($ErrorMsg) = ''; $MolecularFormula = _CleanUpFormula($MolecularFormula); # Make sure it only contains numbers and letters... if ($MolecularFormula =~ /[^a-zA-Z0-9\(\)\[\]]/) { $ErrorMsg = 'Molecular formula contains characters other than a-zA-Z0-9'; return (undef, undef, $ErrorMsg); } # Parse the formula... my($ElementSpec, $FormulaElementSpec, $Spec, $ElementSymbol, $ElementCount, @FormulaElements, @ElementCount, %FormulaElementsToCountMap, @SubFormulaElements, %SubFormulaElementsToCountMap); @FormulaElements = (); @ElementCount = (); %FormulaElementsToCountMap = (); # Setup element symbol and count regular expression... # IUPAC: http://www.iupac.org/reports/provisional/abstract04/RB-prs310804/Chap4-3.04.pdf # $FormulaElementSpec = qr/ \G( # $1 (?: ([A-Z][a-z]?) # Two or one letter element symbol; $2 ([0-9]*) # Optionally followed by element count; $3 ) | \( | \[ | \)[0-9]* | \][0-9]* | . ) /x; my($ProcessingParenthesis); $ProcessingParenthesis = 0; # Go over the formula... FORMULA: while ($MolecularFormula =~ /$FormulaElementSpec/gx) { ($Spec, $ElementSymbol, $ElementCount) = ($1, $2, $3); # Handle parenthesis in formula to indicate repeating units... if ($Spec =~ /^(\(|\[)/) { if ($ProcessingParenthesis) { $ErrorMsg = "Molecular formula contains multiple level of () or []"; return (undef, undef, $ErrorMsg); } $ProcessingParenthesis = 1; @SubFormulaElements = (); %SubFormulaElementsToCountMap = (); next FORMULA; } elsif ($Spec =~ /^(\)|\])/) { $ProcessingParenthesis = 0; # Retrieve repeat count and move data to @FormulaElements and %FormulaElementsToCountMap; my($RepeatCount, $Symbol, $Count); $RepeatCount = $Spec; $RepeatCount =~ s/(\)|\])//g; if (!$RepeatCount) { $RepeatCount = 1; } # Copy data... for $Symbol (@SubFormulaElements) { $Count = $SubFormulaElementsToCountMap{$Symbol} * $RepeatCount; _SetupFormulaElementData(\@FormulaElements, \%FormulaElementsToCountMap, $Symbol, $Count); } # Get ready again... @SubFormulaElements = (); %SubFormulaElementsToCountMap = (); next FORMULA; } # Retrieve element symbol and count... $ElementSymbol = ($Spec && !$ElementSymbol) ? $Spec : ($ElementSymbol ? $ElementSymbol : ''); $ElementCount = $ElementCount ? $ElementCount : 1; if (!PeriodicTable::IsElement($ElementSymbol)) { $ErrorMsg = "Molecular formula contains unknown elemental symbol $ElementSymbol"; return (undef, undef, $ErrorMsg); } if ($ProcessingParenthesis) { _SetupFormulaElementData(\@SubFormulaElements, \%SubFormulaElementsToCountMap, $ElementSymbol, $ElementCount); } else { _SetupFormulaElementData(\@FormulaElements, \%FormulaElementsToCountMap, $ElementSymbol, $ElementCount); } } # Setup element count array... for $ElementSymbol (@FormulaElements) { $ElementCount = $FormulaElementsToCountMap{$ElementSymbol}; push @ElementCount, $ElementCount; } # Make sure it all adds up to 100%; otherwise, adjust the last value.. return (\@FormulaElements, \@ElementCount, $ErrorMsg); } # Clean it up... sub _CleanUpFormula { my($MolecularFormula) = @_; #Take out any spaces... $MolecularFormula =~ s/ //g; # Eliminate any charge specifications: +, - or [1-9]+[+-] # e.g NO+ [Al(H2O)6]3+ [H2NO3]+ if ($MolecularFormula =~ /[\+\-]/) { if ($MolecularFormula =~ /\][0-9]+[\+\-]/) { # Bracket followed optionally by number and then, +/- ... # [Al(H2O)6]3+ ... $MolecularFormula =~ s/\][0-9]+[\+\-]/\]/g; } elsif ($MolecularFormula =~ /[\+\-][0-9]*/) { # +/- followed optionally by a number... # C37H42N2O6+2, Cu+ $MolecularFormula =~ s/[\+\-][0-9]*//g; } } # Eliminate any brackets - ] or ) - not followed by numbers: # e.g. Li[H2PO4] if ($MolecularFormula !~ /\][0-9]+/) { $MolecularFormula =~ s/[\[\]]//g; } if ($MolecularFormula !~ /\)[0-9]+/) { $MolecularFormula =~ s/[\(\)]//g; } # Change adducts to parenthesis format... # Na2CO3.10H2O -> Na2CO3(H2O)10 # 3CdSO4.8H2O -> (CdSO4)3(H2O)8 if ($MolecularFormula =~ /\./) { my($SubFormula, $Count, $Spec); my(@MolecularFormulaSplits) = split /\./, $MolecularFormula; $MolecularFormula = ''; for $SubFormula (@MolecularFormulaSplits) { ($Count, $Spec) = $SubFormula =~ /^([0-9]*)(.*?)$/; if ($Count) { $MolecularFormula .= "(${Spec})${Count}"; } else { $MolecularFormula .= $Spec; } } } return $MolecularFormula; } # Store the element and count... sub _SetupFormulaElementData { my($ElementsRef, $ElementsToCountMapRef, $Element, $Count) = @_; if (exists $ElementsToCountMapRef->{$Element}) { $ElementsToCountMapRef->{$Element} += $Count; } else { push @{$ElementsRef}, $Element; $ElementsToCountMapRef->{$Element} = $Count; } } 1; __END__ =head1 NAME MolecularFormula =head1 SYNOPSIS use MolecularFormula; use MolecularFormula qw(:all); =head1 DESCRIPTION B<MolecularFormula> module provides the following functions: CalculateElementalComposition, CalculateExactMass, CalculateMolecularWeight, FormatCompositionInfomation, GetElementsAndCount, IsMolecularFormula =head1 FUNCTIONS =over 4 =item B<CalculateMolecularWeight> $MolecularWeight = CalculateMolecularWeight($MolecularFormula); Calculates and returns the molecular weight for a specified I<MolecularFormula>. =item B<CalculateElementalComposition> ($ElementsRef, $ElementCompositionRef) = CalculateElementalComposition($MolecularFormula); Calculates the percent composition in a specified I<MolecularFormula> and returns references to arrays containing elements and their percent composition. =item B<CalculateExactMass> $ExactMass = CalculateMolecularWeight($MolecularFormula); Calculates and returns the exact mass for a specified I<MolecularFormula>. =item B<FormatCompositionInfomation> $FormattedString = FormatCompositionInfomation($ElementsRef, $ElementCompositionRef, [$Precision]); Returns a formatted elemental composition string using references to elements and elemental composition arrays. Precision is an optional parameter; its default value is I<2>. =item B<GetElementsAndCount> ($ElementsRef, $ElementCountRef) = GetElementsAndCount( $MolecularFormula); Retrieves elements and their count composition in a specified I<MolecularFormula> and returns references to arrays containing elements and their count. =item B<IsMolecularFormula> $Status = IsMolecularFormula($MolecularFormula); ($Status, $ErrorMsg) = IsMolecularFormula($MolecularFormula); Returns 1 or 0 a based on whether it's a valid I<MolecularFormula>. =back =head1 AUTHOR Manish Sud <msud@san.rr.com> =head1 SEE ALSO Molecule.pm =head1 COPYRIGHT Copyright (C) 2015 Manish Sud. All rights reserved. This file is part of MayaChemTools. MayaChemTools is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. =cut