annotate mayachemtool/mayachemtools/lib/MolecularFormula.pm @ 0:68300206e90d draft default tip

Uploaded
author deepakjadmin
date Thu, 05 Nov 2015 02:41:30 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
1 package MolecularFormula;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
2 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
3 # $RCSfile: MolecularFormula.pm,v $
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
4 # $Date: 2015/02/28 20:47:18 $
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
5 # $Revision: 1.25 $
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
6 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
7 # Author: Manish Sud <msud@san.rr.com>
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
8 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
9 # Copyright (C) 2015 Manish Sud. All rights reserved.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
10 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
11 # This file is part of MayaChemTools.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
12 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
13 # MayaChemTools is free software; you can redistribute it and/or modify it under
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
14 # the terms of the GNU Lesser General Public License as published by the Free
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
15 # Software Foundation; either version 3 of the License, or (at your option) any
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
16 # later version.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
17 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
18 # MayaChemTools is distributed in the hope that it will be useful, but without
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
19 # any warranty; without even the implied warranty of merchantability of fitness
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
20 # for a particular purpose. See the GNU Lesser General Public License for more
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
21 # details.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
22 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
23 # You should have received a copy of the GNU Lesser General Public License
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
26 # Boston, MA, 02111-1307, USA.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
27 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
28
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
29 use strict;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
30 use Carp;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
31 use Text::ParseWords;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
32 use TextUtil;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
33 use PeriodicTable;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
34
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
35 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
36
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
37 @ISA = qw(Exporter);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
38 @EXPORT = qw();
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
39 @EXPORT_OK = qw(CalculateMolecularWeight CalculateExactMass CalculateElementalComposition FormatCompositionInfomation GetElementsAndCount IsMolecularFormula);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
40
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
41 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
42
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
43 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
44 # Calculate molecular weight assuming its a valid molecular formula...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
45 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
46 sub CalculateMolecularWeight {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
47 my($MolecularFormula) = @_;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
48 my($Index, $MolecularWeight, $ElementSymbol, $ElementCount, $AtomicWeight, $FormulaElementsRef, $FormulaElementCountRef);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
49
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
50 ($FormulaElementsRef, $FormulaElementCountRef) = _ProcessMolecularFormula($MolecularFormula);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
51 if (!(defined($FormulaElementsRef) && defined($FormulaElementCountRef))) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
52 return undef;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
53 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
54
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
55 $MolecularWeight = 0;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
56
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
57 for $Index (0 .. $#{$FormulaElementsRef}) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
58 $ElementSymbol = $FormulaElementsRef->[$Index];
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
59 $ElementCount = $FormulaElementCountRef->[$Index];
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
60 $AtomicWeight = PeriodicTable::GetElementAtomicWeight($ElementSymbol);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
61 $MolecularWeight += $AtomicWeight * $ElementCount;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
62 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
63 return $MolecularWeight;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
64 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
65
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
66 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
67 # Calculate exact mass assuming it's a valid formula...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
68 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
69 sub CalculateExactMass {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
70 my($MolecularFormula) = @_;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
71 my($Index, $ElementSymbol, $ElementCount, $ExactMass, $RelativeAtomicMass, $FormulaElementsRef, $FormulaElementCountRef);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
72
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
73 ($FormulaElementsRef, $FormulaElementCountRef) = _ProcessMolecularFormula($MolecularFormula);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
74 if (!(defined($FormulaElementsRef) && defined($FormulaElementCountRef))) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
75 return undef;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
76 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
77 $ExactMass = 0;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
78
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
79 for $Index (0 .. $#{$FormulaElementsRef}) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
80 $ElementSymbol = $FormulaElementsRef->[$Index];
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
81 $ElementCount = $FormulaElementCountRef->[$Index];
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
82 $RelativeAtomicMass = PeriodicTable::GetElementMostAbundantNaturalIsotopeMass($ElementSymbol);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
83 if (!defined($RelativeAtomicMass)) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
84 next ELEMENT;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
85 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
86 $ExactMass += $RelativeAtomicMass * $ElementCount;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
87 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
88 return $ExactMass;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
89 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
90
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
91
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
92 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
93 # Calculate elemental composition and return reference to arrays
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
94 # containing elements and their percent composition...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
95 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
96 sub CalculateElementalComposition {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
97 my($MolecularFormula) = @_;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
98 my($Index, $MolecularWeight, $ElementSymbol, $ElementCount, $AtomicWeight, $Composition, $CompositionMultiplier, $FormulaElementsRef, $FormulaElementCountRef, @FormulaElements, @FormulaElementComposition);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
99
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
100 $MolecularWeight = CalculateMolecularWeight($MolecularFormula);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
101 if (! defined $MolecularWeight) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
102 return (undef, undef);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
103 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
104 ($FormulaElementsRef, $FormulaElementCountRef) = _ProcessMolecularFormula($MolecularFormula);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
105
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
106 @FormulaElements = ();
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
107 @FormulaElementComposition = ();
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
108
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
109 if (!$MolecularWeight) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
110 return ( \@FormulaElements, \@FormulaElementComposition);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
111 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
112
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
113 $CompositionMultiplier = 100 / $MolecularWeight;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
114
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
115 for $Index (0 .. $#{$FormulaElementsRef}) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
116 $ElementSymbol = $FormulaElementsRef->[$Index];
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
117 $ElementCount = $FormulaElementCountRef->[$Index];
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
118 $AtomicWeight = PeriodicTable::GetElementAtomicWeight($ElementSymbol);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
119 $Composition = ($AtomicWeight * $ElementCount) * $CompositionMultiplier;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
120
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
121 push @FormulaElements, $ElementSymbol;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
122 push @FormulaElementComposition, $Composition;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
123 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
124
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
125 return ( \@FormulaElements, \@FormulaElementComposition);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
126 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
127
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
128 # Using refernece to element and its composition arrays, format composition information
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
129 # as: Element: Composition;...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
130 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
131 sub FormatCompositionInfomation {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
132 my($Index, $ElementSymbol, $ElementComposition, $ElementsRef, $ElementCompositionRef, $Precision, $Composition);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
133
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
134 $Precision = 2;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
135 if (@_ == 3) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
136 ($ElementsRef, $ElementCompositionRef, $Precision) = @_;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
137 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
138 else {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
139 ($ElementsRef, $ElementCompositionRef) = @_;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
140 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
141
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
142 $Composition = '';
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
143 for $Index (0 .. $#{$ElementsRef}) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
144 $ElementSymbol = $ElementsRef->[$Index];
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
145 $ElementComposition = $ElementCompositionRef->[$Index];
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
146 $ElementComposition = sprintf("%.${Precision}f", $ElementComposition);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
147
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
148 $Composition .= ($Composition) ? '; ' : '';
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
149 $Composition .= "${ElementSymbol}: ${ElementComposition}%";
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
150 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
151
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
152 return $Composition;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
153 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
154
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
155 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
156 # Get elements and their count...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
157 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
158 sub GetElementsAndCount {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
159 my($MolecularFormula) = @_;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
160 my($FormulaElementsRef, $FormulaElementCountRef, $ErrorMsg);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
161
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
162 ($FormulaElementsRef, $FormulaElementCountRef, $ErrorMsg) = _ProcessMolecularFormula($MolecularFormula);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
163
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
164 return ($FormulaElementsRef, $FormulaElementCountRef);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
165 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
166
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
167 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
168 # Is it a valid molecular formula?
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
169 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
170 sub IsMolecularFormula {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
171 my($MolecularFormula, $PrintErrorMsg, $Status, $FormulaElementsRef, $FormulaElementCountRef, $ErrorMsg);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
172
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
173 ($MolecularFormula) = @_;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
174
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
175 ($FormulaElementsRef, $FormulaElementCountRef, $ErrorMsg) = _ProcessMolecularFormula($MolecularFormula);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
176 $Status = (defined($FormulaElementsRef) && defined($FormulaElementCountRef)) ? 1 : 0;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
177
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
178 return (wantarray ? ($Status, $ErrorMsg) : $Status);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
179 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
180
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
181 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
182 # Process molecular formula. For a valid formula, return references to arrays conatining elements
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
183 # and element count; otherwsie, return undef.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
184 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
185 sub _ProcessMolecularFormula {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
186 my($MolecularFormula) = @_;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
187 my($ErrorMsg) = '';
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
188
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
189 $MolecularFormula = _CleanUpFormula($MolecularFormula);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
190
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
191 # Make sure it only contains numbers and letters...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
192 if ($MolecularFormula =~ /[^a-zA-Z0-9\(\)\[\]]/) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
193 $ErrorMsg = 'Molecular formula contains characters other than a-zA-Z0-9';
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
194 return (undef, undef, $ErrorMsg);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
195 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
196
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
197 # Parse the formula...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
198 my($ElementSpec, $FormulaElementSpec, $Spec, $ElementSymbol, $ElementCount, @FormulaElements, @ElementCount, %FormulaElementsToCountMap, @SubFormulaElements, %SubFormulaElementsToCountMap);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
199
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
200 @FormulaElements = (); @ElementCount = ();
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
201 %FormulaElementsToCountMap = ();
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
202
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
203 # Setup element symbol and count regular expression...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
204 # IUPAC: http://www.iupac.org/reports/provisional/abstract04/RB-prs310804/Chap4-3.04.pdf
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
205 #
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
206
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
207 $FormulaElementSpec = qr/
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
208 \G( # $1
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
209 (?:
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
210 ([A-Z][a-z]?) # Two or one letter element symbol; $2
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
211 ([0-9]*) # Optionally followed by element count; $3
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
212 )
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
213 | \( | \[
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
214 | \)[0-9]* | \][0-9]*
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
215 | .
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
216 )
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
217 /x;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
218
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
219 my($ProcessingParenthesis);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
220 $ProcessingParenthesis = 0;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
221 # Go over the formula...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
222 FORMULA: while ($MolecularFormula =~ /$FormulaElementSpec/gx) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
223 ($Spec, $ElementSymbol, $ElementCount) = ($1, $2, $3);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
224
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
225 # Handle parenthesis in formula to indicate repeating units...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
226 if ($Spec =~ /^(\(|\[)/) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
227 if ($ProcessingParenthesis) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
228 $ErrorMsg = "Molecular formula contains multiple level of () or []";
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
229 return (undef, undef, $ErrorMsg);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
230 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
231 $ProcessingParenthesis = 1;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
232 @SubFormulaElements = ();
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
233 %SubFormulaElementsToCountMap = ();
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
234 next FORMULA;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
235 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
236 elsif ($Spec =~ /^(\)|\])/) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
237 $ProcessingParenthesis = 0;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
238
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
239 # Retrieve repeat count and move data to @FormulaElements and %FormulaElementsToCountMap;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
240 my($RepeatCount, $Symbol, $Count);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
241 $RepeatCount = $Spec;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
242 $RepeatCount =~ s/(\)|\])//g;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
243 if (!$RepeatCount) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
244 $RepeatCount = 1;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
245 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
246 # Copy data...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
247 for $Symbol (@SubFormulaElements) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
248 $Count = $SubFormulaElementsToCountMap{$Symbol} * $RepeatCount;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
249 _SetupFormulaElementData(\@FormulaElements, \%FormulaElementsToCountMap, $Symbol, $Count);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
250 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
251
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
252 # Get ready again...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
253 @SubFormulaElements = ();
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
254 %SubFormulaElementsToCountMap = ();
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
255
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
256 next FORMULA;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
257 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
258
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
259 # Retrieve element symbol and count...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
260 $ElementSymbol = ($Spec && !$ElementSymbol) ? $Spec : ($ElementSymbol ? $ElementSymbol : '');
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
261 $ElementCount = $ElementCount ? $ElementCount : 1;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
262 if (!PeriodicTable::IsElement($ElementSymbol)) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
263 $ErrorMsg = "Molecular formula contains unknown elemental symbol $ElementSymbol";
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
264 return (undef, undef, $ErrorMsg);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
265 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
266
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
267 if ($ProcessingParenthesis) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
268 _SetupFormulaElementData(\@SubFormulaElements, \%SubFormulaElementsToCountMap, $ElementSymbol, $ElementCount);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
269 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
270 else {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
271 _SetupFormulaElementData(\@FormulaElements, \%FormulaElementsToCountMap, $ElementSymbol, $ElementCount);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
272 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
273 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
274
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
275 # Setup element count array...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
276 for $ElementSymbol (@FormulaElements) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
277 $ElementCount = $FormulaElementsToCountMap{$ElementSymbol};
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
278 push @ElementCount, $ElementCount;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
279 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
280
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
281 # Make sure it all adds up to 100%; otherwise, adjust the last value..
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
282
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
283 return (\@FormulaElements, \@ElementCount, $ErrorMsg);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
284 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
285
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
286 # Clean it up...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
287 sub _CleanUpFormula {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
288 my($MolecularFormula) = @_;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
289 #Take out any spaces...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
290 $MolecularFormula =~ s/ //g;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
291
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
292 # Eliminate any charge specifications: +, - or [1-9]+[+-]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
293 # e.g NO+ [Al(H2O)6]3+ [H2NO3]+
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
294 if ($MolecularFormula =~ /[\+\-]/) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
295 if ($MolecularFormula =~ /\][0-9]+[\+\-]/) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
296 # Bracket followed optionally by number and then, +/- ...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
297 # [Al(H2O)6]3+ ...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
298 $MolecularFormula =~ s/\][0-9]+[\+\-]/\]/g;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
299 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
300 elsif ($MolecularFormula =~ /[\+\-][0-9]*/) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
301 # +/- followed optionally by a number...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
302 # C37H42N2O6+2, Cu+
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
303 $MolecularFormula =~ s/[\+\-][0-9]*//g;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
304 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
305 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
306
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
307 # Eliminate any brackets - ] or ) - not followed by numbers:
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
308 # e.g. Li[H2PO4]
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
309 if ($MolecularFormula !~ /\][0-9]+/) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
310 $MolecularFormula =~ s/[\[\]]//g;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
311 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
312 if ($MolecularFormula !~ /\)[0-9]+/) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
313 $MolecularFormula =~ s/[\(\)]//g;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
314 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
315 # Change adducts to parenthesis format...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
316 # Na2CO3.10H2O -> Na2CO3(H2O)10
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
317 # 3CdSO4.8H2O -> (CdSO4)3(H2O)8
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
318 if ($MolecularFormula =~ /\./) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
319 my($SubFormula, $Count, $Spec);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
320 my(@MolecularFormulaSplits) = split /\./, $MolecularFormula;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
321 $MolecularFormula = '';
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
322 for $SubFormula (@MolecularFormulaSplits) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
323 ($Count, $Spec) = $SubFormula =~ /^([0-9]*)(.*?)$/;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
324 if ($Count) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
325 $MolecularFormula .= "(${Spec})${Count}";
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
326 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
327 else {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
328 $MolecularFormula .= $Spec;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
329 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
330 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
331 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
332
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
333 return $MolecularFormula;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
334 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
335
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
336 # Store the element and count...
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
337 sub _SetupFormulaElementData {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
338 my($ElementsRef, $ElementsToCountMapRef, $Element, $Count) = @_;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
339
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
340 if (exists $ElementsToCountMapRef->{$Element}) {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
341 $ElementsToCountMapRef->{$Element} += $Count;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
342 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
343 else {
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
344 push @{$ElementsRef}, $Element;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
345 $ElementsToCountMapRef->{$Element} = $Count;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
346 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
347 }
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
348
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
349 1;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
350
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
351 __END__
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
352
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
353 =head1 NAME
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
354
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
355 MolecularFormula
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
356
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
357 =head1 SYNOPSIS
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
358
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
359 use MolecularFormula;
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
360
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
361 use MolecularFormula qw(:all);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
362
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
363 =head1 DESCRIPTION
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
364
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
365 B<MolecularFormula> module provides the following functions:
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
366
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
367 CalculateElementalComposition, CalculateExactMass, CalculateMolecularWeight,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
368 FormatCompositionInfomation, GetElementsAndCount, IsMolecularFormula
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
369
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
370 =head1 FUNCTIONS
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
371
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
372 =over 4
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
373
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
374 =item B<CalculateMolecularWeight>
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
375
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
376 $MolecularWeight = CalculateMolecularWeight($MolecularFormula);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
377
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
378 Calculates and returns the molecular weight for a specified I<MolecularFormula>.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
379
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
380 =item B<CalculateElementalComposition>
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
381
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
382 ($ElementsRef, $ElementCompositionRef) =
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
383 CalculateElementalComposition($MolecularFormula);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
384
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
385 Calculates the percent composition in a specified I<MolecularFormula> and returns references
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
386 to arrays containing elements and their percent composition.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
387
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
388 =item B<CalculateExactMass>
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
389
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
390 $ExactMass = CalculateMolecularWeight($MolecularFormula);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
391
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
392 Calculates and returns the exact mass for a specified I<MolecularFormula>.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
393
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
394 =item B<FormatCompositionInfomation>
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
395
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
396 $FormattedString = FormatCompositionInfomation($ElementsRef,
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
397 $ElementCompositionRef, [$Precision]);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
398
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
399 Returns a formatted elemental composition string using references to elements and elemental
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
400 composition arrays. Precision is an optional parameter; its default value is I<2>.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
401
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
402 =item B<GetElementsAndCount>
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
403
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
404 ($ElementsRef, $ElementCountRef) = GetElementsAndCount(
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
405 $MolecularFormula);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
406
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
407 Retrieves elements and their count composition in a specified I<MolecularFormula> and
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
408 returns references to arrays containing elements and their count.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
409
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
410 =item B<IsMolecularFormula>
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
411
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
412 $Status = IsMolecularFormula($MolecularFormula);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
413 ($Status, $ErrorMsg) = IsMolecularFormula($MolecularFormula);
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
414
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
415 Returns 1 or 0 a based on whether it's a valid I<MolecularFormula>.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
416
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
417 =back
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
418
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
419 =head1 AUTHOR
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
420
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
421 Manish Sud <msud@san.rr.com>
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
422
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
423 =head1 SEE ALSO
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
424
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
425 Molecule.pm
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
426
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
427 =head1 COPYRIGHT
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
428
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
429 Copyright (C) 2015 Manish Sud. All rights reserved.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
430
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
431 This file is part of MayaChemTools.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
432
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
433 MayaChemTools is free software; you can redistribute it and/or modify it under
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
434 the terms of the GNU Lesser General Public License as published by the Free
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
435 Software Foundation; either version 3 of the License, or (at your option)
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
436 any later version.
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
437
68300206e90d Uploaded
deepakjadmin
parents:
diff changeset
438 =cut