Mercurial > repos > deepakjadmin > mayatool3_test2
comparison lib/MolecularFormula.pm @ 0:4816e4a8ae95 draft default tip
Uploaded
author | deepakjadmin |
---|---|
date | Wed, 20 Jan 2016 09:23:18 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4816e4a8ae95 |
---|---|
1 package MolecularFormula; | |
2 # | |
3 # $RCSfile: MolecularFormula.pm,v $ | |
4 # $Date: 2015/02/28 20:47:18 $ | |
5 # $Revision: 1.25 $ | |
6 # | |
7 # Author: Manish Sud <msud@san.rr.com> | |
8 # | |
9 # Copyright (C) 2015 Manish Sud. All rights reserved. | |
10 # | |
11 # This file is part of MayaChemTools. | |
12 # | |
13 # MayaChemTools is free software; you can redistribute it and/or modify it under | |
14 # the terms of the GNU Lesser General Public License as published by the Free | |
15 # Software Foundation; either version 3 of the License, or (at your option) any | |
16 # later version. | |
17 # | |
18 # MayaChemTools is distributed in the hope that it will be useful, but without | |
19 # any warranty; without even the implied warranty of merchantability of fitness | |
20 # for a particular purpose. See the GNU Lesser General Public License for more | |
21 # details. | |
22 # | |
23 # You should have received a copy of the GNU Lesser General Public License | |
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or | |
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, | |
26 # Boston, MA, 02111-1307, USA. | |
27 # | |
28 | |
29 use strict; | |
30 use Carp; | |
31 use Text::ParseWords; | |
32 use TextUtil; | |
33 use PeriodicTable; | |
34 | |
35 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); | |
36 | |
37 @ISA = qw(Exporter); | |
38 @EXPORT = qw(); | |
39 @EXPORT_OK = qw(CalculateMolecularWeight CalculateExactMass CalculateElementalComposition FormatCompositionInfomation GetElementsAndCount IsMolecularFormula); | |
40 | |
41 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); | |
42 | |
43 # | |
44 # Calculate molecular weight assuming its a valid molecular formula... | |
45 # | |
46 sub CalculateMolecularWeight { | |
47 my($MolecularFormula) = @_; | |
48 my($Index, $MolecularWeight, $ElementSymbol, $ElementCount, $AtomicWeight, $FormulaElementsRef, $FormulaElementCountRef); | |
49 | |
50 ($FormulaElementsRef, $FormulaElementCountRef) = _ProcessMolecularFormula($MolecularFormula); | |
51 if (!(defined($FormulaElementsRef) && defined($FormulaElementCountRef))) { | |
52 return undef; | |
53 } | |
54 | |
55 $MolecularWeight = 0; | |
56 | |
57 for $Index (0 .. $#{$FormulaElementsRef}) { | |
58 $ElementSymbol = $FormulaElementsRef->[$Index]; | |
59 $ElementCount = $FormulaElementCountRef->[$Index]; | |
60 $AtomicWeight = PeriodicTable::GetElementAtomicWeight($ElementSymbol); | |
61 $MolecularWeight += $AtomicWeight * $ElementCount; | |
62 } | |
63 return $MolecularWeight; | |
64 } | |
65 | |
66 # | |
67 # Calculate exact mass assuming it's a valid formula... | |
68 # | |
69 sub CalculateExactMass { | |
70 my($MolecularFormula) = @_; | |
71 my($Index, $ElementSymbol, $ElementCount, $ExactMass, $RelativeAtomicMass, $FormulaElementsRef, $FormulaElementCountRef); | |
72 | |
73 ($FormulaElementsRef, $FormulaElementCountRef) = _ProcessMolecularFormula($MolecularFormula); | |
74 if (!(defined($FormulaElementsRef) && defined($FormulaElementCountRef))) { | |
75 return undef; | |
76 } | |
77 $ExactMass = 0; | |
78 | |
79 for $Index (0 .. $#{$FormulaElementsRef}) { | |
80 $ElementSymbol = $FormulaElementsRef->[$Index]; | |
81 $ElementCount = $FormulaElementCountRef->[$Index]; | |
82 $RelativeAtomicMass = PeriodicTable::GetElementMostAbundantNaturalIsotopeMass($ElementSymbol); | |
83 if (!defined($RelativeAtomicMass)) { | |
84 next ELEMENT; | |
85 } | |
86 $ExactMass += $RelativeAtomicMass * $ElementCount; | |
87 } | |
88 return $ExactMass; | |
89 } | |
90 | |
91 | |
92 # | |
93 # Calculate elemental composition and return reference to arrays | |
94 # containing elements and their percent composition... | |
95 # | |
96 sub CalculateElementalComposition { | |
97 my($MolecularFormula) = @_; | |
98 my($Index, $MolecularWeight, $ElementSymbol, $ElementCount, $AtomicWeight, $Composition, $CompositionMultiplier, $FormulaElementsRef, $FormulaElementCountRef, @FormulaElements, @FormulaElementComposition); | |
99 | |
100 $MolecularWeight = CalculateMolecularWeight($MolecularFormula); | |
101 if (! defined $MolecularWeight) { | |
102 return (undef, undef); | |
103 } | |
104 ($FormulaElementsRef, $FormulaElementCountRef) = _ProcessMolecularFormula($MolecularFormula); | |
105 | |
106 @FormulaElements = (); | |
107 @FormulaElementComposition = (); | |
108 | |
109 if (!$MolecularWeight) { | |
110 return ( \@FormulaElements, \@FormulaElementComposition); | |
111 } | |
112 | |
113 $CompositionMultiplier = 100 / $MolecularWeight; | |
114 | |
115 for $Index (0 .. $#{$FormulaElementsRef}) { | |
116 $ElementSymbol = $FormulaElementsRef->[$Index]; | |
117 $ElementCount = $FormulaElementCountRef->[$Index]; | |
118 $AtomicWeight = PeriodicTable::GetElementAtomicWeight($ElementSymbol); | |
119 $Composition = ($AtomicWeight * $ElementCount) * $CompositionMultiplier; | |
120 | |
121 push @FormulaElements, $ElementSymbol; | |
122 push @FormulaElementComposition, $Composition; | |
123 } | |
124 | |
125 return ( \@FormulaElements, \@FormulaElementComposition); | |
126 } | |
127 | |
128 # Using refernece to element and its composition arrays, format composition information | |
129 # as: Element: Composition;... | |
130 # | |
131 sub FormatCompositionInfomation { | |
132 my($Index, $ElementSymbol, $ElementComposition, $ElementsRef, $ElementCompositionRef, $Precision, $Composition); | |
133 | |
134 $Precision = 2; | |
135 if (@_ == 3) { | |
136 ($ElementsRef, $ElementCompositionRef, $Precision) = @_; | |
137 } | |
138 else { | |
139 ($ElementsRef, $ElementCompositionRef) = @_; | |
140 } | |
141 | |
142 $Composition = ''; | |
143 for $Index (0 .. $#{$ElementsRef}) { | |
144 $ElementSymbol = $ElementsRef->[$Index]; | |
145 $ElementComposition = $ElementCompositionRef->[$Index]; | |
146 $ElementComposition = sprintf("%.${Precision}f", $ElementComposition); | |
147 | |
148 $Composition .= ($Composition) ? '; ' : ''; | |
149 $Composition .= "${ElementSymbol}: ${ElementComposition}%"; | |
150 } | |
151 | |
152 return $Composition; | |
153 } | |
154 | |
155 # | |
156 # Get elements and their count... | |
157 # | |
158 sub GetElementsAndCount { | |
159 my($MolecularFormula) = @_; | |
160 my($FormulaElementsRef, $FormulaElementCountRef, $ErrorMsg); | |
161 | |
162 ($FormulaElementsRef, $FormulaElementCountRef, $ErrorMsg) = _ProcessMolecularFormula($MolecularFormula); | |
163 | |
164 return ($FormulaElementsRef, $FormulaElementCountRef); | |
165 } | |
166 | |
167 # | |
168 # Is it a valid molecular formula? | |
169 # | |
170 sub IsMolecularFormula { | |
171 my($MolecularFormula, $PrintErrorMsg, $Status, $FormulaElementsRef, $FormulaElementCountRef, $ErrorMsg); | |
172 | |
173 ($MolecularFormula) = @_; | |
174 | |
175 ($FormulaElementsRef, $FormulaElementCountRef, $ErrorMsg) = _ProcessMolecularFormula($MolecularFormula); | |
176 $Status = (defined($FormulaElementsRef) && defined($FormulaElementCountRef)) ? 1 : 0; | |
177 | |
178 return (wantarray ? ($Status, $ErrorMsg) : $Status); | |
179 } | |
180 | |
181 # | |
182 # Process molecular formula. For a valid formula, return references to arrays conatining elements | |
183 # and element count; otherwsie, return undef. | |
184 # | |
185 sub _ProcessMolecularFormula { | |
186 my($MolecularFormula) = @_; | |
187 my($ErrorMsg) = ''; | |
188 | |
189 $MolecularFormula = _CleanUpFormula($MolecularFormula); | |
190 | |
191 # Make sure it only contains numbers and letters... | |
192 if ($MolecularFormula =~ /[^a-zA-Z0-9\(\)\[\]]/) { | |
193 $ErrorMsg = 'Molecular formula contains characters other than a-zA-Z0-9'; | |
194 return (undef, undef, $ErrorMsg); | |
195 } | |
196 | |
197 # Parse the formula... | |
198 my($ElementSpec, $FormulaElementSpec, $Spec, $ElementSymbol, $ElementCount, @FormulaElements, @ElementCount, %FormulaElementsToCountMap, @SubFormulaElements, %SubFormulaElementsToCountMap); | |
199 | |
200 @FormulaElements = (); @ElementCount = (); | |
201 %FormulaElementsToCountMap = (); | |
202 | |
203 # Setup element symbol and count regular expression... | |
204 # IUPAC: http://www.iupac.org/reports/provisional/abstract04/RB-prs310804/Chap4-3.04.pdf | |
205 # | |
206 | |
207 $FormulaElementSpec = qr/ | |
208 \G( # $1 | |
209 (?: | |
210 ([A-Z][a-z]?) # Two or one letter element symbol; $2 | |
211 ([0-9]*) # Optionally followed by element count; $3 | |
212 ) | |
213 | \( | \[ | |
214 | \)[0-9]* | \][0-9]* | |
215 | . | |
216 ) | |
217 /x; | |
218 | |
219 my($ProcessingParenthesis); | |
220 $ProcessingParenthesis = 0; | |
221 # Go over the formula... | |
222 FORMULA: while ($MolecularFormula =~ /$FormulaElementSpec/gx) { | |
223 ($Spec, $ElementSymbol, $ElementCount) = ($1, $2, $3); | |
224 | |
225 # Handle parenthesis in formula to indicate repeating units... | |
226 if ($Spec =~ /^(\(|\[)/) { | |
227 if ($ProcessingParenthesis) { | |
228 $ErrorMsg = "Molecular formula contains multiple level of () or []"; | |
229 return (undef, undef, $ErrorMsg); | |
230 } | |
231 $ProcessingParenthesis = 1; | |
232 @SubFormulaElements = (); | |
233 %SubFormulaElementsToCountMap = (); | |
234 next FORMULA; | |
235 } | |
236 elsif ($Spec =~ /^(\)|\])/) { | |
237 $ProcessingParenthesis = 0; | |
238 | |
239 # Retrieve repeat count and move data to @FormulaElements and %FormulaElementsToCountMap; | |
240 my($RepeatCount, $Symbol, $Count); | |
241 $RepeatCount = $Spec; | |
242 $RepeatCount =~ s/(\)|\])//g; | |
243 if (!$RepeatCount) { | |
244 $RepeatCount = 1; | |
245 } | |
246 # Copy data... | |
247 for $Symbol (@SubFormulaElements) { | |
248 $Count = $SubFormulaElementsToCountMap{$Symbol} * $RepeatCount; | |
249 _SetupFormulaElementData(\@FormulaElements, \%FormulaElementsToCountMap, $Symbol, $Count); | |
250 } | |
251 | |
252 # Get ready again... | |
253 @SubFormulaElements = (); | |
254 %SubFormulaElementsToCountMap = (); | |
255 | |
256 next FORMULA; | |
257 } | |
258 | |
259 # Retrieve element symbol and count... | |
260 $ElementSymbol = ($Spec && !$ElementSymbol) ? $Spec : ($ElementSymbol ? $ElementSymbol : ''); | |
261 $ElementCount = $ElementCount ? $ElementCount : 1; | |
262 if (!PeriodicTable::IsElement($ElementSymbol)) { | |
263 $ErrorMsg = "Molecular formula contains unknown elemental symbol $ElementSymbol"; | |
264 return (undef, undef, $ErrorMsg); | |
265 } | |
266 | |
267 if ($ProcessingParenthesis) { | |
268 _SetupFormulaElementData(\@SubFormulaElements, \%SubFormulaElementsToCountMap, $ElementSymbol, $ElementCount); | |
269 } | |
270 else { | |
271 _SetupFormulaElementData(\@FormulaElements, \%FormulaElementsToCountMap, $ElementSymbol, $ElementCount); | |
272 } | |
273 } | |
274 | |
275 # Setup element count array... | |
276 for $ElementSymbol (@FormulaElements) { | |
277 $ElementCount = $FormulaElementsToCountMap{$ElementSymbol}; | |
278 push @ElementCount, $ElementCount; | |
279 } | |
280 | |
281 # Make sure it all adds up to 100%; otherwise, adjust the last value.. | |
282 | |
283 return (\@FormulaElements, \@ElementCount, $ErrorMsg); | |
284 } | |
285 | |
286 # Clean it up... | |
287 sub _CleanUpFormula { | |
288 my($MolecularFormula) = @_; | |
289 #Take out any spaces... | |
290 $MolecularFormula =~ s/ //g; | |
291 | |
292 # Eliminate any charge specifications: +, - or [1-9]+[+-] | |
293 # e.g NO+ [Al(H2O)6]3+ [H2NO3]+ | |
294 if ($MolecularFormula =~ /[\+\-]/) { | |
295 if ($MolecularFormula =~ /\][0-9]+[\+\-]/) { | |
296 # Bracket followed optionally by number and then, +/- ... | |
297 # [Al(H2O)6]3+ ... | |
298 $MolecularFormula =~ s/\][0-9]+[\+\-]/\]/g; | |
299 } | |
300 elsif ($MolecularFormula =~ /[\+\-][0-9]*/) { | |
301 # +/- followed optionally by a number... | |
302 # C37H42N2O6+2, Cu+ | |
303 $MolecularFormula =~ s/[\+\-][0-9]*//g; | |
304 } | |
305 } | |
306 | |
307 # Eliminate any brackets - ] or ) - not followed by numbers: | |
308 # e.g. Li[H2PO4] | |
309 if ($MolecularFormula !~ /\][0-9]+/) { | |
310 $MolecularFormula =~ s/[\[\]]//g; | |
311 } | |
312 if ($MolecularFormula !~ /\)[0-9]+/) { | |
313 $MolecularFormula =~ s/[\(\)]//g; | |
314 } | |
315 # Change adducts to parenthesis format... | |
316 # Na2CO3.10H2O -> Na2CO3(H2O)10 | |
317 # 3CdSO4.8H2O -> (CdSO4)3(H2O)8 | |
318 if ($MolecularFormula =~ /\./) { | |
319 my($SubFormula, $Count, $Spec); | |
320 my(@MolecularFormulaSplits) = split /\./, $MolecularFormula; | |
321 $MolecularFormula = ''; | |
322 for $SubFormula (@MolecularFormulaSplits) { | |
323 ($Count, $Spec) = $SubFormula =~ /^([0-9]*)(.*?)$/; | |
324 if ($Count) { | |
325 $MolecularFormula .= "(${Spec})${Count}"; | |
326 } | |
327 else { | |
328 $MolecularFormula .= $Spec; | |
329 } | |
330 } | |
331 } | |
332 | |
333 return $MolecularFormula; | |
334 } | |
335 | |
336 # Store the element and count... | |
337 sub _SetupFormulaElementData { | |
338 my($ElementsRef, $ElementsToCountMapRef, $Element, $Count) = @_; | |
339 | |
340 if (exists $ElementsToCountMapRef->{$Element}) { | |
341 $ElementsToCountMapRef->{$Element} += $Count; | |
342 } | |
343 else { | |
344 push @{$ElementsRef}, $Element; | |
345 $ElementsToCountMapRef->{$Element} = $Count; | |
346 } | |
347 } | |
348 | |
349 1; | |
350 | |
351 __END__ | |
352 | |
353 =head1 NAME | |
354 | |
355 MolecularFormula | |
356 | |
357 =head1 SYNOPSIS | |
358 | |
359 use MolecularFormula; | |
360 | |
361 use MolecularFormula qw(:all); | |
362 | |
363 =head1 DESCRIPTION | |
364 | |
365 B<MolecularFormula> module provides the following functions: | |
366 | |
367 CalculateElementalComposition, CalculateExactMass, CalculateMolecularWeight, | |
368 FormatCompositionInfomation, GetElementsAndCount, IsMolecularFormula | |
369 | |
370 =head1 FUNCTIONS | |
371 | |
372 =over 4 | |
373 | |
374 =item B<CalculateMolecularWeight> | |
375 | |
376 $MolecularWeight = CalculateMolecularWeight($MolecularFormula); | |
377 | |
378 Calculates and returns the molecular weight for a specified I<MolecularFormula>. | |
379 | |
380 =item B<CalculateElementalComposition> | |
381 | |
382 ($ElementsRef, $ElementCompositionRef) = | |
383 CalculateElementalComposition($MolecularFormula); | |
384 | |
385 Calculates the percent composition in a specified I<MolecularFormula> and returns references | |
386 to arrays containing elements and their percent composition. | |
387 | |
388 =item B<CalculateExactMass> | |
389 | |
390 $ExactMass = CalculateMolecularWeight($MolecularFormula); | |
391 | |
392 Calculates and returns the exact mass for a specified I<MolecularFormula>. | |
393 | |
394 =item B<FormatCompositionInfomation> | |
395 | |
396 $FormattedString = FormatCompositionInfomation($ElementsRef, | |
397 $ElementCompositionRef, [$Precision]); | |
398 | |
399 Returns a formatted elemental composition string using references to elements and elemental | |
400 composition arrays. Precision is an optional parameter; its default value is I<2>. | |
401 | |
402 =item B<GetElementsAndCount> | |
403 | |
404 ($ElementsRef, $ElementCountRef) = GetElementsAndCount( | |
405 $MolecularFormula); | |
406 | |
407 Retrieves elements and their count composition in a specified I<MolecularFormula> and | |
408 returns references to arrays containing elements and their count. | |
409 | |
410 =item B<IsMolecularFormula> | |
411 | |
412 $Status = IsMolecularFormula($MolecularFormula); | |
413 ($Status, $ErrorMsg) = IsMolecularFormula($MolecularFormula); | |
414 | |
415 Returns 1 or 0 a based on whether it's a valid I<MolecularFormula>. | |
416 | |
417 =back | |
418 | |
419 =head1 AUTHOR | |
420 | |
421 Manish Sud <msud@san.rr.com> | |
422 | |
423 =head1 SEE ALSO | |
424 | |
425 Molecule.pm | |
426 | |
427 =head1 COPYRIGHT | |
428 | |
429 Copyright (C) 2015 Manish Sud. All rights reserved. | |
430 | |
431 This file is part of MayaChemTools. | |
432 | |
433 MayaChemTools is free software; you can redistribute it and/or modify it under | |
434 the terms of the GNU Lesser General Public License as published by the Free | |
435 Software Foundation; either version 3 of the License, or (at your option) | |
436 any later version. | |
437 | |
438 =cut |