comparison mayachemtools/lib/NucleicAcids.pm @ 0:73ae111cf86f draft

Uploaded
author deepakjadmin
date Wed, 20 Jan 2016 11:55:01 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:73ae111cf86f
1 package NucleicAcids;
2 #
3 # $RCSfile: NucleicAcids.pm,v $
4 # $Date: 2015/02/28 20:47:18 $
5 # $Revision: 1.25 $
6 #
7 # Author: Manish Sud <msud@san.rr.com>
8 #
9 # Copyright (C) 2015 Manish Sud. All rights reserved.
10 #
11 # This file is part of MayaChemTools.
12 #
13 # MayaChemTools is free software; you can redistribute it and/or modify it under
14 # the terms of the GNU Lesser General Public License as published by the Free
15 # Software Foundation; either version 3 of the License, or (at your option) any
16 # later version.
17 #
18 # MayaChemTools is distributed in the hope that it will be useful, but without
19 # any warranty; without even the implied warranty of merchantability of fitness
20 # for a particular purpose. See the GNU Lesser General Public License for more
21 # details.
22 #
23 # You should have received a copy of the GNU Lesser General Public License
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
26 # Boston, MA, 02111-1307, USA.
27 #
28
29 use strict;
30 use Carp;
31 use Text::ParseWords;
32 use TextUtil;
33 use FileUtil;
34
35 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
36
37 @ISA = qw(Exporter);
38 @EXPORT = qw();
39 @EXPORT_OK = qw(GetNucleicAcids GetNucleicAcidsByType GetNucleicAcidPropertiesData GetNucleicAcidPropertiesNames IsNucleicAcid IsNucleicAcidProperty IsNucleicAcidType);
40
41 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]);
42
43 #
44 # Load nucleic acids data...
45 #
46 my(%NucleicAcidDataMap, %NucleicAcidCodeMap, %NucleicAcidOtherCodeMap, %NucleicAcidNameMap, @NucleicAcidCodes, @NucleicAcidPropertyNames, %NucleicAcidPropertyNamesMap, %NucleicAcidTypesMap);
47 _LoadNucleicAcidsData();
48
49 #
50 # Get a list of all known nucleic acids as one of these values:
51 # code or nucleic acid name...
52 #
53 sub GetNucleicAcids {
54 my($NameType, $Code, $Name, @NucleicAcidNames);
55
56 $NameType = 'Code';
57 if (@_ >= 1) {
58 ($NameType) = @_;
59 }
60
61 # Collect names...
62 @NucleicAcidNames = ();
63 for $Code (@NucleicAcidCodes) {
64 NAME : {
65 if ($NameType =~ /^Name$/i) {$Name = $NucleicAcidDataMap{$Code}{Name}; last NAME; }
66 $Name = $Code;
67 }
68 push @NucleicAcidNames, $Name;
69 }
70
71 return (wantarray ? @NucleicAcidNames : \@NucleicAcidNames);
72 }
73
74 #
75 # Get a list of all known nucleic acids by one of these specified types:
76 # Nucleobase, Nucleoside, Deoxynucleoside, Nucleotide, Deoxynucleotide. Default: Nucleoside
77 #
78 sub GetNucleicAcidsByType {
79 my($NameType, $Type, $Code, $Name, @NucleicAcidNames);
80
81 $Type = 'Nucleoside';
82 $NameType = 'Code';
83 if (@_ == 2) {
84 ($Type, $NameType) = @_;
85 }
86 elsif (@_ == 1) {
87 ($Type) = @_;
88 }
89
90 # Collect names...
91 @NucleicAcidNames = ();
92 CODE: for $Code (@NucleicAcidCodes) {
93 if ($NucleicAcidDataMap{$Code}{Type} !~ /^$Type$/i ) {
94 next CODE;
95 }
96 NAME : {
97 if ($NameType =~ /^Name$/i) {$Name = $NucleicAcidDataMap{$Code}{Name}; last NAME; }
98 $Name = $Code;
99 }
100 push @NucleicAcidNames, $Name;
101 }
102
103 return (wantarray ? @NucleicAcidNames : \@NucleicAcidNames);
104 }
105
106 #
107 # Get all available properties data for an nucleic acid using any of these symbols:
108 # code, other code or name.
109 #
110 # A reference to a hash array is returned with keys and values representing property
111 # name and its values respectively.
112 #
113 sub GetNucleicAcidPropertiesData {
114 my($NucleicAcidID) = @_;
115 my($Code);
116
117 if ($Code = _ValidateNucleicAcidID($NucleicAcidID)) {
118 return \%{$NucleicAcidDataMap{$Code}};
119 }
120 else {
121 return undef;
122 }
123 }
124
125 #
126 # Get names of all available nucleic acid properties. A reference to an array containing
127 # names of all available properties is returned.
128 #
129 sub GetNucleicAcidPropertiesNames {
130 my($Mode);
131 my($PropertyName, @PropertyNames);
132
133 $Mode = 'ByGroup';
134 if (@_ == 1) {
135 ($Mode) = @_;
136 }
137
138 @PropertyNames = ();
139 if ($Mode =~ /^Alphabetical$/i) {
140 my($PropertyName);
141 # Code, OtherCodes and Name are always listed first...
142 push @PropertyNames, qw(Code OtherCodes Name);
143 for $PropertyName (sort keys %NucleicAcidPropertyNamesMap) {
144 if ($PropertyName !~ /^(Code|OtherCodes|Name)$/) {
145 push @PropertyNames, $PropertyName;
146 }
147 }
148 }
149 else {
150 push @PropertyNames, @NucleicAcidPropertyNames;
151 }
152 return (wantarray ? @PropertyNames : \@PropertyNames);
153 }
154
155 #
156 # Is it a known nucleic acid? Input is either a code or a name
157 #
158 sub IsNucleicAcid {
159 my($NucleicAcidID) = @_;
160 my($Status);
161
162 $Status = (_ValidateNucleicAcidID($NucleicAcidID)) ? 1 : 0;
163
164 return $Status;
165 }
166
167 #
168 # Is it an available nucleic acid property?
169 #
170 sub IsNucleicAcidProperty {
171 my($PropertyName) = @_;
172 my($Status);
173
174 $Status = (exists($NucleicAcidPropertyNamesMap{$PropertyName})) ? 1 : 0;
175
176 return $Status;
177 }
178
179 #
180 # Is it an available nucleic acid type?
181 #
182 sub IsNucleicAcidType {
183 my($Type) = @_;
184 my($Status);
185
186 $Status = (exists($NucleicAcidTypesMap{lc($Type)})) ? 1 : 0;
187
188 return $Status;
189 }
190
191 #
192 # Implents GetNucleicAcid<PropertyName> for a valid proprty name.
193 #
194 sub AUTOLOAD {
195 my($NucleicAcidID) = @_;
196 my($FunctionName, $PropertyName, $PropertyValue, $Code);
197
198 $PropertyValue = undef;
199
200 use vars qw($AUTOLOAD);
201 $FunctionName = $AUTOLOAD;
202 $FunctionName =~ s/.*:://;
203
204 # Only Get<PropertyName> functions are supported...
205 if ($FunctionName !~ /^Get/) {
206 croak "Error: Function, NucleicAcid::$FunctionName, is not supported by AUTOLOAD in NucleicAcid module: Only Get<PropertyName> functions are implemented...";
207 }
208
209 $PropertyName = $FunctionName;
210 $PropertyName =~ s/^GetNucleicAcid//;
211 if (!exists $NucleicAcidPropertyNamesMap{$PropertyName}) {
212 croak "Error: Function, NucleicAcid::$FunctionName, is not supported by AUTOLOAD in NucleicAcid module: Unknown nucleic acid property name, $PropertyName, specified...";
213 }
214
215 if (!($Code = _ValidateNucleicAcidID($NucleicAcidID))) {
216 return undef;
217 }
218 $PropertyValue = $NucleicAcidDataMap{$Code}{$PropertyName};
219 return $PropertyValue;
220 }
221
222 #
223 # Load NucleicAcidsData.csv files from <MayaChemTools>/lib directory...
224 #
225 sub _LoadNucleicAcidsData {
226 my($NucleicAcidsDataFile, $MayaChemToolsLibDir);
227
228 $MayaChemToolsLibDir = GetMayaChemToolsLibDirName();
229
230 $NucleicAcidsDataFile = "$MayaChemToolsLibDir" . "/data/NucleicAcidsData.csv";
231
232 if (! -e "$NucleicAcidsDataFile") {
233 croak "Error: MayaChemTools package file, $NucleicAcidsDataFile, is missing: Possible installation problems...";
234 }
235
236 _LoadData($NucleicAcidsDataFile);
237 }
238
239 #
240 # Load NucleicAcidsData.csv file from <MayaChemTools>/lib directory...
241 #
242 sub _LoadData {
243 my($NucleicAcidsDataFile) = @_;
244
245 %NucleicAcidDataMap = ();
246 @NucleicAcidCodes = ();
247 @NucleicAcidPropertyNames = ();
248 %NucleicAcidPropertyNamesMap = ();
249 %NucleicAcidCodeMap = ();
250 %NucleicAcidOtherCodeMap = ();
251 %NucleicAcidNameMap = ();
252 %NucleicAcidTypesMap = ();
253
254 # Load property data for all nucleic acids...
255 #
256 # File Format:
257 # "Code","OtherCodes","BasePair","Name","Type","ChemicalFormula","ChemicalFormulaAtpH7.5","MolecularWeight","ExactMass","ElementalComposition"
258 #
259 my($Code, $OtherCodes, $NucleicAcidName, $Line, $NumOfCols, $InDelim, $Index, $Name, $Value, $Units, @LineWords, @ColLabels);
260
261 $InDelim = "\,";
262 open NUCLEICACIDSDATAFILE, "$NucleicAcidsDataFile" or croak "Couldn't open $NucleicAcidsDataFile: $! ...";
263
264 # Skip lines up to column labels...
265 LINE: while ($Line = GetTextLine(\*NUCLEICACIDSDATAFILE)) {
266 if ($Line !~ /^#/) {
267 last LINE;
268 }
269 }
270 @ColLabels= quotewords($InDelim, 0, $Line);
271 $NumOfCols = @ColLabels;
272
273 # Extract property names from column labels...
274 @NucleicAcidPropertyNames = ();
275 for $Index (0 .. $#ColLabels) {
276 $Name = $ColLabels[$Index];
277 push @NucleicAcidPropertyNames, $Name;
278
279 # Store property names...
280 $NucleicAcidPropertyNamesMap{$Name} = $Name;
281 }
282
283 # Process nucleic acid data...
284 LINE: while ($Line = GetTextLine(\*NUCLEICACIDSDATAFILE)) {
285 if ($Line =~ /^#/) {
286 next LINE;
287 }
288 @LineWords = ();
289 @LineWords = quotewords($InDelim, 0, $Line);
290 if (@LineWords != $NumOfCols) {
291 croak "Error: The number of data fields, @LineWords, in $NucleicAcidsDataFile must be $NumOfCols.\nLine: $Line...";
292 }
293 $Code = $LineWords[0]; $OtherCodes = $LineWords[1]; $NucleicAcidName = $LineWords[3];
294 if (exists $NucleicAcidDataMap{$Code}) {
295 carp "Warning: Ignoring data for nucleic acid $Code: It has already been loaded.\nLine: $Line....";
296 next LINE;
297 }
298
299 # Store all the values...
300 push @NucleicAcidCodes, $Code;
301 %{$NucleicAcidDataMap{$Code}} = ();
302 for $Index (0 .. $#LineWords) {
303 $Name = $NucleicAcidPropertyNames[$Index];
304 $Value = $LineWords[$Index];
305 $NucleicAcidDataMap{$Code}{$Name} = $Value;
306 }
307 }
308 close NUCLEICACIDSDATAFILE;
309
310 # Setup one letter and nucleic acid name maps...
311 _SetupNucleicAcidIDMap();
312 }
313
314 #
315 # Setup lowercase other codes and name maps pointing
316 # to code as show in data file.
317 #
318 sub _SetupNucleicAcidIDMap {
319 my($Code, @OtherCodes, $OtherCode, $NucleicAcidName, $NucleicAcidType);
320
321 %NucleicAcidCodeMap = ();
322 %NucleicAcidOtherCodeMap = ();
323 %NucleicAcidNameMap = ();
324 %NucleicAcidTypesMap = ();
325
326 for $Code (keys %NucleicAcidDataMap) {
327 $NucleicAcidCodeMap{lc($Code)} = $Code;
328
329 $NucleicAcidName = $NucleicAcidDataMap{$Code}{Name};
330 $NucleicAcidNameMap{lc($NucleicAcidName)} = $Code;
331
332 $NucleicAcidType = $NucleicAcidDataMap{$Code}{Type};
333 if (! exists $NucleicAcidTypesMap{$NucleicAcidType}) {
334 $NucleicAcidTypesMap{lc($NucleicAcidType)} = $NucleicAcidType;
335 }
336
337 @OtherCodes = split /\,/, $NucleicAcidDataMap{$Code}{OtherCodes};
338 OTHERCODE: for $OtherCode (@OtherCodes) {
339 if (!$OtherCode) {
340 next OTHERCODE;
341 }
342 $OtherCode = RemoveLeadingAndTrailingWhiteSpaces($OtherCode);
343 $NucleicAcidOtherCodeMap{lc($OtherCode)} = $Code;
344 }
345 }
346 }
347
348 # Validate Nucleic acid ID...
349 sub _ValidateNucleicAcidID {
350 my($NucleicAcidID) = @_;
351 my($Code) = undef;
352
353 if (exists $NucleicAcidCodeMap{lc($NucleicAcidID)}) {
354 $Code = $NucleicAcidCodeMap{lc($NucleicAcidID)};
355 }
356 elsif (exists $NucleicAcidOtherCodeMap{lc($NucleicAcidID)}) {
357 $Code = $NucleicAcidOtherCodeMap{lc($NucleicAcidID)};
358 }
359 elsif (exists $NucleicAcidNameMap{lc($NucleicAcidID)}) {
360 $Code = $NucleicAcidNameMap{lc($NucleicAcidID)};
361 }
362 return $Code;
363 }
364
365
366 1;
367
368 __END__
369
370 =head1 NAME
371
372 NucleicAcids
373
374 =head1 SYNOPSIS
375
376 use NucleicAcids;
377
378 use NucleicAcids qw(:all);
379
380 =head1 DESCRIPTION
381
382 B<NucleicAcids> module the provides the following functions:
383
384 GetNucleicAcidPropertiesData, GetNucleicAcidPropertiesNames,
385 GetNucleicAcids, GetNucleicAcidsByType, IsNucleicAcid, IsNucleicAcidProperty,
386 IsNucleicAcidType
387
388 =head1 Functions
389
390 =over 4
391
392 =item B<GetNucleicAcids>
393
394 (@Names) = GetNucleicAcids([$NameType]);
395 $NamesRef = GetNucleicAcids([$NameType]);
396
397 Returns an array or a reference to an array containing names of nucleic acids
398 as a code or nucleic acid name controlled by optional parameter I<NameType>. By
399 default, nucleic acids names are returned as the code. Possible values for
400 I<NameType>: I<Code or Name>.
401
402 =item B<GetNucleicAcidsByType>
403
404 (@Names) = GetNucleicAcidsByType([$Type, $NameType]);
405 $NamesRef = GetNucleicAcidsByType([$Type, $NameType]);
406
407 Returns an array or a reference to an array containing names of nucleic acids
408 specified by parameter I<Type> as a code or name controlled by optional
409 parameter I<NameType>. Default values for I<Type>: I<Nucleoside>. Default value for
410 I<NameType>: I<Code>. Possible values for I<Type>: I<Nucleobase, Nucleoside, Deoxynucleoside,
411 Nucleotide, Deoxynucleotide>. Possible values for I<NameType>: I<Code or Name>.
412
413 =item B<GetNucleicAcidPropertiesData>
414
415 $DataHashRef = GetNucleicAcidPropertiesData($NucleicAcidID);
416
417 Returns a reference to hash containing property names and values for a specified
418 I<NucleicAcidID>.
419
420 =item B<GetNucleicAcidPropertyName>
421
422 $Value = GetNucleicAcid<PropertyName>($NucleicAcidID);
423
424 Returns nucleic acid property value for a specified I<NucleicAcidID>. This function is
425 implemented on-the-fly using Perl's AUTOLOAD functionality.
426
427 =item B<GetNucleicAcidPropertiesNames>
428
429 @Names = GetNucleicAcidPropertiesNames([$Mode]);
430 $NamesRef = GetNucleicAcidPropertiesNames([$Mode]);
431
432 Returns an array or a reference to an array containing names of properties for
433 nucleic acids. Order of nucleic acids properties is controlled by optional parameter
434 I<Mode>. Possible values for I<Mode>: I<Alphabetical or ByGroup>; Default: I<ByGroup>.
435
436 =item B<IsNucleicAcid>
437
438 $Status = IsNucleicAcid($NucleicAcidID);
439
440 Returns 1 or 0 based on whether it's a known nucleic acid ID.
441
442 =item B<IsNucleicAcidProperty>
443
444 $Status = IsNucleicAcid($PropertyName);
445
446 Returns 1 or 0 based on whether it's a known nucleic acid property name.
447
448 =item B<IsNucleicAcidType>
449
450 $Status = IsNucleicAcidType();
451
452 Returns 1 or 0 based on whether it's a known nucleic acid type.
453
454 =back
455
456 =head1 AUTHOR
457
458 Manish Sud <msud@san.rr.com>
459
460 =head1 SEE ALSO
461
462 AminoAcids.pm, PeriodicTable.pm
463
464 =head1 COPYRIGHT
465
466 Copyright (C) 2015 Manish Sud. All rights reserved.
467
468 This file is part of MayaChemTools.
469
470 MayaChemTools is free software; you can redistribute it and/or modify it under
471 the terms of the GNU Lesser General Public License as published by the Free
472 Software Foundation; either version 3 of the License, or (at your option)
473 any later version.
474
475 =cut