0
|
1 package NucleicAcids;
|
|
2 #
|
|
3 # $RCSfile: NucleicAcids.pm,v $
|
|
4 # $Date: 2015/02/28 20:47:18 $
|
|
5 # $Revision: 1.25 $
|
|
6 #
|
|
7 # Author: Manish Sud <msud@san.rr.com>
|
|
8 #
|
|
9 # Copyright (C) 2015 Manish Sud. All rights reserved.
|
|
10 #
|
|
11 # This file is part of MayaChemTools.
|
|
12 #
|
|
13 # MayaChemTools is free software; you can redistribute it and/or modify it under
|
|
14 # the terms of the GNU Lesser General Public License as published by the Free
|
|
15 # Software Foundation; either version 3 of the License, or (at your option) any
|
|
16 # later version.
|
|
17 #
|
|
18 # MayaChemTools is distributed in the hope that it will be useful, but without
|
|
19 # any warranty; without even the implied warranty of merchantability of fitness
|
|
20 # for a particular purpose. See the GNU Lesser General Public License for more
|
|
21 # details.
|
|
22 #
|
|
23 # You should have received a copy of the GNU Lesser General Public License
|
|
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
|
|
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
|
|
26 # Boston, MA, 02111-1307, USA.
|
|
27 #
|
|
28
|
|
29 use strict;
|
|
30 use Carp;
|
|
31 use Text::ParseWords;
|
|
32 use TextUtil;
|
|
33 use FileUtil;
|
|
34
|
|
35 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
|
|
36
|
|
37 @ISA = qw(Exporter);
|
|
38 @EXPORT = qw();
|
|
39 @EXPORT_OK = qw(GetNucleicAcids GetNucleicAcidsByType GetNucleicAcidPropertiesData GetNucleicAcidPropertiesNames IsNucleicAcid IsNucleicAcidProperty IsNucleicAcidType);
|
|
40
|
|
41 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]);
|
|
42
|
|
43 #
|
|
44 # Load nucleic acids data...
|
|
45 #
|
|
46 my(%NucleicAcidDataMap, %NucleicAcidCodeMap, %NucleicAcidOtherCodeMap, %NucleicAcidNameMap, @NucleicAcidCodes, @NucleicAcidPropertyNames, %NucleicAcidPropertyNamesMap, %NucleicAcidTypesMap);
|
|
47 _LoadNucleicAcidsData();
|
|
48
|
|
49 #
|
|
50 # Get a list of all known nucleic acids as one of these values:
|
|
51 # code or nucleic acid name...
|
|
52 #
|
|
53 sub GetNucleicAcids {
|
|
54 my($NameType, $Code, $Name, @NucleicAcidNames);
|
|
55
|
|
56 $NameType = 'Code';
|
|
57 if (@_ >= 1) {
|
|
58 ($NameType) = @_;
|
|
59 }
|
|
60
|
|
61 # Collect names...
|
|
62 @NucleicAcidNames = ();
|
|
63 for $Code (@NucleicAcidCodes) {
|
|
64 NAME : {
|
|
65 if ($NameType =~ /^Name$/i) {$Name = $NucleicAcidDataMap{$Code}{Name}; last NAME; }
|
|
66 $Name = $Code;
|
|
67 }
|
|
68 push @NucleicAcidNames, $Name;
|
|
69 }
|
|
70
|
|
71 return (wantarray ? @NucleicAcidNames : \@NucleicAcidNames);
|
|
72 }
|
|
73
|
|
74 #
|
|
75 # Get a list of all known nucleic acids by one of these specified types:
|
|
76 # Nucleobase, Nucleoside, Deoxynucleoside, Nucleotide, Deoxynucleotide. Default: Nucleoside
|
|
77 #
|
|
78 sub GetNucleicAcidsByType {
|
|
79 my($NameType, $Type, $Code, $Name, @NucleicAcidNames);
|
|
80
|
|
81 $Type = 'Nucleoside';
|
|
82 $NameType = 'Code';
|
|
83 if (@_ == 2) {
|
|
84 ($Type, $NameType) = @_;
|
|
85 }
|
|
86 elsif (@_ == 1) {
|
|
87 ($Type) = @_;
|
|
88 }
|
|
89
|
|
90 # Collect names...
|
|
91 @NucleicAcidNames = ();
|
|
92 CODE: for $Code (@NucleicAcidCodes) {
|
|
93 if ($NucleicAcidDataMap{$Code}{Type} !~ /^$Type$/i ) {
|
|
94 next CODE;
|
|
95 }
|
|
96 NAME : {
|
|
97 if ($NameType =~ /^Name$/i) {$Name = $NucleicAcidDataMap{$Code}{Name}; last NAME; }
|
|
98 $Name = $Code;
|
|
99 }
|
|
100 push @NucleicAcidNames, $Name;
|
|
101 }
|
|
102
|
|
103 return (wantarray ? @NucleicAcidNames : \@NucleicAcidNames);
|
|
104 }
|
|
105
|
|
106 #
|
|
107 # Get all available properties data for an nucleic acid using any of these symbols:
|
|
108 # code, other code or name.
|
|
109 #
|
|
110 # A reference to a hash array is returned with keys and values representing property
|
|
111 # name and its values respectively.
|
|
112 #
|
|
113 sub GetNucleicAcidPropertiesData {
|
|
114 my($NucleicAcidID) = @_;
|
|
115 my($Code);
|
|
116
|
|
117 if ($Code = _ValidateNucleicAcidID($NucleicAcidID)) {
|
|
118 return \%{$NucleicAcidDataMap{$Code}};
|
|
119 }
|
|
120 else {
|
|
121 return undef;
|
|
122 }
|
|
123 }
|
|
124
|
|
125 #
|
|
126 # Get names of all available nucleic acid properties. A reference to an array containing
|
|
127 # names of all available properties is returned.
|
|
128 #
|
|
129 sub GetNucleicAcidPropertiesNames {
|
|
130 my($Mode);
|
|
131 my($PropertyName, @PropertyNames);
|
|
132
|
|
133 $Mode = 'ByGroup';
|
|
134 if (@_ == 1) {
|
|
135 ($Mode) = @_;
|
|
136 }
|
|
137
|
|
138 @PropertyNames = ();
|
|
139 if ($Mode =~ /^Alphabetical$/i) {
|
|
140 my($PropertyName);
|
|
141 # Code, OtherCodes and Name are always listed first...
|
|
142 push @PropertyNames, qw(Code OtherCodes Name);
|
|
143 for $PropertyName (sort keys %NucleicAcidPropertyNamesMap) {
|
|
144 if ($PropertyName !~ /^(Code|OtherCodes|Name)$/) {
|
|
145 push @PropertyNames, $PropertyName;
|
|
146 }
|
|
147 }
|
|
148 }
|
|
149 else {
|
|
150 push @PropertyNames, @NucleicAcidPropertyNames;
|
|
151 }
|
|
152 return (wantarray ? @PropertyNames : \@PropertyNames);
|
|
153 }
|
|
154
|
|
155 #
|
|
156 # Is it a known nucleic acid? Input is either a code or a name
|
|
157 #
|
|
158 sub IsNucleicAcid {
|
|
159 my($NucleicAcidID) = @_;
|
|
160 my($Status);
|
|
161
|
|
162 $Status = (_ValidateNucleicAcidID($NucleicAcidID)) ? 1 : 0;
|
|
163
|
|
164 return $Status;
|
|
165 }
|
|
166
|
|
167 #
|
|
168 # Is it an available nucleic acid property?
|
|
169 #
|
|
170 sub IsNucleicAcidProperty {
|
|
171 my($PropertyName) = @_;
|
|
172 my($Status);
|
|
173
|
|
174 $Status = (exists($NucleicAcidPropertyNamesMap{$PropertyName})) ? 1 : 0;
|
|
175
|
|
176 return $Status;
|
|
177 }
|
|
178
|
|
179 #
|
|
180 # Is it an available nucleic acid type?
|
|
181 #
|
|
182 sub IsNucleicAcidType {
|
|
183 my($Type) = @_;
|
|
184 my($Status);
|
|
185
|
|
186 $Status = (exists($NucleicAcidTypesMap{lc($Type)})) ? 1 : 0;
|
|
187
|
|
188 return $Status;
|
|
189 }
|
|
190
|
|
191 #
|
|
192 # Implents GetNucleicAcid<PropertyName> for a valid proprty name.
|
|
193 #
|
|
194 sub AUTOLOAD {
|
|
195 my($NucleicAcidID) = @_;
|
|
196 my($FunctionName, $PropertyName, $PropertyValue, $Code);
|
|
197
|
|
198 $PropertyValue = undef;
|
|
199
|
|
200 use vars qw($AUTOLOAD);
|
|
201 $FunctionName = $AUTOLOAD;
|
|
202 $FunctionName =~ s/.*:://;
|
|
203
|
|
204 # Only Get<PropertyName> functions are supported...
|
|
205 if ($FunctionName !~ /^Get/) {
|
|
206 croak "Error: Function, NucleicAcid::$FunctionName, is not supported by AUTOLOAD in NucleicAcid module: Only Get<PropertyName> functions are implemented...";
|
|
207 }
|
|
208
|
|
209 $PropertyName = $FunctionName;
|
|
210 $PropertyName =~ s/^GetNucleicAcid//;
|
|
211 if (!exists $NucleicAcidPropertyNamesMap{$PropertyName}) {
|
|
212 croak "Error: Function, NucleicAcid::$FunctionName, is not supported by AUTOLOAD in NucleicAcid module: Unknown nucleic acid property name, $PropertyName, specified...";
|
|
213 }
|
|
214
|
|
215 if (!($Code = _ValidateNucleicAcidID($NucleicAcidID))) {
|
|
216 return undef;
|
|
217 }
|
|
218 $PropertyValue = $NucleicAcidDataMap{$Code}{$PropertyName};
|
|
219 return $PropertyValue;
|
|
220 }
|
|
221
|
|
222 #
|
|
223 # Load NucleicAcidsData.csv files from <MayaChemTools>/lib directory...
|
|
224 #
|
|
225 sub _LoadNucleicAcidsData {
|
|
226 my($NucleicAcidsDataFile, $MayaChemToolsLibDir);
|
|
227
|
|
228 $MayaChemToolsLibDir = GetMayaChemToolsLibDirName();
|
|
229
|
|
230 $NucleicAcidsDataFile = "$MayaChemToolsLibDir" . "/data/NucleicAcidsData.csv";
|
|
231
|
|
232 if (! -e "$NucleicAcidsDataFile") {
|
|
233 croak "Error: MayaChemTools package file, $NucleicAcidsDataFile, is missing: Possible installation problems...";
|
|
234 }
|
|
235
|
|
236 _LoadData($NucleicAcidsDataFile);
|
|
237 }
|
|
238
|
|
239 #
|
|
240 # Load NucleicAcidsData.csv file from <MayaChemTools>/lib directory...
|
|
241 #
|
|
242 sub _LoadData {
|
|
243 my($NucleicAcidsDataFile) = @_;
|
|
244
|
|
245 %NucleicAcidDataMap = ();
|
|
246 @NucleicAcidCodes = ();
|
|
247 @NucleicAcidPropertyNames = ();
|
|
248 %NucleicAcidPropertyNamesMap = ();
|
|
249 %NucleicAcidCodeMap = ();
|
|
250 %NucleicAcidOtherCodeMap = ();
|
|
251 %NucleicAcidNameMap = ();
|
|
252 %NucleicAcidTypesMap = ();
|
|
253
|
|
254 # Load property data for all nucleic acids...
|
|
255 #
|
|
256 # File Format:
|
|
257 # "Code","OtherCodes","BasePair","Name","Type","ChemicalFormula","ChemicalFormulaAtpH7.5","MolecularWeight","ExactMass","ElementalComposition"
|
|
258 #
|
|
259 my($Code, $OtherCodes, $NucleicAcidName, $Line, $NumOfCols, $InDelim, $Index, $Name, $Value, $Units, @LineWords, @ColLabels);
|
|
260
|
|
261 $InDelim = "\,";
|
|
262 open NUCLEICACIDSDATAFILE, "$NucleicAcidsDataFile" or croak "Couldn't open $NucleicAcidsDataFile: $! ...";
|
|
263
|
|
264 # Skip lines up to column labels...
|
|
265 LINE: while ($Line = GetTextLine(\*NUCLEICACIDSDATAFILE)) {
|
|
266 if ($Line !~ /^#/) {
|
|
267 last LINE;
|
|
268 }
|
|
269 }
|
|
270 @ColLabels= quotewords($InDelim, 0, $Line);
|
|
271 $NumOfCols = @ColLabels;
|
|
272
|
|
273 # Extract property names from column labels...
|
|
274 @NucleicAcidPropertyNames = ();
|
|
275 for $Index (0 .. $#ColLabels) {
|
|
276 $Name = $ColLabels[$Index];
|
|
277 push @NucleicAcidPropertyNames, $Name;
|
|
278
|
|
279 # Store property names...
|
|
280 $NucleicAcidPropertyNamesMap{$Name} = $Name;
|
|
281 }
|
|
282
|
|
283 # Process nucleic acid data...
|
|
284 LINE: while ($Line = GetTextLine(\*NUCLEICACIDSDATAFILE)) {
|
|
285 if ($Line =~ /^#/) {
|
|
286 next LINE;
|
|
287 }
|
|
288 @LineWords = ();
|
|
289 @LineWords = quotewords($InDelim, 0, $Line);
|
|
290 if (@LineWords != $NumOfCols) {
|
|
291 croak "Error: The number of data fields, @LineWords, in $NucleicAcidsDataFile must be $NumOfCols.\nLine: $Line...";
|
|
292 }
|
|
293 $Code = $LineWords[0]; $OtherCodes = $LineWords[1]; $NucleicAcidName = $LineWords[3];
|
|
294 if (exists $NucleicAcidDataMap{$Code}) {
|
|
295 carp "Warning: Ignoring data for nucleic acid $Code: It has already been loaded.\nLine: $Line....";
|
|
296 next LINE;
|
|
297 }
|
|
298
|
|
299 # Store all the values...
|
|
300 push @NucleicAcidCodes, $Code;
|
|
301 %{$NucleicAcidDataMap{$Code}} = ();
|
|
302 for $Index (0 .. $#LineWords) {
|
|
303 $Name = $NucleicAcidPropertyNames[$Index];
|
|
304 $Value = $LineWords[$Index];
|
|
305 $NucleicAcidDataMap{$Code}{$Name} = $Value;
|
|
306 }
|
|
307 }
|
|
308 close NUCLEICACIDSDATAFILE;
|
|
309
|
|
310 # Setup one letter and nucleic acid name maps...
|
|
311 _SetupNucleicAcidIDMap();
|
|
312 }
|
|
313
|
|
314 #
|
|
315 # Setup lowercase other codes and name maps pointing
|
|
316 # to code as show in data file.
|
|
317 #
|
|
318 sub _SetupNucleicAcidIDMap {
|
|
319 my($Code, @OtherCodes, $OtherCode, $NucleicAcidName, $NucleicAcidType);
|
|
320
|
|
321 %NucleicAcidCodeMap = ();
|
|
322 %NucleicAcidOtherCodeMap = ();
|
|
323 %NucleicAcidNameMap = ();
|
|
324 %NucleicAcidTypesMap = ();
|
|
325
|
|
326 for $Code (keys %NucleicAcidDataMap) {
|
|
327 $NucleicAcidCodeMap{lc($Code)} = $Code;
|
|
328
|
|
329 $NucleicAcidName = $NucleicAcidDataMap{$Code}{Name};
|
|
330 $NucleicAcidNameMap{lc($NucleicAcidName)} = $Code;
|
|
331
|
|
332 $NucleicAcidType = $NucleicAcidDataMap{$Code}{Type};
|
|
333 if (! exists $NucleicAcidTypesMap{$NucleicAcidType}) {
|
|
334 $NucleicAcidTypesMap{lc($NucleicAcidType)} = $NucleicAcidType;
|
|
335 }
|
|
336
|
|
337 @OtherCodes = split /\,/, $NucleicAcidDataMap{$Code}{OtherCodes};
|
|
338 OTHERCODE: for $OtherCode (@OtherCodes) {
|
|
339 if (!$OtherCode) {
|
|
340 next OTHERCODE;
|
|
341 }
|
|
342 $OtherCode = RemoveLeadingAndTrailingWhiteSpaces($OtherCode);
|
|
343 $NucleicAcidOtherCodeMap{lc($OtherCode)} = $Code;
|
|
344 }
|
|
345 }
|
|
346 }
|
|
347
|
|
348 # Validate Nucleic acid ID...
|
|
349 sub _ValidateNucleicAcidID {
|
|
350 my($NucleicAcidID) = @_;
|
|
351 my($Code) = undef;
|
|
352
|
|
353 if (exists $NucleicAcidCodeMap{lc($NucleicAcidID)}) {
|
|
354 $Code = $NucleicAcidCodeMap{lc($NucleicAcidID)};
|
|
355 }
|
|
356 elsif (exists $NucleicAcidOtherCodeMap{lc($NucleicAcidID)}) {
|
|
357 $Code = $NucleicAcidOtherCodeMap{lc($NucleicAcidID)};
|
|
358 }
|
|
359 elsif (exists $NucleicAcidNameMap{lc($NucleicAcidID)}) {
|
|
360 $Code = $NucleicAcidNameMap{lc($NucleicAcidID)};
|
|
361 }
|
|
362 return $Code;
|
|
363 }
|
|
364
|
|
365
|
|
366 1;
|
|
367
|
|
368 __END__
|
|
369
|
|
370 =head1 NAME
|
|
371
|
|
372 NucleicAcids
|
|
373
|
|
374 =head1 SYNOPSIS
|
|
375
|
|
376 use NucleicAcids;
|
|
377
|
|
378 use NucleicAcids qw(:all);
|
|
379
|
|
380 =head1 DESCRIPTION
|
|
381
|
|
382 B<NucleicAcids> module the provides the following functions:
|
|
383
|
|
384 GetNucleicAcidPropertiesData, GetNucleicAcidPropertiesNames,
|
|
385 GetNucleicAcids, GetNucleicAcidsByType, IsNucleicAcid, IsNucleicAcidProperty,
|
|
386 IsNucleicAcidType
|
|
387
|
|
388 =head1 Functions
|
|
389
|
|
390 =over 4
|
|
391
|
|
392 =item B<GetNucleicAcids>
|
|
393
|
|
394 (@Names) = GetNucleicAcids([$NameType]);
|
|
395 $NamesRef = GetNucleicAcids([$NameType]);
|
|
396
|
|
397 Returns an array or a reference to an array containing names of nucleic acids
|
|
398 as a code or nucleic acid name controlled by optional parameter I<NameType>. By
|
|
399 default, nucleic acids names are returned as the code. Possible values for
|
|
400 I<NameType>: I<Code or Name>.
|
|
401
|
|
402 =item B<GetNucleicAcidsByType>
|
|
403
|
|
404 (@Names) = GetNucleicAcidsByType([$Type, $NameType]);
|
|
405 $NamesRef = GetNucleicAcidsByType([$Type, $NameType]);
|
|
406
|
|
407 Returns an array or a reference to an array containing names of nucleic acids
|
|
408 specified by parameter I<Type> as a code or name controlled by optional
|
|
409 parameter I<NameType>. Default values for I<Type>: I<Nucleoside>. Default value for
|
|
410 I<NameType>: I<Code>. Possible values for I<Type>: I<Nucleobase, Nucleoside, Deoxynucleoside,
|
|
411 Nucleotide, Deoxynucleotide>. Possible values for I<NameType>: I<Code or Name>.
|
|
412
|
|
413 =item B<GetNucleicAcidPropertiesData>
|
|
414
|
|
415 $DataHashRef = GetNucleicAcidPropertiesData($NucleicAcidID);
|
|
416
|
|
417 Returns a reference to hash containing property names and values for a specified
|
|
418 I<NucleicAcidID>.
|
|
419
|
|
420 =item B<GetNucleicAcidPropertyName>
|
|
421
|
|
422 $Value = GetNucleicAcid<PropertyName>($NucleicAcidID);
|
|
423
|
|
424 Returns nucleic acid property value for a specified I<NucleicAcidID>. This function is
|
|
425 implemented on-the-fly using Perl's AUTOLOAD functionality.
|
|
426
|
|
427 =item B<GetNucleicAcidPropertiesNames>
|
|
428
|
|
429 @Names = GetNucleicAcidPropertiesNames([$Mode]);
|
|
430 $NamesRef = GetNucleicAcidPropertiesNames([$Mode]);
|
|
431
|
|
432 Returns an array or a reference to an array containing names of properties for
|
|
433 nucleic acids. Order of nucleic acids properties is controlled by optional parameter
|
|
434 I<Mode>. Possible values for I<Mode>: I<Alphabetical or ByGroup>; Default: I<ByGroup>.
|
|
435
|
|
436 =item B<IsNucleicAcid>
|
|
437
|
|
438 $Status = IsNucleicAcid($NucleicAcidID);
|
|
439
|
|
440 Returns 1 or 0 based on whether it's a known nucleic acid ID.
|
|
441
|
|
442 =item B<IsNucleicAcidProperty>
|
|
443
|
|
444 $Status = IsNucleicAcid($PropertyName);
|
|
445
|
|
446 Returns 1 or 0 based on whether it's a known nucleic acid property name.
|
|
447
|
|
448 =item B<IsNucleicAcidType>
|
|
449
|
|
450 $Status = IsNucleicAcidType();
|
|
451
|
|
452 Returns 1 or 0 based on whether it's a known nucleic acid type.
|
|
453
|
|
454 =back
|
|
455
|
|
456 =head1 AUTHOR
|
|
457
|
|
458 Manish Sud <msud@san.rr.com>
|
|
459
|
|
460 =head1 SEE ALSO
|
|
461
|
|
462 AminoAcids.pm, PeriodicTable.pm
|
|
463
|
|
464 =head1 COPYRIGHT
|
|
465
|
|
466 Copyright (C) 2015 Manish Sud. All rights reserved.
|
|
467
|
|
468 This file is part of MayaChemTools.
|
|
469
|
|
470 MayaChemTools is free software; you can redistribute it and/or modify it under
|
|
471 the terms of the GNU Lesser General Public License as published by the Free
|
|
472 Software Foundation; either version 3 of the License, or (at your option)
|
|
473 any later version.
|
|
474
|
|
475 =cut
|