| 
0
 | 
     1 #!/usr/bin/perl -w
 | 
| 
 | 
     2 #
 | 
| 
 | 
     3 # $RCSfile: InfoNucleicAcids.pl,v $
 | 
| 
 | 
     4 # $Date: 2015/02/28 20:46:20 $
 | 
| 
 | 
     5 # $Revision: 1.26 $
 | 
| 
 | 
     6 #
 | 
| 
 | 
     7 # Author: Manish Sud <msud@san.rr.com>
 | 
| 
 | 
     8 #
 | 
| 
 | 
     9 # Copyright (C) 2015 Manish Sud. All rights reserved.
 | 
| 
 | 
    10 #
 | 
| 
 | 
    11 # This file is part of MayaChemTools.
 | 
| 
 | 
    12 #
 | 
| 
 | 
    13 # MayaChemTools is free software; you can redistribute it and/or modify it under
 | 
| 
 | 
    14 # the terms of the GNU Lesser General Public License as published by the Free
 | 
| 
 | 
    15 # Software Foundation; either version 3 of the License, or (at your option) any
 | 
| 
 | 
    16 # later version.
 | 
| 
 | 
    17 #
 | 
| 
 | 
    18 # MayaChemTools is distributed in the hope that it will be useful, but without
 | 
| 
 | 
    19 # any warranty; without even the implied warranty of merchantability of fitness
 | 
| 
 | 
    20 # for a particular purpose.  See the GNU Lesser General Public License for more
 | 
| 
 | 
    21 # details.
 | 
| 
 | 
    22 #
 | 
| 
 | 
    23 # You should have received a copy of the GNU Lesser General Public License
 | 
| 
 | 
    24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
 | 
| 
 | 
    25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
 | 
| 
 | 
    26 # Boston, MA, 02111-1307, USA.
 | 
| 
 | 
    27 #
 | 
| 
 | 
    28 
 | 
| 
 | 
    29 use strict;
 | 
| 
 | 
    30 use FindBin; use lib "$FindBin::Bin/../lib";
 | 
| 
 | 
    31 use Getopt::Long;
 | 
| 
 | 
    32 use File::Basename;
 | 
| 
 | 
    33 use Text::ParseWords;
 | 
| 
 | 
    34 use Benchmark;
 | 
| 
 | 
    35 use FileUtil;
 | 
| 
 | 
    36 use TextUtil;
 | 
| 
 | 
    37 use NucleicAcids;
 | 
| 
 | 
    38 
 | 
| 
 | 
    39 my($ScriptName, %Options, $StartTime, $EndTime, $TotalTime);
 | 
| 
 | 
    40 
 | 
| 
 | 
    41 # Autoflush STDOUT
 | 
| 
 | 
    42 $| = 1;
 | 
| 
 | 
    43 
 | 
| 
 | 
    44 # Starting message...
 | 
| 
 | 
    45 $ScriptName = basename($0);
 | 
| 
 | 
    46 print "\n$ScriptName: Starting...\n\n";
 | 
| 
 | 
    47 $StartTime = new Benchmark;
 | 
| 
 | 
    48 
 | 
| 
 | 
    49 # Get the options and setup script...
 | 
| 
 | 
    50 SetupScriptUsage();
 | 
| 
 | 
    51 if ($Options{help}) {
 | 
| 
 | 
    52   die GetUsageFromPod("$FindBin::Bin/$ScriptName");
 | 
| 
 | 
    53 }
 | 
| 
 | 
    54 
 | 
| 
 | 
    55 print "Processing options...\n";
 | 
| 
 | 
    56 my(%OptionsInfo);
 | 
| 
 | 
    57 ProcessOptions();
 | 
| 
 | 
    58 
 | 
| 
 | 
    59 ListNucleicAcidProperties();
 | 
| 
 | 
    60 print "\n$ScriptName:Done...\n\n";
 | 
| 
 | 
    61 
 | 
| 
 | 
    62 $EndTime = new Benchmark;
 | 
| 
 | 
    63 $TotalTime = timediff ($EndTime, $StartTime);
 | 
| 
 | 
    64 print "Total time: ", timestr($TotalTime), "\n";
 | 
| 
 | 
    65 
 | 
| 
 | 
    66 ###############################################################################
 | 
| 
 | 
    67 
 | 
| 
 | 
    68 # List data for an nucleic acid...
 | 
| 
 | 
    69 sub ListNucleicAcidData {
 | 
| 
 | 
    70   my($DataLabelRef, $DataValueRef) = @_;
 | 
| 
 | 
    71   my($Index, $Line, $Value);
 | 
| 
 | 
    72 
 | 
| 
 | 
    73   if ($OptionsInfo{NucleicAcidRowsOutput}) {
 | 
| 
 | 
    74     $Line = '';
 | 
| 
 | 
    75     # Format data...
 | 
| 
 | 
    76     if ($OptionsInfo{OutQuote} || $Options{outdelim} !~ /^comma$/i) {
 | 
| 
 | 
    77       $Line = JoinWords($DataValueRef, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
 | 
| 
 | 
    78     }
 | 
| 
 | 
    79     else {
 | 
| 
 | 
    80       # Always quote values containing commas...
 | 
| 
 | 
    81       $Line = ($DataValueRef->[0] =~ /\,/) ? qq("$DataValueRef->[0]") : $DataValueRef->[0];
 | 
| 
 | 
    82       for $Index (1 .. $#{$DataValueRef} ) {
 | 
| 
 | 
    83 	$Value = $DataValueRef->[$Index];
 | 
| 
 | 
    84 	if ($Value =~ /\,/) {
 | 
| 
 | 
    85 	  $Value = qq("$Value");
 | 
| 
 | 
    86 	}
 | 
| 
 | 
    87 	$Line .= $OptionsInfo{OutDelim} . $Value;
 | 
| 
 | 
    88       }
 | 
| 
 | 
    89     }
 | 
| 
 | 
    90     if ($OptionsInfo{FileOutput}) {
 | 
| 
 | 
    91       print OUTFILE "$Line\n";
 | 
| 
 | 
    92     }
 | 
| 
 | 
    93     else {
 | 
| 
 | 
    94       print "$Line\n";
 | 
| 
 | 
    95     }
 | 
| 
 | 
    96   }
 | 
| 
 | 
    97   else {
 | 
| 
 | 
    98     # Format and list data...
 | 
| 
 | 
    99     $Line = '';
 | 
| 
 | 
   100     for $Index (0 .. $#{$DataLabelRef} ) {
 | 
| 
 | 
   101       $Line = $DataLabelRef->[$Index] . ' ' . $DataValueRef->[$Index];
 | 
| 
 | 
   102       if ($OptionsInfo{FileOutput}) {
 | 
| 
 | 
   103 	print OUTFILE "$Line\n";
 | 
| 
 | 
   104       }
 | 
| 
 | 
   105       else {
 | 
| 
 | 
   106 	print "$Line\n";
 | 
| 
 | 
   107       }
 | 
| 
 | 
   108     }
 | 
| 
 | 
   109   }
 | 
| 
 | 
   110 }
 | 
| 
 | 
   111 
 | 
| 
 | 
   112 # List data for an nucleic acid...
 | 
| 
 | 
   113 sub ListHeaderRowData {
 | 
| 
 | 
   114   my($DataLabelRef) = @_;
 | 
| 
 | 
   115   my($Line);
 | 
| 
 | 
   116 
 | 
| 
 | 
   117   # Format data...
 | 
| 
 | 
   118   $Line = JoinWords($DataLabelRef, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
 | 
| 
 | 
   119   $Line =~ s/\://g;
 | 
| 
 | 
   120   # List data...
 | 
| 
 | 
   121   if ($OptionsInfo{FileOutput}) {
 | 
| 
 | 
   122     print OUTFILE "$Line\n";
 | 
| 
 | 
   123   }
 | 
| 
 | 
   124   else {
 | 
| 
 | 
   125     print "$Line\n";
 | 
| 
 | 
   126   }
 | 
| 
 | 
   127 }
 | 
| 
 | 
   128 
 | 
| 
 | 
   129 # List properties for nucleic acids...
 | 
| 
 | 
   130 sub ListNucleicAcidProperties {
 | 
| 
 | 
   131   my($NucleicAcidID, $NucleicAcidDataRef, $PropertyName, $PropertyValue, @PropertyLabels, @PropertyValues);
 | 
| 
 | 
   132 
 | 
| 
 | 
   133   print "Listing information for nucleic acid(s)...\n";
 | 
| 
 | 
   134 
 | 
| 
 | 
   135   if ($OptionsInfo{FileOutput}) {
 | 
| 
 | 
   136     print "Generating file $OptionsInfo{OutFileName}...\n";
 | 
| 
 | 
   137     open OUTFILE, ">$OptionsInfo{OutFileName}" or die "Couldn't open $OptionsInfo{OutFileName}: $!\n";
 | 
| 
 | 
   138   }
 | 
| 
 | 
   139 
 | 
| 
 | 
   140   # Setup property labels...
 | 
| 
 | 
   141   @PropertyLabels = ();
 | 
| 
 | 
   142   for $PropertyName (@{$OptionsInfo{SpecifiedProperies}}) {
 | 
| 
 | 
   143     push @PropertyLabels, ("$PropertyName:");
 | 
| 
 | 
   144   }
 | 
| 
 | 
   145 
 | 
| 
 | 
   146   if ($OptionsInfo{NucleicAcidRowsOutput}) {
 | 
| 
 | 
   147     ListHeaderRowData(\@PropertyLabels);
 | 
| 
 | 
   148   }
 | 
| 
 | 
   149 
 | 
| 
 | 
   150   # Go over specified properties...
 | 
| 
 | 
   151   for $NucleicAcidID (@{$OptionsInfo{SpecifiedNucleicAcidIDs}}) {
 | 
| 
 | 
   152     $NucleicAcidDataRef = NucleicAcids::GetNucleicAcidPropertiesData($NucleicAcidID);
 | 
| 
 | 
   153 
 | 
| 
 | 
   154     if (!$OptionsInfo{NucleicAcidRowsOutput}) {
 | 
| 
 | 
   155       if ($OptionsInfo{FileOutput}) {
 | 
| 
 | 
   156 	print OUTFILE "\nListing properties for nucleic acid $NucleicAcidID...\n\n";
 | 
| 
 | 
   157       }
 | 
| 
 | 
   158       else {
 | 
| 
 | 
   159 	print "\nListing properties for nucleic acid $NucleicAcidID...\n\n";
 | 
| 
 | 
   160       }
 | 
| 
 | 
   161     }
 | 
| 
 | 
   162 
 | 
| 
 | 
   163     # Collect data..
 | 
| 
 | 
   164     @PropertyValues = ();
 | 
| 
 | 
   165     for $PropertyName (@{$OptionsInfo{SpecifiedProperies}}) {
 | 
| 
 | 
   166       $PropertyValue = $NucleicAcidDataRef->{$PropertyName};
 | 
| 
 | 
   167       if (IsFloat($PropertyValue)) {
 | 
| 
 | 
   168 	$PropertyValue = sprintf("%.$OptionsInfo{Precision}f", $PropertyValue) + 0;
 | 
| 
 | 
   169       }
 | 
| 
 | 
   170       push @PropertyValues, $PropertyValue;
 | 
| 
 | 
   171     }
 | 
| 
 | 
   172     # List data...
 | 
| 
 | 
   173     ListNucleicAcidData(\@PropertyLabels, \@PropertyValues);
 | 
| 
 | 
   174   }
 | 
| 
 | 
   175   if ($OptionsInfo{FileOutput}) {
 | 
| 
 | 
   176     close OUTFILE;
 | 
| 
 | 
   177   }
 | 
| 
 | 
   178   print "\n";
 | 
| 
 | 
   179 }
 | 
| 
 | 
   180 
 | 
| 
 | 
   181 # Get propery names from categories...
 | 
| 
 | 
   182 sub GetPropertyNamesFromCategories {
 | 
| 
 | 
   183   my($CategoryName) = @_;
 | 
| 
 | 
   184   my(@PropertyNames);
 | 
| 
 | 
   185 
 | 
| 
 | 
   186   @PropertyNames = ();
 | 
| 
 | 
   187   if ($CategoryName =~ /^Basic$/i) {
 | 
| 
 | 
   188     @PropertyNames = ('Code', 'OtherCodes', 'Name', 'Type', 'MolecularFormula', 'MolecularWeight');
 | 
| 
 | 
   189   } elsif ($CategoryName =~ /^BasicPlus$/i) {
 | 
| 
 | 
   190     @PropertyNames = ('Code', 'OtherCodes', 'Name', 'Type', 'MolecularFormula', 'MolecularWeight', 'ExactMass', 'ElementalComposition');
 | 
| 
 | 
   191   }
 | 
| 
 | 
   192 
 | 
| 
 | 
   193   return @PropertyNames;
 | 
| 
 | 
   194 }
 | 
| 
 | 
   195 
 | 
| 
 | 
   196 # Process option values...
 | 
| 
 | 
   197 sub ProcessOptions {
 | 
| 
 | 
   198   %OptionsInfo = ();
 | 
| 
 | 
   199 
 | 
| 
 | 
   200   $OptionsInfo{Mode} = $Options{mode};
 | 
| 
 | 
   201 
 | 
| 
 | 
   202   $OptionsInfo{OutDelim} = ($Options{outdelim} =~ /^tab$/i ) ? "\t" : (($Options{outdelim} =~ /^semicolon$/i) ? "\;" : "\,");
 | 
| 
 | 
   203   $OptionsInfo{OutQuote} = ($Options{quote} =~ /^yes$/i) ? 1 : 0;
 | 
| 
 | 
   204 
 | 
| 
 | 
   205   $OptionsInfo{Overwrite} = defined $Options{overwrite} ? $Options{overwrite} : undef;
 | 
| 
 | 
   206   $OptionsInfo{OutFileRoot} = defined $Options{root} ? $Options{root} : undef;
 | 
| 
 | 
   207 
 | 
| 
 | 
   208   $OptionsInfo{Output} = $Options{output};
 | 
| 
 | 
   209   $OptionsInfo{OutputStyle} = $Options{outputstyle};
 | 
| 
 | 
   210 
 | 
| 
 | 
   211   $OptionsInfo{NucleicAcidRowsOutput} = ($Options{outputstyle} =~ /^NucleicAcidRows$/i) ? 1 : 0;
 | 
| 
 | 
   212   $OptionsInfo{FileOutput} = ($Options{output} =~ /^File$/i) ? 1 : 0;
 | 
| 
 | 
   213 
 | 
| 
 | 
   214   $OptionsInfo{Precision} = $Options{precision};
 | 
| 
 | 
   215 
 | 
| 
 | 
   216   my($NucleicAcidID, @NucleicAcidIDs);
 | 
| 
 | 
   217 
 | 
| 
 | 
   218   @{$OptionsInfo{SpecifiedNucleicAcidIDs}} = ();
 | 
| 
 | 
   219 
 | 
| 
 | 
   220   # Set up Nucleic Acids IDs except for All mode...
 | 
| 
 | 
   221   @NucleicAcidIDs = ();
 | 
| 
 | 
   222 
 | 
| 
 | 
   223   if (@ARGV >= 1) {
 | 
| 
 | 
   224     push @NucleicAcidIDs, @ARGV;
 | 
| 
 | 
   225   }
 | 
| 
 | 
   226   else {
 | 
| 
 | 
   227     # Setup mode specified default values...
 | 
| 
 | 
   228     if ($Options{mode} =~ /NucleicAcidID/i) {
 | 
| 
 | 
   229       push @NucleicAcidIDs, 'A';
 | 
| 
 | 
   230     }
 | 
| 
 | 
   231     elsif ($Options{mode} =~ /NucleicAcidType/i) {
 | 
| 
 | 
   232       push @NucleicAcidIDs, 'Nucleoside';
 | 
| 
 | 
   233     }
 | 
| 
 | 
   234     else {
 | 
| 
 | 
   235       push @NucleicAcidIDs, 'A';
 | 
| 
 | 
   236     }
 | 
| 
 | 
   237   }
 | 
| 
 | 
   238 
 | 
| 
 | 
   239   # Generate list of nucleic acids...
 | 
| 
 | 
   240   if (@ARGV == 1 && $ARGV[0] =~ /^All$/i) {
 | 
| 
 | 
   241     push @{$OptionsInfo{SpecifiedNucleicAcidIDs}}, NucleicAcids::GetNucleicAcids();
 | 
| 
 | 
   242   }
 | 
| 
 | 
   243   else {
 | 
| 
 | 
   244     if ($Options{mode} =~ /NucleicAcidID/i) {
 | 
| 
 | 
   245       ID: for $NucleicAcidID (@NucleicAcidIDs) {
 | 
| 
 | 
   246 	if (NucleicAcids::IsNucleicAcid($NucleicAcidID)) {
 | 
| 
 | 
   247 	  push @{$OptionsInfo{SpecifiedNucleicAcidIDs}}, $NucleicAcidID;
 | 
| 
 | 
   248 	}
 | 
| 
 | 
   249 	else {
 | 
| 
 | 
   250 	  warn "Ignoring nucleic acid ID, $NucleicAcidID, specified using command line parameter option: Unknown nucleic acid ID...\n";
 | 
| 
 | 
   251 	  next ID;
 | 
| 
 | 
   252 	}
 | 
| 
 | 
   253       }
 | 
| 
 | 
   254     }
 | 
| 
 | 
   255     elsif ($Options{mode} =~ /NucleicAcidType/i) {
 | 
| 
 | 
   256       ID: for $NucleicAcidID (@NucleicAcidIDs) {
 | 
| 
 | 
   257 	  if (!NucleicAcids::IsNucleicAcidType($NucleicAcidID)) {
 | 
| 
 | 
   258 	    warn "Ignoring nucleic acid type, $NucleicAcidID, specified using command line parameter option: Unknown nucleic acid type...\n";
 | 
| 
 | 
   259 	    next ID;
 | 
| 
 | 
   260 	  }
 | 
| 
 | 
   261 	  push @{$OptionsInfo{SpecifiedNucleicAcidIDs}}, NucleicAcids::GetNucleicAcidsByType($NucleicAcidID);
 | 
| 
 | 
   262 	}
 | 
| 
 | 
   263       }
 | 
| 
 | 
   264   }
 | 
| 
 | 
   265   SetupSpecifiedProperties();
 | 
| 
 | 
   266 
 | 
| 
 | 
   267   # Setup output file name...
 | 
| 
 | 
   268   $OptionsInfo{OutFileName} = '';
 | 
| 
 | 
   269   if ($OptionsInfo{FileOutput}) {
 | 
| 
 | 
   270     my($OutFileRoot, $OutFileExt);
 | 
| 
 | 
   271 
 | 
| 
 | 
   272     $OutFileRoot = '';
 | 
| 
 | 
   273     $OutFileExt = "csv";
 | 
| 
 | 
   274     if ($Options{outdelim} =~ /^tab$/i) {
 | 
| 
 | 
   275       $OutFileExt = "tsv";
 | 
| 
 | 
   276     }
 | 
| 
 | 
   277     if ($Options{root}) {
 | 
| 
 | 
   278       my ($RootFileDir, $RootFileName, $RootFileExt) = ParseFileName($Options{root});
 | 
| 
 | 
   279       if ($RootFileName && $RootFileExt) {
 | 
| 
 | 
   280 	$OutFileRoot = $RootFileName;
 | 
| 
 | 
   281       }
 | 
| 
 | 
   282       else {
 | 
| 
 | 
   283 	$OutFileRoot = $Options{root};
 | 
| 
 | 
   284       }
 | 
| 
 | 
   285     }
 | 
| 
 | 
   286     else {
 | 
| 
 | 
   287       $OutFileRoot = 'NucleicAcidsInfo';
 | 
| 
 | 
   288     }
 | 
| 
 | 
   289     $OptionsInfo{OutFileName} = $OutFileRoot . '.' . $OutFileExt;
 | 
| 
 | 
   290     if (!$Options{overwrite}) {
 | 
| 
 | 
   291       if (-e $OptionsInfo{OutFileName}) {
 | 
| 
 | 
   292 	die "Error: Output file, $OptionsInfo{OutFileName}, already exists.\nUse \-o --overwrite\ option or specify a different name using \"-r --root\" option.\n";
 | 
| 
 | 
   293       }
 | 
| 
 | 
   294     }
 | 
| 
 | 
   295   }
 | 
| 
 | 
   296 }
 | 
| 
 | 
   297 
 | 
| 
 | 
   298 # Setup properties to list...
 | 
| 
 | 
   299 sub SetupSpecifiedProperties {
 | 
| 
 | 
   300 
 | 
| 
 | 
   301   $OptionsInfo{Properties} = defined $Options{properties} ? $Options{properties} : undef;
 | 
| 
 | 
   302 
 | 
| 
 | 
   303   $OptionsInfo{PropertiesMode} = $Options{propertiesmode};
 | 
| 
 | 
   304   $OptionsInfo{PropertiesListing} = $Options{propertieslisting};
 | 
| 
 | 
   305 
 | 
| 
 | 
   306   # Make sure appropriate properties/category names are specified...
 | 
| 
 | 
   307   @{$OptionsInfo{SpecifiedProperies}} = ();
 | 
| 
 | 
   308   if ($Options{properties} && ($Options{propertiesmode} =~ /^All$/i) ) {
 | 
| 
 | 
   309     warn "Warning: Ignoring values specifed by \"-p --properties\" option: Not valid for All value of \"--propertiesmode\" option...\n";
 | 
| 
 | 
   310   }
 | 
| 
 | 
   311   if ($Options{propertiesmode} =~ /^All$/i) {
 | 
| 
 | 
   312     if ($Options{propertieslisting} =~ /^Alphabetical$/i) {
 | 
| 
 | 
   313       push @{$OptionsInfo{SpecifiedProperies}}, NucleicAcids::GetNucleicAcidPropertiesNames('Alphabetical');
 | 
| 
 | 
   314     }
 | 
| 
 | 
   315     else {
 | 
| 
 | 
   316       push @{$OptionsInfo{SpecifiedProperies}}, NucleicAcids::GetNucleicAcidPropertiesNames();
 | 
| 
 | 
   317     }
 | 
| 
 | 
   318   }
 | 
| 
 | 
   319   else {
 | 
| 
 | 
   320     if ($Options{properties}) {
 | 
| 
 | 
   321       if ($Options{propertiesmode} =~ /^Categories$/i) {
 | 
| 
 | 
   322 	# Check category name...
 | 
| 
 | 
   323 	if ($Options{properties} !~ /^(Basic|BasicPlus)$/i) {
 | 
| 
 | 
   324 	  die "Error: The value specified, $Options{properties}, for option \"-p --properties\" in conjunction with \"Categories\" value for option \"--propertiesmode\" is not valid. Allowed values: Basic and BasicPlus\n";
 | 
| 
 | 
   325 	}
 | 
| 
 | 
   326 	# Set propertynames...
 | 
| 
 | 
   327 	push @{$OptionsInfo{SpecifiedProperies}}, GetPropertyNamesFromCategories($Options{properties});
 | 
| 
 | 
   328       }
 | 
| 
 | 
   329       else {
 | 
| 
 | 
   330 	# Check property names..
 | 
| 
 | 
   331 	my($Name, $PropertyName, @Names);
 | 
| 
 | 
   332 	@Names = split /\,/, $Options{properties};
 | 
| 
 | 
   333 	NAME: for $Name (@Names) {
 | 
| 
 | 
   334 	  $PropertyName = RemoveLeadingAndTrailingWhiteSpaces($Name);
 | 
| 
 | 
   335 	  if (NucleicAcids::IsNucleicAcidProperty($PropertyName)) {
 | 
| 
 | 
   336 	    push @{$OptionsInfo{SpecifiedProperies}}, $PropertyName;
 | 
| 
 | 
   337 	  }
 | 
| 
 | 
   338 	  else {
 | 
| 
 | 
   339 	    warn "Warning: Ignoring value, $Name, specifed by \"-p --properties\" option: Unknown property name...\n";
 | 
| 
 | 
   340 	  }
 | 
| 
 | 
   341 	}
 | 
| 
 | 
   342 	if ($Options{propertieslisting} =~ /^Alphabetical$/i) {
 | 
| 
 | 
   343 	  # Code, OtherCodes and Name are always listed first...
 | 
| 
 | 
   344 	  my($CodePresent, $OtherCodesPresent, $NamePresent,  @AlphabeticalProperties, %PropertiesMap);
 | 
| 
 | 
   345 	  %PropertiesMap = ();
 | 
| 
 | 
   346 	  @AlphabeticalProperties = ();
 | 
| 
 | 
   347 	  $CodePresent = 0; $OtherCodesPresent = 0; $NamePresent = 0;
 | 
| 
 | 
   348 	  NAME: for $Name (@{$OptionsInfo{SpecifiedProperies}}) {
 | 
| 
 | 
   349 	    if ($Name =~ /^Code$/i) {
 | 
| 
 | 
   350 	      $CodePresent = 1;
 | 
| 
 | 
   351 	      next NAME;
 | 
| 
 | 
   352 	    }
 | 
| 
 | 
   353 	    if ($Name =~ /^OtherCodes$/i) {
 | 
| 
 | 
   354 	      $OtherCodesPresent = 1;
 | 
| 
 | 
   355 	      next NAME;
 | 
| 
 | 
   356 	    }
 | 
| 
 | 
   357 	    if ($Name =~ /^Name$/i) {
 | 
| 
 | 
   358 	      $NamePresent = 1;
 | 
| 
 | 
   359 	      next NAME;
 | 
| 
 | 
   360 	    }
 | 
| 
 | 
   361 	    $PropertiesMap{$Name} = $Name;
 | 
| 
 | 
   362 	  }
 | 
| 
 | 
   363 	  # Setup the alphabetical list...
 | 
| 
 | 
   364 	  if ($CodePresent) {
 | 
| 
 | 
   365 	    push @AlphabeticalProperties, 'Code';
 | 
| 
 | 
   366 	  }
 | 
| 
 | 
   367 	  if ($OtherCodesPresent) {
 | 
| 
 | 
   368 	    push @AlphabeticalProperties, 'OtherCodesPresent';
 | 
| 
 | 
   369 	  }
 | 
| 
 | 
   370 	  if ($NamePresent) {
 | 
| 
 | 
   371 	    push @AlphabeticalProperties, 'Name';
 | 
| 
 | 
   372 	  }
 | 
| 
 | 
   373 	  for $Name (sort keys %PropertiesMap) {
 | 
| 
 | 
   374 	    push @AlphabeticalProperties, $Name;
 | 
| 
 | 
   375 	  }
 | 
| 
 | 
   376 	  @{$OptionsInfo{SpecifiedProperies}} = ();
 | 
| 
 | 
   377 	  push @{$OptionsInfo{SpecifiedProperies}}, @AlphabeticalProperties;
 | 
| 
 | 
   378 	}
 | 
| 
 | 
   379       }
 | 
| 
 | 
   380     }
 | 
| 
 | 
   381     else {
 | 
| 
 | 
   382       # Set default value...
 | 
| 
 | 
   383       push @{$OptionsInfo{SpecifiedProperies}}, GetPropertyNamesFromCategories('Basic');
 | 
| 
 | 
   384     }
 | 
| 
 | 
   385   }
 | 
| 
 | 
   386 }
 | 
| 
 | 
   387 
 | 
| 
 | 
   388 # Setup script usage  and retrieve command line arguments specified using various options...
 | 
| 
 | 
   389 sub SetupScriptUsage {
 | 
| 
 | 
   390 
 | 
| 
 | 
   391   # Retrieve all the options...
 | 
| 
 | 
   392   %Options = ();
 | 
| 
 | 
   393   $Options{mode} = "NucleicAcidID";
 | 
| 
 | 
   394   $Options{outdelim} = "comma";
 | 
| 
 | 
   395   $Options{output} = "STDOUT";
 | 
| 
 | 
   396   $Options{outputstyle} = "NucleicAcidBlock";
 | 
| 
 | 
   397   $Options{precision} = 4;
 | 
| 
 | 
   398   $Options{propertiesmode} = "Categories";
 | 
| 
 | 
   399   $Options{propertieslisting} = "ByGroup";
 | 
| 
 | 
   400   $Options{quote} = "yes";
 | 
| 
 | 
   401 
 | 
| 
 | 
   402   if (!GetOptions(\%Options, "help|h", "mode|m=s", "outdelim=s", "output=s", "outputstyle=s", "overwrite|o", "precision=i", "properties|p=s", "propertieslisting=s", "propertiesmode=s", "quote|q=s", "root|r=s", "workingdir|w=s")) {
 | 
| 
 | 
   403     die "\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n";
 | 
| 
 | 
   404   }
 | 
| 
 | 
   405   if ($Options{workingdir}) {
 | 
| 
 | 
   406     if (! -d $Options{workingdir}) {
 | 
| 
 | 
   407       die "Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n";
 | 
| 
 | 
   408     }
 | 
| 
 | 
   409     chdir $Options{workingdir} or die "Error: Couldn't chdir $Options{workingdir}: $! \n";
 | 
| 
 | 
   410   }
 | 
| 
 | 
   411   if ($Options{mode} !~ /^(NucleicAcidID|NucleicAcidType)$/i) {
 | 
| 
 | 
   412     die "Error: The value specified, $Options{mode}, for option \"--mode\" is not valid. Allowed values: NucleicAcidID or NucleicAcidType\n";
 | 
| 
 | 
   413   }
 | 
| 
 | 
   414   if ($Options{outdelim} !~ /^(comma|semicolon|tab)$/i) {
 | 
| 
 | 
   415     die "Error: The value specified, $Options{outdelim}, for option \"--outdelim\" is not valid. Allowed values: comma, tab, or semicolon\n";
 | 
| 
 | 
   416   }
 | 
| 
 | 
   417   if ($Options{output} !~ /^(STDOUT|File)$/i) {
 | 
| 
 | 
   418     die "Error: The value specified, $Options{output}, for option \"--output\" is not valid. Allowed values: STDOUT or File\n";
 | 
| 
 | 
   419   }
 | 
| 
 | 
   420   if ($Options{outputstyle} !~ /^(NucleicAcidBlock|NucleicAcidRows)$/i) {
 | 
| 
 | 
   421     die "Error: The value specified, $Options{outputstyle}, for option \"--outputstyle\" is not valid. Allowed values: NucleicAcidBlock or NucleicAcidRows\n";
 | 
| 
 | 
   422   }
 | 
| 
 | 
   423   if (!IsPositiveInteger($Options{precision})) {
 | 
| 
 | 
   424     die "Error: The value specified, $Options{precision}, for option \"-p --precision\" is not valid. Allowed values: > 0 \n";
 | 
| 
 | 
   425   }
 | 
| 
 | 
   426   if ($Options{propertiesmode} !~ /^(Categories|Names|All)$/i) {
 | 
| 
 | 
   427     die "Error: The value specified, $Options{propertiesmode}, for option \"--propertiesmode\" is not valid. Allowed values: Categories, Names, or All\n";
 | 
| 
 | 
   428   }
 | 
| 
 | 
   429   if ($Options{propertieslisting} !~ /^(ByGroup|Alphabetical)$/i) {
 | 
| 
 | 
   430     die "Error: The value specified, $Options{propertieslisting}, for option \"--propertieslisting\" is not valid. Allowed values: ByGroup, or Alphabetical\n";
 | 
| 
 | 
   431   }
 | 
| 
 | 
   432   if ($Options{quote} !~ /^(yes|no)$/i) {
 | 
| 
 | 
   433     die "Error: The value specified, $Options{quote}, for option \"-q --quote\" is not valid. Allowed values: yes or no\n";
 | 
| 
 | 
   434   }
 | 
| 
 | 
   435 }
 | 
| 
 | 
   436 
 | 
| 
 | 
   437 __END__
 | 
| 
 | 
   438 
 | 
| 
 | 
   439 =head1 NAME
 | 
| 
 | 
   440 
 | 
| 
 | 
   441 InfoNucleicAcids.pl - List properties of nucleic acids
 | 
| 
 | 
   442 
 | 
| 
 | 
   443 =head1 SYNOPSIS
 | 
| 
 | 
   444 
 | 
| 
 | 
   445 InfoNucleicAcids.pl NucleicAcidIDs...
 | 
| 
 | 
   446 
 | 
| 
 | 
   447 InfoNucleicAcids.pl [B<-h, --help>] [B<-m, --mode> NucleicAcidID | NucleicAcidType]
 | 
| 
 | 
   448 [B<--OutDelim> comma | tab | semicolon]
 | 
| 
 | 
   449 [B<--output> STDOUT | File] [B<--OutputStyle> NucleicAcidBlock | NucleicAcidRows]
 | 
| 
 | 
   450 [B<-o, --overwrite>] [B<--precision> number] [B<--PropertiesMode> Categories | Names | All]
 | 
| 
 | 
   451 [B<-p, --properties> CategoryName, [CategoryName,...] | PropertyName, [PropertyName,...]]
 | 
| 
 | 
   452 [B<--PropertiesListing> ByGroup | Alphabetical] [B<-q, --quote> yes | no] [B<-r, --root> rootname]
 | 
| 
 | 
   453 [B<-w, --WorkingDir> dirname] NucleicAcidIDs...
 | 
| 
 | 
   454 
 | 
| 
 | 
   455 =head1 DESCRIPTION
 | 
| 
 | 
   456 
 | 
| 
 | 
   457 List nucleic acid properties. Nucleic acids identification supports two types of IDs: code
 | 
| 
 | 
   458 or name. Nucleic acid properties data, in addition to basic information about nucleic acids - code,
 | 
| 
 | 
   459 name, type, chemical formula and molecular weight - include information about exact mass and
 | 
| 
 | 
   460 elemental composition.
 | 
| 
 | 
   461 
 | 
| 
 | 
   462 =head1 PARAMETERS
 | 
| 
 | 
   463 
 | 
| 
 | 
   464 =over 4
 | 
| 
 | 
   465 
 | 
| 
 | 
   466 =item B<NucleicAcidIDs> I<Code [NucleicAcidName...] | NucleicAcidType [NucleicAcidType...]>
 | 
| 
 | 
   467 
 | 
| 
 | 
   468 I<NucleicAcidIDs> is a space delimited list of values to identify nucleic acids.
 | 
| 
 | 
   469 
 | 
| 
 | 
   470 For I<NucleicAcidID> mode, input value format is: I<Code [NucleicAcidName...]>. Default: I<A>.
 | 
| 
 | 
   471 Examples:
 | 
| 
 | 
   472 
 | 
| 
 | 
   473     A
 | 
| 
 | 
   474     dG AMP
 | 
| 
 | 
   475     Cytidine T UDP dpppA "5'-dATP"
 | 
| 
 | 
   476 
 | 
| 
 | 
   477 For I<NucleicAcidType> mode, input value format is: I<NucleicAcidType [NucleicAcidType...]>.
 | 
| 
 | 
   478 Default: I<A>. Possible values are: I<Nucleobase, Nucleoside, Deoxynucleoside, Nucleotide,
 | 
| 
 | 
   479 Deoxynucleotide>. Default: I<Nucleoside>.
 | 
| 
 | 
   480 Examples:
 | 
| 
 | 
   481 
 | 
| 
 | 
   482     Deoxynucleoside
 | 
| 
 | 
   483     Nucleobase Nucleotide
 | 
| 
 | 
   484 
 | 
| 
 | 
   485 =back
 | 
| 
 | 
   486 
 | 
| 
 | 
   487 =head1 OPTIONS
 | 
| 
 | 
   488 
 | 
| 
 | 
   489 =over 4
 | 
| 
 | 
   490 
 | 
| 
 | 
   491 =item B<-h, --help>
 | 
| 
 | 
   492 
 | 
| 
 | 
   493 Print this help message.
 | 
| 
 | 
   494 
 | 
| 
 | 
   495 =item B<-m, --mode> I<NucleicAcidID | NucleicAcidType>
 | 
| 
 | 
   496 
 | 
| 
 | 
   497 Specify nucleic acids for listing properties using one of these methods: nucleic acid
 | 
| 
 | 
   498 code and/or names or nucleic acid type.
 | 
| 
 | 
   499 
 | 
| 
 | 
   500 Possible values: I<NucleicAcidID or NucleicAcidType>. Default: I<NucleicAcidID>
 | 
| 
 | 
   501 
 | 
| 
 | 
   502 For I<NucleicAcidType>, command line parameters support these type: I<Nucleobase,
 | 
| 
 | 
   503 Nucleoside, Deoxynucleoside, Nucleotide, Deoxynucleotide>.
 | 
| 
 | 
   504 
 | 
| 
 | 
   505 =item B<--OutDelim> I<comma | tab | semicolon>
 | 
| 
 | 
   506 
 | 
| 
 | 
   507 Output text file delimiter. Possible values: I<comma, tab, or semicolon>
 | 
| 
 | 
   508 Default value: I<comma>.
 | 
| 
 | 
   509 
 | 
| 
 | 
   510 =item B<--output> I<STDOUT | File>
 | 
| 
 | 
   511 
 | 
| 
 | 
   512 List information at STDOUT or write it to a file. Possible values: I<STDOUT or File>. Default:
 | 
| 
 | 
   513 I<STDOUT>. B<-r, --root> option is used to generate output file name.
 | 
| 
 | 
   514 
 | 
| 
 | 
   515 =item B<--OutputStyle> I<NucleicAcidBlock | NucleicAcidRows>
 | 
| 
 | 
   516 
 | 
| 
 | 
   517 Specify how to list nucleic acid information: add a new line for each property and present it as a block
 | 
| 
 | 
   518 for each nucleic acid; or include all properties in one line and show it as a single line.
 | 
| 
 | 
   519 
 | 
| 
 | 
   520 Possible values: I<NucleicAcidBlock | NucleicAcidRows>. Default: I<NucleicAcidBlock>
 | 
| 
 | 
   521 
 | 
| 
 | 
   522 An example for I<NucleicAcidBlock> output style:
 | 
| 
 | 
   523 
 | 
| 
 | 
   524     Code: Ado
 | 
| 
 | 
   525     OtherCodes: A
 | 
| 
 | 
   526     Name: Adenosine
 | 
| 
 | 
   527     Type: Nucleoside
 | 
| 
 | 
   528     MolecularFormula: C10H13O4N5
 | 
| 
 | 
   529     MolecularWeight: 267.2413
 | 
| 
 | 
   530     ... ...
 | 
| 
 | 
   531 
 | 
| 
 | 
   532 An example for I<NucleicAcidRows> output style:
 | 
| 
 | 
   533 
 | 
| 
 | 
   534     Code,OtherCodes,Name,Type,MolecularFormula,MolecularWeight
 | 
| 
 | 
   535 
 | 
| 
 | 
   536 =item B<-o, --overwrite>
 | 
| 
 | 
   537 
 | 
| 
 | 
   538 Overwrite existing files.
 | 
| 
 | 
   539 
 | 
| 
 | 
   540 =item B<--precision> I<number>
 | 
| 
 | 
   541 
 | 
| 
 | 
   542 Precision for listing numerical values. Default: up to I<4> decimal places.
 | 
| 
 | 
   543 Valid values: positive integers.
 | 
| 
 | 
   544 
 | 
| 
 | 
   545 =item B<--PropertiesMode> I<Categories | Names | All>
 | 
| 
 | 
   546 
 | 
| 
 | 
   547 Specify how property names are specified: use category names; explicit list of property names; or
 | 
| 
 | 
   548 use all available properties. Possible values: I<Categories, Names, or All>. Default: I<Categories>.
 | 
| 
 | 
   549 
 | 
| 
 | 
   550 This option is used in conjunction with B<-p, --properties> option to specify properties of
 | 
| 
 | 
   551 interest.
 | 
| 
 | 
   552 
 | 
| 
 | 
   553 =item B<-p, --properties> I<CategoryName,[CategoryName,...] | PropertyName,[PropertyName,...]>
 | 
| 
 | 
   554 
 | 
| 
 | 
   555 This option is B<--propertiesmode> specific. In general, it's a list of comma separated category or
 | 
| 
 | 
   556 property names.
 | 
| 
 | 
   557 
 | 
| 
 | 
   558 Specify which nucleic acid properties information to list for the nucleic acid IDs specified using
 | 
| 
 | 
   559 command line parameters: list basic information; list all available information; or specify a comma
 | 
| 
 | 
   560 separated list of nucleic acid property names.
 | 
| 
 | 
   561 
 | 
| 
 | 
   562 Possible values: I<Basic | BasicPlus | PropertyName,[PropertyName,...]>.
 | 
| 
 | 
   563 Default: I<Basic>.
 | 
| 
 | 
   564 
 | 
| 
 | 
   565 I<Basic> includes: I<Code, OtherCodes, Name, Type, MolecularFormula, MolecularWeight>
 | 
| 
 | 
   566 
 | 
| 
 | 
   567 I<BasicPlus> includes: I<Code, OtherCodes, Name, Type, MolecularFormula, MolecularWeight, ExactMass,
 | 
| 
 | 
   568 ElementalComposition>
 | 
| 
 | 
   569 
 | 
| 
 | 
   570 Here is a complete list of available properties: I<Code, OtherCodes, BasePair, Name, Type, MolecularFormula,
 | 
| 
 | 
   571 MolecularFormulaAtpH7.5, MolecularWeight, ExactMass, ElementalComposition>.
 | 
| 
 | 
   572 
 | 
| 
 | 
   573 =item B<--PropertiesListing> I<ByGroup | Alphabetical>
 | 
| 
 | 
   574 
 | 
| 
 | 
   575 Specify how to list properties for nucleic acids: group by category or an alphabetical by
 | 
| 
 | 
   576 property names. Possible values: I<ByGroup or Alphabetical>. Default: I<ByGroup>
 | 
| 
 | 
   577 
 | 
| 
 | 
   578 =item B<-q, --quote> I<yes | no>
 | 
| 
 | 
   579 
 | 
| 
 | 
   580 Put quotes around column values in output text file. Possible values: I<yes or
 | 
| 
 | 
   581 no>. Default value: I<yes>.
 | 
| 
 | 
   582 
 | 
| 
 | 
   583 =item B<-r, --root> I<rootname>
 | 
| 
 | 
   584 
 | 
| 
 | 
   585 New text file name is generated using the root: <Root>.<Ext>. File name is only
 | 
| 
 | 
   586 used during I<File> value of B<-o, --output> option.
 | 
| 
 | 
   587 
 | 
| 
 | 
   588 Default file name: NucleicAcidInfo<mode>.<Ext>. The csv, and tsv
 | 
| 
 | 
   589 <Ext> values are used for comma/semicolon, and tab delimited text files respectively.
 | 
| 
 | 
   590 
 | 
| 
 | 
   591 =item B<-w, --WorkingDir> I<dirname>
 | 
| 
 | 
   592 
 | 
| 
 | 
   593 Location of working directory. Default: current directory.
 | 
| 
 | 
   594 
 | 
| 
 | 
   595 =back
 | 
| 
 | 
   596 
 | 
| 
 | 
   597 =head1 EXAMPLES
 | 
| 
 | 
   598 
 | 
| 
 | 
   599 To list basic properties information for nucleoside A, type:
 | 
| 
 | 
   600 
 | 
| 
 | 
   601     % InfoNucleicAcids.pl
 | 
| 
 | 
   602 
 | 
| 
 | 
   603 To list all available properties information for nucleoside A, type:
 | 
| 
 | 
   604 
 | 
| 
 | 
   605     % InfoNucleicAcids.pl --propertiesmode all A
 | 
| 
 | 
   606 
 | 
| 
 | 
   607 To list all available information for all available nucleic acids, type:
 | 
| 
 | 
   608 
 | 
| 
 | 
   609     % InfoNucleicAcids.pl --propertiesmode All All
 | 
| 
 | 
   610 
 | 
| 
 | 
   611 To list basic properties information for all nucleobases, type:
 | 
| 
 | 
   612 
 | 
| 
 | 
   613     % InfoNucleicAcids.pl -m NucleicAcidType Nucleoside
 | 
| 
 | 
   614 
 | 
| 
 | 
   615 To list basic properties information for all nucleotides and deoxynulceotides, type:
 | 
| 
 | 
   616 
 | 
| 
 | 
   617     % InfoNucleicAcids.pl -m NucleicAcidType Nucleotide Deoxynucleotide
 | 
| 
 | 
   618 
 | 
| 
 | 
   619 To list basic properties information for variety of nucleic acids, type:
 | 
| 
 | 
   620 
 | 
| 
 | 
   621     % InfoNucleicAcids.pl A dG AMP Cytidine T UDP "5'-dATP"
 | 
| 
 | 
   622 
 | 
| 
 | 
   623 To list code and molecular weights for nucleosides A, G, C and T, type:
 | 
| 
 | 
   624 
 | 
| 
 | 
   625     % InfoNucleicAcids.pl --PropertiesMode  Names --properties
 | 
| 
 | 
   626       Code,MolecularWeight A G C T
 | 
| 
 | 
   627 
 | 
| 
 | 
   628 To alphabetically list all the available properties for nucleotides dAMP, dGMP,
 | 
| 
 | 
   629 dCMP, and dTMP in rows instead of nucleic acid blocks with quotes around the values, type:
 | 
| 
 | 
   630 
 | 
| 
 | 
   631     % InfoNucleicAcids.pl --PropertiesMode All --PropertiesListing
 | 
| 
 | 
   632       Alphabetical --OutputStyle NucleicAcidRows -q yes dAMP dGMP
 | 
| 
 | 
   633       dCMP dTMP
 | 
| 
 | 
   634 
 | 
| 
 | 
   635 To alphabetically list all the available properties for all available nucleic acids to
 | 
| 
 | 
   636 a file names NucleicAcidsProperties.csv with quotes around the values, type
 | 
| 
 | 
   637 
 | 
| 
 | 
   638     % InfoNucleicAcids.pl --PropertiesMode All --PropertiesListing
 | 
| 
 | 
   639       Alphabetical --output File --OutputStyle NucleicAcidRows -r
 | 
| 
 | 
   640       NucleicAcidsProperties -o -q Yes All
 | 
| 
 | 
   641 
 | 
| 
 | 
   642 =head1 AUTHOR
 | 
| 
 | 
   643 
 | 
| 
 | 
   644 Manish Sud <msud@san.rr.com>
 | 
| 
 | 
   645 
 | 
| 
 | 
   646 =head1 SEE ALSO
 | 
| 
 | 
   647 
 | 
| 
 | 
   648 InfoAminoAcids.pl, InfoPeriodicTableElements.pl
 | 
| 
 | 
   649 
 | 
| 
 | 
   650 =head1 COPYRIGHT
 | 
| 
 | 
   651 
 | 
| 
 | 
   652 Copyright (C) 2015 Manish Sud. All rights reserved.
 | 
| 
 | 
   653 
 | 
| 
 | 
   654 This file is part of MayaChemTools.
 | 
| 
 | 
   655 
 | 
| 
 | 
   656 MayaChemTools is free software; you can redistribute it and/or modify it under
 | 
| 
 | 
   657 the terms of the GNU Lesser General Public License as published by the Free
 | 
| 
 | 
   658 Software Foundation; either version 3 of the License, or (at your option)
 | 
| 
 | 
   659 any later version.
 | 
| 
 | 
   660 
 | 
| 
 | 
   661 =cut
 |