MayaChemTools

   1 package Parsers::YYLexer;
   2 #
   3 # $RCSfile: YYLexer.pm,v $
   4 # $Date: 2015/02/28 20:50:55 $
   5 # $Revision: 1.10 $
   6 #
   7 # Author: Manish Sud <msud@san.rr.com>
   8 #
   9 # Copyright (C) 2015 Manish Sud. All rights reserved.
  10 #
  11 # This file is part of MayaChemTools.
  12 #
  13 # MayaChemTools is free software; you can redistribute it and/or modify it under
  14 # the terms of the GNU Lesser General Public License as published by the Free
  15 # Software Foundation; either version 3 of the License, or (at your option) any
  16 # later version.
  17 #
  18 # MayaChemTools is distributed in the hope that it will be useful, but without
  19 # any warranty; without even the implied warranty of merchantability of fitness
  20 # for a particular purpose.  See the GNU Lesser General Public License for more
  21 # details.
  22 #
  23 # You should have received a copy of the GNU Lesser General Public License
  24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  26 # Boston, MA, 02111-1307, USA.
  27 #
  28 
  29 use strict;
  30 use Carp;
  31 use Exporter;
  32 use Scalar::Util ();
  33 use Parsers::Lexer;
  34 
  35 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  36 
  37 @ISA = qw(Parsers::Lexer Exporter);
  38 @EXPORT = qw();
  39 @EXPORT_OK = qw();
  40 
  41 %EXPORT_TAGS = (all  => [@EXPORT, @EXPORT_OK]);
  42 
  43 # Setup class variables...
  44 my($ClassName);
  45 _InitializeClass();
  46 
  47 # Overload Perl functions...
  48 use overload '""' => 'StringifyYYLexer';
  49 
  50 # Class constructor...
  51 sub new {
  52   my($Class, $Input,  @TokensSpec) = @_;
  53 
  54   # Initialize object...
  55   my $This = $Class->SUPER::new($Input,  @TokensSpec);
  56   bless $This, ref($Class) || $Class;
  57   $This->_InitializeYYLexer();
  58 
  59   return $This;
  60 }
  61 
  62 # Initialize object data...
  63 #
  64 sub _InitializeYYLexer {
  65   my($This) = @_;
  66 
  67   # File generated containing mapping of token labels to token numbers by
  68   # running command byacc with -d option on a parser definition file.
  69   #
  70   # For example, "byacc -l -P -d -b Parser Parser.yy" would generate file
  71   # Parser.tab.ph, which might contain the following tokem name and values
  72   # for a parser for a simple calculator:
  73   #
  74   #  $NUMBER=257;
  75   #  $LETTER=258;
  76   #
  77   #
  78   $This->{YYTabFile} = undef;
  79   $This->{YYTabFilePath} = undef;
  80 
  81   # Mapping of token lables to token numbers...
  82   %{$This->{YYTabDataMap}} = ();
  83 
  84   return $This;
  85 }
  86 
  87 # Initialize class ...
  88 sub _InitializeClass {
  89   #Class name...
  90 
  91   $ClassName = __PACKAGE__;
  92 }
  93 
  94 # Process tokens in YYTab file and load mapping of token labels to integers
  95 # for return during YYLex method invocation...
  96 #
  97 # Notes:
  98 #   . YYTabFile must be a complete path or available through @INC path in the
  99 #     same directory where this package is located.
 100 #   . Name of YYTabFile might start with any valid sub directory name in @INC
 101 #     For example, "Parsers/<YYTablFile>" implies the tab file in parsers sub directory
 102 #     under MayaChemTools lib directory as it would be already in @INC path.
 103 #   . YYTabFile must be explicitly set by the caller. The default YYTabFile name,
 104 #     y.tab.ph, generated by byacc is not used implicitly to avoid confusion among
 105 #     multiple distinct instances of YYLexer.
 106 #   . YYTabFile is generated by byacc during its usage with -d options and contains
 107 #     mapping of token codes to token names/labels. YYLexer used this file to map
 108 #     token lables to token codes before retuning token code and value pair back
 109 #     to yyparse function used by byacc.
 110 #   . User defined token numbers start from 257
 111 #
 112 #     The format of YYTabFile generted by byacc during generation of parser code in
 113 #     Perl code is:
 114 #
 115 #     ... ...
 116 #     $NUMBER=257;
 117 #     $ADDOP=258;
 118 #     $SUBOP=259;
 119 #     ... ..
 120 #
 121 sub SetupYYTabFile {
 122   my($This, $YYTabFile) = @_;
 123   my($YYTabFilePath, $Line, $TokenLabel, $TokenNumber);
 124 
 125   $This->{YYTabFile} = undef;
 126   $This->{YYTabFilePath} = undef;
 127   %{$This->{YYTabDataMap}} = ();
 128 
 129   if (!defined $YYTabFile) {
 130     croak "Error: ${ClassName}->SetupYYTabFile: YYTabFile must be specified...";
 131   }
 132   $This->{YYTabFile} = $YYTabFile;
 133 
 134   if (-e $YYTabFile) {
 135     $YYTabFilePath = $YYTabFile;
 136   }
 137   else {
 138     ($YYTabFilePath) = grep {-f "$_/$YYTabFile"}  @INC;
 139     if (!$YYTabFilePath) {
 140       carp "Warning: ${ClassName}->SetupYYTabFile: YYTabFile, $YYTabFile,  can't be located in \@INC path: @INC...";
 141       return $This;
 142     }
 143     $YYTabFilePath = "${YYTabFilePath}/$YYTabFile";
 144   }
 145 
 146   $This->{YYTabFilePath} = $YYTabFilePath;
 147 
 148   open YYTABFILE, "$YYTabFilePath" or die "Couldn't open $YYTabFilePath: $_\n";
 149   while ($Line = <YYTABFILE>) {
 150     ($TokenLabel, $TokenNumber) = ($Line =~ /^\$(.*?)=(.*?);$/);
 151     if (!(defined($TokenLabel) && defined($TokenNumber))) {
 152       croak "Error: ${ClassName}->SetupYYTabFile: Couldn't extract token label and number from YYTabFile $YYTabFile at line: $Line...";
 153     }
 154     if (exists $This->{YYTabDataMap}{$TokenLabel}) {
 155       carp "Warning: ${ClassName}->SetupYYTabFile: Token lable, $TokenLabel, already defined in YYTabFile $YYTabFile...";
 156     }
 157     $This->{YYTabDataMap}{$TokenLabel} = $TokenNumber;
 158   }
 159   close YYTABFILE;
 160 
 161   return $This;
 162 }
 163 
 164 # Get next available token number and any matched text from input stream
 165 # by either removing it from the input stream or simply peeking ahead.
 166 #
 167 # Supported mode values: Peek, Next. Default: Next
 168 #
 169 # Notes:
 170 #   . Token label and value pairs returned by lexer, which can't be mapped to token
 171 #     labels specified in YYTabFile are ignored.
 172 #   . Token text of length 1 returned by lexer without a corresponding explicit token label,
 173 #     which can't be mapped to a token number using Perl ord function, is ignored.
 174 #
 175 sub YYLex {
 176   my($This, $Mode) = @_;
 177   my($LexerToken, $TokenLabel, $TokenNumber, $TokenText);
 178 
 179   ($TokenLabel, $TokenNumber, $TokenText) = (undef) x 3;
 180 
 181   TOKEN: while (defined($LexerToken = $This->Lex($Mode))) {
 182     if (ref $LexerToken) {
 183       ($TokenLabel, $TokenText) = @{$LexerToken};
 184       if (exists $This->{YYTabDataMap}{$TokenLabel}) {
 185         $TokenNumber = $This->{YYTabDataMap}{$TokenLabel};
 186       }
 187       elsif ($TokenLabel =~ /^EOI$/i) {
 188         $TokenNumber = 0;
 189       }
 190     }
 191     else {
 192       $TokenText = $LexerToken;
 193     }
 194 
 195     # Check for any literals (+, - , = etc.) to generte token numbers...
 196     #
 197     if (!defined $TokenNumber) {
 198       if (length($TokenText) == 1 && ord $TokenText) {
 199         $TokenNumber = ord $TokenText;
 200       }
 201     }
 202 
 203     # Generate error message for no mapping to token numbers...
 204     if (defined $TokenNumber) {
 205       last TOKEN;
 206     }
 207     else {
 208       if (defined $TokenLabel) {
 209         carp "Warning: ${ClassName}->YYLex: Igorning token label, $TokenLabel, with matched text, $TokenText, returned by lexer and retrieving next available token or text. Token label couldn't be mapped to token numbers specified in YYTabFile generated from a parser defintion file using byacc. After updating parser definition file, a new YYTabFile containing entry for token label must be generated...";
 210       }
 211       else {
 212         carp "Warning: ${ClassName}->YYLex: Igorning token text, $TokenText, returned by lexer and retrieving next available token or text. Token text returned by lexer couldn't be mapped to token number using Perl ord function. After updating lexer token specifications and parser definition file, a new YYTabFile containing entry for a new token label to match unrecognized text must be generated...  ";
 213       }
 214       next TOKEN;
 215     }
 216   }
 217 
 218   if (!defined $LexerToken) {
 219     # Chained lexer returns undefined at end of input. So it's equivalent to EOI
 220     # token.
 221     if (exists $This->{YYTabDataMap}{EOI}) {
 222       $TokenLabel = "EOI";
 223       $TokenNumber = $This->{YYTabDataMap}{$TokenLabel};
 224       $TokenText = "0";
 225     }
 226     else {
 227       ($TokenLabel, $TokenNumber, $TokenText) = ("EOI", 0, "0");
 228     }
 229   }
 230 
 231   return ($TokenNumber, $TokenText);
 232 }
 233 
 234 # Get next available token number and text pair from input stream by removing it
 235 # from the input stream...
 236 #
 237 sub Next {
 238   my($This) = @_;
 239 
 240   return $This->YYLex();
 241 }
 242 
 243 # Get next available token number and text pair from input stream by by simply
 244 # peeking ahead and without removing it from the input stream...
 245 #
 246 sub Peek {
 247   my($This) = @_;
 248 
 249   return $This->YYLex('Peek')
 250 }
 251 
 252 # Return a curried verson of lexer: yyparse in parser generated by byacc expects it
 253 # to call without passing any argument for the YYLexer object...
 254 #
 255 sub GetYYLex {
 256   my($This) = @_;
 257 
 258   return sub { my($Mode) = @_; $This->YYLex($Mode); };
 259 }
 260 
 261 # Is it a lexer object?
 262 sub _IsYYLexer {
 263   my($Object) = @_;
 264 
 265   return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0;
 266 }
 267 
 268 # Return a string containing information about lexer...
 269 sub StringifyYYLexer {
 270   my($This) = @_;
 271   my($YYLexerString);
 272 
 273   $YYLexerString = "YYLexer: PackageName: $ClassName; " . $This->_GetYYLexerInfoString();
 274 
 275   return $YYLexerString;
 276 }
 277 
 278 # Stringigy YYTabFile token name and value information...
 279 #
 280 sub _GetYYLexerInfoString {
 281   my($This) = @_;
 282   my($YYLexerInfoString, $TokenValue, $YYTabFile, $YYTabFilePath, $YYTabDataMapString);
 283 
 284   $YYTabFile = defined $This->{YYTabFile} ? $This->{YYTabFile} : 'None';
 285   $YYTabFilePath = defined $This->{YYTabFilePath} ? $This->{YYTabFilePath} : 'None';
 286 
 287   $YYLexerInfoString = "YYTabFile: $YYTabFile; YYTabFilePath: $YYTabFilePath";
 288 
 289   $YYTabDataMapString = "YYTabDataMap: None";
 290   if (keys %{$This->{YYTabDataMap}}) {
 291     my($TokenLabel, $TokenNumber);
 292 
 293     $YYTabDataMapString = "YYTabDataMap:";
 294     for $TokenLabel (sort keys %{$This->{YYTabDataMap}}) {
 295       $TokenValue = $This->{YYTabDataMap}{$TokenLabel};
 296       $YYTabDataMapString .= " ${TokenLabel}=${TokenValue}";
 297     }
 298   }
 299 
 300   $YYLexerInfoString .= "; $YYTabDataMapString; " . $This->_GetLexerInfoString();
 301 
 302   return $YYLexerInfoString;
 303 }
 304