1 package Parsers::YYLexer; 2 # 3 # $RCSfile: YYLexer.pm,v $ 4 # $Date: 2015/02/28 20:50:55 $ 5 # $Revision: 1.10 $ 6 # 7 # Author: Manish Sud <msud@san.rr.com> 8 # 9 # Copyright (C) 2015 Manish Sud. All rights reserved. 10 # 11 # This file is part of MayaChemTools. 12 # 13 # MayaChemTools is free software; you can redistribute it and/or modify it under 14 # the terms of the GNU Lesser General Public License as published by the Free 15 # Software Foundation; either version 3 of the License, or (at your option) any 16 # later version. 17 # 18 # MayaChemTools is distributed in the hope that it will be useful, but without 19 # any warranty; without even the implied warranty of merchantability of fitness 20 # for a particular purpose. See the GNU Lesser General Public License for more 21 # details. 22 # 23 # You should have received a copy of the GNU Lesser General Public License 24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 26 # Boston, MA, 02111-1307, USA. 27 # 28 29 use strict; 30 use Carp; 31 use Exporter; 32 use Scalar::Util (); 33 use Parsers::Lexer; 34 35 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); 36 37 @ISA = qw(Parsers::Lexer Exporter); 38 @EXPORT = qw(); 39 @EXPORT_OK = qw(); 40 41 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); 42 43 # Setup class variables... 44 my($ClassName); 45 _InitializeClass(); 46 47 # Overload Perl functions... 48 use overload '""' => 'StringifyYYLexer'; 49 50 # Class constructor... 51 sub new { 52 my($Class, $Input, @TokensSpec) = @_; 53 54 # Initialize object... 55 my $This = $Class->SUPER::new($Input, @TokensSpec); 56 bless $This, ref($Class) || $Class; 57 $This->_InitializeYYLexer(); 58 59 return $This; 60 } 61 62 # Initialize object data... 63 # 64 sub _InitializeYYLexer { 65 my($This) = @_; 66 67 # File generated containing mapping of token labels to token numbers by 68 # running command byacc with -d option on a parser definition file. 69 # 70 # For example, "byacc -l -P -d -b Parser Parser.yy" would generate file 71 # Parser.tab.ph, which might contain the following tokem name and values 72 # for a parser for a simple calculator: 73 # 74 # $NUMBER=257; 75 # $LETTER=258; 76 # 77 # 78 $This->{YYTabFile} = undef; 79 $This->{YYTabFilePath} = undef; 80 81 # Mapping of token lables to token numbers... 82 %{$This->{YYTabDataMap}} = (); 83 84 return $This; 85 } 86 87 # Initialize class ... 88 sub _InitializeClass { 89 #Class name... 90 91 $ClassName = __PACKAGE__; 92 } 93 94 # Process tokens in YYTab file and load mapping of token labels to integers 95 # for return during YYLex method invocation... 96 # 97 # Notes: 98 # . YYTabFile must be a complete path or available through @INC path in the 99 # same directory where this package is located. 100 # . Name of YYTabFile might start with any valid sub directory name in @INC 101 # For example, "Parsers/<YYTablFile>" implies the tab file in parsers sub directory 102 # under MayaChemTools lib directory as it would be already in @INC path. 103 # . YYTabFile must be explicitly set by the caller. The default YYTabFile name, 104 # y.tab.ph, generated by byacc is not used implicitly to avoid confusion among 105 # multiple distinct instances of YYLexer. 106 # . YYTabFile is generated by byacc during its usage with -d options and contains 107 # mapping of token codes to token names/labels. YYLexer used this file to map 108 # token lables to token codes before retuning token code and value pair back 109 # to yyparse function used by byacc. 110 # . User defined token numbers start from 257 111 # 112 # The format of YYTabFile generted by byacc during generation of parser code in 113 # Perl code is: 114 # 115 # ... ... 116 # $NUMBER=257; 117 # $ADDOP=258; 118 # $SUBOP=259; 119 # ... .. 120 # 121 sub SetupYYTabFile { 122 my($This, $YYTabFile) = @_; 123 my($YYTabFilePath, $Line, $TokenLabel, $TokenNumber); 124 125 $This->{YYTabFile} = undef; 126 $This->{YYTabFilePath} = undef; 127 %{$This->{YYTabDataMap}} = (); 128 129 if (!defined $YYTabFile) { 130 croak "Error: ${ClassName}->SetupYYTabFile: YYTabFile must be specified..."; 131 } 132 $This->{YYTabFile} = $YYTabFile; 133 134 if (-e $YYTabFile) { 135 $YYTabFilePath = $YYTabFile; 136 } 137 else { 138 ($YYTabFilePath) = grep {-f "$_/$YYTabFile"} @INC; 139 if (!$YYTabFilePath) { 140 carp "Warning: ${ClassName}->SetupYYTabFile: YYTabFile, $YYTabFile, can't be located in \@INC path: @INC..."; 141 return $This; 142 } 143 $YYTabFilePath = "${YYTabFilePath}/$YYTabFile"; 144 } 145 146 $This->{YYTabFilePath} = $YYTabFilePath; 147 148 open YYTABFILE, "$YYTabFilePath" or die "Couldn't open $YYTabFilePath: $_\n"; 149 while ($Line = <YYTABFILE>) { 150 ($TokenLabel, $TokenNumber) = ($Line =~ /^\$(.*?)=(.*?);$/); 151 if (!(defined($TokenLabel) && defined($TokenNumber))) { 152 croak "Error: ${ClassName}->SetupYYTabFile: Couldn't extract token label and number from YYTabFile $YYTabFile at line: $Line..."; 153 } 154 if (exists $This->{YYTabDataMap}{$TokenLabel}) { 155 carp "Warning: ${ClassName}->SetupYYTabFile: Token lable, $TokenLabel, already defined in YYTabFile $YYTabFile..."; 156 } 157 $This->{YYTabDataMap}{$TokenLabel} = $TokenNumber; 158 } 159 close YYTABFILE; 160 161 return $This; 162 } 163 164 # Get next available token number and any matched text from input stream 165 # by either removing it from the input stream or simply peeking ahead. 166 # 167 # Supported mode values: Peek, Next. Default: Next 168 # 169 # Notes: 170 # . Token label and value pairs returned by lexer, which can't be mapped to token 171 # labels specified in YYTabFile are ignored. 172 # . Token text of length 1 returned by lexer without a corresponding explicit token label, 173 # which can't be mapped to a token number using Perl ord function, is ignored. 174 # 175 sub YYLex { 176 my($This, $Mode) = @_; 177 my($LexerToken, $TokenLabel, $TokenNumber, $TokenText); 178 179 ($TokenLabel, $TokenNumber, $TokenText) = (undef) x 3; 180 181 TOKEN: while (defined($LexerToken = $This->Lex($Mode))) { 182 if (ref $LexerToken) { 183 ($TokenLabel, $TokenText) = @{$LexerToken}; 184 if (exists $This->{YYTabDataMap}{$TokenLabel}) { 185 $TokenNumber = $This->{YYTabDataMap}{$TokenLabel}; 186 } 187 elsif ($TokenLabel =~ /^EOI$/i) { 188 $TokenNumber = 0; 189 } 190 } 191 else { 192 $TokenText = $LexerToken; 193 } 194 195 # Check for any literals (+, - , = etc.) to generte token numbers... 196 # 197 if (!defined $TokenNumber) { 198 if (length($TokenText) == 1 && ord $TokenText) { 199 $TokenNumber = ord $TokenText; 200 } 201 } 202 203 # Generate error message for no mapping to token numbers... 204 if (defined $TokenNumber) { 205 last TOKEN; 206 } 207 else { 208 if (defined $TokenLabel) { 209 carp "Warning: ${ClassName}->YYLex: Igorning token label, $TokenLabel, with matched text, $TokenText, returned by lexer and retrieving next available token or text. Token label couldn't be mapped to token numbers specified in YYTabFile generated from a parser defintion file using byacc. After updating parser definition file, a new YYTabFile containing entry for token label must be generated..."; 210 } 211 else { 212 carp "Warning: ${ClassName}->YYLex: Igorning token text, $TokenText, returned by lexer and retrieving next available token or text. Token text returned by lexer couldn't be mapped to token number using Perl ord function. After updating lexer token specifications and parser definition file, a new YYTabFile containing entry for a new token label to match unrecognized text must be generated... "; 213 } 214 next TOKEN; 215 } 216 } 217 218 if (!defined $LexerToken) { 219 # Chained lexer returns undefined at end of input. So it's equivalent to EOI 220 # token. 221 if (exists $This->{YYTabDataMap}{EOI}) { 222 $TokenLabel = "EOI"; 223 $TokenNumber = $This->{YYTabDataMap}{$TokenLabel}; 224 $TokenText = "0"; 225 } 226 else { 227 ($TokenLabel, $TokenNumber, $TokenText) = ("EOI", 0, "0"); 228 } 229 } 230 231 return ($TokenNumber, $TokenText); 232 } 233 234 # Get next available token number and text pair from input stream by removing it 235 # from the input stream... 236 # 237 sub Next { 238 my($This) = @_; 239 240 return $This->YYLex(); 241 } 242 243 # Get next available token number and text pair from input stream by by simply 244 # peeking ahead and without removing it from the input stream... 245 # 246 sub Peek { 247 my($This) = @_; 248 249 return $This->YYLex('Peek') 250 } 251 252 # Return a curried verson of lexer: yyparse in parser generated by byacc expects it 253 # to call without passing any argument for the YYLexer object... 254 # 255 sub GetYYLex { 256 my($This) = @_; 257 258 return sub { my($Mode) = @_; $This->YYLex($Mode); }; 259 } 260 261 # Is it a lexer object? 262 sub _IsYYLexer { 263 my($Object) = @_; 264 265 return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0; 266 } 267 268 # Return a string containing information about lexer... 269 sub StringifyYYLexer { 270 my($This) = @_; 271 my($YYLexerString); 272 273 $YYLexerString = "YYLexer: PackageName: $ClassName; " . $This->_GetYYLexerInfoString(); 274 275 return $YYLexerString; 276 } 277 278 # Stringigy YYTabFile token name and value information... 279 # 280 sub _GetYYLexerInfoString { 281 my($This) = @_; 282 my($YYLexerInfoString, $TokenValue, $YYTabFile, $YYTabFilePath, $YYTabDataMapString); 283 284 $YYTabFile = defined $This->{YYTabFile} ? $This->{YYTabFile} : 'None'; 285 $YYTabFilePath = defined $This->{YYTabFilePath} ? $This->{YYTabFilePath} : 'None'; 286 287 $YYLexerInfoString = "YYTabFile: $YYTabFile; YYTabFilePath: $YYTabFilePath"; 288 289 $YYTabDataMapString = "YYTabDataMap: None"; 290 if (keys %{$This->{YYTabDataMap}}) { 291 my($TokenLabel, $TokenNumber); 292 293 $YYTabDataMapString = "YYTabDataMap:"; 294 for $TokenLabel (sort keys %{$This->{YYTabDataMap}}) { 295 $TokenValue = $This->{YYTabDataMap}{$TokenLabel}; 296 $YYTabDataMapString .= " ${TokenLabel}=${TokenValue}"; 297 } 298 } 299 300 $YYLexerInfoString .= "; $YYTabDataMapString; " . $This->_GetLexerInfoString(); 301 302 return $YYLexerInfoString; 303 } 304