0
|
1 package Parsers::YYLexer;
|
|
2 #
|
|
3 # $RCSfile: YYLexer.pm,v $
|
|
4 # $Date: 2015/02/28 20:50:55 $
|
|
5 # $Revision: 1.10 $
|
|
6 #
|
|
7 # Author: Manish Sud <msud@san.rr.com>
|
|
8 #
|
|
9 # Copyright (C) 2015 Manish Sud. All rights reserved.
|
|
10 #
|
|
11 # This file is part of MayaChemTools.
|
|
12 #
|
|
13 # MayaChemTools is free software; you can redistribute it and/or modify it under
|
|
14 # the terms of the GNU Lesser General Public License as published by the Free
|
|
15 # Software Foundation; either version 3 of the License, or (at your option) any
|
|
16 # later version.
|
|
17 #
|
|
18 # MayaChemTools is distributed in the hope that it will be useful, but without
|
|
19 # any warranty; without even the implied warranty of merchantability of fitness
|
|
20 # for a particular purpose. See the GNU Lesser General Public License for more
|
|
21 # details.
|
|
22 #
|
|
23 # You should have received a copy of the GNU Lesser General Public License
|
|
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
|
|
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
|
|
26 # Boston, MA, 02111-1307, USA.
|
|
27 #
|
|
28
|
|
29 use strict;
|
|
30 use Carp;
|
|
31 use Exporter;
|
|
32 use Scalar::Util ();
|
|
33 use Parsers::Lexer;
|
|
34
|
|
35 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
|
|
36
|
|
37 @ISA = qw(Parsers::Lexer Exporter);
|
|
38 @EXPORT = qw();
|
|
39 @EXPORT_OK = qw();
|
|
40
|
|
41 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]);
|
|
42
|
|
43 # Setup class variables...
|
|
44 my($ClassName);
|
|
45 _InitializeClass();
|
|
46
|
|
47 # Overload Perl functions...
|
|
48 use overload '""' => 'StringifyYYLexer';
|
|
49
|
|
50 # Class constructor...
|
|
51 sub new {
|
|
52 my($Class, $Input, @TokensSpec) = @_;
|
|
53
|
|
54 # Initialize object...
|
|
55 my $This = $Class->SUPER::new($Input, @TokensSpec);
|
|
56 bless $This, ref($Class) || $Class;
|
|
57 $This->_InitializeYYLexer();
|
|
58
|
|
59 return $This;
|
|
60 }
|
|
61
|
|
62 # Initialize object data...
|
|
63 #
|
|
64 sub _InitializeYYLexer {
|
|
65 my($This) = @_;
|
|
66
|
|
67 # File generated containing mapping of token labels to token numbers by
|
|
68 # running command byacc with -d option on a parser definition file.
|
|
69 #
|
|
70 # For example, "byacc -l -P -d -b Parser Parser.yy" would generate file
|
|
71 # Parser.tab.ph, which might contain the following tokem name and values
|
|
72 # for a parser for a simple calculator:
|
|
73 #
|
|
74 # $NUMBER=257;
|
|
75 # $LETTER=258;
|
|
76 #
|
|
77 #
|
|
78 $This->{YYTabFile} = undef;
|
|
79 $This->{YYTabFilePath} = undef;
|
|
80
|
|
81 # Mapping of token lables to token numbers...
|
|
82 %{$This->{YYTabDataMap}} = ();
|
|
83
|
|
84 return $This;
|
|
85 }
|
|
86
|
|
87 # Initialize class ...
|
|
88 sub _InitializeClass {
|
|
89 #Class name...
|
|
90
|
|
91 $ClassName = __PACKAGE__;
|
|
92 }
|
|
93
|
|
94 # Process tokens in YYTab file and load mapping of token labels to integers
|
|
95 # for return during YYLex method invocation...
|
|
96 #
|
|
97 # Notes:
|
|
98 # . YYTabFile must be a complete path or available through @INC path in the
|
|
99 # same directory where this package is located.
|
|
100 # . Name of YYTabFile might start with any valid sub directory name in @INC
|
|
101 # For example, "Parsers/<YYTablFile>" implies the tab file in parsers sub directory
|
|
102 # under MayaChemTools lib directory as it would be already in @INC path.
|
|
103 # . YYTabFile must be explicitly set by the caller. The default YYTabFile name,
|
|
104 # y.tab.ph, generated by byacc is not used implicitly to avoid confusion among
|
|
105 # multiple distinct instances of YYLexer.
|
|
106 # . YYTabFile is generated by byacc during its usage with -d options and contains
|
|
107 # mapping of token codes to token names/labels. YYLexer used this file to map
|
|
108 # token lables to token codes before retuning token code and value pair back
|
|
109 # to yyparse function used by byacc.
|
|
110 # . User defined token numbers start from 257
|
|
111 #
|
|
112 # The format of YYTabFile generted by byacc during generation of parser code in
|
|
113 # Perl code is:
|
|
114 #
|
|
115 # ... ...
|
|
116 # $NUMBER=257;
|
|
117 # $ADDOP=258;
|
|
118 # $SUBOP=259;
|
|
119 # ... ..
|
|
120 #
|
|
121 sub SetupYYTabFile {
|
|
122 my($This, $YYTabFile) = @_;
|
|
123 my($YYTabFilePath, $Line, $TokenLabel, $TokenNumber);
|
|
124
|
|
125 $This->{YYTabFile} = undef;
|
|
126 $This->{YYTabFilePath} = undef;
|
|
127 %{$This->{YYTabDataMap}} = ();
|
|
128
|
|
129 if (!defined $YYTabFile) {
|
|
130 croak "Error: ${ClassName}->SetupYYTabFile: YYTabFile must be specified...";
|
|
131 }
|
|
132 $This->{YYTabFile} = $YYTabFile;
|
|
133
|
|
134 if (-e $YYTabFile) {
|
|
135 $YYTabFilePath = $YYTabFile;
|
|
136 }
|
|
137 else {
|
|
138 ($YYTabFilePath) = grep {-f "$_/$YYTabFile"} @INC;
|
|
139 if (!$YYTabFilePath) {
|
|
140 carp "Warning: ${ClassName}->SetupYYTabFile: YYTabFile, $YYTabFile, can't be located in \@INC path: @INC...";
|
|
141 return $This;
|
|
142 }
|
|
143 $YYTabFilePath = "${YYTabFilePath}/$YYTabFile";
|
|
144 }
|
|
145
|
|
146 $This->{YYTabFilePath} = $YYTabFilePath;
|
|
147
|
|
148 open YYTABFILE, "$YYTabFilePath" or die "Couldn't open $YYTabFilePath: $_\n";
|
|
149 while ($Line = <YYTABFILE>) {
|
|
150 ($TokenLabel, $TokenNumber) = ($Line =~ /^\$(.*?)=(.*?);$/);
|
|
151 if (!(defined($TokenLabel) && defined($TokenNumber))) {
|
|
152 croak "Error: ${ClassName}->SetupYYTabFile: Couldn't extract token label and number from YYTabFile $YYTabFile at line: $Line...";
|
|
153 }
|
|
154 if (exists $This->{YYTabDataMap}{$TokenLabel}) {
|
|
155 carp "Warning: ${ClassName}->SetupYYTabFile: Token lable, $TokenLabel, already defined in YYTabFile $YYTabFile...";
|
|
156 }
|
|
157 $This->{YYTabDataMap}{$TokenLabel} = $TokenNumber;
|
|
158 }
|
|
159 close YYTABFILE;
|
|
160
|
|
161 return $This;
|
|
162 }
|
|
163
|
|
164 # Get next available token number and any matched text from input stream
|
|
165 # by either removing it from the input stream or simply peeking ahead.
|
|
166 #
|
|
167 # Supported mode values: Peek, Next. Default: Next
|
|
168 #
|
|
169 # Notes:
|
|
170 # . Token label and value pairs returned by lexer, which can't be mapped to token
|
|
171 # labels specified in YYTabFile are ignored.
|
|
172 # . Token text of length 1 returned by lexer without a corresponding explicit token label,
|
|
173 # which can't be mapped to a token number using Perl ord function, is ignored.
|
|
174 #
|
|
175 sub YYLex {
|
|
176 my($This, $Mode) = @_;
|
|
177 my($LexerToken, $TokenLabel, $TokenNumber, $TokenText);
|
|
178
|
|
179 ($TokenLabel, $TokenNumber, $TokenText) = (undef) x 3;
|
|
180
|
|
181 TOKEN: while (defined($LexerToken = $This->Lex($Mode))) {
|
|
182 if (ref $LexerToken) {
|
|
183 ($TokenLabel, $TokenText) = @{$LexerToken};
|
|
184 if (exists $This->{YYTabDataMap}{$TokenLabel}) {
|
|
185 $TokenNumber = $This->{YYTabDataMap}{$TokenLabel};
|
|
186 }
|
|
187 elsif ($TokenLabel =~ /^EOI$/i) {
|
|
188 $TokenNumber = 0;
|
|
189 }
|
|
190 }
|
|
191 else {
|
|
192 $TokenText = $LexerToken;
|
|
193 }
|
|
194
|
|
195 # Check for any literals (+, - , = etc.) to generte token numbers...
|
|
196 #
|
|
197 if (!defined $TokenNumber) {
|
|
198 if (length($TokenText) == 1 && ord $TokenText) {
|
|
199 $TokenNumber = ord $TokenText;
|
|
200 }
|
|
201 }
|
|
202
|
|
203 # Generate error message for no mapping to token numbers...
|
|
204 if (defined $TokenNumber) {
|
|
205 last TOKEN;
|
|
206 }
|
|
207 else {
|
|
208 if (defined $TokenLabel) {
|
|
209 carp "Warning: ${ClassName}->YYLex: Igorning token label, $TokenLabel, with matched text, $TokenText, returned by lexer and retrieving next available token or text. Token label couldn't be mapped to token numbers specified in YYTabFile generated from a parser defintion file using byacc. After updating parser definition file, a new YYTabFile containing entry for token label must be generated...";
|
|
210 }
|
|
211 else {
|
|
212 carp "Warning: ${ClassName}->YYLex: Igorning token text, $TokenText, returned by lexer and retrieving next available token or text. Token text returned by lexer couldn't be mapped to token number using Perl ord function. After updating lexer token specifications and parser definition file, a new YYTabFile containing entry for a new token label to match unrecognized text must be generated... ";
|
|
213 }
|
|
214 next TOKEN;
|
|
215 }
|
|
216 }
|
|
217
|
|
218 if (!defined $LexerToken) {
|
|
219 # Chained lexer returns undefined at end of input. So it's equivalent to EOI
|
|
220 # token.
|
|
221 if (exists $This->{YYTabDataMap}{EOI}) {
|
|
222 $TokenLabel = "EOI";
|
|
223 $TokenNumber = $This->{YYTabDataMap}{$TokenLabel};
|
|
224 $TokenText = "0";
|
|
225 }
|
|
226 else {
|
|
227 ($TokenLabel, $TokenNumber, $TokenText) = ("EOI", 0, "0");
|
|
228 }
|
|
229 }
|
|
230
|
|
231 return ($TokenNumber, $TokenText);
|
|
232 }
|
|
233
|
|
234 # Get next available token number and text pair from input stream by removing it
|
|
235 # from the input stream...
|
|
236 #
|
|
237 sub Next {
|
|
238 my($This) = @_;
|
|
239
|
|
240 return $This->YYLex();
|
|
241 }
|
|
242
|
|
243 # Get next available token number and text pair from input stream by by simply
|
|
244 # peeking ahead and without removing it from the input stream...
|
|
245 #
|
|
246 sub Peek {
|
|
247 my($This) = @_;
|
|
248
|
|
249 return $This->YYLex('Peek')
|
|
250 }
|
|
251
|
|
252 # Return a curried verson of lexer: yyparse in parser generated by byacc expects it
|
|
253 # to call without passing any argument for the YYLexer object...
|
|
254 #
|
|
255 sub GetYYLex {
|
|
256 my($This) = @_;
|
|
257
|
|
258 return sub { my($Mode) = @_; $This->YYLex($Mode); };
|
|
259 }
|
|
260
|
|
261 # Is it a lexer object?
|
|
262 sub _IsYYLexer {
|
|
263 my($Object) = @_;
|
|
264
|
|
265 return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0;
|
|
266 }
|
|
267
|
|
268 # Return a string containing information about lexer...
|
|
269 sub StringifyYYLexer {
|
|
270 my($This) = @_;
|
|
271 my($YYLexerString);
|
|
272
|
|
273 $YYLexerString = "YYLexer: PackageName: $ClassName; " . $This->_GetYYLexerInfoString();
|
|
274
|
|
275 return $YYLexerString;
|
|
276 }
|
|
277
|
|
278 # Stringigy YYTabFile token name and value information...
|
|
279 #
|
|
280 sub _GetYYLexerInfoString {
|
|
281 my($This) = @_;
|
|
282 my($YYLexerInfoString, $TokenValue, $YYTabFile, $YYTabFilePath, $YYTabDataMapString);
|
|
283
|
|
284 $YYTabFile = defined $This->{YYTabFile} ? $This->{YYTabFile} : 'None';
|
|
285 $YYTabFilePath = defined $This->{YYTabFilePath} ? $This->{YYTabFilePath} : 'None';
|
|
286
|
|
287 $YYLexerInfoString = "YYTabFile: $YYTabFile; YYTabFilePath: $YYTabFilePath";
|
|
288
|
|
289 $YYTabDataMapString = "YYTabDataMap: None";
|
|
290 if (keys %{$This->{YYTabDataMap}}) {
|
|
291 my($TokenLabel, $TokenNumber);
|
|
292
|
|
293 $YYTabDataMapString = "YYTabDataMap:";
|
|
294 for $TokenLabel (sort keys %{$This->{YYTabDataMap}}) {
|
|
295 $TokenValue = $This->{YYTabDataMap}{$TokenLabel};
|
|
296 $YYTabDataMapString .= " ${TokenLabel}=${TokenValue}";
|
|
297 }
|
|
298 }
|
|
299
|
|
300 $YYLexerInfoString .= "; $YYTabDataMapString; " . $This->_GetLexerInfoString();
|
|
301
|
|
302 return $YYLexerInfoString;
|
|
303 }
|
|
304
|
|
305 1;
|
|
306
|
|
307 __END__
|
|
308
|
|
309 =head1 NAME
|
|
310
|
|
311 Parsers::YYLexer
|
|
312
|
|
313 =head1 SYNOPSIS
|
|
314
|
|
315 use Parseres::YYLexer;
|
|
316
|
|
317 use Parsers::YYLexer qw(:all);
|
|
318
|
|
319 =head1 DESCRIPTION
|
|
320
|
|
321 B<YYLexer> class provides the following methods:
|
|
322
|
|
323 new, GetYYLex, Next, Peek, SetupYYTabFile, StringifyYYLexer, YYLex
|
|
324
|
|
325 B<Parsers::YYLexer> class is derived from B<Parsers::Lexer> base class, which provides all
|
|
326 the underlying lexer functionality. B<YYLexer> class is designed to be used with
|
|
327 B<yyparse> code generated by running B<byacc> on a parsers defined using
|
|
328 parser definition B<ParserName.yy> file.
|
|
329
|
|
330 I<YYTabFile> containing mapping of token labels to integers must be explicitly
|
|
331 specified by the caller. This file is processed during new method invocation and
|
|
332 mapping of token labels to integers is loaded in a hash to be used later by B<YYLex>
|
|
333 method to return token number and text pairs to the parser.
|
|
334
|
|
335 =head2 METHODS
|
|
336
|
|
337 =over 4
|
|
338
|
|
339 =item B<new>
|
|
340
|
|
341 $YYLexer = new Parsers::YYLexer($Input, @YYLexerTokensSpec);
|
|
342
|
|
343 Using specified I<Input> and I<YYLexerTokensSpec>, B<new> method generates a new
|
|
344 B<YYLexer> and returns a reference to newly created B<YYLexer> object.
|
|
345
|
|
346 Examples:
|
|
347
|
|
348 # Tokens specifications supplied by the caller. It's an array containing references
|
|
349 # to arrays with each containing TokenLabel and TokenMatchRegex pair along with
|
|
350 # an option reference to code to be executed after a matched.
|
|
351 #
|
|
352 @LexerTokensSpec = (
|
|
353 [ 'LETTER', qr/[a-zA-Z]/ ],
|
|
354 [ 'NUMBER', qr/\d+/ ],
|
|
355 [ 'SPACE', qr/[ ]*/,
|
|
356 sub { my($This, $TokenLabel, $MatchedText) = @_; return ''; }
|
|
357 ],
|
|
358 [ 'NEWLINE', qr/(?:\r\n|\r|\n)/,
|
|
359 sub { my($This, $TokenLabel, $MatchedText) = @_; return "\n"; }
|
|
360 ],
|
|
361 [ 'CHAR', qr/./ ]
|
|
362 );
|
|
363
|
|
364 # Input string...
|
|
365 $InputText = 'y = 3 + 4';
|
|
366
|
|
367 $YLexer = new Parsers::YYLexer($InputText, @LexerTokensSpec);
|
|
368
|
|
369 # Setup default token table file...
|
|
370 $YYTabFile = "Parsers/SimpleCalcParser.tab.ph";
|
|
371 $This->SetupYYTabFile($YYTabFile);
|
|
372
|
|
373 # Process input stream...
|
|
374 ($TokenNumber, $TokenText) = $YYLexer->Lex();
|
|
375 print "TokenNumber: $TokenNumber; TokenText: $TokenText\n";
|
|
376
|
|
377 # Input file...
|
|
378 $InputFile = "Input.txt";
|
|
379 open INPUTFILE, "$InputFile" or die "Couldn't open $InputFile: $!\n";
|
|
380 $Lexer = new Parsers::YYLexer(\*INPUTFILE, @LexerTokensSpec);
|
|
381
|
|
382 # Input file iterator...
|
|
383 $InputFile = "TestSimpleCalcParser.txt";
|
|
384 open INPUTFILE, "$InputFile" or die "Couldn't open $InputFile: $!\n";
|
|
385 $InputIterator = sub { return <INPUTFILE>; };
|
|
386 $Lexer = new Parsers::YYLexer($InputIterator, @LexerTokensSpec);
|
|
387
|
|
388 # Usage with code generated by byacc from a parser definition
|
|
389 # file SimpleCalcParser.yy...
|
|
390
|
|
391 $InputText = "3 + 4 +6\nx=3\ny=5\nx+y\nx+z\n";
|
|
392
|
|
393 $YYLexer = new Parsers::YYLexer($InputText,@LexerTokensSpec);
|
|
394
|
|
395 $YYLex = $YYLexer->GetYYLex();
|
|
396
|
|
397 $YYTabFile = "Parsers/SimpleCalcParser.tab.ph";
|
|
398 $YYLexer->SetupYYTabFile($YYTabFile);
|
|
399
|
|
400 $Debug = 0;
|
|
401 $SimpleCalcParser = new Parsers::SimpleCalcParser($YYLex,
|
|
402 \&Parsers::SimpleCalcParser::yyerror, $Debug);
|
|
403
|
|
404 $Value = $SimpleCalcParser->yyparse();
|
|
405 print "Value = " . (defined($Value) ? "$Value" : "Undefined") . "\n";
|
|
406
|
|
407 =item B<GetYYLex>
|
|
408
|
|
409 $YYLex = $YYLexer->GetYYLex();
|
|
410
|
|
411 Returns a curried verson of YYLexer as B<YYLex>: yyparse in parser generated by
|
|
412 byacc expects it to call without passing any argument for the I<YYLexer> object.
|
|
413
|
|
414 =item B<Next>
|
|
415
|
|
416 ($TokenNumber, $TokenText) = $YYLexer->Next();
|
|
417
|
|
418 Returns next available B<TokenNumber> and any matched B<TokenText> from
|
|
419 input stream by removing it from the input stream. Token number and text of
|
|
420 zero corresponds to end of input (EOI).
|
|
421
|
|
422 =item B<Peek>
|
|
423
|
|
424 ($TokenNumber, $TokenText) = $YYLexer->Peek();
|
|
425
|
|
426 Returns next available B<TokenNumber> and any matched B<TokenText> from
|
|
427 input stream by simply looking ahead and without removing it from the input stream.
|
|
428 Token number and text of zero corresponds to end of input (EOI).
|
|
429
|
|
430 =item B<SetupYYTabFile>
|
|
431
|
|
432 $YYLexer = $YYLexer->SetupYYTabFile($YYTabFile);
|
|
433
|
|
434 Processes token labels to integers data map in specified I<YYTabFile> and returns
|
|
435 I<YYLexer>.
|
|
436
|
|
437 Notes:
|
|
438
|
|
439 . YYTabFile must be a complete path or available through @INC path in the
|
|
440 same directory where this package is located.
|
|
441 . Name of YYTabFile might start with any valid sub directory name in @INC
|
|
442 For example, "Parsers/<YYTablFile>" implies the tab file in parsers sub
|
|
443 directory under MayaChemTools lib directory as it would be already in @INC
|
|
444 path.
|
|
445 . YYTabFile must be explicitly set by the caller. The default YYTabFile name,
|
|
446 y.tab.ph, generated by byacc is not used implicitly to avoid confusion
|
|
447 among multiple distinct instances of YYLexer.
|
|
448 . YYTabFile is generated by byacc during its usage with -d options and
|
|
449 contains mapping of token codes to token names/labels. YYLexer used this
|
|
450 file to map token labels to token codes before returning token code and
|
|
451 value pair back to yyparse function used by byacc.
|
|
452 . User defined token numbers start from 257
|
|
453 . Token number for any user defined token EOI is mapped to its value before
|
|
454 default token number of 0 for EOI.
|
|
455
|
|
456 The format of YYTabFile generated by byacc during generation of parser code in
|
|
457 Perl code is:
|
|
458
|
|
459 ... ...
|
|
460 $NUMBER=257;
|
|
461 $ADDOP=258;
|
|
462 $SUBOP=259;
|
|
463 ... ..
|
|
464
|
|
465 =item B<YYLex>
|
|
466
|
|
467 ($TokenNumber, $TokenText) = $YYLexer->YYLex();
|
|
468 ($TokenNumber, $TokenText) = $YYLexer->YYLex($Mode);
|
|
469
|
|
470 Returns available B<TokenNumber> and any matched B<TokenText> from
|
|
471 input stream by either removing it from the input stream or by simply looking
|
|
472 ahead and without removing it from the input stream. Token number and text of
|
|
473 zero corresponds to end of input (EOI).
|
|
474
|
|
475 Possible I<Mode> values: I<Peek, Next>. Default: I<Next>.
|
|
476
|
|
477 I<YYLex> is designed to be used with B<yyparse> code generated by running
|
|
478 B<byacc> on a parsers defined using parser definition B<ParserName.yy> file.
|
|
479
|
|
480 Notes:
|
|
481
|
|
482 . Token label and value pairs returned by Lexer from by base class, which
|
|
483 can't be mapped to token labels specified in YYTabFile are ignored.
|
|
484 . Token text of length 1 returned by Lexer from base class without a
|
|
485 corresponding explicit token label, which can't be mapped to a token
|
|
486 number using Perl ord function, is ignored.
|
|
487
|
|
488 =item B<StringifyYYLexer>
|
|
489
|
|
490 $YYLexerString = $YYLexer->StringifyYYLexer();
|
|
491
|
|
492 Returns a string containing information about I<YYLexer> object.
|
|
493
|
|
494 =back
|
|
495
|
|
496 =head1 AUTHOR
|
|
497
|
|
498 Manish Sud <msud@san.rr.com>
|
|
499
|
|
500 =head1 SEE ALSO
|
|
501
|
|
502 Lexer.pm, SimpleCalcYYLexer.pm, SimpleCalcParser.yy
|
|
503
|
|
504 =head1 COPYRIGHT
|
|
505
|
|
506 Copyright (C) 2015 Manish Sud. All rights reserved.
|
|
507
|
|
508 This file is part of MayaChemTools.
|
|
509
|
|
510 MayaChemTools is free software; you can redistribute it and/or modify it under
|
|
511 the terms of the GNU Lesser General Public License as published by the Free
|
|
512 Software Foundation; either version 3 of the License, or (at your option)
|
|
513 any later version.
|
|
514
|
|
515 =cut
|