comparison lib/Parsers/YYLexer.pm @ 0:4816e4a8ae95 draft default tip

Uploaded
author deepakjadmin
date Wed, 20 Jan 2016 09:23:18 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4816e4a8ae95
1 package Parsers::YYLexer;
2 #
3 # $RCSfile: YYLexer.pm,v $
4 # $Date: 2015/02/28 20:50:55 $
5 # $Revision: 1.10 $
6 #
7 # Author: Manish Sud <msud@san.rr.com>
8 #
9 # Copyright (C) 2015 Manish Sud. All rights reserved.
10 #
11 # This file is part of MayaChemTools.
12 #
13 # MayaChemTools is free software; you can redistribute it and/or modify it under
14 # the terms of the GNU Lesser General Public License as published by the Free
15 # Software Foundation; either version 3 of the License, or (at your option) any
16 # later version.
17 #
18 # MayaChemTools is distributed in the hope that it will be useful, but without
19 # any warranty; without even the implied warranty of merchantability of fitness
20 # for a particular purpose. See the GNU Lesser General Public License for more
21 # details.
22 #
23 # You should have received a copy of the GNU Lesser General Public License
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
26 # Boston, MA, 02111-1307, USA.
27 #
28
29 use strict;
30 use Carp;
31 use Exporter;
32 use Scalar::Util ();
33 use Parsers::Lexer;
34
35 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
36
37 @ISA = qw(Parsers::Lexer Exporter);
38 @EXPORT = qw();
39 @EXPORT_OK = qw();
40
41 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]);
42
43 # Setup class variables...
44 my($ClassName);
45 _InitializeClass();
46
47 # Overload Perl functions...
48 use overload '""' => 'StringifyYYLexer';
49
50 # Class constructor...
51 sub new {
52 my($Class, $Input, @TokensSpec) = @_;
53
54 # Initialize object...
55 my $This = $Class->SUPER::new($Input, @TokensSpec);
56 bless $This, ref($Class) || $Class;
57 $This->_InitializeYYLexer();
58
59 return $This;
60 }
61
62 # Initialize object data...
63 #
64 sub _InitializeYYLexer {
65 my($This) = @_;
66
67 # File generated containing mapping of token labels to token numbers by
68 # running command byacc with -d option on a parser definition file.
69 #
70 # For example, "byacc -l -P -d -b Parser Parser.yy" would generate file
71 # Parser.tab.ph, which might contain the following tokem name and values
72 # for a parser for a simple calculator:
73 #
74 # $NUMBER=257;
75 # $LETTER=258;
76 #
77 #
78 $This->{YYTabFile} = undef;
79 $This->{YYTabFilePath} = undef;
80
81 # Mapping of token lables to token numbers...
82 %{$This->{YYTabDataMap}} = ();
83
84 return $This;
85 }
86
87 # Initialize class ...
88 sub _InitializeClass {
89 #Class name...
90
91 $ClassName = __PACKAGE__;
92 }
93
94 # Process tokens in YYTab file and load mapping of token labels to integers
95 # for return during YYLex method invocation...
96 #
97 # Notes:
98 # . YYTabFile must be a complete path or available through @INC path in the
99 # same directory where this package is located.
100 # . Name of YYTabFile might start with any valid sub directory name in @INC
101 # For example, "Parsers/<YYTablFile>" implies the tab file in parsers sub directory
102 # under MayaChemTools lib directory as it would be already in @INC path.
103 # . YYTabFile must be explicitly set by the caller. The default YYTabFile name,
104 # y.tab.ph, generated by byacc is not used implicitly to avoid confusion among
105 # multiple distinct instances of YYLexer.
106 # . YYTabFile is generated by byacc during its usage with -d options and contains
107 # mapping of token codes to token names/labels. YYLexer used this file to map
108 # token lables to token codes before retuning token code and value pair back
109 # to yyparse function used by byacc.
110 # . User defined token numbers start from 257
111 #
112 # The format of YYTabFile generted by byacc during generation of parser code in
113 # Perl code is:
114 #
115 # ... ...
116 # $NUMBER=257;
117 # $ADDOP=258;
118 # $SUBOP=259;
119 # ... ..
120 #
121 sub SetupYYTabFile {
122 my($This, $YYTabFile) = @_;
123 my($YYTabFilePath, $Line, $TokenLabel, $TokenNumber);
124
125 $This->{YYTabFile} = undef;
126 $This->{YYTabFilePath} = undef;
127 %{$This->{YYTabDataMap}} = ();
128
129 if (!defined $YYTabFile) {
130 croak "Error: ${ClassName}->SetupYYTabFile: YYTabFile must be specified...";
131 }
132 $This->{YYTabFile} = $YYTabFile;
133
134 if (-e $YYTabFile) {
135 $YYTabFilePath = $YYTabFile;
136 }
137 else {
138 ($YYTabFilePath) = grep {-f "$_/$YYTabFile"} @INC;
139 if (!$YYTabFilePath) {
140 carp "Warning: ${ClassName}->SetupYYTabFile: YYTabFile, $YYTabFile, can't be located in \@INC path: @INC...";
141 return $This;
142 }
143 $YYTabFilePath = "${YYTabFilePath}/$YYTabFile";
144 }
145
146 $This->{YYTabFilePath} = $YYTabFilePath;
147
148 open YYTABFILE, "$YYTabFilePath" or die "Couldn't open $YYTabFilePath: $_\n";
149 while ($Line = <YYTABFILE>) {
150 ($TokenLabel, $TokenNumber) = ($Line =~ /^\$(.*?)=(.*?);$/);
151 if (!(defined($TokenLabel) && defined($TokenNumber))) {
152 croak "Error: ${ClassName}->SetupYYTabFile: Couldn't extract token label and number from YYTabFile $YYTabFile at line: $Line...";
153 }
154 if (exists $This->{YYTabDataMap}{$TokenLabel}) {
155 carp "Warning: ${ClassName}->SetupYYTabFile: Token lable, $TokenLabel, already defined in YYTabFile $YYTabFile...";
156 }
157 $This->{YYTabDataMap}{$TokenLabel} = $TokenNumber;
158 }
159 close YYTABFILE;
160
161 return $This;
162 }
163
164 # Get next available token number and any matched text from input stream
165 # by either removing it from the input stream or simply peeking ahead.
166 #
167 # Supported mode values: Peek, Next. Default: Next
168 #
169 # Notes:
170 # . Token label and value pairs returned by lexer, which can't be mapped to token
171 # labels specified in YYTabFile are ignored.
172 # . Token text of length 1 returned by lexer without a corresponding explicit token label,
173 # which can't be mapped to a token number using Perl ord function, is ignored.
174 #
175 sub YYLex {
176 my($This, $Mode) = @_;
177 my($LexerToken, $TokenLabel, $TokenNumber, $TokenText);
178
179 ($TokenLabel, $TokenNumber, $TokenText) = (undef) x 3;
180
181 TOKEN: while (defined($LexerToken = $This->Lex($Mode))) {
182 if (ref $LexerToken) {
183 ($TokenLabel, $TokenText) = @{$LexerToken};
184 if (exists $This->{YYTabDataMap}{$TokenLabel}) {
185 $TokenNumber = $This->{YYTabDataMap}{$TokenLabel};
186 }
187 elsif ($TokenLabel =~ /^EOI$/i) {
188 $TokenNumber = 0;
189 }
190 }
191 else {
192 $TokenText = $LexerToken;
193 }
194
195 # Check for any literals (+, - , = etc.) to generte token numbers...
196 #
197 if (!defined $TokenNumber) {
198 if (length($TokenText) == 1 && ord $TokenText) {
199 $TokenNumber = ord $TokenText;
200 }
201 }
202
203 # Generate error message for no mapping to token numbers...
204 if (defined $TokenNumber) {
205 last TOKEN;
206 }
207 else {
208 if (defined $TokenLabel) {
209 carp "Warning: ${ClassName}->YYLex: Igorning token label, $TokenLabel, with matched text, $TokenText, returned by lexer and retrieving next available token or text. Token label couldn't be mapped to token numbers specified in YYTabFile generated from a parser defintion file using byacc. After updating parser definition file, a new YYTabFile containing entry for token label must be generated...";
210 }
211 else {
212 carp "Warning: ${ClassName}->YYLex: Igorning token text, $TokenText, returned by lexer and retrieving next available token or text. Token text returned by lexer couldn't be mapped to token number using Perl ord function. After updating lexer token specifications and parser definition file, a new YYTabFile containing entry for a new token label to match unrecognized text must be generated... ";
213 }
214 next TOKEN;
215 }
216 }
217
218 if (!defined $LexerToken) {
219 # Chained lexer returns undefined at end of input. So it's equivalent to EOI
220 # token.
221 if (exists $This->{YYTabDataMap}{EOI}) {
222 $TokenLabel = "EOI";
223 $TokenNumber = $This->{YYTabDataMap}{$TokenLabel};
224 $TokenText = "0";
225 }
226 else {
227 ($TokenLabel, $TokenNumber, $TokenText) = ("EOI", 0, "0");
228 }
229 }
230
231 return ($TokenNumber, $TokenText);
232 }
233
234 # Get next available token number and text pair from input stream by removing it
235 # from the input stream...
236 #
237 sub Next {
238 my($This) = @_;
239
240 return $This->YYLex();
241 }
242
243 # Get next available token number and text pair from input stream by by simply
244 # peeking ahead and without removing it from the input stream...
245 #
246 sub Peek {
247 my($This) = @_;
248
249 return $This->YYLex('Peek')
250 }
251
252 # Return a curried verson of lexer: yyparse in parser generated by byacc expects it
253 # to call without passing any argument for the YYLexer object...
254 #
255 sub GetYYLex {
256 my($This) = @_;
257
258 return sub { my($Mode) = @_; $This->YYLex($Mode); };
259 }
260
261 # Is it a lexer object?
262 sub _IsYYLexer {
263 my($Object) = @_;
264
265 return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0;
266 }
267
268 # Return a string containing information about lexer...
269 sub StringifyYYLexer {
270 my($This) = @_;
271 my($YYLexerString);
272
273 $YYLexerString = "YYLexer: PackageName: $ClassName; " . $This->_GetYYLexerInfoString();
274
275 return $YYLexerString;
276 }
277
278 # Stringigy YYTabFile token name and value information...
279 #
280 sub _GetYYLexerInfoString {
281 my($This) = @_;
282 my($YYLexerInfoString, $TokenValue, $YYTabFile, $YYTabFilePath, $YYTabDataMapString);
283
284 $YYTabFile = defined $This->{YYTabFile} ? $This->{YYTabFile} : 'None';
285 $YYTabFilePath = defined $This->{YYTabFilePath} ? $This->{YYTabFilePath} : 'None';
286
287 $YYLexerInfoString = "YYTabFile: $YYTabFile; YYTabFilePath: $YYTabFilePath";
288
289 $YYTabDataMapString = "YYTabDataMap: None";
290 if (keys %{$This->{YYTabDataMap}}) {
291 my($TokenLabel, $TokenNumber);
292
293 $YYTabDataMapString = "YYTabDataMap:";
294 for $TokenLabel (sort keys %{$This->{YYTabDataMap}}) {
295 $TokenValue = $This->{YYTabDataMap}{$TokenLabel};
296 $YYTabDataMapString .= " ${TokenLabel}=${TokenValue}";
297 }
298 }
299
300 $YYLexerInfoString .= "; $YYTabDataMapString; " . $This->_GetLexerInfoString();
301
302 return $YYLexerInfoString;
303 }
304
305 1;
306
307 __END__
308
309 =head1 NAME
310
311 Parsers::YYLexer
312
313 =head1 SYNOPSIS
314
315 use Parseres::YYLexer;
316
317 use Parsers::YYLexer qw(:all);
318
319 =head1 DESCRIPTION
320
321 B<YYLexer> class provides the following methods:
322
323 new, GetYYLex, Next, Peek, SetupYYTabFile, StringifyYYLexer, YYLex
324
325 B<Parsers::YYLexer> class is derived from B<Parsers::Lexer> base class, which provides all
326 the underlying lexer functionality. B<YYLexer> class is designed to be used with
327 B<yyparse> code generated by running B<byacc> on a parsers defined using
328 parser definition B<ParserName.yy> file.
329
330 I<YYTabFile> containing mapping of token labels to integers must be explicitly
331 specified by the caller. This file is processed during new method invocation and
332 mapping of token labels to integers is loaded in a hash to be used later by B<YYLex>
333 method to return token number and text pairs to the parser.
334
335 =head2 METHODS
336
337 =over 4
338
339 =item B<new>
340
341 $YYLexer = new Parsers::YYLexer($Input, @YYLexerTokensSpec);
342
343 Using specified I<Input> and I<YYLexerTokensSpec>, B<new> method generates a new
344 B<YYLexer> and returns a reference to newly created B<YYLexer> object.
345
346 Examples:
347
348 # Tokens specifications supplied by the caller. It's an array containing references
349 # to arrays with each containing TokenLabel and TokenMatchRegex pair along with
350 # an option reference to code to be executed after a matched.
351 #
352 @LexerTokensSpec = (
353 [ 'LETTER', qr/[a-zA-Z]/ ],
354 [ 'NUMBER', qr/\d+/ ],
355 [ 'SPACE', qr/[ ]*/,
356 sub { my($This, $TokenLabel, $MatchedText) = @_; return ''; }
357 ],
358 [ 'NEWLINE', qr/(?:\r\n|\r|\n)/,
359 sub { my($This, $TokenLabel, $MatchedText) = @_; return "\n"; }
360 ],
361 [ 'CHAR', qr/./ ]
362 );
363
364 # Input string...
365 $InputText = 'y = 3 + 4';
366
367 $YLexer = new Parsers::YYLexer($InputText, @LexerTokensSpec);
368
369 # Setup default token table file...
370 $YYTabFile = "Parsers/SimpleCalcParser.tab.ph";
371 $This->SetupYYTabFile($YYTabFile);
372
373 # Process input stream...
374 ($TokenNumber, $TokenText) = $YYLexer->Lex();
375 print "TokenNumber: $TokenNumber; TokenText: $TokenText\n";
376
377 # Input file...
378 $InputFile = "Input.txt";
379 open INPUTFILE, "$InputFile" or die "Couldn't open $InputFile: $!\n";
380 $Lexer = new Parsers::YYLexer(\*INPUTFILE, @LexerTokensSpec);
381
382 # Input file iterator...
383 $InputFile = "TestSimpleCalcParser.txt";
384 open INPUTFILE, "$InputFile" or die "Couldn't open $InputFile: $!\n";
385 $InputIterator = sub { return <INPUTFILE>; };
386 $Lexer = new Parsers::YYLexer($InputIterator, @LexerTokensSpec);
387
388 # Usage with code generated by byacc from a parser definition
389 # file SimpleCalcParser.yy...
390
391 $InputText = "3 + 4 +6\nx=3\ny=5\nx+y\nx+z\n";
392
393 $YYLexer = new Parsers::YYLexer($InputText,@LexerTokensSpec);
394
395 $YYLex = $YYLexer->GetYYLex();
396
397 $YYTabFile = "Parsers/SimpleCalcParser.tab.ph";
398 $YYLexer->SetupYYTabFile($YYTabFile);
399
400 $Debug = 0;
401 $SimpleCalcParser = new Parsers::SimpleCalcParser($YYLex,
402 \&Parsers::SimpleCalcParser::yyerror, $Debug);
403
404 $Value = $SimpleCalcParser->yyparse();
405 print "Value = " . (defined($Value) ? "$Value" : "Undefined") . "\n";
406
407 =item B<GetYYLex>
408
409 $YYLex = $YYLexer->GetYYLex();
410
411 Returns a curried verson of YYLexer as B<YYLex>: yyparse in parser generated by
412 byacc expects it to call without passing any argument for the I<YYLexer> object.
413
414 =item B<Next>
415
416 ($TokenNumber, $TokenText) = $YYLexer->Next();
417
418 Returns next available B<TokenNumber> and any matched B<TokenText> from
419 input stream by removing it from the input stream. Token number and text of
420 zero corresponds to end of input (EOI).
421
422 =item B<Peek>
423
424 ($TokenNumber, $TokenText) = $YYLexer->Peek();
425
426 Returns next available B<TokenNumber> and any matched B<TokenText> from
427 input stream by simply looking ahead and without removing it from the input stream.
428 Token number and text of zero corresponds to end of input (EOI).
429
430 =item B<SetupYYTabFile>
431
432 $YYLexer = $YYLexer->SetupYYTabFile($YYTabFile);
433
434 Processes token labels to integers data map in specified I<YYTabFile> and returns
435 I<YYLexer>.
436
437 Notes:
438
439 . YYTabFile must be a complete path or available through @INC path in the
440 same directory where this package is located.
441 . Name of YYTabFile might start with any valid sub directory name in @INC
442 For example, "Parsers/<YYTablFile>" implies the tab file in parsers sub
443 directory under MayaChemTools lib directory as it would be already in @INC
444 path.
445 . YYTabFile must be explicitly set by the caller. The default YYTabFile name,
446 y.tab.ph, generated by byacc is not used implicitly to avoid confusion
447 among multiple distinct instances of YYLexer.
448 . YYTabFile is generated by byacc during its usage with -d options and
449 contains mapping of token codes to token names/labels. YYLexer used this
450 file to map token labels to token codes before returning token code and
451 value pair back to yyparse function used by byacc.
452 . User defined token numbers start from 257
453 . Token number for any user defined token EOI is mapped to its value before
454 default token number of 0 for EOI.
455
456 The format of YYTabFile generated by byacc during generation of parser code in
457 Perl code is:
458
459 ... ...
460 $NUMBER=257;
461 $ADDOP=258;
462 $SUBOP=259;
463 ... ..
464
465 =item B<YYLex>
466
467 ($TokenNumber, $TokenText) = $YYLexer->YYLex();
468 ($TokenNumber, $TokenText) = $YYLexer->YYLex($Mode);
469
470 Returns available B<TokenNumber> and any matched B<TokenText> from
471 input stream by either removing it from the input stream or by simply looking
472 ahead and without removing it from the input stream. Token number and text of
473 zero corresponds to end of input (EOI).
474
475 Possible I<Mode> values: I<Peek, Next>. Default: I<Next>.
476
477 I<YYLex> is designed to be used with B<yyparse> code generated by running
478 B<byacc> on a parsers defined using parser definition B<ParserName.yy> file.
479
480 Notes:
481
482 . Token label and value pairs returned by Lexer from by base class, which
483 can't be mapped to token labels specified in YYTabFile are ignored.
484 . Token text of length 1 returned by Lexer from base class without a
485 corresponding explicit token label, which can't be mapped to a token
486 number using Perl ord function, is ignored.
487
488 =item B<StringifyYYLexer>
489
490 $YYLexerString = $YYLexer->StringifyYYLexer();
491
492 Returns a string containing information about I<YYLexer> object.
493
494 =back
495
496 =head1 AUTHOR
497
498 Manish Sud <msud@san.rr.com>
499
500 =head1 SEE ALSO
501
502 Lexer.pm, SimpleCalcYYLexer.pm, SimpleCalcParser.yy
503
504 =head1 COPYRIGHT
505
506 Copyright (C) 2015 Manish Sud. All rights reserved.
507
508 This file is part of MayaChemTools.
509
510 MayaChemTools is free software; you can redistribute it and/or modify it under
511 the terms of the GNU Lesser General Public License as published by the Free
512 Software Foundation; either version 3 of the License, or (at your option)
513 any later version.
514
515 =cut