0
|
1 package Parsers::Lexer;
|
|
2 #
|
|
3 # $RCSfile: Lexer.pm,v $
|
|
4 # $Date: 2015/02/28 20:50:55 $
|
|
5 # $Revision: 1.10 $
|
|
6 #
|
|
7 # Author: Manish Sud <msud@san.rr.com>
|
|
8 #
|
|
9 # Copyright (C) 2015 Manish Sud. All rights reserved.
|
|
10 #
|
|
11 # This file is part of MayaChemTools.
|
|
12 #
|
|
13 # MayaChemTools is free software; you can redistribute it and/or modify it under
|
|
14 # the terms of the GNU Lesser General Public License as published by the Free
|
|
15 # Software Foundation; either version 3 of the License, or (at your option) any
|
|
16 # later version.
|
|
17 #
|
|
18 # MayaChemTools is distributed in the hope that it will be useful, but without
|
|
19 # any warranty; without even the implied warranty of merchantability of fitness
|
|
20 # for a particular purpose. See the GNU Lesser General Public License for more
|
|
21 # details.
|
|
22 #
|
|
23 # You should have received a copy of the GNU Lesser General Public License
|
|
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
|
|
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
|
|
26 # Boston, MA, 02111-1307, USA.
|
|
27 #
|
|
28
|
|
29 use strict;
|
|
30 use Carp;
|
|
31 use Exporter;
|
|
32 use Scalar::Util ();
|
|
33
|
|
34 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
|
|
35
|
|
36 @ISA = qw(Exporter);
|
|
37 @EXPORT = qw();
|
|
38 @EXPORT_OK = qw();
|
|
39
|
|
40 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]);
|
|
41
|
|
42 # Setup class variables...
|
|
43 my($ClassName);
|
|
44 _InitializeClass();
|
|
45
|
|
46 # Overload Perl functions...
|
|
47 use overload '""' => 'StringifyLexer';
|
|
48
|
|
49 # Class constructor...
|
|
50 sub new {
|
|
51 my($Class, $Input, @TokensSpec) = @_;
|
|
52
|
|
53 # Initialize object...
|
|
54 my $This = {};
|
|
55 bless $This, ref($Class) || $Class;
|
|
56 $This->_InitializeLexer();
|
|
57
|
|
58 $This->_ValidateParametersAndGenerateLexer($Input, @TokensSpec);
|
|
59
|
|
60 return $This;
|
|
61 }
|
|
62
|
|
63
|
|
64 # Initialize class ...
|
|
65 sub _InitializeClass {
|
|
66 #Class name...
|
|
67 $ClassName = __PACKAGE__;
|
|
68 }
|
|
69
|
|
70 # Initialize object data...
|
|
71 #
|
|
72 sub _InitializeLexer {
|
|
73 my($This) = @_;
|
|
74
|
|
75 # Input parameter used by lexer to retrieve text to be lexed. Supported parameter types:
|
|
76 # . Reference to input iterator function
|
|
77 # . Reference to an open file handle
|
|
78 # . Text string
|
|
79 #
|
|
80 $This->{Input} = undef;
|
|
81
|
|
82 # Type of input paramater determined using Perl ref function:
|
|
83 # . InputIterator - ref returns CODE
|
|
84 # . FileStream - ref return GLOB and fileno is valid
|
|
85 # . String - ref return an empty string
|
|
86 #
|
|
87 $This->{InputType} = '';
|
|
88
|
|
89 # Tokens specifications supplied by the caller. It's an array containing references
|
|
90 # to arrays with each containing TokenLabel and TokenMatchRegex pair along with
|
|
91 # an option reference to code to be executed after a matched.
|
|
92 #
|
|
93 # For example:
|
|
94 #
|
|
95 # @LexerTokensSpec = (
|
|
96 # [ 'LETTER', qr/[a-zA-Z]/ ],
|
|
97 # [ 'NUMBER', qr/\d+/ ],
|
|
98 # [ 'SPACE', qr/[ ]*/, sub { my($This, $TokenLabel, $MatchedText) = @_; return ''; } ],
|
|
99 # [ 'NEWLINE', qr/(?:\r\n|\r|\n)/, sub { my($This, $TokenLabel, $MatchedText) = @_; return "\n"; } ],
|
|
100 # [ 'CHAR', qr/[\.]/ ],
|
|
101 # );
|
|
102 #
|
|
103 @{$This->{TokensSpec}} = ();
|
|
104
|
|
105 # Refernce to chained lexer...
|
|
106 $This->{ChainedLexer} = undef;
|
|
107
|
|
108 return $This;
|
|
109 }
|
|
110
|
|
111 # Validate input parameters and generate a chained lexer...
|
|
112 #
|
|
113 sub _ValidateParametersAndGenerateLexer {
|
|
114 my($This, $Input, @TokensSpec) = @_;
|
|
115
|
|
116 #
|
|
117 # Validate input to be lexed...
|
|
118 if (!defined $Input) {
|
|
119 croak "Error: ${ClassName}->new: Object can't be instantiated: Input is not defined. Supported values: a reference to input iterator function, a reference to an open file handle or a text string...";
|
|
120 }
|
|
121 $This->{Input} = $Input;
|
|
122
|
|
123 # Check input parameter type...
|
|
124 my($InputType);
|
|
125
|
|
126 $InputType = ref $Input;
|
|
127 if ($InputType =~ /CODE/i) {
|
|
128 # Input iterator...
|
|
129 $This->{InputType} = "InputIterator";
|
|
130 }
|
|
131 elsif ($InputType =~ /GLOB/i && defined fileno $Input) {
|
|
132 # Input stream...
|
|
133 $This->{InputType} = "FileStream";
|
|
134 }
|
|
135 elsif ($InputType) {
|
|
136 # Perl ref function returns nonempty string for all other references...
|
|
137 croak "Error: ${ClassName}->new: Object can't be instantiated: Invalid input parameter type specified. Supported parameter types: a reference to input iterator function, a reference to an open file handle or a text string...";
|
|
138 }
|
|
139 else {
|
|
140 # Input string...
|
|
141 $This->{InputType} = "String";
|
|
142 }
|
|
143
|
|
144 # Check tokens specifications...
|
|
145 if (!@TokensSpec) {
|
|
146 croak "Error: ${ClassName}->new: TokensSpec is not defined or the array doesn't contain any values. Supported values: a reference to an array containg token label, regular expression to match and an option reference to function to modify matched values...";
|
|
147 }
|
|
148 @{$This->{TokensSpec}} = @TokensSpec;
|
|
149
|
|
150 $This->_GenerateLexer($Input, @TokensSpec);
|
|
151
|
|
152 return $This;
|
|
153 }
|
|
154
|
|
155 # Generate a lexer using reference to an input iterator function, an open file
|
|
156 # handle or an input string passed as first parameter by the caller along
|
|
157 # with token specifications as second paramater...
|
|
158 #
|
|
159 sub _GenerateLexer {
|
|
160 my($This, $Input, @TokensSpec) = @_;
|
|
161
|
|
162 if ($This->{InputType} =~ /^InputIterator$/i) {
|
|
163 $This->_GenerateInputIteratorLexer($Input, @TokensSpec);
|
|
164 }
|
|
165 elsif ($This->{InputType} =~ /^FileStream$/i) {
|
|
166 $This->_GenerateInputFileStreamLexer($Input, @TokensSpec);
|
|
167 }
|
|
168 elsif ($This->{InputType} =~ /^String$/i) {
|
|
169 $This->_GenerateInputStringLexer($Input, @TokensSpec);
|
|
170 }
|
|
171 else {
|
|
172 croak "Error: ${ClassName}->new: Object can't be instantiated: Invalid input parameter type specified. Supported parameter types: a reference to input iterator function, a reference to an open file handle or a text string...";
|
|
173 }
|
|
174
|
|
175 return $This;
|
|
176 }
|
|
177
|
|
178 # Generate a lexer using specifed input iterator...
|
|
179 #
|
|
180 sub _GenerateInputIteratorLexer {
|
|
181 my($This, $InputIteratorRef, @TokensSpec) = @_;
|
|
182
|
|
183 $This->_GenerateChainedLexer($InputIteratorRef, @TokensSpec);
|
|
184
|
|
185 return $This;
|
|
186 }
|
|
187
|
|
188 # Generate a lexer using specifed input file stream reference...
|
|
189 #
|
|
190 sub _GenerateInputFileStreamLexer {
|
|
191 my($This, $FileHandleRef, @TokensSpec) = @_;
|
|
192
|
|
193 # Iterator is a annoymous function reference and Perl keeps $FileHandleRef
|
|
194 # in scope during its execution.
|
|
195
|
|
196 $This->_GenerateChainedLexer( sub { return <$FileHandleRef>; }, @TokensSpec);
|
|
197
|
|
198 return $This;
|
|
199 }
|
|
200
|
|
201 # Generate a lexer using specifed input string...
|
|
202 #
|
|
203 sub _GenerateInputStringLexer {
|
|
204 my($This, $Text, @TokensSpec) = @_;
|
|
205 my(@InputText) = ($Text);
|
|
206
|
|
207 # Iterator is a annoymous function reference and Perl keeps @InputText
|
|
208 # in scope during its execution.
|
|
209
|
|
210 $This->_GenerateChainedLexer( sub { return shift @InputText; }, @TokensSpec);
|
|
211
|
|
212 return $This;
|
|
213 }
|
|
214
|
|
215 # Get next available token label and value pair as an array reference or unrecognized
|
|
216 # text from input stream by either removing it from the input or simply peeking ahead...
|
|
217 #
|
|
218 # Supported mode values: Peek, Next. Default: Next
|
|
219 #
|
|
220 sub Lex {
|
|
221 my($This, $Mode) = @_;
|
|
222
|
|
223 return $This->{ChainedLexer}->($Mode)
|
|
224 }
|
|
225
|
|
226 # Get next available token label and value pair as an array reference or unrecognized
|
|
227 # text from input stream by either removing it from the input stream...
|
|
228 #
|
|
229 sub Next {
|
|
230 my($This) = @_;
|
|
231
|
|
232 return $This->Lex();
|
|
233 }
|
|
234
|
|
235 # Get next available token label and value pair as an array reference or unrecognized
|
|
236 # text from input stream by simply peeking ahead and without removing it from the input
|
|
237 # stream..
|
|
238 #
|
|
239 sub Peek {
|
|
240 my($This) = @_;
|
|
241
|
|
242 return $This->Lex('Peek')
|
|
243 }
|
|
244
|
|
245 # Get a reference to lexer method to be used by the caller...
|
|
246 #
|
|
247 sub GetLex {
|
|
248 my($This) = @_;
|
|
249
|
|
250 return sub { $This->Lex(); };
|
|
251 }
|
|
252
|
|
253 # The chained lexer generation is implemented based on examples in Higher-order Perl
|
|
254 # [ Ref 126 ] book.
|
|
255 #
|
|
256 # Generate a lexer using specified input iterator and chaining it with other lexers generated
|
|
257 # for all token specifications. The lexer generated for first token specification uses input
|
|
258 # iterator to retrieve any available input text; the subsequent chained lexeres for rest
|
|
259 # of the tokens use lexers generated for previous token specifications to get next input
|
|
260 # which might be unmatched input text or a reference to an array containing token and
|
|
261 # matched text pair.
|
|
262 #
|
|
263 sub _GenerateChainedLexer {
|
|
264 my($This, $InputIteratorRef, @TokensSpec) = @_;
|
|
265 my($TokenSpecRef, $ChainedLexer);
|
|
266
|
|
267 $ChainedLexer = undef;
|
|
268 for $TokenSpecRef (@TokensSpec) {
|
|
269 $ChainedLexer = defined $ChainedLexer ? $This->_GenerateLexerForToken($ChainedLexer, @{$TokenSpecRef}) : $This->_GenerateLexerForToken($InputIteratorRef, @{$TokenSpecRef});
|
|
270 }
|
|
271
|
|
272 $This->{ChainedLexer} = $ChainedLexer;
|
|
273
|
|
274 return $This;
|
|
275 }
|
|
276
|
|
277
|
|
278 # Generate a lexer using specifed token specification using specified input or
|
|
279 # input retrieved using another token lexer. The lexer retrieving input from the
|
|
280 # specified input stream is at the bottom of the chain.
|
|
281 #
|
|
282 sub _GenerateLexerForToken {
|
|
283 my($This, $InputIteratorOrLexer, $TokenLabel, $RegexPattern, $TokenMatchActionRef) = @_;
|
|
284 my($TokenMatchAndSplitRef, $InputBuffer, @ProcessedTokens);
|
|
285
|
|
286 # Input buffer for a specific lexer in chained lexers containing unprocessed
|
|
287 # text for token specifications retrieved from a downstrean lexer or intial
|
|
288 # input...
|
|
289 #
|
|
290 $InputBuffer = "";
|
|
291
|
|
292 # @ProcessedTokens contains either references to an array containing token label
|
|
293 # and matched text or any unmatched input text string...
|
|
294 #
|
|
295 @ProcessedTokens = ();
|
|
296
|
|
297 # Setup a default annoymous function reference to generate an array reference
|
|
298 # containing $Token and text matched to $RegexPattern.
|
|
299 #
|
|
300 $TokenMatchActionRef = defined $TokenMatchActionRef ? $TokenMatchActionRef : sub { my($This, $Label, $MatchedText) = @_; return [$Label, $MatchedText] };
|
|
301
|
|
302 # Setup an annoymous function to match and split input text using $RegexPattern for
|
|
303 # a specific token during its lexer invocation in chained lexers.
|
|
304 #
|
|
305 # The usage of parenthesis around $RegexPattern during split allows capturing of matched
|
|
306 # text, which is subsequently processed to retrieve matched $Token values. The split function
|
|
307 # inserts a "" separator in the returned array as first entry whenever $InputText starts with
|
|
308 # $RegexPattern. $InputText is returned as the only element for no match.
|
|
309 #
|
|
310 $TokenMatchAndSplitRef = sub { my($InputText) = @_; return split /($RegexPattern)/, $InputText; };
|
|
311
|
|
312 # Setup a lexer for $TokenLabel as an annoymous function and return its reference to caller
|
|
313 # which in turns chains the lexers for all $Tokens before returning a reference to a lexer
|
|
314 # at top of the lexer chain.
|
|
315 #
|
|
316 # Perl maintains scope of all variables defined with in the scope of the current function
|
|
317 # during invocation of annoymous function even after the return call.
|
|
318 #
|
|
319 return sub {
|
|
320 my($Mode) = @_;
|
|
321
|
|
322 # Currenly supported value for mode: Peek, Next
|
|
323 #
|
|
324 $Mode = defined $Mode ? $Mode : 'Next';
|
|
325
|
|
326 while (@ProcessedTokens == 0 && defined $InputBuffer ) {
|
|
327 # Get any new input....
|
|
328 my $NewInput = $InputIteratorOrLexer->();
|
|
329
|
|
330 if (ref $NewInput) {
|
|
331 # Input is an array reference containing matched token and text returned by
|
|
332 # a chained lexer downstream lexer...
|
|
333 #
|
|
334 # Match $RegexPattern in available buffer text to retieve any matched text
|
|
335 # for current $Token. $Separator might be "": $RegexPattern is at start of
|
|
336 # of $InputBuffer
|
|
337 #
|
|
338 # Process input buffer containing text to be matched for the current lexer
|
|
339 # which didn't get processed earlier during @NewTokens > 2 while loop:
|
|
340 # no match for current lexer or more input available. It maintains order
|
|
341 # of token matching in input stream.
|
|
342 #
|
|
343 my($Separator, $MatchedTokenRefOrText);
|
|
344
|
|
345 ($Separator, $MatchedTokenRefOrText) = $TokenMatchAndSplitRef->($InputBuffer);
|
|
346 if (defined $MatchedTokenRefOrText) {
|
|
347 $MatchedTokenRefOrText = $TokenMatchActionRef->($This, $TokenLabel, $MatchedTokenRefOrText);
|
|
348 }
|
|
349
|
|
350 # Collect valid token references or text...
|
|
351 push @ProcessedTokens, grep { defined $_ && $_ ne "" } ($Separator, $MatchedTokenRefOrText, $NewInput);
|
|
352
|
|
353 # Empty put buffer...
|
|
354 $InputBuffer = "";
|
|
355
|
|
356 # Get out of the loop as processed token refererences and/or text are available...
|
|
357 last;
|
|
358 }
|
|
359
|
|
360 # Process input retrieved from downstream lexer or input iterator which hasn't
|
|
361 # been processed into tokens..
|
|
362 if (defined $NewInput) {
|
|
363 $InputBuffer .= $NewInput;
|
|
364 }
|
|
365
|
|
366 # Retrieve any matched tokens from available input for the current lexer...
|
|
367 #
|
|
368 my(@NewTokens) = $TokenMatchAndSplitRef->($InputBuffer);
|
|
369
|
|
370 while ( @NewTokens > 2 || @NewTokens && !defined $NewInput) {
|
|
371 # Scenario 1: Complete match
|
|
372 # @NewTokens > 2 : Availability of separator, matched token text, separator.
|
|
373 # The separator might correspond to token for a token for upstream lexer followed
|
|
374 # by matched token from current lexer. It ends up getting passed to upsrteam
|
|
375 # lexer for processing.
|
|
376 #
|
|
377 # Scenario 2: No more input available from iterator or downstream lexer
|
|
378 # @NewTokens <= 2 and no more input implies any left over text in buffer. And
|
|
379 # it ends up getting passed to upsrteam for processing.
|
|
380 #
|
|
381
|
|
382 # Take off any unprocessed input text that doesn't match off the buffer: It'll be
|
|
383 # passed to upstream chained lexer for processing...
|
|
384 #
|
|
385 push @ProcessedTokens, shift @NewTokens;
|
|
386
|
|
387 if (@NewTokens) {
|
|
388 my $MatchedTokenText = shift @NewTokens;
|
|
389 push @ProcessedTokens, $TokenMatchActionRef->($This, $TokenLabel, $MatchedTokenText);
|
|
390 }
|
|
391 }
|
|
392
|
|
393 # Retrieve any leftover text from NewTokens and put it back into InputBuffer for
|
|
394 # processing by current lexer. All token references have been taken out....
|
|
395 #
|
|
396 $InputBuffer = "";
|
|
397 if (@NewTokens) {
|
|
398 $InputBuffer = join "", @NewTokens;
|
|
399 }
|
|
400
|
|
401 if (!defined $NewInput) {
|
|
402 # No more input from the downstream lexer...
|
|
403 $InputBuffer = undef;
|
|
404 }
|
|
405
|
|
406 # Clean up any empty strings from ProcessedTokens containing token
|
|
407 # references or text...
|
|
408 @ProcessedTokens = grep { $_ ne "" } @ProcessedTokens;
|
|
409
|
|
410 }
|
|
411
|
|
412 # Return reference to an array containing token and matched text or just unmatched input text...
|
|
413 my $TokenRefOrText = undef;
|
|
414
|
|
415 if (@ProcessedTokens) {
|
|
416 # Get first available reference either by just peeking or removing it from the list
|
|
417 # of available tokens...
|
|
418 $TokenRefOrText = ($Mode =~ /^Peek$/i) ? $ProcessedTokens[0] : shift @ProcessedTokens;
|
|
419 }
|
|
420
|
|
421 return $TokenRefOrText;
|
|
422 };
|
|
423 }
|
|
424
|
|
425 # Is it a lexer object?
|
|
426 sub _IsLexer {
|
|
427 my($Object) = @_;
|
|
428
|
|
429 return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0;
|
|
430 }
|
|
431
|
|
432 # Return a string containing information about lexer...
|
|
433 sub StringifyLexer {
|
|
434 my($This) = @_;
|
|
435 my($LexerString);
|
|
436
|
|
437 $LexerString = "Lexer: PackageName: $ClassName; " . $This->_GetLexerInfoString();
|
|
438
|
|
439 return $LexerString;
|
|
440 }
|
|
441
|
|
442 # Return a string containing information about lexer...
|
|
443 sub _GetLexerInfoString {
|
|
444 my($This) = @_;
|
|
445 my($LexerInfoString, $TokensSpec, $TokenSpec, $TokenLabel, $TokenMatchRegex, $TokenMatchAction);
|
|
446
|
|
447 $LexerInfoString = "InputType: $This->{InputType}";
|
|
448
|
|
449 if ($This->{InputType} =~ /^String$/i) {
|
|
450 $LexerInfoString .= "; InputString: $This->{Input}";
|
|
451 }
|
|
452
|
|
453 $TokensSpec = "TokensSpecifications: <None>";
|
|
454 if (@{$This->{TokensSpec}}) {
|
|
455 $TokensSpec = "TokensSpecifications: < [Label, MatchRegex, MatchAction]:";
|
|
456 for $TokenSpec (@{$This->{TokensSpec}}) {
|
|
457 ($TokenLabel, $TokenMatchRegex) = @{$TokenSpec};
|
|
458 $TokenMatchAction = (@{$TokenSpec} == 3) ? "$TokenSpec->[2]" : "undefined";
|
|
459 $TokensSpec .= " [$TokenLabel, $TokenMatchRegex, $TokenMatchAction]";
|
|
460 }
|
|
461 $TokensSpec .= " >";
|
|
462 }
|
|
463
|
|
464 $LexerInfoString .= "; $TokensSpec";
|
|
465
|
|
466 return $LexerInfoString;
|
|
467 }
|
|
468
|
|
469 1;
|
|
470
|
|
471 __END__
|
|
472
|
|
473 =head1 NAME
|
|
474
|
|
475 Parsers::Lexer
|
|
476
|
|
477 =head1 SYNOPSIS
|
|
478
|
|
479 use Parsers::Lexer;
|
|
480
|
|
481 use Parsers::Lexer qw(:all);
|
|
482
|
|
483 =head1 DESCRIPTION
|
|
484
|
|
485 B<Lexer> class provides the following methods:
|
|
486
|
|
487 new, GetLex, Lex, Next, Peek, StringifyLexer
|
|
488
|
|
489 The object oriented chained B<Lexer> is implemented based on examples available in
|
|
490 Higher-order Perl [ Ref 126 ] book by Mark J. Dominus. It is designed to be used
|
|
491 both in standalone mode or as a base class for B<YYLexer>.
|
|
492
|
|
493 A chained lexer is created by generating a lexer for for the first specified token
|
|
494 specification using specified input and chaining it with other lexers generated for all
|
|
495 subsequent token specifications. The lexer generated for the first token specification
|
|
496 uses input iterator to retrieve any available input text; the subsequent chained lexeres
|
|
497 for rest of the token specifications use lexers generated for previous token
|
|
498 specifications to get next input, which might be unmatched input text or a reference
|
|
499 to an array containing token and matched text pair.
|
|
500
|
|
501 =head2 METHODS
|
|
502
|
|
503 =over 4
|
|
504
|
|
505 =item B<new>
|
|
506
|
|
507 $Lexer = new Parsers::Lexer($Input, @TokensSpec);
|
|
508
|
|
509 Using specified I<Input> and I<TokensSpec>, B<new> method generates a new lexer
|
|
510 and returns a reference to newly created B<Lexer> object.
|
|
511
|
|
512 Example:
|
|
513
|
|
514 # Tokens specifications supplied by the caller. It's an array containing references
|
|
515 # to arrays with each containing TokenLabel and TokenMatchRegex pair along with
|
|
516 # an option reference to code to be executed after a matched.
|
|
517 #
|
|
518 @LexerTokensSpec = (
|
|
519 [ 'LETTER', qr/[a-zA-Z]/ ],
|
|
520 [ 'NUMBER', qr/\d+/ ],
|
|
521 [ 'SPACE', qr/[ ]*/,
|
|
522 sub { my($This, $TokenLabel, $MatchedText) = @_; return ''; }
|
|
523 ],
|
|
524 [ 'NEWLINE', qr/(?:\r\n|\r|\n)/,
|
|
525 sub { my($This, $TokenLabel, $MatchedText) = @_; return "\n"; }
|
|
526 ],
|
|
527 [ 'CHAR', qr/./ ]
|
|
528 );
|
|
529
|
|
530 # Input string...
|
|
531 $InputText = 'y = 3 + 4';
|
|
532 $Lexer = new Parsers::Lexer($InputText, @LexerTokensSpec);
|
|
533
|
|
534 # Process input stream...
|
|
535 while (defined($Token = $Lexer->Lex())) {
|
|
536 print "Token: " . ((ref $Token) ? "@{$Token}" : "$Token") . "\n";
|
|
537 }
|
|
538
|
|
539 # Input file...
|
|
540 $InputFile = "Input.txt";
|
|
541 open INPUTFILE, "$InputFile" or die "Couldn't open $InputFile: $!\n";
|
|
542 $Lexer = new Parsers::Lexer(\*INPUTFILE, @LexerTokensSpec);
|
|
543
|
|
544 # Input file iterator...
|
|
545 $InputFile = "TestSimpleCalcParser.txt";
|
|
546 open INPUTFILE, "$InputFile" or die "Couldn't open $InputFile: $!\n";
|
|
547 $InputIterator = sub { return <INPUTFILE>; };
|
|
548 $Lexer = new Parsers::Lexer($InputIterator, @LexerTokensSpec);
|
|
549
|
|
550 @LexerTokensSpec = (
|
|
551 [ 'VAR', qr/[[:alpha:]]+/ ],
|
|
552 [ 'NUM', qr/\d+/ ],
|
|
553 [ 'OP', qr/[-+=\/]/,
|
|
554 sub { my($This, $Label, $Value) = @_;
|
|
555 $Value .= "; ord: " . ord $Value;
|
|
556 return [$Label, $Value];
|
|
557 }
|
|
558 ],
|
|
559 [ 'NEWLINE', qr/(?:\r\n|\r|\n)/, sub { return [$_[1], 'NewLine']; } ],
|
|
560 [ 'SPACE', qr/\s*/, sub { return [$_[1], 'Space']; } ],
|
|
561 );
|
|
562
|
|
563 # Look ahead without removing...
|
|
564 $Token = $Lexer->Lex('Peek');
|
|
565 if (defined $Token && ref $Token) {
|
|
566 print "PEEK: Token: @{$Token}\n\n";
|
|
567 }
|
|
568
|
|
569 # Process input stream...
|
|
570 while (defined($Token = $Lexer->Lex())) {
|
|
571 print "Token: " . ((ref $Token) ? "@{$Token}" : "$Token") . "\n";
|
|
572 }
|
|
573
|
|
574 =item B<GetLex>
|
|
575
|
|
576 $LexerRef = $Lexer->GetLex();
|
|
577
|
|
578 Returns a refernece to I<Lexer> method to the caller for use in a specific B<YYLexer>.
|
|
579
|
|
580 =item B<Lex>
|
|
581
|
|
582 $TokenRefOrText = $Lexer->Lex($Mode);
|
|
583 if (ref $TokenRefOrText) {
|
|
584 ($TokenLabel, $TokenValue) = @{$TokenRefOrText};
|
|
585 }
|
|
586 else {
|
|
587 $TokenText = $TokenRefOrText;
|
|
588 }
|
|
589
|
|
590 Get next available token label and value pair as an array reference or unrecognized
|
|
591 text from input stream by either removing it from the input or simply peeking ahead
|
|
592 and without removing it from the input stream.
|
|
593
|
|
594 Possible I<Mode> values: I<Peek, Next>. Default: I<Next>.
|
|
595
|
|
596 =item B<Next>
|
|
597
|
|
598 $TokenRefOrText = $Lexer->Next();
|
|
599
|
|
600 Get next available token label and value pair as an array reference or unrecognized
|
|
601 text from input stream by removing it from the input stream.
|
|
602
|
|
603 =item B<Peek>
|
|
604
|
|
605 $TokenRefOrText = $Lexer->Peek();
|
|
606
|
|
607 Get next available token label and value pair as an array reference or unrecognized
|
|
608 text from input stream by by simply peeking ahead and without removing it from the
|
|
609 input stream.
|
|
610
|
|
611 =item B<StringifyLexer>
|
|
612
|
|
613 $LexerString = $Lexer->StringifyLexer();
|
|
614
|
|
615 Returns a string containing information about I<Lexer> object.
|
|
616
|
|
617 =back
|
|
618
|
|
619 =head1 AUTHOR
|
|
620
|
|
621 Manish Sud <msud@san.rr.com>
|
|
622
|
|
623 =head1 SEE ALSO
|
|
624
|
|
625 YYLexer.pm, SimpleCalcYYLexer.pm, SimpleCalcParser.yy
|
|
626
|
|
627 =head1 COPYRIGHT
|
|
628
|
|
629 Copyright (C) 2015 Manish Sud. All rights reserved.
|
|
630
|
|
631 This file is part of MayaChemTools.
|
|
632
|
|
633 MayaChemTools is free software; you can redistribute it and/or modify it under
|
|
634 the terms of the GNU Lesser General Public License as published by the Free
|
|
635 Software Foundation; either version 3 of the License, or (at your option)
|
|
636 any later version.
|
|
637
|
|
638 =cut
|