Mercurial > repos > deepakjadmin > r_caret_test1
comparison mayachemtool/mayachemtools/lib/Parsers/Lexer.pm @ 0:a4a2ad5a214e draft default tip
Uploaded
| author | deepakjadmin |
|---|---|
| date | Thu, 05 Nov 2015 02:37:56 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:a4a2ad5a214e |
|---|---|
| 1 package Parsers::Lexer; | |
| 2 # | |
| 3 # $RCSfile: Lexer.pm,v $ | |
| 4 # $Date: 2015/02/28 20:50:55 $ | |
| 5 # $Revision: 1.10 $ | |
| 6 # | |
| 7 # Author: Manish Sud <msud@san.rr.com> | |
| 8 # | |
| 9 # Copyright (C) 2015 Manish Sud. All rights reserved. | |
| 10 # | |
| 11 # This file is part of MayaChemTools. | |
| 12 # | |
| 13 # MayaChemTools is free software; you can redistribute it and/or modify it under | |
| 14 # the terms of the GNU Lesser General Public License as published by the Free | |
| 15 # Software Foundation; either version 3 of the License, or (at your option) any | |
| 16 # later version. | |
| 17 # | |
| 18 # MayaChemTools is distributed in the hope that it will be useful, but without | |
| 19 # any warranty; without even the implied warranty of merchantability of fitness | |
| 20 # for a particular purpose. See the GNU Lesser General Public License for more | |
| 21 # details. | |
| 22 # | |
| 23 # You should have received a copy of the GNU Lesser General Public License | |
| 24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or | |
| 25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, | |
| 26 # Boston, MA, 02111-1307, USA. | |
| 27 # | |
| 28 | |
| 29 use strict; | |
| 30 use Carp; | |
| 31 use Exporter; | |
| 32 use Scalar::Util (); | |
| 33 | |
| 34 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); | |
| 35 | |
| 36 @ISA = qw(Exporter); | |
| 37 @EXPORT = qw(); | |
| 38 @EXPORT_OK = qw(); | |
| 39 | |
| 40 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); | |
| 41 | |
| 42 # Setup class variables... | |
| 43 my($ClassName); | |
| 44 _InitializeClass(); | |
| 45 | |
| 46 # Overload Perl functions... | |
| 47 use overload '""' => 'StringifyLexer'; | |
| 48 | |
| 49 # Class constructor... | |
| 50 sub new { | |
| 51 my($Class, $Input, @TokensSpec) = @_; | |
| 52 | |
| 53 # Initialize object... | |
| 54 my $This = {}; | |
| 55 bless $This, ref($Class) || $Class; | |
| 56 $This->_InitializeLexer(); | |
| 57 | |
| 58 $This->_ValidateParametersAndGenerateLexer($Input, @TokensSpec); | |
| 59 | |
| 60 return $This; | |
| 61 } | |
| 62 | |
| 63 | |
| 64 # Initialize class ... | |
| 65 sub _InitializeClass { | |
| 66 #Class name... | |
| 67 $ClassName = __PACKAGE__; | |
| 68 } | |
| 69 | |
| 70 # Initialize object data... | |
| 71 # | |
| 72 sub _InitializeLexer { | |
| 73 my($This) = @_; | |
| 74 | |
| 75 # Input parameter used by lexer to retrieve text to be lexed. Supported parameter types: | |
| 76 # . Reference to input iterator function | |
| 77 # . Reference to an open file handle | |
| 78 # . Text string | |
| 79 # | |
| 80 $This->{Input} = undef; | |
| 81 | |
| 82 # Type of input paramater determined using Perl ref function: | |
| 83 # . InputIterator - ref returns CODE | |
| 84 # . FileStream - ref return GLOB and fileno is valid | |
| 85 # . String - ref return an empty string | |
| 86 # | |
| 87 $This->{InputType} = ''; | |
| 88 | |
| 89 # Tokens specifications supplied by the caller. It's an array containing references | |
| 90 # to arrays with each containing TokenLabel and TokenMatchRegex pair along with | |
| 91 # an option reference to code to be executed after a matched. | |
| 92 # | |
| 93 # For example: | |
| 94 # | |
| 95 # @LexerTokensSpec = ( | |
| 96 # [ 'LETTER', qr/[a-zA-Z]/ ], | |
| 97 # [ 'NUMBER', qr/\d+/ ], | |
| 98 # [ 'SPACE', qr/[ ]*/, sub { my($This, $TokenLabel, $MatchedText) = @_; return ''; } ], | |
| 99 # [ 'NEWLINE', qr/(?:\r\n|\r|\n)/, sub { my($This, $TokenLabel, $MatchedText) = @_; return "\n"; } ], | |
| 100 # [ 'CHAR', qr/[\.]/ ], | |
| 101 # ); | |
| 102 # | |
| 103 @{$This->{TokensSpec}} = (); | |
| 104 | |
| 105 # Refernce to chained lexer... | |
| 106 $This->{ChainedLexer} = undef; | |
| 107 | |
| 108 return $This; | |
| 109 } | |
| 110 | |
| 111 # Validate input parameters and generate a chained lexer... | |
| 112 # | |
| 113 sub _ValidateParametersAndGenerateLexer { | |
| 114 my($This, $Input, @TokensSpec) = @_; | |
| 115 | |
| 116 # | |
| 117 # Validate input to be lexed... | |
| 118 if (!defined $Input) { | |
| 119 croak "Error: ${ClassName}->new: Object can't be instantiated: Input is not defined. Supported values: a reference to input iterator function, a reference to an open file handle or a text string..."; | |
| 120 } | |
| 121 $This->{Input} = $Input; | |
| 122 | |
| 123 # Check input parameter type... | |
| 124 my($InputType); | |
| 125 | |
| 126 $InputType = ref $Input; | |
| 127 if ($InputType =~ /CODE/i) { | |
| 128 # Input iterator... | |
| 129 $This->{InputType} = "InputIterator"; | |
| 130 } | |
| 131 elsif ($InputType =~ /GLOB/i && defined fileno $Input) { | |
| 132 # Input stream... | |
| 133 $This->{InputType} = "FileStream"; | |
| 134 } | |
| 135 elsif ($InputType) { | |
| 136 # Perl ref function returns nonempty string for all other references... | |
| 137 croak "Error: ${ClassName}->new: Object can't be instantiated: Invalid input parameter type specified. Supported parameter types: a reference to input iterator function, a reference to an open file handle or a text string..."; | |
| 138 } | |
| 139 else { | |
| 140 # Input string... | |
| 141 $This->{InputType} = "String"; | |
| 142 } | |
| 143 | |
| 144 # Check tokens specifications... | |
| 145 if (!@TokensSpec) { | |
| 146 croak "Error: ${ClassName}->new: TokensSpec is not defined or the array doesn't contain any values. Supported values: a reference to an array containg token label, regular expression to match and an option reference to function to modify matched values..."; | |
| 147 } | |
| 148 @{$This->{TokensSpec}} = @TokensSpec; | |
| 149 | |
| 150 $This->_GenerateLexer($Input, @TokensSpec); | |
| 151 | |
| 152 return $This; | |
| 153 } | |
| 154 | |
| 155 # Generate a lexer using reference to an input iterator function, an open file | |
| 156 # handle or an input string passed as first parameter by the caller along | |
| 157 # with token specifications as second paramater... | |
| 158 # | |
| 159 sub _GenerateLexer { | |
| 160 my($This, $Input, @TokensSpec) = @_; | |
| 161 | |
| 162 if ($This->{InputType} =~ /^InputIterator$/i) { | |
| 163 $This->_GenerateInputIteratorLexer($Input, @TokensSpec); | |
| 164 } | |
| 165 elsif ($This->{InputType} =~ /^FileStream$/i) { | |
| 166 $This->_GenerateInputFileStreamLexer($Input, @TokensSpec); | |
| 167 } | |
| 168 elsif ($This->{InputType} =~ /^String$/i) { | |
| 169 $This->_GenerateInputStringLexer($Input, @TokensSpec); | |
| 170 } | |
| 171 else { | |
| 172 croak "Error: ${ClassName}->new: Object can't be instantiated: Invalid input parameter type specified. Supported parameter types: a reference to input iterator function, a reference to an open file handle or a text string..."; | |
| 173 } | |
| 174 | |
| 175 return $This; | |
| 176 } | |
| 177 | |
| 178 # Generate a lexer using specifed input iterator... | |
| 179 # | |
| 180 sub _GenerateInputIteratorLexer { | |
| 181 my($This, $InputIteratorRef, @TokensSpec) = @_; | |
| 182 | |
| 183 $This->_GenerateChainedLexer($InputIteratorRef, @TokensSpec); | |
| 184 | |
| 185 return $This; | |
| 186 } | |
| 187 | |
| 188 # Generate a lexer using specifed input file stream reference... | |
| 189 # | |
| 190 sub _GenerateInputFileStreamLexer { | |
| 191 my($This, $FileHandleRef, @TokensSpec) = @_; | |
| 192 | |
| 193 # Iterator is a annoymous function reference and Perl keeps $FileHandleRef | |
| 194 # in scope during its execution. | |
| 195 | |
| 196 $This->_GenerateChainedLexer( sub { return <$FileHandleRef>; }, @TokensSpec); | |
| 197 | |
| 198 return $This; | |
| 199 } | |
| 200 | |
| 201 # Generate a lexer using specifed input string... | |
| 202 # | |
| 203 sub _GenerateInputStringLexer { | |
| 204 my($This, $Text, @TokensSpec) = @_; | |
| 205 my(@InputText) = ($Text); | |
| 206 | |
| 207 # Iterator is a annoymous function reference and Perl keeps @InputText | |
| 208 # in scope during its execution. | |
| 209 | |
| 210 $This->_GenerateChainedLexer( sub { return shift @InputText; }, @TokensSpec); | |
| 211 | |
| 212 return $This; | |
| 213 } | |
| 214 | |
| 215 # Get next available token label and value pair as an array reference or unrecognized | |
| 216 # text from input stream by either removing it from the input or simply peeking ahead... | |
| 217 # | |
| 218 # Supported mode values: Peek, Next. Default: Next | |
| 219 # | |
| 220 sub Lex { | |
| 221 my($This, $Mode) = @_; | |
| 222 | |
| 223 return $This->{ChainedLexer}->($Mode) | |
| 224 } | |
| 225 | |
| 226 # Get next available token label and value pair as an array reference or unrecognized | |
| 227 # text from input stream by either removing it from the input stream... | |
| 228 # | |
| 229 sub Next { | |
| 230 my($This) = @_; | |
| 231 | |
| 232 return $This->Lex(); | |
| 233 } | |
| 234 | |
| 235 # Get next available token label and value pair as an array reference or unrecognized | |
| 236 # text from input stream by simply peeking ahead and without removing it from the input | |
| 237 # stream.. | |
| 238 # | |
| 239 sub Peek { | |
| 240 my($This) = @_; | |
| 241 | |
| 242 return $This->Lex('Peek') | |
| 243 } | |
| 244 | |
| 245 # Get a reference to lexer method to be used by the caller... | |
| 246 # | |
| 247 sub GetLex { | |
| 248 my($This) = @_; | |
| 249 | |
| 250 return sub { $This->Lex(); }; | |
| 251 } | |
| 252 | |
| 253 # The chained lexer generation is implemented based on examples in Higher-order Perl | |
| 254 # [ Ref 126 ] book. | |
| 255 # | |
| 256 # Generate a lexer using specified input iterator and chaining it with other lexers generated | |
| 257 # for all token specifications. The lexer generated for first token specification uses input | |
| 258 # iterator to retrieve any available input text; the subsequent chained lexeres for rest | |
| 259 # of the tokens use lexers generated for previous token specifications to get next input | |
| 260 # which might be unmatched input text or a reference to an array containing token and | |
| 261 # matched text pair. | |
| 262 # | |
| 263 sub _GenerateChainedLexer { | |
| 264 my($This, $InputIteratorRef, @TokensSpec) = @_; | |
| 265 my($TokenSpecRef, $ChainedLexer); | |
| 266 | |
| 267 $ChainedLexer = undef; | |
| 268 for $TokenSpecRef (@TokensSpec) { | |
| 269 $ChainedLexer = defined $ChainedLexer ? $This->_GenerateLexerForToken($ChainedLexer, @{$TokenSpecRef}) : $This->_GenerateLexerForToken($InputIteratorRef, @{$TokenSpecRef}); | |
| 270 } | |
| 271 | |
| 272 $This->{ChainedLexer} = $ChainedLexer; | |
| 273 | |
| 274 return $This; | |
| 275 } | |
| 276 | |
| 277 | |
| 278 # Generate a lexer using specifed token specification using specified input or | |
| 279 # input retrieved using another token lexer. The lexer retrieving input from the | |
| 280 # specified input stream is at the bottom of the chain. | |
| 281 # | |
| 282 sub _GenerateLexerForToken { | |
| 283 my($This, $InputIteratorOrLexer, $TokenLabel, $RegexPattern, $TokenMatchActionRef) = @_; | |
| 284 my($TokenMatchAndSplitRef, $InputBuffer, @ProcessedTokens); | |
| 285 | |
| 286 # Input buffer for a specific lexer in chained lexers containing unprocessed | |
| 287 # text for token specifications retrieved from a downstrean lexer or intial | |
| 288 # input... | |
| 289 # | |
| 290 $InputBuffer = ""; | |
| 291 | |
| 292 # @ProcessedTokens contains either references to an array containing token label | |
| 293 # and matched text or any unmatched input text string... | |
| 294 # | |
| 295 @ProcessedTokens = (); | |
| 296 | |
| 297 # Setup a default annoymous function reference to generate an array reference | |
| 298 # containing $Token and text matched to $RegexPattern. | |
| 299 # | |
| 300 $TokenMatchActionRef = defined $TokenMatchActionRef ? $TokenMatchActionRef : sub { my($This, $Label, $MatchedText) = @_; return [$Label, $MatchedText] }; | |
| 301 | |
| 302 # Setup an annoymous function to match and split input text using $RegexPattern for | |
| 303 # a specific token during its lexer invocation in chained lexers. | |
| 304 # | |
| 305 # The usage of parenthesis around $RegexPattern during split allows capturing of matched | |
| 306 # text, which is subsequently processed to retrieve matched $Token values. The split function | |
| 307 # inserts a "" separator in the returned array as first entry whenever $InputText starts with | |
| 308 # $RegexPattern. $InputText is returned as the only element for no match. | |
| 309 # | |
| 310 $TokenMatchAndSplitRef = sub { my($InputText) = @_; return split /($RegexPattern)/, $InputText; }; | |
| 311 | |
| 312 # Setup a lexer for $TokenLabel as an annoymous function and return its reference to caller | |
| 313 # which in turns chains the lexers for all $Tokens before returning a reference to a lexer | |
| 314 # at top of the lexer chain. | |
| 315 # | |
| 316 # Perl maintains scope of all variables defined with in the scope of the current function | |
| 317 # during invocation of annoymous function even after the return call. | |
| 318 # | |
| 319 return sub { | |
| 320 my($Mode) = @_; | |
| 321 | |
| 322 # Currenly supported value for mode: Peek, Next | |
| 323 # | |
| 324 $Mode = defined $Mode ? $Mode : 'Next'; | |
| 325 | |
| 326 while (@ProcessedTokens == 0 && defined $InputBuffer ) { | |
| 327 # Get any new input.... | |
| 328 my $NewInput = $InputIteratorOrLexer->(); | |
| 329 | |
| 330 if (ref $NewInput) { | |
| 331 # Input is an array reference containing matched token and text returned by | |
| 332 # a chained lexer downstream lexer... | |
| 333 # | |
| 334 # Match $RegexPattern in available buffer text to retieve any matched text | |
| 335 # for current $Token. $Separator might be "": $RegexPattern is at start of | |
| 336 # of $InputBuffer | |
| 337 # | |
| 338 # Process input buffer containing text to be matched for the current lexer | |
| 339 # which didn't get processed earlier during @NewTokens > 2 while loop: | |
| 340 # no match for current lexer or more input available. It maintains order | |
| 341 # of token matching in input stream. | |
| 342 # | |
| 343 my($Separator, $MatchedTokenRefOrText); | |
| 344 | |
| 345 ($Separator, $MatchedTokenRefOrText) = $TokenMatchAndSplitRef->($InputBuffer); | |
| 346 if (defined $MatchedTokenRefOrText) { | |
| 347 $MatchedTokenRefOrText = $TokenMatchActionRef->($This, $TokenLabel, $MatchedTokenRefOrText); | |
| 348 } | |
| 349 | |
| 350 # Collect valid token references or text... | |
| 351 push @ProcessedTokens, grep { defined $_ && $_ ne "" } ($Separator, $MatchedTokenRefOrText, $NewInput); | |
| 352 | |
| 353 # Empty put buffer... | |
| 354 $InputBuffer = ""; | |
| 355 | |
| 356 # Get out of the loop as processed token refererences and/or text are available... | |
| 357 last; | |
| 358 } | |
| 359 | |
| 360 # Process input retrieved from downstream lexer or input iterator which hasn't | |
| 361 # been processed into tokens.. | |
| 362 if (defined $NewInput) { | |
| 363 $InputBuffer .= $NewInput; | |
| 364 } | |
| 365 | |
| 366 # Retrieve any matched tokens from available input for the current lexer... | |
| 367 # | |
| 368 my(@NewTokens) = $TokenMatchAndSplitRef->($InputBuffer); | |
| 369 | |
| 370 while ( @NewTokens > 2 || @NewTokens && !defined $NewInput) { | |
| 371 # Scenario 1: Complete match | |
| 372 # @NewTokens > 2 : Availability of separator, matched token text, separator. | |
| 373 # The separator might correspond to token for a token for upstream lexer followed | |
| 374 # by matched token from current lexer. It ends up getting passed to upsrteam | |
| 375 # lexer for processing. | |
| 376 # | |
| 377 # Scenario 2: No more input available from iterator or downstream lexer | |
| 378 # @NewTokens <= 2 and no more input implies any left over text in buffer. And | |
| 379 # it ends up getting passed to upsrteam for processing. | |
| 380 # | |
| 381 | |
| 382 # Take off any unprocessed input text that doesn't match off the buffer: It'll be | |
| 383 # passed to upstream chained lexer for processing... | |
| 384 # | |
| 385 push @ProcessedTokens, shift @NewTokens; | |
| 386 | |
| 387 if (@NewTokens) { | |
| 388 my $MatchedTokenText = shift @NewTokens; | |
| 389 push @ProcessedTokens, $TokenMatchActionRef->($This, $TokenLabel, $MatchedTokenText); | |
| 390 } | |
| 391 } | |
| 392 | |
| 393 # Retrieve any leftover text from NewTokens and put it back into InputBuffer for | |
| 394 # processing by current lexer. All token references have been taken out.... | |
| 395 # | |
| 396 $InputBuffer = ""; | |
| 397 if (@NewTokens) { | |
| 398 $InputBuffer = join "", @NewTokens; | |
| 399 } | |
| 400 | |
| 401 if (!defined $NewInput) { | |
| 402 # No more input from the downstream lexer... | |
| 403 $InputBuffer = undef; | |
| 404 } | |
| 405 | |
| 406 # Clean up any empty strings from ProcessedTokens containing token | |
| 407 # references or text... | |
| 408 @ProcessedTokens = grep { $_ ne "" } @ProcessedTokens; | |
| 409 | |
| 410 } | |
| 411 | |
| 412 # Return reference to an array containing token and matched text or just unmatched input text... | |
| 413 my $TokenRefOrText = undef; | |
| 414 | |
| 415 if (@ProcessedTokens) { | |
| 416 # Get first available reference either by just peeking or removing it from the list | |
| 417 # of available tokens... | |
| 418 $TokenRefOrText = ($Mode =~ /^Peek$/i) ? $ProcessedTokens[0] : shift @ProcessedTokens; | |
| 419 } | |
| 420 | |
| 421 return $TokenRefOrText; | |
| 422 }; | |
| 423 } | |
| 424 | |
| 425 # Is it a lexer object? | |
| 426 sub _IsLexer { | |
| 427 my($Object) = @_; | |
| 428 | |
| 429 return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0; | |
| 430 } | |
| 431 | |
| 432 # Return a string containing information about lexer... | |
| 433 sub StringifyLexer { | |
| 434 my($This) = @_; | |
| 435 my($LexerString); | |
| 436 | |
| 437 $LexerString = "Lexer: PackageName: $ClassName; " . $This->_GetLexerInfoString(); | |
| 438 | |
| 439 return $LexerString; | |
| 440 } | |
| 441 | |
| 442 # Return a string containing information about lexer... | |
| 443 sub _GetLexerInfoString { | |
| 444 my($This) = @_; | |
| 445 my($LexerInfoString, $TokensSpec, $TokenSpec, $TokenLabel, $TokenMatchRegex, $TokenMatchAction); | |
| 446 | |
| 447 $LexerInfoString = "InputType: $This->{InputType}"; | |
| 448 | |
| 449 if ($This->{InputType} =~ /^String$/i) { | |
| 450 $LexerInfoString .= "; InputString: $This->{Input}"; | |
| 451 } | |
| 452 | |
| 453 $TokensSpec = "TokensSpecifications: <None>"; | |
| 454 if (@{$This->{TokensSpec}}) { | |
| 455 $TokensSpec = "TokensSpecifications: < [Label, MatchRegex, MatchAction]:"; | |
| 456 for $TokenSpec (@{$This->{TokensSpec}}) { | |
| 457 ($TokenLabel, $TokenMatchRegex) = @{$TokenSpec}; | |
| 458 $TokenMatchAction = (@{$TokenSpec} == 3) ? "$TokenSpec->[2]" : "undefined"; | |
| 459 $TokensSpec .= " [$TokenLabel, $TokenMatchRegex, $TokenMatchAction]"; | |
| 460 } | |
| 461 $TokensSpec .= " >"; | |
| 462 } | |
| 463 | |
| 464 $LexerInfoString .= "; $TokensSpec"; | |
| 465 | |
| 466 return $LexerInfoString; | |
| 467 } | |
| 468 | |
| 469 1; | |
| 470 | |
| 471 __END__ | |
| 472 | |
| 473 =head1 NAME | |
| 474 | |
| 475 Parsers::Lexer | |
| 476 | |
| 477 =head1 SYNOPSIS | |
| 478 | |
| 479 use Parsers::Lexer; | |
| 480 | |
| 481 use Parsers::Lexer qw(:all); | |
| 482 | |
| 483 =head1 DESCRIPTION | |
| 484 | |
| 485 B<Lexer> class provides the following methods: | |
| 486 | |
| 487 new, GetLex, Lex, Next, Peek, StringifyLexer | |
| 488 | |
| 489 The object oriented chained B<Lexer> is implemented based on examples available in | |
| 490 Higher-order Perl [ Ref 126 ] book by Mark J. Dominus. It is designed to be used | |
| 491 both in standalone mode or as a base class for B<YYLexer>. | |
| 492 | |
| 493 A chained lexer is created by generating a lexer for for the first specified token | |
| 494 specification using specified input and chaining it with other lexers generated for all | |
| 495 subsequent token specifications. The lexer generated for the first token specification | |
| 496 uses input iterator to retrieve any available input text; the subsequent chained lexeres | |
| 497 for rest of the token specifications use lexers generated for previous token | |
| 498 specifications to get next input, which might be unmatched input text or a reference | |
| 499 to an array containing token and matched text pair. | |
| 500 | |
| 501 =head2 METHODS | |
| 502 | |
| 503 =over 4 | |
| 504 | |
| 505 =item B<new> | |
| 506 | |
| 507 $Lexer = new Parsers::Lexer($Input, @TokensSpec); | |
| 508 | |
| 509 Using specified I<Input> and I<TokensSpec>, B<new> method generates a new lexer | |
| 510 and returns a reference to newly created B<Lexer> object. | |
| 511 | |
| 512 Example: | |
| 513 | |
| 514 # Tokens specifications supplied by the caller. It's an array containing references | |
| 515 # to arrays with each containing TokenLabel and TokenMatchRegex pair along with | |
| 516 # an option reference to code to be executed after a matched. | |
| 517 # | |
| 518 @LexerTokensSpec = ( | |
| 519 [ 'LETTER', qr/[a-zA-Z]/ ], | |
| 520 [ 'NUMBER', qr/\d+/ ], | |
| 521 [ 'SPACE', qr/[ ]*/, | |
| 522 sub { my($This, $TokenLabel, $MatchedText) = @_; return ''; } | |
| 523 ], | |
| 524 [ 'NEWLINE', qr/(?:\r\n|\r|\n)/, | |
| 525 sub { my($This, $TokenLabel, $MatchedText) = @_; return "\n"; } | |
| 526 ], | |
| 527 [ 'CHAR', qr/./ ] | |
| 528 ); | |
| 529 | |
| 530 # Input string... | |
| 531 $InputText = 'y = 3 + 4'; | |
| 532 $Lexer = new Parsers::Lexer($InputText, @LexerTokensSpec); | |
| 533 | |
| 534 # Process input stream... | |
| 535 while (defined($Token = $Lexer->Lex())) { | |
| 536 print "Token: " . ((ref $Token) ? "@{$Token}" : "$Token") . "\n"; | |
| 537 } | |
| 538 | |
| 539 # Input file... | |
| 540 $InputFile = "Input.txt"; | |
| 541 open INPUTFILE, "$InputFile" or die "Couldn't open $InputFile: $!\n"; | |
| 542 $Lexer = new Parsers::Lexer(\*INPUTFILE, @LexerTokensSpec); | |
| 543 | |
| 544 # Input file iterator... | |
| 545 $InputFile = "TestSimpleCalcParser.txt"; | |
| 546 open INPUTFILE, "$InputFile" or die "Couldn't open $InputFile: $!\n"; | |
| 547 $InputIterator = sub { return <INPUTFILE>; }; | |
| 548 $Lexer = new Parsers::Lexer($InputIterator, @LexerTokensSpec); | |
| 549 | |
| 550 @LexerTokensSpec = ( | |
| 551 [ 'VAR', qr/[[:alpha:]]+/ ], | |
| 552 [ 'NUM', qr/\d+/ ], | |
| 553 [ 'OP', qr/[-+=\/]/, | |
| 554 sub { my($This, $Label, $Value) = @_; | |
| 555 $Value .= "; ord: " . ord $Value; | |
| 556 return [$Label, $Value]; | |
| 557 } | |
| 558 ], | |
| 559 [ 'NEWLINE', qr/(?:\r\n|\r|\n)/, sub { return [$_[1], 'NewLine']; } ], | |
| 560 [ 'SPACE', qr/\s*/, sub { return [$_[1], 'Space']; } ], | |
| 561 ); | |
| 562 | |
| 563 # Look ahead without removing... | |
| 564 $Token = $Lexer->Lex('Peek'); | |
| 565 if (defined $Token && ref $Token) { | |
| 566 print "PEEK: Token: @{$Token}\n\n"; | |
| 567 } | |
| 568 | |
| 569 # Process input stream... | |
| 570 while (defined($Token = $Lexer->Lex())) { | |
| 571 print "Token: " . ((ref $Token) ? "@{$Token}" : "$Token") . "\n"; | |
| 572 } | |
| 573 | |
| 574 =item B<GetLex> | |
| 575 | |
| 576 $LexerRef = $Lexer->GetLex(); | |
| 577 | |
| 578 Returns a refernece to I<Lexer> method to the caller for use in a specific B<YYLexer>. | |
| 579 | |
| 580 =item B<Lex> | |
| 581 | |
| 582 $TokenRefOrText = $Lexer->Lex($Mode); | |
| 583 if (ref $TokenRefOrText) { | |
| 584 ($TokenLabel, $TokenValue) = @{$TokenRefOrText}; | |
| 585 } | |
| 586 else { | |
| 587 $TokenText = $TokenRefOrText; | |
| 588 } | |
| 589 | |
| 590 Get next available token label and value pair as an array reference or unrecognized | |
| 591 text from input stream by either removing it from the input or simply peeking ahead | |
| 592 and without removing it from the input stream. | |
| 593 | |
| 594 Possible I<Mode> values: I<Peek, Next>. Default: I<Next>. | |
| 595 | |
| 596 =item B<Next> | |
| 597 | |
| 598 $TokenRefOrText = $Lexer->Next(); | |
| 599 | |
| 600 Get next available token label and value pair as an array reference or unrecognized | |
| 601 text from input stream by removing it from the input stream. | |
| 602 | |
| 603 =item B<Peek> | |
| 604 | |
| 605 $TokenRefOrText = $Lexer->Peek(); | |
| 606 | |
| 607 Get next available token label and value pair as an array reference or unrecognized | |
| 608 text from input stream by by simply peeking ahead and without removing it from the | |
| 609 input stream. | |
| 610 | |
| 611 =item B<StringifyLexer> | |
| 612 | |
| 613 $LexerString = $Lexer->StringifyLexer(); | |
| 614 | |
| 615 Returns a string containing information about I<Lexer> object. | |
| 616 | |
| 617 =back | |
| 618 | |
| 619 =head1 AUTHOR | |
| 620 | |
| 621 Manish Sud <msud@san.rr.com> | |
| 622 | |
| 623 =head1 SEE ALSO | |
| 624 | |
| 625 YYLexer.pm, SimpleCalcYYLexer.pm, SimpleCalcParser.yy | |
| 626 | |
| 627 =head1 COPYRIGHT | |
| 628 | |
| 629 Copyright (C) 2015 Manish Sud. All rights reserved. | |
| 630 | |
| 631 This file is part of MayaChemTools. | |
| 632 | |
| 633 MayaChemTools is free software; you can redistribute it and/or modify it under | |
| 634 the terms of the GNU Lesser General Public License as published by the Free | |
| 635 Software Foundation; either version 3 of the License, or (at your option) | |
| 636 any later version. | |
| 637 | |
| 638 =cut |
