0
|
1 NAME
|
|
2 Parsers::Lexer
|
|
3
|
|
4 SYNOPSIS
|
|
5 use Parsers::Lexer;
|
|
6
|
|
7 use Parsers::Lexer qw(:all);
|
|
8
|
|
9 DESCRIPTION
|
|
10 Lexer class provides the following methods:
|
|
11
|
|
12 new, GetLex, Lex, Next, Peek, StringifyLexer
|
|
13
|
|
14 The object oriented chained Lexer is implemented based on examples
|
|
15 available in Higher-order Perl [ Ref 126 ] book by Mark J. Dominus. It
|
|
16 is designed to be used both in standalone mode or as a base class for
|
|
17 YYLexer.
|
|
18
|
|
19 A chained lexer is created by generating a lexer for for the first
|
|
20 specified token specification using specified input and chaining it with
|
|
21 other lexers generated for all subsequent token specifications. The
|
|
22 lexer generated for the first token specification uses input iterator to
|
|
23 retrieve any available input text; the subsequent chained lexeres for
|
|
24 rest of the token specifications use lexers generated for previous token
|
|
25 specifications to get next input, which might be unmatched input text or
|
|
26 a reference to an array containing token and matched text pair.
|
|
27
|
|
28 METHODS
|
|
29 new
|
|
30 $Lexer = new Parsers::Lexer($Input, @TokensSpec);
|
|
31
|
|
32 Using specified *Input* and *TokensSpec*, new method generates a new
|
|
33 lexer and returns a reference to newly created Lexer object.
|
|
34
|
|
35 Example:
|
|
36
|
|
37 # Tokens specifications supplied by the caller. It's an array containing references
|
|
38 # to arrays with each containing TokenLabel and TokenMatchRegex pair along with
|
|
39 # an option reference to code to be executed after a matched.
|
|
40 #
|
|
41 @LexerTokensSpec = (
|
|
42 [ 'LETTER', qr/[a-zA-Z]/ ],
|
|
43 [ 'NUMBER', qr/\d+/ ],
|
|
44 [ 'SPACE', qr/[ ]*/,
|
|
45 sub { my($This, $TokenLabel, $MatchedText) = @_; return ''; }
|
|
46 ],
|
|
47 [ 'NEWLINE', qr/(?:\r\n|\r|\n)/,
|
|
48 sub { my($This, $TokenLabel, $MatchedText) = @_; return "\n"; }
|
|
49 ],
|
|
50 [ 'CHAR', qr/./ ]
|
|
51 );
|
|
52
|
|
53 # Input string...
|
|
54 $InputText = 'y = 3 + 4';
|
|
55 $Lexer = new Parsers::Lexer($InputText, @LexerTokensSpec);
|
|
56
|
|
57 # Process input stream...
|
|
58 while (defined($Token = $Lexer->Lex())) {
|
|
59 print "Token: " . ((ref $Token) ? "@{$Token}" : "$Token") . "\n";
|
|
60 }
|
|
61
|
|
62 # Input file...
|
|
63 $InputFile = "Input.txt";
|
|
64 open INPUTFILE, "$InputFile" or die "Couldn't open $InputFile: $!\n";
|
|
65 $Lexer = new Parsers::Lexer(\*INPUTFILE, @LexerTokensSpec);
|
|
66
|
|
67 # Input file iterator...
|
|
68 $InputFile = "TestSimpleCalcParser.txt";
|
|
69 open INPUTFILE, "$InputFile" or die "Couldn't open $InputFile: $!\n";
|
|
70 $InputIterator = sub { return <INPUTFILE>; };
|
|
71 $Lexer = new Parsers::Lexer($InputIterator, @LexerTokensSpec);
|
|
72
|
|
73 @LexerTokensSpec = (
|
|
74 [ 'VAR', qr/[[:alpha:]]+/ ],
|
|
75 [ 'NUM', qr/\d+/ ],
|
|
76 [ 'OP', qr/[-+=\/]/,
|
|
77 sub { my($This, $Label, $Value) = @_;
|
|
78 $Value .= "; ord: " . ord $Value;
|
|
79 return [$Label, $Value];
|
|
80 }
|
|
81 ],
|
|
82 [ 'NEWLINE', qr/(?:\r\n|\r|\n)/, sub { return [$_[1], 'NewLine']; } ],
|
|
83 [ 'SPACE', qr/\s*/, sub { return [$_[1], 'Space']; } ],
|
|
84 );
|
|
85
|
|
86 # Look ahead without removing...
|
|
87 $Token = $Lexer->Lex('Peek');
|
|
88 if (defined $Token && ref $Token) {
|
|
89 print "PEEK: Token: @{$Token}\n\n";
|
|
90 }
|
|
91
|
|
92 # Process input stream...
|
|
93 while (defined($Token = $Lexer->Lex())) {
|
|
94 print "Token: " . ((ref $Token) ? "@{$Token}" : "$Token") . "\n";
|
|
95 }
|
|
96
|
|
97 GetLex
|
|
98 $LexerRef = $Lexer->GetLex();
|
|
99
|
|
100 Returns a refernece to *Lexer* method to the caller for use in a
|
|
101 specific YYLexer.
|
|
102
|
|
103 Lex
|
|
104 $TokenRefOrText = $Lexer->Lex($Mode);
|
|
105 if (ref $TokenRefOrText) {
|
|
106 ($TokenLabel, $TokenValue) = @{$TokenRefOrText};
|
|
107 }
|
|
108 else {
|
|
109 $TokenText = $TokenRefOrText;
|
|
110 }
|
|
111
|
|
112 Get next available token label and value pair as an array reference
|
|
113 or unrecognized text from input stream by either removing it from
|
|
114 the input or simply peeking ahead and without removing it from the
|
|
115 input stream.
|
|
116
|
|
117 Possible *Mode* values: *Peek, Next*. Default: *Next*.
|
|
118
|
|
119 Next
|
|
120 $TokenRefOrText = $Lexer->Next();
|
|
121
|
|
122 Get next available token label and value pair as an array reference
|
|
123 or unrecognized text from input stream by removing it from the input
|
|
124 stream.
|
|
125
|
|
126 Peek
|
|
127 $TokenRefOrText = $Lexer->Peek();
|
|
128
|
|
129 Get next available token label and value pair as an array reference
|
|
130 or unrecognized text from input stream by by simply peeking ahead
|
|
131 and without removing it from the input stream.
|
|
132
|
|
133 StringifyLexer
|
|
134 $LexerString = $Lexer->StringifyLexer();
|
|
135
|
|
136 Returns a string containing information about *Lexer* object.
|
|
137
|
|
138 AUTHOR
|
|
139 Manish Sud <msud@san.rr.com>
|
|
140
|
|
141 SEE ALSO
|
|
142 YYLexer.pm, SimpleCalcYYLexer.pm, SimpleCalcParser.yy
|
|
143
|
|
144 COPYRIGHT
|
|
145 Copyright (C) 2015 Manish Sud. All rights reserved.
|
|
146
|
|
147 This file is part of MayaChemTools.
|
|
148
|
|
149 MayaChemTools is free software; you can redistribute it and/or modify it
|
|
150 under the terms of the GNU Lesser General Public License as published by
|
|
151 the Free Software Foundation; either version 3 of the License, or (at
|
|
152 your option) any later version.
|
|
153
|