annotate lib/Parsers/Lexer.pm @ 3:90ea638ce878 draft default tip

Uploaded
author deepakjadmin
date Wed, 20 Jan 2016 09:11:59 -0500
parents 2abf0d43254d
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
1 package Parsers::Lexer;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
2 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
3 # $RCSfile: Lexer.pm,v $
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
4 # $Date: 2015/02/28 20:50:55 $
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
5 # $Revision: 1.10 $
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
6 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
7 # Author: Manish Sud <msud@san.rr.com>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
8 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
9 # Copyright (C) 2015 Manish Sud. All rights reserved.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
10 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
11 # This file is part of MayaChemTools.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
12 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
13 # MayaChemTools is free software; you can redistribute it and/or modify it under
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
14 # the terms of the GNU Lesser General Public License as published by the Free
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
15 # Software Foundation; either version 3 of the License, or (at your option) any
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
16 # later version.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
17 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
18 # MayaChemTools is distributed in the hope that it will be useful, but without
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
19 # any warranty; without even the implied warranty of merchantability of fitness
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
20 # for a particular purpose. See the GNU Lesser General Public License for more
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
21 # details.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
22 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
23 # You should have received a copy of the GNU Lesser General Public License
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
26 # Boston, MA, 02111-1307, USA.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
27 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
28
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
29 use strict;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
30 use Carp;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
31 use Exporter;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
32 use Scalar::Util ();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
33
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
34 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
35
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
36 @ISA = qw(Exporter);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
37 @EXPORT = qw();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
38 @EXPORT_OK = qw();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
39
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
40 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
41
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
42 # Setup class variables...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
43 my($ClassName);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
44 _InitializeClass();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
45
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
46 # Overload Perl functions...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
47 use overload '""' => 'StringifyLexer';
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
48
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
49 # Class constructor...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
50 sub new {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
51 my($Class, $Input, @TokensSpec) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
52
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
53 # Initialize object...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
54 my $This = {};
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
55 bless $This, ref($Class) || $Class;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
56 $This->_InitializeLexer();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
57
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
58 $This->_ValidateParametersAndGenerateLexer($Input, @TokensSpec);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
59
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
60 return $This;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
61 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
62
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
63
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
64 # Initialize class ...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
65 sub _InitializeClass {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
66 #Class name...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
67 $ClassName = __PACKAGE__;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
68 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
69
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
70 # Initialize object data...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
71 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
72 sub _InitializeLexer {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
73 my($This) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
74
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
75 # Input parameter used by lexer to retrieve text to be lexed. Supported parameter types:
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
76 # . Reference to input iterator function
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
77 # . Reference to an open file handle
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
78 # . Text string
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
79 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
80 $This->{Input} = undef;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
81
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
82 # Type of input paramater determined using Perl ref function:
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
83 # . InputIterator - ref returns CODE
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
84 # . FileStream - ref return GLOB and fileno is valid
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
85 # . String - ref return an empty string
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
86 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
87 $This->{InputType} = '';
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
88
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
89 # Tokens specifications supplied by the caller. It's an array containing references
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
90 # to arrays with each containing TokenLabel and TokenMatchRegex pair along with
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
91 # an option reference to code to be executed after a matched.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
92 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
93 # For example:
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
94 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
95 # @LexerTokensSpec = (
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
96 # [ 'LETTER', qr/[a-zA-Z]/ ],
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
97 # [ 'NUMBER', qr/\d+/ ],
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
98 # [ 'SPACE', qr/[ ]*/, sub { my($This, $TokenLabel, $MatchedText) = @_; return ''; } ],
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
99 # [ 'NEWLINE', qr/(?:\r\n|\r|\n)/, sub { my($This, $TokenLabel, $MatchedText) = @_; return "\n"; } ],
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
100 # [ 'CHAR', qr/[\.]/ ],
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
101 # );
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
102 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
103 @{$This->{TokensSpec}} = ();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
104
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
105 # Refernce to chained lexer...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
106 $This->{ChainedLexer} = undef;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
107
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
108 return $This;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
109 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
110
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
111 # Validate input parameters and generate a chained lexer...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
112 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
113 sub _ValidateParametersAndGenerateLexer {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
114 my($This, $Input, @TokensSpec) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
115
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
116 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
117 # Validate input to be lexed...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
118 if (!defined $Input) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
119 croak "Error: ${ClassName}->new: Object can't be instantiated: Input is not defined. Supported values: a reference to input iterator function, a reference to an open file handle or a text string...";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
120 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
121 $This->{Input} = $Input;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
122
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
123 # Check input parameter type...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
124 my($InputType);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
125
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
126 $InputType = ref $Input;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
127 if ($InputType =~ /CODE/i) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
128 # Input iterator...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
129 $This->{InputType} = "InputIterator";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
130 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
131 elsif ($InputType =~ /GLOB/i && defined fileno $Input) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
132 # Input stream...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
133 $This->{InputType} = "FileStream";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
134 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
135 elsif ($InputType) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
136 # Perl ref function returns nonempty string for all other references...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
137 croak "Error: ${ClassName}->new: Object can't be instantiated: Invalid input parameter type specified. Supported parameter types: a reference to input iterator function, a reference to an open file handle or a text string...";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
138 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
139 else {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
140 # Input string...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
141 $This->{InputType} = "String";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
142 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
143
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
144 # Check tokens specifications...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
145 if (!@TokensSpec) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
146 croak "Error: ${ClassName}->new: TokensSpec is not defined or the array doesn't contain any values. Supported values: a reference to an array containg token label, regular expression to match and an option reference to function to modify matched values...";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
147 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
148 @{$This->{TokensSpec}} = @TokensSpec;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
149
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
150 $This->_GenerateLexer($Input, @TokensSpec);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
151
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
152 return $This;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
153 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
154
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
155 # Generate a lexer using reference to an input iterator function, an open file
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
156 # handle or an input string passed as first parameter by the caller along
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
157 # with token specifications as second paramater...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
158 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
159 sub _GenerateLexer {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
160 my($This, $Input, @TokensSpec) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
161
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
162 if ($This->{InputType} =~ /^InputIterator$/i) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
163 $This->_GenerateInputIteratorLexer($Input, @TokensSpec);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
164 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
165 elsif ($This->{InputType} =~ /^FileStream$/i) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
166 $This->_GenerateInputFileStreamLexer($Input, @TokensSpec);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
167 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
168 elsif ($This->{InputType} =~ /^String$/i) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
169 $This->_GenerateInputStringLexer($Input, @TokensSpec);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
170 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
171 else {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
172 croak "Error: ${ClassName}->new: Object can't be instantiated: Invalid input parameter type specified. Supported parameter types: a reference to input iterator function, a reference to an open file handle or a text string...";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
173 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
174
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
175 return $This;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
176 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
177
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
178 # Generate a lexer using specifed input iterator...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
179 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
180 sub _GenerateInputIteratorLexer {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
181 my($This, $InputIteratorRef, @TokensSpec) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
182
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
183 $This->_GenerateChainedLexer($InputIteratorRef, @TokensSpec);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
184
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
185 return $This;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
186 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
187
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
188 # Generate a lexer using specifed input file stream reference...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
189 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
190 sub _GenerateInputFileStreamLexer {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
191 my($This, $FileHandleRef, @TokensSpec) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
192
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
193 # Iterator is a annoymous function reference and Perl keeps $FileHandleRef
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
194 # in scope during its execution.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
195
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
196 $This->_GenerateChainedLexer( sub { return <$FileHandleRef>; }, @TokensSpec);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
197
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
198 return $This;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
199 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
200
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
201 # Generate a lexer using specifed input string...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
202 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
203 sub _GenerateInputStringLexer {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
204 my($This, $Text, @TokensSpec) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
205 my(@InputText) = ($Text);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
206
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
207 # Iterator is a annoymous function reference and Perl keeps @InputText
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
208 # in scope during its execution.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
209
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
210 $This->_GenerateChainedLexer( sub { return shift @InputText; }, @TokensSpec);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
211
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
212 return $This;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
213 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
214
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
215 # Get next available token label and value pair as an array reference or unrecognized
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
216 # text from input stream by either removing it from the input or simply peeking ahead...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
217 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
218 # Supported mode values: Peek, Next. Default: Next
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
219 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
220 sub Lex {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
221 my($This, $Mode) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
222
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
223 return $This->{ChainedLexer}->($Mode)
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
224 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
225
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
226 # Get next available token label and value pair as an array reference or unrecognized
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
227 # text from input stream by either removing it from the input stream...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
228 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
229 sub Next {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
230 my($This) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
231
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
232 return $This->Lex();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
233 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
234
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
235 # Get next available token label and value pair as an array reference or unrecognized
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
236 # text from input stream by simply peeking ahead and without removing it from the input
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
237 # stream..
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
238 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
239 sub Peek {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
240 my($This) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
241
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
242 return $This->Lex('Peek')
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
243 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
244
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
245 # Get a reference to lexer method to be used by the caller...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
246 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
247 sub GetLex {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
248 my($This) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
249
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
250 return sub { $This->Lex(); };
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
251 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
252
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
253 # The chained lexer generation is implemented based on examples in Higher-order Perl
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
254 # [ Ref 126 ] book.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
255 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
256 # Generate a lexer using specified input iterator and chaining it with other lexers generated
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
257 # for all token specifications. The lexer generated for first token specification uses input
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
258 # iterator to retrieve any available input text; the subsequent chained lexeres for rest
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
259 # of the tokens use lexers generated for previous token specifications to get next input
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
260 # which might be unmatched input text or a reference to an array containing token and
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
261 # matched text pair.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
262 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
263 sub _GenerateChainedLexer {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
264 my($This, $InputIteratorRef, @TokensSpec) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
265 my($TokenSpecRef, $ChainedLexer);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
266
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
267 $ChainedLexer = undef;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
268 for $TokenSpecRef (@TokensSpec) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
269 $ChainedLexer = defined $ChainedLexer ? $This->_GenerateLexerForToken($ChainedLexer, @{$TokenSpecRef}) : $This->_GenerateLexerForToken($InputIteratorRef, @{$TokenSpecRef});
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
270 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
271
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
272 $This->{ChainedLexer} = $ChainedLexer;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
273
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
274 return $This;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
275 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
276
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
277
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
278 # Generate a lexer using specifed token specification using specified input or
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
279 # input retrieved using another token lexer. The lexer retrieving input from the
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
280 # specified input stream is at the bottom of the chain.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
281 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
282 sub _GenerateLexerForToken {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
283 my($This, $InputIteratorOrLexer, $TokenLabel, $RegexPattern, $TokenMatchActionRef) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
284 my($TokenMatchAndSplitRef, $InputBuffer, @ProcessedTokens);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
285
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
286 # Input buffer for a specific lexer in chained lexers containing unprocessed
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
287 # text for token specifications retrieved from a downstrean lexer or intial
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
288 # input...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
289 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
290 $InputBuffer = "";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
291
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
292 # @ProcessedTokens contains either references to an array containing token label
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
293 # and matched text or any unmatched input text string...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
294 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
295 @ProcessedTokens = ();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
296
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
297 # Setup a default annoymous function reference to generate an array reference
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
298 # containing $Token and text matched to $RegexPattern.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
299 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
300 $TokenMatchActionRef = defined $TokenMatchActionRef ? $TokenMatchActionRef : sub { my($This, $Label, $MatchedText) = @_; return [$Label, $MatchedText] };
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
301
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
302 # Setup an annoymous function to match and split input text using $RegexPattern for
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
303 # a specific token during its lexer invocation in chained lexers.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
304 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
305 # The usage of parenthesis around $RegexPattern during split allows capturing of matched
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
306 # text, which is subsequently processed to retrieve matched $Token values. The split function
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
307 # inserts a "" separator in the returned array as first entry whenever $InputText starts with
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
308 # $RegexPattern. $InputText is returned as the only element for no match.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
309 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
310 $TokenMatchAndSplitRef = sub { my($InputText) = @_; return split /($RegexPattern)/, $InputText; };
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
311
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
312 # Setup a lexer for $TokenLabel as an annoymous function and return its reference to caller
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
313 # which in turns chains the lexers for all $Tokens before returning a reference to a lexer
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
314 # at top of the lexer chain.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
315 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
316 # Perl maintains scope of all variables defined with in the scope of the current function
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
317 # during invocation of annoymous function even after the return call.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
318 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
319 return sub {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
320 my($Mode) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
321
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
322 # Currenly supported value for mode: Peek, Next
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
323 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
324 $Mode = defined $Mode ? $Mode : 'Next';
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
325
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
326 while (@ProcessedTokens == 0 && defined $InputBuffer ) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
327 # Get any new input....
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
328 my $NewInput = $InputIteratorOrLexer->();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
329
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
330 if (ref $NewInput) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
331 # Input is an array reference containing matched token and text returned by
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
332 # a chained lexer downstream lexer...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
333 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
334 # Match $RegexPattern in available buffer text to retieve any matched text
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
335 # for current $Token. $Separator might be "": $RegexPattern is at start of
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
336 # of $InputBuffer
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
337 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
338 # Process input buffer containing text to be matched for the current lexer
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
339 # which didn't get processed earlier during @NewTokens > 2 while loop:
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
340 # no match for current lexer or more input available. It maintains order
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
341 # of token matching in input stream.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
342 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
343 my($Separator, $MatchedTokenRefOrText);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
344
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
345 ($Separator, $MatchedTokenRefOrText) = $TokenMatchAndSplitRef->($InputBuffer);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
346 if (defined $MatchedTokenRefOrText) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
347 $MatchedTokenRefOrText = $TokenMatchActionRef->($This, $TokenLabel, $MatchedTokenRefOrText);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
348 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
349
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
350 # Collect valid token references or text...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
351 push @ProcessedTokens, grep { defined $_ && $_ ne "" } ($Separator, $MatchedTokenRefOrText, $NewInput);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
352
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
353 # Empty put buffer...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
354 $InputBuffer = "";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
355
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
356 # Get out of the loop as processed token refererences and/or text are available...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
357 last;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
358 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
359
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
360 # Process input retrieved from downstream lexer or input iterator which hasn't
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
361 # been processed into tokens..
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
362 if (defined $NewInput) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
363 $InputBuffer .= $NewInput;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
364 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
365
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
366 # Retrieve any matched tokens from available input for the current lexer...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
367 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
368 my(@NewTokens) = $TokenMatchAndSplitRef->($InputBuffer);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
369
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
370 while ( @NewTokens > 2 || @NewTokens && !defined $NewInput) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
371 # Scenario 1: Complete match
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
372 # @NewTokens > 2 : Availability of separator, matched token text, separator.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
373 # The separator might correspond to token for a token for upstream lexer followed
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
374 # by matched token from current lexer. It ends up getting passed to upsrteam
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
375 # lexer for processing.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
376 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
377 # Scenario 2: No more input available from iterator or downstream lexer
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
378 # @NewTokens <= 2 and no more input implies any left over text in buffer. And
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
379 # it ends up getting passed to upsrteam for processing.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
380 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
381
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
382 # Take off any unprocessed input text that doesn't match off the buffer: It'll be
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
383 # passed to upstream chained lexer for processing...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
384 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
385 push @ProcessedTokens, shift @NewTokens;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
386
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
387 if (@NewTokens) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
388 my $MatchedTokenText = shift @NewTokens;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
389 push @ProcessedTokens, $TokenMatchActionRef->($This, $TokenLabel, $MatchedTokenText);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
390 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
391 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
392
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
393 # Retrieve any leftover text from NewTokens and put it back into InputBuffer for
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
394 # processing by current lexer. All token references have been taken out....
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
395 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
396 $InputBuffer = "";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
397 if (@NewTokens) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
398 $InputBuffer = join "", @NewTokens;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
399 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
400
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
401 if (!defined $NewInput) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
402 # No more input from the downstream lexer...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
403 $InputBuffer = undef;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
404 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
405
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
406 # Clean up any empty strings from ProcessedTokens containing token
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
407 # references or text...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
408 @ProcessedTokens = grep { $_ ne "" } @ProcessedTokens;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
409
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
410 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
411
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
412 # Return reference to an array containing token and matched text or just unmatched input text...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
413 my $TokenRefOrText = undef;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
414
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
415 if (@ProcessedTokens) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
416 # Get first available reference either by just peeking or removing it from the list
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
417 # of available tokens...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
418 $TokenRefOrText = ($Mode =~ /^Peek$/i) ? $ProcessedTokens[0] : shift @ProcessedTokens;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
419 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
420
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
421 return $TokenRefOrText;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
422 };
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
423 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
424
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
425 # Is it a lexer object?
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
426 sub _IsLexer {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
427 my($Object) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
428
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
429 return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
430 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
431
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
432 # Return a string containing information about lexer...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
433 sub StringifyLexer {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
434 my($This) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
435 my($LexerString);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
436
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
437 $LexerString = "Lexer: PackageName: $ClassName; " . $This->_GetLexerInfoString();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
438
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
439 return $LexerString;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
440 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
441
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
442 # Return a string containing information about lexer...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
443 sub _GetLexerInfoString {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
444 my($This) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
445 my($LexerInfoString, $TokensSpec, $TokenSpec, $TokenLabel, $TokenMatchRegex, $TokenMatchAction);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
446
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
447 $LexerInfoString = "InputType: $This->{InputType}";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
448
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
449 if ($This->{InputType} =~ /^String$/i) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
450 $LexerInfoString .= "; InputString: $This->{Input}";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
451 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
452
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
453 $TokensSpec = "TokensSpecifications: <None>";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
454 if (@{$This->{TokensSpec}}) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
455 $TokensSpec = "TokensSpecifications: < [Label, MatchRegex, MatchAction]:";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
456 for $TokenSpec (@{$This->{TokensSpec}}) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
457 ($TokenLabel, $TokenMatchRegex) = @{$TokenSpec};
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
458 $TokenMatchAction = (@{$TokenSpec} == 3) ? "$TokenSpec->[2]" : "undefined";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
459 $TokensSpec .= " [$TokenLabel, $TokenMatchRegex, $TokenMatchAction]";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
460 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
461 $TokensSpec .= " >";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
462 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
463
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
464 $LexerInfoString .= "; $TokensSpec";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
465
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
466 return $LexerInfoString;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
467 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
468
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
469 1;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
470
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
471 __END__
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
472
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
473 =head1 NAME
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
474
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
475 Parsers::Lexer
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
476
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
477 =head1 SYNOPSIS
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
478
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
479 use Parsers::Lexer;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
480
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
481 use Parsers::Lexer qw(:all);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
482
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
483 =head1 DESCRIPTION
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
484
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
485 B<Lexer> class provides the following methods:
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
486
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
487 new, GetLex, Lex, Next, Peek, StringifyLexer
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
488
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
489 The object oriented chained B<Lexer> is implemented based on examples available in
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
490 Higher-order Perl [ Ref 126 ] book by Mark J. Dominus. It is designed to be used
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
491 both in standalone mode or as a base class for B<YYLexer>.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
492
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
493 A chained lexer is created by generating a lexer for for the first specified token
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
494 specification using specified input and chaining it with other lexers generated for all
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
495 subsequent token specifications. The lexer generated for the first token specification
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
496 uses input iterator to retrieve any available input text; the subsequent chained lexeres
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
497 for rest of the token specifications use lexers generated for previous token
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
498 specifications to get next input, which might be unmatched input text or a reference
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
499 to an array containing token and matched text pair.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
500
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
501 =head2 METHODS
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
502
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
503 =over 4
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
504
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
505 =item B<new>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
506
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
507 $Lexer = new Parsers::Lexer($Input, @TokensSpec);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
508
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
509 Using specified I<Input> and I<TokensSpec>, B<new> method generates a new lexer
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
510 and returns a reference to newly created B<Lexer> object.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
511
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
512 Example:
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
513
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
514 # Tokens specifications supplied by the caller. It's an array containing references
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
515 # to arrays with each containing TokenLabel and TokenMatchRegex pair along with
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
516 # an option reference to code to be executed after a matched.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
517 #
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
518 @LexerTokensSpec = (
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
519 [ 'LETTER', qr/[a-zA-Z]/ ],
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
520 [ 'NUMBER', qr/\d+/ ],
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
521 [ 'SPACE', qr/[ ]*/,
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
522 sub { my($This, $TokenLabel, $MatchedText) = @_; return ''; }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
523 ],
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
524 [ 'NEWLINE', qr/(?:\r\n|\r|\n)/,
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
525 sub { my($This, $TokenLabel, $MatchedText) = @_; return "\n"; }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
526 ],
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
527 [ 'CHAR', qr/./ ]
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
528 );
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
529
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
530 # Input string...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
531 $InputText = 'y = 3 + 4';
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
532 $Lexer = new Parsers::Lexer($InputText, @LexerTokensSpec);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
533
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
534 # Process input stream...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
535 while (defined($Token = $Lexer->Lex())) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
536 print "Token: " . ((ref $Token) ? "@{$Token}" : "$Token") . "\n";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
537 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
538
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
539 # Input file...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
540 $InputFile = "Input.txt";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
541 open INPUTFILE, "$InputFile" or die "Couldn't open $InputFile: $!\n";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
542 $Lexer = new Parsers::Lexer(\*INPUTFILE, @LexerTokensSpec);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
543
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
544 # Input file iterator...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
545 $InputFile = "TestSimpleCalcParser.txt";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
546 open INPUTFILE, "$InputFile" or die "Couldn't open $InputFile: $!\n";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
547 $InputIterator = sub { return <INPUTFILE>; };
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
548 $Lexer = new Parsers::Lexer($InputIterator, @LexerTokensSpec);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
549
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
550 @LexerTokensSpec = (
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
551 [ 'VAR', qr/[[:alpha:]]+/ ],
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
552 [ 'NUM', qr/\d+/ ],
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
553 [ 'OP', qr/[-+=\/]/,
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
554 sub { my($This, $Label, $Value) = @_;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
555 $Value .= "; ord: " . ord $Value;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
556 return [$Label, $Value];
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
557 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
558 ],
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
559 [ 'NEWLINE', qr/(?:\r\n|\r|\n)/, sub { return [$_[1], 'NewLine']; } ],
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
560 [ 'SPACE', qr/\s*/, sub { return [$_[1], 'Space']; } ],
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
561 );
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
562
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
563 # Look ahead without removing...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
564 $Token = $Lexer->Lex('Peek');
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
565 if (defined $Token && ref $Token) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
566 print "PEEK: Token: @{$Token}\n\n";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
567 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
568
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
569 # Process input stream...
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
570 while (defined($Token = $Lexer->Lex())) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
571 print "Token: " . ((ref $Token) ? "@{$Token}" : "$Token") . "\n";
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
572 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
573
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
574 =item B<GetLex>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
575
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
576 $LexerRef = $Lexer->GetLex();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
577
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
578 Returns a refernece to I<Lexer> method to the caller for use in a specific B<YYLexer>.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
579
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
580 =item B<Lex>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
581
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
582 $TokenRefOrText = $Lexer->Lex($Mode);
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
583 if (ref $TokenRefOrText) {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
584 ($TokenLabel, $TokenValue) = @{$TokenRefOrText};
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
585 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
586 else {
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
587 $TokenText = $TokenRefOrText;
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
588 }
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
589
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
590 Get next available token label and value pair as an array reference or unrecognized
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
591 text from input stream by either removing it from the input or simply peeking ahead
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
592 and without removing it from the input stream.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
593
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
594 Possible I<Mode> values: I<Peek, Next>. Default: I<Next>.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
595
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
596 =item B<Next>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
597
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
598 $TokenRefOrText = $Lexer->Next();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
599
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
600 Get next available token label and value pair as an array reference or unrecognized
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
601 text from input stream by removing it from the input stream.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
602
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
603 =item B<Peek>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
604
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
605 $TokenRefOrText = $Lexer->Peek();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
606
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
607 Get next available token label and value pair as an array reference or unrecognized
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
608 text from input stream by by simply peeking ahead and without removing it from the
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
609 input stream.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
610
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
611 =item B<StringifyLexer>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
612
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
613 $LexerString = $Lexer->StringifyLexer();
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
614
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
615 Returns a string containing information about I<Lexer> object.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
616
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
617 =back
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
618
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
619 =head1 AUTHOR
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
620
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
621 Manish Sud <msud@san.rr.com>
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
622
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
623 =head1 SEE ALSO
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
624
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
625 YYLexer.pm, SimpleCalcYYLexer.pm, SimpleCalcParser.yy
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
626
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
627 =head1 COPYRIGHT
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
628
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
629 Copyright (C) 2015 Manish Sud. All rights reserved.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
630
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
631 This file is part of MayaChemTools.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
632
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
633 MayaChemTools is free software; you can redistribute it and/or modify it under
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
634 the terms of the GNU Lesser General Public License as published by the Free
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
635 Software Foundation; either version 3 of the License, or (at your option)
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
636 any later version.
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
637
2abf0d43254d Uploaded
deepakjadmin
parents:
diff changeset
638 =cut