diff mayachemtools/docs/modules/html/code/Lexer.html @ 0:73ae111cf86f draft

Uploaded
author deepakjadmin
date Wed, 20 Jan 2016 11:55:01 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mayachemtools/docs/modules/html/code/Lexer.html	Wed Jan 20 11:55:01 2016 -0500
@@ -0,0 +1,489 @@
+<html>
+<head>
+<title>MayaChemTools:Code:Parsers::Lexer.pm</title>
+<meta http-equiv="content-type" content="text/html;charset=utf-8">
+<link rel="stylesheet" type="text/css" href="../../../css/MayaChemToolsCode.css">
+</head>
+<body leftmargin="20" rightmargin="20" topmargin="10" bottommargin="10">
+<br/>
+<center>
+<a href="http://www.mayachemtools.org" title="MayaChemTools Home"><img src="../../../images/MayaChemToolsLogo.gif" border="0" alt="MayaChemTools"></a>
+</center>
+<br/>
+<pre>
+<a name="package-Parsers::Lexer-"></a>   1 <span class="k">package </span><span class="i">Parsers::Lexer</span><span class="sc">;</span>
+   2 <span class="c">#</span>
+   3 <span class="c"># $RCSfile: Lexer.pm,v $</span>
+   4 <span class="c"># $Date: 2015/02/28 20:50:55 $</span>
+   5 <span class="c"># $Revision: 1.10 $</span>
+   6 <span class="c">#</span>
+   7 <span class="c"># Author: Manish Sud &lt;msud@san.rr.com&gt;</span>
+   8 <span class="c">#</span>
+   9 <span class="c"># Copyright (C) 2015 Manish Sud. All rights reserved.</span>
+  10 <span class="c">#</span>
+  11 <span class="c"># This file is part of MayaChemTools.</span>
+  12 <span class="c">#</span>
+  13 <span class="c"># MayaChemTools is free software; you can redistribute it and/or modify it under</span>
+  14 <span class="c"># the terms of the GNU Lesser General Public License as published by the Free</span>
+  15 <span class="c"># Software Foundation; either version 3 of the License, or (at your option) any</span>
+  16 <span class="c"># later version.</span>
+  17 <span class="c">#</span>
+  18 <span class="c"># MayaChemTools is distributed in the hope that it will be useful, but without</span>
+  19 <span class="c"># any warranty; without even the implied warranty of merchantability of fitness</span>
+  20 <span class="c"># for a particular purpose.  See the GNU Lesser General Public License for more</span>
+  21 <span class="c"># details.</span>
+  22 <span class="c">#</span>
+  23 <span class="c"># You should have received a copy of the GNU Lesser General Public License</span>
+  24 <span class="c"># along with MayaChemTools; if not, see &lt;http://www.gnu.org/licenses/&gt; or</span>
+  25 <span class="c"># write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,</span>
+  26 <span class="c"># Boston, MA, 02111-1307, USA.</span>
+  27 <span class="c">#</span>
+  28 
+  29 <span class="k">use</span> <span class="w">strict</span><span class="sc">;</span>
+  30 <span class="k">use</span> <span class="w">Carp</span><span class="sc">;</span>
+  31 <span class="k">use</span> <span class="w">Exporter</span><span class="sc">;</span>
+  32 <span class="k">use</span> <span class="w">Scalar::Util</span> <span class="s">(</span><span class="s">)</span><span class="sc">;</span>
+  33 
+  34 <span class="k">use</span> <span class="w">vars</span> <span class="q">qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS)</span><span class="sc">;</span>
+  35 
+  36 <span class="i">@ISA</span> = <span class="q">qw(Exporter)</span><span class="sc">;</span>
+  37 <span class="i">@EXPORT</span> = <span class="q">qw()</span><span class="sc">;</span>
+  38 <span class="i">@EXPORT_OK</span> = <span class="q">qw()</span><span class="sc">;</span>
+  39 
+  40 <span class="i">%EXPORT_TAGS</span> = <span class="s">(</span><span class="w">all</span>  <span class="cm">=&gt;</span> <span class="s">[</span><span class="i">@EXPORT</span><span class="cm">,</span> <span class="i">@EXPORT_OK</span><span class="s">]</span><span class="s">)</span><span class="sc">;</span>
+  41 
+  42 <span class="c"># Setup class variables...</span>
+  43 <span class="k">my</span><span class="s">(</span><span class="i">$ClassName</span><span class="s">)</span><span class="sc">;</span>
+  44 <span class="i">_InitializeClass</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span>
+  45 
+  46 <span class="c"># Overload Perl functions...</span>
+  47 <span class="k">use</span> <span class="w">overload</span> <span class="q">&#39;&quot;&quot;&#39;</span> <span class="cm">=&gt;</span> <span class="q">&#39;StringifyLexer&#39;</span><span class="sc">;</span>
+  48 
+  49 <span class="c"># Class constructor...</span>
+<a name="new-"></a>  50 <span class="k">sub </span><span class="m">new</span> <span class="s">{</span>
+  51   <span class="k">my</span><span class="s">(</span><span class="i">$Class</span><span class="cm">,</span> <span class="i">$Input</span><span class="cm">,</span> <span class="i">@TokensSpec</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
+  52 
+  53   <span class="c"># Initialize object...</span>
+  54   <span class="k">my</span> <span class="i">$This</span> = <span class="s">{</span><span class="s">}</span><span class="sc">;</span>
+  55   <span class="k">bless</span> <span class="i">$This</span><span class="cm">,</span> <span class="k">ref</span><span class="s">(</span><span class="i">$Class</span><span class="s">)</span> || <span class="i">$Class</span><span class="sc">;</span>
+  56   <span class="i">$This</span><span class="i">-&gt;_InitializeLexer</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span>
+  57 
+  58   <span class="i">$This</span><span class="i">-&gt;_ValidateParametersAndGenerateLexer</span><span class="s">(</span><span class="i">$Input</span><span class="cm">,</span> <span class="i">@TokensSpec</span><span class="s">)</span><span class="sc">;</span>
+  59 
+  60   <span class="k">return</span> <span class="i">$This</span><span class="sc">;</span>
+  61 <span class="s">}</span>
+  62 
+  63 
+  64 <span class="c"># Initialize class ...</span>
+<a name="_InitializeClass-"></a>  65 <span class="k">sub </span><span class="m">_InitializeClass</span> <span class="s">{</span>
+  66   <span class="c">#Class name...</span>
+  67   <span class="i">$ClassName</span> = <span class="w">__PACKAGE__</span><span class="sc">;</span>
+  68 <span class="s">}</span>
+  69 
+  70 <span class="c"># Initialize object data...</span>
+  71 <span class="c">#</span>
+<a name="_InitializeLexer-"></a>  72 <span class="k">sub </span><span class="m">_InitializeLexer</span> <span class="s">{</span>
+  73   <span class="k">my</span><span class="s">(</span><span class="i">$This</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
+  74 
+  75   <span class="c"># Input parameter used by lexer to retrieve text to be lexed. Supported parameter types:</span>
+  76   <span class="c">#   . Reference to input iterator function</span>
+  77   <span class="c">#   . Reference to an open file handle</span>
+  78   <span class="c">#   . Text string</span>
+  79   <span class="c">#</span>
+  80   <span class="i">$This</span>-&gt;{<span class="w">Input</span>} = <span class="k">undef</span><span class="sc">;</span>
+  81 
+  82   <span class="c"># Type of input paramater determined using Perl ref function:</span>
+  83   <span class="c">#   . InputIterator - ref returns CODE</span>
+  84   <span class="c">#   . FileStream - ref return GLOB and fileno is valid</span>
+  85   <span class="c">#   . String - ref return an empty string</span>
+  86   <span class="c">#</span>
+  87   <span class="i">$This</span>-&gt;{<span class="w">InputType</span>} = <span class="q">&#39;&#39;</span><span class="sc">;</span>
+  88 
+  89   <span class="c"># Tokens specifications supplied by the caller. It&#39;s an array containing references</span>
+  90   <span class="c"># to arrays with each containing TokenLabel and TokenMatchRegex pair along with</span>
+  91   <span class="c"># an option reference to code to be executed after a matched.</span>
+  92   <span class="c">#</span>
+  93   <span class="c"># For example:</span>
+  94   <span class="c">#</span>
+  95   <span class="c"># @LexerTokensSpec = (</span>
+  96   <span class="c">#                        [ &#39;LETTER&#39;, qr/[a-zA-Z]/ ],</span>
+  97   <span class="c">#                        [ &#39;NUMBER&#39;, qr/\d+/ ],</span>
+  98   <span class="c">#                        [ &#39;SPACE&#39;, qr/[ ]*/, sub { my($This, $TokenLabel, $MatchedText) = @_; return &#39;&#39;; } ],</span>
+  99   <span class="c">#                        [ &#39;NEWLINE&#39;, qr/(?:\r\n|\r|\n)/, sub { my($This, $TokenLabel, $MatchedText) = @_;  return &quot;\n&quot;; } ],</span>
+ 100   <span class="c">#                        [ &#39;CHAR&#39;, qr/[\.]/ ],</span>
+ 101   <span class="c">#                       );</span>
+ 102   <span class="c">#</span>
+ 103   <span class="i">@</span>{<span class="i">$This</span>-&gt;{<span class="w">TokensSpec</span>}} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span>
+ 104 
+ 105   <span class="c"># Refernce to chained lexer...</span>
+ 106   <span class="i">$This</span>-&gt;{<span class="w">ChainedLexer</span>} = <span class="k">undef</span><span class="sc">;</span>
+ 107 
+ 108   <span class="k">return</span> <span class="i">$This</span><span class="sc">;</span>
+ 109 <span class="s">}</span>
+ 110 
+ 111 <span class="c"># Validate input parameters and generate a chained lexer...</span>
+ 112 <span class="c">#</span>
+<a name="_ValidateParametersAndGenerateLexer-"></a> 113 <span class="k">sub </span><span class="m">_ValidateParametersAndGenerateLexer</span> <span class="s">{</span>
+ 114   <span class="k">my</span><span class="s">(</span><span class="i">$This</span><span class="cm">,</span> <span class="i">$Input</span><span class="cm">,</span> <span class="i">@TokensSpec</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
+ 115 
+ 116   <span class="c">#</span>
+ 117   <span class="c"># Validate input to be lexed...</span>
+ 118   <span class="k">if</span> <span class="s">(</span>!<span class="k">defined</span> <span class="i">$Input</span><span class="s">)</span> <span class="s">{</span>
+ 119     <span class="w">croak</span> <span class="q">&quot;Error: ${ClassName}-&gt;new: Object can&#39;t be instantiated: Input is not defined. Supported values: a reference to input iterator function, a reference to an open file handle or a text string...&quot;</span><span class="sc">;</span>
+ 120   <span class="s">}</span>
+ 121   <span class="i">$This</span>-&gt;{<span class="w">Input</span>} = <span class="i">$Input</span><span class="sc">;</span>
+ 122 
+ 123   <span class="c"># Check input parameter type...</span>
+ 124   <span class="k">my</span><span class="s">(</span><span class="i">$InputType</span><span class="s">)</span><span class="sc">;</span>
+ 125 
+ 126   <span class="i">$InputType</span> = <span class="k">ref</span> <span class="i">$Input</span><span class="sc">;</span>
+ 127   <span class="k">if</span> <span class="s">(</span><span class="i">$InputType</span> =~ <span class="q">/CODE/i</span><span class="s">)</span> <span class="s">{</span>
+ 128     <span class="c"># Input iterator...</span>
+ 129     <span class="i">$This</span>-&gt;{<span class="w">InputType</span>} = <span class="q">&quot;InputIterator&quot;</span><span class="sc">;</span>
+ 130   <span class="s">}</span>
+ 131   <span class="k">elsif</span> <span class="s">(</span><span class="i">$InputType</span> =~ <span class="q">/GLOB/i</span> &amp;&amp; <span class="k">defined</span> <span class="k">fileno</span> <span class="i">$Input</span><span class="s">)</span> <span class="s">{</span>
+ 132     <span class="c"># Input stream...</span>
+ 133     <span class="i">$This</span>-&gt;{<span class="w">InputType</span>} = <span class="q">&quot;FileStream&quot;</span><span class="sc">;</span>
+ 134   <span class="s">}</span>
+ 135   <span class="k">elsif</span> <span class="s">(</span><span class="i">$InputType</span><span class="s">)</span> <span class="s">{</span>
+ 136     <span class="c"># Perl ref function returns nonempty string for all other references...</span>
+ 137     <span class="w">croak</span> <span class="q">&quot;Error: ${ClassName}-&gt;new: Object can&#39;t be instantiated: Invalid input parameter type specified. Supported parameter types: a reference to input iterator function, a reference to an open file handle or a text string...&quot;</span><span class="sc">;</span>
+ 138   <span class="s">}</span>
+ 139   <span class="k">else</span> <span class="s">{</span>
+ 140     <span class="c"># Input string...</span>
+ 141     <span class="i">$This</span>-&gt;{<span class="w">InputType</span>} = <span class="q">&quot;String&quot;</span><span class="sc">;</span>
+ 142   <span class="s">}</span>
+ 143 
+ 144   <span class="c"># Check tokens specifications...</span>
+ 145   <span class="k">if</span> <span class="s">(</span>!<span class="i">@TokensSpec</span><span class="s">)</span> <span class="s">{</span>
+ 146     <span class="w">croak</span> <span class="q">&quot;Error: ${ClassName}-&gt;new: TokensSpec is not defined or the array doesn&#39;t contain any values. Supported values: a reference to an array containg token label, regular expression to match and an option reference to function to modify matched values...&quot;</span><span class="sc">;</span>
+ 147   <span class="s">}</span>
+ 148   <span class="i">@</span>{<span class="i">$This</span>-&gt;{<span class="w">TokensSpec</span>}} = <span class="i">@TokensSpec</span><span class="sc">;</span>
+ 149 
+ 150   <span class="i">$This</span><span class="i">-&gt;_GenerateLexer</span><span class="s">(</span><span class="i">$Input</span><span class="cm">,</span> <span class="i">@TokensSpec</span><span class="s">)</span><span class="sc">;</span>
+ 151 
+ 152   <span class="k">return</span> <span class="i">$This</span><span class="sc">;</span>
+ 153 <span class="s">}</span>
+ 154 
+ 155 <span class="c"># Generate a lexer using reference to an input iterator function, an open file</span>
+ 156 <span class="c"># handle or an input string passed as first parameter by the caller along</span>
+ 157 <span class="c"># with token specifications as second paramater...</span>
+ 158 <span class="c">#</span>
+<a name="_GenerateLexer-"></a> 159 <span class="k">sub </span><span class="m">_GenerateLexer</span> <span class="s">{</span>
+ 160   <span class="k">my</span><span class="s">(</span><span class="i">$This</span><span class="cm">,</span> <span class="i">$Input</span><span class="cm">,</span> <span class="i">@TokensSpec</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
+ 161 
+ 162   <span class="k">if</span> <span class="s">(</span><span class="i">$This</span>-&gt;{<span class="w">InputType</span>} =~ <span class="q">/^InputIterator$/i</span><span class="s">)</span> <span class="s">{</span>
+ 163     <span class="i">$This</span><span class="i">-&gt;_GenerateInputIteratorLexer</span><span class="s">(</span><span class="i">$Input</span><span class="cm">,</span> <span class="i">@TokensSpec</span><span class="s">)</span><span class="sc">;</span>
+ 164   <span class="s">}</span>
+ 165   <span class="k">elsif</span> <span class="s">(</span><span class="i">$This</span>-&gt;{<span class="w">InputType</span>} =~ <span class="q">/^FileStream$/i</span><span class="s">)</span> <span class="s">{</span>
+ 166     <span class="i">$This</span><span class="i">-&gt;_GenerateInputFileStreamLexer</span><span class="s">(</span><span class="i">$Input</span><span class="cm">,</span> <span class="i">@TokensSpec</span><span class="s">)</span><span class="sc">;</span>
+ 167   <span class="s">}</span>
+ 168   <span class="k">elsif</span> <span class="s">(</span><span class="i">$This</span>-&gt;{<span class="w">InputType</span>} =~ <span class="q">/^String$/i</span><span class="s">)</span> <span class="s">{</span>
+ 169     <span class="i">$This</span><span class="i">-&gt;_GenerateInputStringLexer</span><span class="s">(</span><span class="i">$Input</span><span class="cm">,</span> <span class="i">@TokensSpec</span><span class="s">)</span><span class="sc">;</span>
+ 170   <span class="s">}</span>
+ 171   <span class="k">else</span> <span class="s">{</span>
+ 172     <span class="w">croak</span> <span class="q">&quot;Error: ${ClassName}-&gt;new: Object can&#39;t be instantiated: Invalid input parameter type specified. Supported parameter types: a reference to input iterator function, a reference to an open file handle or a text string...&quot;</span><span class="sc">;</span>
+ 173   <span class="s">}</span>
+ 174 
+ 175   <span class="k">return</span> <span class="i">$This</span><span class="sc">;</span>
+ 176 <span class="s">}</span>
+ 177 
+ 178 <span class="c"># Generate a lexer using specifed input iterator...</span>
+ 179 <span class="c">#</span>
+<a name="_GenerateInputIteratorLexer-"></a> 180 <span class="k">sub </span><span class="m">_GenerateInputIteratorLexer</span> <span class="s">{</span>
+ 181   <span class="k">my</span><span class="s">(</span><span class="i">$This</span><span class="cm">,</span> <span class="i">$InputIteratorRef</span><span class="cm">,</span> <span class="i">@TokensSpec</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
+ 182 
+ 183   <span class="i">$This</span><span class="i">-&gt;_GenerateChainedLexer</span><span class="s">(</span><span class="i">$InputIteratorRef</span><span class="cm">,</span> <span class="i">@TokensSpec</span><span class="s">)</span><span class="sc">;</span>
+ 184 
+ 185   <span class="k">return</span> <span class="i">$This</span><span class="sc">;</span>
+ 186 <span class="s">}</span>
+ 187 
+ 188 <span class="c"># Generate a lexer using specifed input file stream reference...</span>
+ 189 <span class="c">#</span>
+<a name="_GenerateInputFileStreamLexer-"></a> 190 <span class="k">sub </span><span class="m">_GenerateInputFileStreamLexer</span> <span class="s">{</span>
+ 191   <span class="k">my</span><span class="s">(</span><span class="i">$This</span><span class="cm">,</span> <span class="i">$FileHandleRef</span><span class="cm">,</span> <span class="i">@TokensSpec</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
+ 192 
+ 193   <span class="c"># Iterator is a annoymous function reference and Perl keeps $FileHandleRef</span>
+ 194   <span class="c"># in scope during its execution.</span>
+ 195 
+ 196   <span class="i">$This</span><span class="i">-&gt;_GenerateChainedLexer</span><span class="s">(</span> <span class="k">sub</span> <span class="s">{</span> <span class="k">return</span> <span class="q">&lt;$FileHandleRef&gt;</span><span class="sc">;</span> <span class="s">}</span><span class="cm">,</span> <span class="i">@TokensSpec</span><span class="s">)</span><span class="sc">;</span>
+ 197 
+ 198   <span class="k">return</span> <span class="i">$This</span><span class="sc">;</span>
+ 199 <span class="s">}</span>
+ 200 
+ 201 <span class="c"># Generate a lexer using specifed input string...</span>
+ 202 <span class="c">#</span>
+<a name="_GenerateInputStringLexer-"></a> 203 <span class="k">sub </span><span class="m">_GenerateInputStringLexer</span> <span class="s">{</span>
+ 204   <span class="k">my</span><span class="s">(</span><span class="i">$This</span><span class="cm">,</span> <span class="i">$Text</span><span class="cm">,</span> <span class="i">@TokensSpec</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
+ 205   <span class="k">my</span><span class="s">(</span><span class="i">@InputText</span><span class="s">)</span> = <span class="s">(</span><span class="i">$Text</span><span class="s">)</span><span class="sc">;</span>
+ 206 
+ 207   <span class="c"># Iterator is a annoymous function reference and Perl keeps @InputText</span>
+ 208   <span class="c"># in scope during its execution.</span>
+ 209 
+ 210   <span class="i">$This</span><span class="i">-&gt;_GenerateChainedLexer</span><span class="s">(</span> <span class="k">sub</span> <span class="s">{</span> <span class="k">return</span> <span class="k">shift</span> <span class="i">@InputText</span><span class="sc">;</span> <span class="s">}</span><span class="cm">,</span> <span class="i">@TokensSpec</span><span class="s">)</span><span class="sc">;</span>
+ 211 
+ 212   <span class="k">return</span> <span class="i">$This</span><span class="sc">;</span>
+ 213 <span class="s">}</span>
+ 214 
+ 215 <span class="c"># Get next available token label and value pair as an array reference or unrecognized</span>
+ 216 <span class="c"># text from input stream by either removing it from the input or simply peeking ahead...</span>
+ 217 <span class="c">#</span>
+ 218 <span class="c"># Supported mode values: Peek, Next. Default: Next</span>
+ 219 <span class="c">#</span>
+<a name="Lex-"></a> 220 <span class="k">sub </span><span class="m">Lex</span> <span class="s">{</span>
+ 221   <span class="k">my</span><span class="s">(</span><span class="i">$This</span><span class="cm">,</span> <span class="i">$Mode</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
+ 222 
+ 223   <span class="k">return</span> <span class="i">$This</span>-&gt;{<span class="w">ChainedLexer</span>}-&gt;<span class="s">(</span><span class="i">$Mode</span><span class="s">)</span>
+ 224 <span class="s">}</span>
+ 225 
+ 226 <span class="c"># Get next available token label and value pair as an array reference or unrecognized</span>
+ 227 <span class="c"># text from input stream by either removing it from the input stream...</span>
+ 228 <span class="c">#</span>
+<a name="Next-"></a> 229 <span class="k">sub </span><span class="m">Next</span> <span class="s">{</span>
+ 230   <span class="k">my</span><span class="s">(</span><span class="i">$This</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
+ 231 
+ 232   <span class="k">return</span> <span class="i">$This</span><span class="i">-&gt;Lex</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span>
+ 233 <span class="s">}</span>
+ 234 
+ 235 <span class="c"># Get next available token label and value pair as an array reference or unrecognized</span>
+ 236 <span class="c"># text from input stream by simply peeking ahead and without removing it from the input</span>
+ 237 <span class="c"># stream..</span>
+ 238 <span class="c">#</span>
+<a name="Peek-"></a> 239 <span class="k">sub </span><span class="m">Peek</span> <span class="s">{</span>
+ 240   <span class="k">my</span><span class="s">(</span><span class="i">$This</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
+ 241 
+ 242   <span class="k">return</span> <span class="i">$This</span><span class="i">-&gt;Lex</span><span class="s">(</span><span class="q">&#39;Peek&#39;</span><span class="s">)</span>
+ 243 <span class="s">}</span>
+ 244 
+ 245 <span class="c"># Get a reference to lexer method to be used by the caller...</span>
+ 246 <span class="c">#</span>
+<a name="GetLex-"></a> 247 <span class="k">sub </span><span class="m">GetLex</span> <span class="s">{</span>
+ 248   <span class="k">my</span><span class="s">(</span><span class="i">$This</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
+ 249 
+ 250   <span class="k">return</span> <span class="k">sub</span> <span class="s">{</span> <span class="i">$This</span><span class="i">-&gt;Lex</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span> <span class="s">}</span><span class="sc">;</span>
+ 251 <span class="s">}</span>
+ 252 
+ 253 <span class="c"># The chained lexer generation is implemented based on examples in Higher-order Perl</span>
+ 254 <span class="c"># [ Ref 126 ] book.</span>
+ 255 <span class="c">#</span>
+ 256 <span class="c"># Generate a lexer using specified input iterator and chaining it with other lexers generated</span>
+ 257 <span class="c"># for all token specifications. The lexer generated for first token specification uses input</span>
+ 258 <span class="c"># iterator to retrieve any available input text; the subsequent chained lexeres for rest</span>
+ 259 <span class="c"># of the tokens use lexers generated for previous token specifications to get next input</span>
+ 260 <span class="c"># which might be unmatched input text or a reference to an array containing token and</span>
+ 261 <span class="c"># matched text pair.</span>
+ 262 <span class="c">#</span>
+<a name="_GenerateChainedLexer-"></a> 263 <span class="k">sub </span><span class="m">_GenerateChainedLexer</span> <span class="s">{</span>
+ 264   <span class="k">my</span><span class="s">(</span><span class="i">$This</span><span class="cm">,</span> <span class="i">$InputIteratorRef</span><span class="cm">,</span> <span class="i">@TokensSpec</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
+ 265   <span class="k">my</span><span class="s">(</span><span class="i">$TokenSpecRef</span><span class="cm">,</span> <span class="i">$ChainedLexer</span><span class="s">)</span><span class="sc">;</span>
+ 266 
+ 267   <span class="i">$ChainedLexer</span> = <span class="k">undef</span><span class="sc">;</span>
+ 268   <span class="k">for</span> <span class="i">$TokenSpecRef</span> <span class="s">(</span><span class="i">@TokensSpec</span><span class="s">)</span> <span class="s">{</span>
+ 269     <span class="i">$ChainedLexer</span> = <span class="k">defined</span> <span class="i">$ChainedLexer</span> ? <span class="i">$This</span><span class="i">-&gt;_GenerateLexerForToken</span><span class="s">(</span><span class="i">$ChainedLexer</span><span class="cm">,</span> <span class="i">@</span>{<span class="i">$TokenSpecRef</span>}<span class="s">)</span> <span class="co">:</span> <span class="i">$This</span><span class="i">-&gt;_GenerateLexerForToken</span><span class="s">(</span><span class="i">$InputIteratorRef</span><span class="cm">,</span> <span class="i">@</span>{<span class="i">$TokenSpecRef</span>}<span class="s">)</span><span class="sc">;</span>
+ 270   <span class="s">}</span>
+ 271 
+ 272   <span class="i">$This</span>-&gt;{<span class="w">ChainedLexer</span>} = <span class="i">$ChainedLexer</span><span class="sc">;</span>
+ 273 
+ 274   <span class="k">return</span> <span class="i">$This</span><span class="sc">;</span>
+ 275 <span class="s">}</span>
+ 276 
+ 277 
+ 278 <span class="c"># Generate a lexer using specifed token specification using specified input or</span>
+ 279 <span class="c"># input retrieved using another token lexer. The lexer retrieving input from the</span>
+ 280 <span class="c"># specified input stream is at the bottom of the chain.</span>
+ 281 <span class="c">#</span>
+<a name="_GenerateLexerForToken-"></a> 282 <span class="k">sub </span><span class="m">_GenerateLexerForToken</span> <span class="s">{</span>
+ 283   <span class="k">my</span><span class="s">(</span><span class="i">$This</span><span class="cm">,</span> <span class="i">$InputIteratorOrLexer</span><span class="cm">,</span> <span class="i">$TokenLabel</span><span class="cm">,</span> <span class="i">$RegexPattern</span><span class="cm">,</span> <span class="i">$TokenMatchActionRef</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
+ 284   <span class="k">my</span><span class="s">(</span><span class="i">$TokenMatchAndSplitRef</span><span class="cm">,</span> <span class="i">$InputBuffer</span><span class="cm">,</span> <span class="i">@ProcessedTokens</span><span class="s">)</span><span class="sc">;</span>
+ 285 
+ 286   <span class="c"># Input buffer for a specific lexer in chained lexers containing unprocessed</span>
+ 287   <span class="c"># text for token specifications retrieved from a downstrean lexer or intial</span>
+ 288   <span class="c"># input...</span>
+ 289   <span class="c">#</span>
+ 290   <span class="i">$InputBuffer</span> = <span class="q">&quot;&quot;</span><span class="sc">;</span>
+ 291 
+ 292   <span class="c"># @ProcessedTokens contains either references to an array containing token label</span>
+ 293   <span class="c"># and matched text or any unmatched input text string...</span>
+ 294   <span class="c">#</span>
+ 295   <span class="i">@ProcessedTokens</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span>
+ 296 
+ 297   <span class="c"># Setup a default annoymous function reference to generate an array reference</span>
+ 298   <span class="c"># containing $Token and text matched to $RegexPattern.</span>
+ 299   <span class="c">#</span>
+ 300   <span class="i">$TokenMatchActionRef</span> = <span class="k">defined</span> <span class="i">$TokenMatchActionRef</span> ? <span class="i">$TokenMatchActionRef</span> <span class="co">:</span> <span class="k">sub</span> <span class="s">{</span> <span class="k">my</span><span class="s">(</span><span class="i">$This</span><span class="cm">,</span> <span class="i">$Label</span><span class="cm">,</span> <span class="i">$MatchedText</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> <span class="k">return</span> <span class="s">[</span><span class="i">$Label</span><span class="cm">,</span> <span class="i">$MatchedText</span><span class="s">]</span>  <span class="s">}</span><span class="sc">;</span>
+ 301 
+ 302   <span class="c"># Setup an annoymous function to match and split input text using $RegexPattern for</span>
+ 303   <span class="c"># a specific token during its lexer invocation in chained lexers.</span>
+ 304   <span class="c">#</span>
+ 305   <span class="c"># The usage of parenthesis around $RegexPattern during split allows capturing of matched</span>
+ 306   <span class="c"># text, which is subsequently processed to retrieve matched $Token values. The split function</span>
+ 307   <span class="c"># inserts a &quot;&quot; separator in the returned array as first entry whenever $InputText starts with</span>
+ 308   <span class="c"># $RegexPattern. $InputText is returned as the only element for no match.</span>
+ 309   <span class="c">#</span>
+ 310   <span class="i">$TokenMatchAndSplitRef</span> = <span class="k">sub</span> <span class="s">{</span> <span class="k">my</span><span class="s">(</span><span class="i">$InputText</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> <span class="k">return</span> <span class="k">split</span> <span class="q">/($RegexPattern)/</span><span class="cm">,</span> <span class="i">$InputText</span><span class="sc">;</span> <span class="s">}</span><span class="sc">;</span>
+ 311 
+ 312   <span class="c"># Setup a lexer for $TokenLabel as an annoymous function and return its reference to caller</span>
+ 313   <span class="c"># which in turns chains the lexers for all $Tokens before returning a reference to a lexer</span>
+ 314   <span class="c"># at top of the lexer chain.</span>
+ 315   <span class="c">#</span>
+ 316   <span class="c"># Perl maintains scope of all variables defined with in the scope of the current function</span>
+ 317   <span class="c"># during invocation of annoymous function even after the return call.</span>
+ 318   <span class="c">#</span>
+ 319   <span class="k">return</span> <span class="k">sub</span> <span class="s">{</span>
+ 320     <span class="k">my</span><span class="s">(</span><span class="i">$Mode</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
+ 321 
+ 322     <span class="c"># Currenly supported value for mode: Peek, Next</span>
+ 323     <span class="c">#</span>
+ 324     <span class="i">$Mode</span> = <span class="k">defined</span> <span class="i">$Mode</span> ? <span class="i">$Mode</span> <span class="co">:</span> <span class="q">&#39;Next&#39;</span><span class="sc">;</span>
+ 325 
+ 326     <span class="k">while</span> <span class="s">(</span><span class="i">@ProcessedTokens</span> == <span class="n">0</span> &amp;&amp; <span class="k">defined</span> <span class="i">$InputBuffer</span> <span class="s">)</span> <span class="s">{</span>
+ 327       <span class="c"># Get any new input....</span>
+ 328       <span class="k">my</span> <span class="i">$NewInput</span> = <span class="i">$InputIteratorOrLexer</span>-&gt;<span class="s">(</span><span class="s">)</span><span class="sc">;</span>
+ 329 
+ 330       <span class="k">if</span> <span class="s">(</span><span class="k">ref</span> <span class="i">$NewInput</span><span class="s">)</span> <span class="s">{</span>
+ 331         <span class="c"># Input is an array reference containing matched token and text returned by</span>
+ 332         <span class="c"># a chained lexer downstream lexer...</span>
+ 333         <span class="c">#</span>
+ 334         <span class="c"># Match $RegexPattern in available buffer text to retieve any matched text</span>
+ 335         <span class="c"># for current $Token. $Separator might be &quot;&quot;: $RegexPattern is at start of</span>
+ 336         <span class="c"># of $InputBuffer</span>
+ 337         <span class="c">#</span>
+ 338         <span class="c"># Process input buffer containing text to be matched for the current lexer</span>
+ 339         <span class="c"># which didn&#39;t get processed earlier during @NewTokens &gt; 2  while loop:</span>
+ 340         <span class="c"># no match for current lexer or more input available. It maintains order</span>
+ 341         <span class="c"># of token matching in input stream.</span>
+ 342         <span class="c">#</span>
+ 343         <span class="k">my</span><span class="s">(</span><span class="i">$Separator</span><span class="cm">,</span> <span class="i">$MatchedTokenRefOrText</span><span class="s">)</span><span class="sc">;</span>
+ 344 
+ 345         <span class="s">(</span><span class="i">$Separator</span><span class="cm">,</span> <span class="i">$MatchedTokenRefOrText</span><span class="s">)</span> = <span class="i">$TokenMatchAndSplitRef</span>-&gt;<span class="s">(</span><span class="i">$InputBuffer</span><span class="s">)</span><span class="sc">;</span>
+ 346         <span class="k">if</span> <span class="s">(</span><span class="k">defined</span> <span class="i">$MatchedTokenRefOrText</span><span class="s">)</span> <span class="s">{</span>
+ 347           <span class="i">$MatchedTokenRefOrText</span> = <span class="i">$TokenMatchActionRef</span>-&gt;<span class="s">(</span><span class="i">$This</span><span class="cm">,</span> <span class="i">$TokenLabel</span><span class="cm">,</span> <span class="i">$MatchedTokenRefOrText</span><span class="s">)</span><span class="sc">;</span>
+ 348         <span class="s">}</span>
+ 349 
+ 350         <span class="c"># Collect valid token references or text...</span>
+ 351         <span class="k">push</span> <span class="i">@ProcessedTokens</span><span class="cm">,</span> <span class="k">grep</span> <span class="s">{</span> <span class="k">defined</span> <span class="i">$_</span> &amp;&amp; <span class="i">$_</span> <span class="k">ne</span> <span class="q">&quot;&quot;</span> <span class="s">}</span> <span class="s">(</span><span class="i">$Separator</span><span class="cm">,</span> <span class="i">$MatchedTokenRefOrText</span><span class="cm">,</span> <span class="i">$NewInput</span><span class="s">)</span><span class="sc">;</span>
+ 352 
+ 353         <span class="c"># Empty put buffer...</span>
+ 354         <span class="i">$InputBuffer</span> = <span class="q">&quot;&quot;</span><span class="sc">;</span>
+ 355 
+ 356         <span class="c"># Get out of the loop as processed token refererences and/or text  are available...</span>
+ 357         <span class="k">last</span><span class="sc">;</span>
+ 358       <span class="s">}</span>
+ 359 
+ 360       <span class="c"># Process input retrieved from downstream lexer or input iterator which hasn&#39;t</span>
+ 361       <span class="c"># been processed into tokens..</span>
+ 362       <span class="k">if</span> <span class="s">(</span><span class="k">defined</span> <span class="i">$NewInput</span><span class="s">)</span> <span class="s">{</span>
+ 363         <span class="i">$InputBuffer</span> .= <span class="i">$NewInput</span><span class="sc">;</span>
+ 364       <span class="s">}</span>
+ 365 
+ 366       <span class="c"># Retrieve any matched tokens from available input for the current lexer...</span>
+ 367       <span class="c">#</span>
+ 368       <span class="k">my</span><span class="s">(</span><span class="i">@NewTokens</span><span class="s">)</span> = <span class="i">$TokenMatchAndSplitRef</span>-&gt;<span class="s">(</span><span class="i">$InputBuffer</span><span class="s">)</span><span class="sc">;</span>
+ 369 
+ 370       <span class="k">while</span> <span class="s">(</span> <span class="i">@NewTokens</span> &gt; <span class="n">2</span> || <span class="i">@NewTokens</span> &amp;&amp; !<span class="k">defined</span> <span class="i">$NewInput</span><span class="s">)</span> <span class="s">{</span>
+ 371         <span class="c"># Scenario 1: Complete match</span>
+ 372         <span class="c">#   @NewTokens &gt; 2 : Availability of separator, matched token text, separator.</span>
+ 373         <span class="c">#   The separator might correspond to token for a token for upstream lexer followed</span>
+ 374         <span class="c">#   by matched token from current lexer. It ends up getting passed to upsrteam</span>
+ 375         <span class="c">#   lexer for processing.</span>
+ 376         <span class="c">#</span>
+ 377         <span class="c"># Scenario 2: No more input available from iterator or downstream lexer</span>
+ 378         <span class="c">#   @NewTokens &lt;= 2 and no more input implies any left over text in buffer. And</span>
+ 379         <span class="c">#   it ends up getting passed to upsrteam for processing.</span>
+ 380         <span class="c">#</span>
+ 381 
+ 382         <span class="c"># Take off any unprocessed input text that doesn&#39;t match off the buffer: It&#39;ll be</span>
+ 383         <span class="c"># passed to upstream chained lexer for processing...</span>
+ 384         <span class="c">#</span>
+ 385         <span class="k">push</span> <span class="i">@ProcessedTokens</span><span class="cm">,</span> <span class="k">shift</span> <span class="i">@NewTokens</span><span class="sc">;</span>
+ 386 
+ 387         <span class="k">if</span> <span class="s">(</span><span class="i">@NewTokens</span><span class="s">)</span> <span class="s">{</span>
+ 388           <span class="k">my</span> <span class="i">$MatchedTokenText</span> = <span class="k">shift</span> <span class="i">@NewTokens</span><span class="sc">;</span>
+ 389           <span class="k">push</span> <span class="i">@ProcessedTokens</span><span class="cm">,</span> <span class="i">$TokenMatchActionRef</span>-&gt;<span class="s">(</span><span class="i">$This</span><span class="cm">,</span> <span class="i">$TokenLabel</span><span class="cm">,</span> <span class="i">$MatchedTokenText</span><span class="s">)</span><span class="sc">;</span>
+ 390         <span class="s">}</span>
+ 391       <span class="s">}</span>
+ 392 
+ 393       <span class="c"># Retrieve any leftover text from NewTokens and put it back into InputBuffer for</span>
+ 394       <span class="c"># processing by current lexer. All token references have been taken out....</span>
+ 395       <span class="c">#</span>
+ 396       <span class="i">$InputBuffer</span> = <span class="q">&quot;&quot;</span><span class="sc">;</span>
+ 397       <span class="k">if</span> <span class="s">(</span><span class="i">@NewTokens</span><span class="s">)</span> <span class="s">{</span>
+ 398         <span class="i">$InputBuffer</span> = <span class="k">join</span> <span class="q">&quot;&quot;</span><span class="cm">,</span> <span class="i">@NewTokens</span><span class="sc">;</span>
+ 399       <span class="s">}</span>
+ 400 
+ 401       <span class="k">if</span> <span class="s">(</span>!<span class="k">defined</span> <span class="i">$NewInput</span><span class="s">)</span> <span class="s">{</span>
+ 402         <span class="c"># No more input from the downstream lexer...</span>
+ 403         <span class="i">$InputBuffer</span> = <span class="k">undef</span><span class="sc">;</span>
+ 404       <span class="s">}</span>
+ 405 
+ 406       <span class="c"># Clean up any empty strings from ProcessedTokens containing token</span>
+ 407       <span class="c"># references or text...</span>
+ 408       <span class="i">@ProcessedTokens</span> = <span class="k">grep</span> <span class="s">{</span> <span class="i">$_</span> <span class="k">ne</span> <span class="q">&quot;&quot;</span> <span class="s">}</span> <span class="i">@ProcessedTokens</span><span class="sc">;</span>
+ 409 
+ 410     <span class="s">}</span>
+ 411 
+ 412     <span class="c"># Return reference to an array containing token and matched text or just unmatched input text...</span>
+ 413     <span class="k">my</span> <span class="i">$TokenRefOrText</span> = <span class="k">undef</span><span class="sc">;</span>
+ 414 
+ 415     <span class="k">if</span> <span class="s">(</span><span class="i">@ProcessedTokens</span><span class="s">)</span> <span class="s">{</span>
+ 416       <span class="c"># Get first available reference either by just peeking or removing it from the list</span>
+ 417       <span class="c"># of available tokens...</span>
+ 418       <span class="i">$TokenRefOrText</span> = <span class="s">(</span><span class="i">$Mode</span> =~ <span class="q">/^Peek$/i</span><span class="s">)</span> ?  <span class="i">$ProcessedTokens</span>[<span class="n">0</span>] <span class="co">:</span> <span class="k">shift</span> <span class="i">@ProcessedTokens</span><span class="sc">;</span>
+ 419     <span class="s">}</span>
+ 420 
+ 421     <span class="k">return</span> <span class="i">$TokenRefOrText</span><span class="sc">;</span>
+ 422   <span class="s">}</span><span class="sc">;</span>
+ 423 <span class="s">}</span>
+ 424 
+ 425 <span class="c"># Is it a lexer object?</span>
+<a name="_IsLexer-"></a> 426 <span class="k">sub </span><span class="m">_IsLexer</span> <span class="s">{</span>
+ 427   <span class="k">my</span><span class="s">(</span><span class="i">$Object</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
+ 428 
+ 429   <span class="k">return</span> <span class="s">(</span><span class="i">Scalar::Util::blessed</span><span class="s">(</span><span class="i">$Object</span><span class="s">)</span> &amp;&amp; <span class="i">$Object</span><span class="i">-&gt;isa</span><span class="s">(</span><span class="i">$ClassName</span><span class="s">)</span><span class="s">)</span> ? <span class="n">1</span> <span class="co">:</span> <span class="n">0</span><span class="sc">;</span>
+ 430 <span class="s">}</span>
+ 431 
+ 432 <span class="c"># Return a string containing information about lexer...</span>
+<a name="StringifyLexer-"></a> 433 <span class="k">sub </span><span class="m">StringifyLexer</span> <span class="s">{</span>
+ 434   <span class="k">my</span><span class="s">(</span><span class="i">$This</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
+ 435   <span class="k">my</span><span class="s">(</span><span class="i">$LexerString</span><span class="s">)</span><span class="sc">;</span>
+ 436 
+ 437   <span class="i">$LexerString</span> = <span class="q">&quot;Lexer: PackageName: $ClassName; &quot;</span> . <span class="i">$This</span><span class="i">-&gt;_GetLexerInfoString</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span>
+ 438 
+ 439   <span class="k">return</span> <span class="i">$LexerString</span><span class="sc">;</span>
+ 440 <span class="s">}</span>
+ 441 
+ 442 <span class="c"># Return a string containing information about lexer...</span>
+<a name="_GetLexerInfoString-"></a> 443 <span class="k">sub </span><span class="m">_GetLexerInfoString</span> <span class="s">{</span>
+ 444   <span class="k">my</span><span class="s">(</span><span class="i">$This</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
+ 445   <span class="k">my</span><span class="s">(</span><span class="i">$LexerInfoString</span><span class="cm">,</span> <span class="i">$TokensSpec</span><span class="cm">,</span> <span class="i">$TokenSpec</span><span class="cm">,</span> <span class="i">$TokenLabel</span><span class="cm">,</span> <span class="i">$TokenMatchRegex</span><span class="cm">,</span> <span class="i">$TokenMatchAction</span><span class="s">)</span><span class="sc">;</span>
+ 446 
+ 447   <span class="i">$LexerInfoString</span> = <span class="q">&quot;InputType: $This-&gt;{InputType}&quot;</span><span class="sc">;</span>
+ 448 
+ 449   <span class="k">if</span> <span class="s">(</span><span class="i">$This</span>-&gt;{<span class="w">InputType</span>} =~ <span class="q">/^String$/i</span><span class="s">)</span> <span class="s">{</span>
+ 450     <span class="i">$LexerInfoString</span> .= <span class="q">&quot;; InputString: $This-&gt;{Input}&quot;</span><span class="sc">;</span>
+ 451   <span class="s">}</span>
+ 452 
+ 453   <span class="i">$TokensSpec</span> = <span class="q">&quot;TokensSpecifications: &lt;None&gt;&quot;</span><span class="sc">;</span>
+ 454   <span class="k">if</span> <span class="s">(</span><span class="i">@</span>{<span class="i">$This</span>-&gt;{<span class="w">TokensSpec</span>}}<span class="s">)</span> <span class="s">{</span>
+ 455     <span class="i">$TokensSpec</span> = <span class="q">&quot;TokensSpecifications: &lt; [Label, MatchRegex, MatchAction]:&quot;</span><span class="sc">;</span>
+ 456     <span class="k">for</span> <span class="i">$TokenSpec</span> <span class="s">(</span><span class="i">@</span>{<span class="i">$This</span>-&gt;{<span class="w">TokensSpec</span>}}<span class="s">)</span> <span class="s">{</span>
+ 457       <span class="s">(</span><span class="i">$TokenLabel</span><span class="cm">,</span> <span class="i">$TokenMatchRegex</span><span class="s">)</span> = <span class="i">@</span>{<span class="i">$TokenSpec</span>}<span class="sc">;</span>
+ 458       <span class="i">$TokenMatchAction</span> = <span class="s">(</span><span class="i">@</span>{<span class="i">$TokenSpec</span>} == <span class="n">3</span><span class="s">)</span> ? <span class="q">&quot;$TokenSpec-&gt;[2]&quot;</span> <span class="co">:</span> <span class="q">&quot;undefined&quot;</span><span class="sc">;</span>
+ 459       <span class="i">$TokensSpec</span> .= <span class="q">&quot; [$TokenLabel, $TokenMatchRegex, $TokenMatchAction]&quot;</span><span class="sc">;</span>
+ 460     <span class="s">}</span>
+ 461     <span class="i">$TokensSpec</span> .= <span class="q">&quot; &gt;&quot;</span><span class="sc">;</span>
+ 462   <span class="s">}</span>
+ 463 
+ 464   <span class="i">$LexerInfoString</span> .= <span class="q">&quot;; $TokensSpec&quot;</span><span class="sc">;</span>
+ 465 
+ 466   <span class="k">return</span> <span class="i">$LexerInfoString</span><span class="sc">;</span>
+ 467 <span class="s">}</span>
+ 468 
+<a name="EOF-"></a></pre>
+<p>&nbsp;</p>
+<br />
+<center>
+<img src="../../../images/h2o2.png">
+</center>
+</body>
+</html>