view mayachemtools/docs/modules/html/code/Lexer.html @ 9:ab29fa5c8c1f draft default tip

Uploaded
author deepakjadmin
date Thu, 15 Dec 2016 14:18:03 -0500
parents 73ae111cf86f
children
line wrap: on
line source

<html>
<head>
<title>MayaChemTools:Code:Parsers::Lexer.pm</title>
<meta http-equiv="content-type" content="text/html;charset=utf-8">
<link rel="stylesheet" type="text/css" href="../../../css/MayaChemToolsCode.css">
</head>
<body leftmargin="20" rightmargin="20" topmargin="10" bottommargin="10">
<br/>
<center>
<a href="http://www.mayachemtools.org" title="MayaChemTools Home"><img src="../../../images/MayaChemToolsLogo.gif" border="0" alt="MayaChemTools"></a>
</center>
<br/>
<pre>
<a name="package-Parsers::Lexer-"></a>   1 <span class="k">package </span><span class="i">Parsers::Lexer</span><span class="sc">;</span>
   2 <span class="c">#</span>
   3 <span class="c"># $RCSfile: Lexer.pm,v $</span>
   4 <span class="c"># $Date: 2015/02/28 20:50:55 $</span>
   5 <span class="c"># $Revision: 1.10 $</span>
   6 <span class="c">#</span>
   7 <span class="c"># Author: Manish Sud &lt;msud@san.rr.com&gt;</span>
   8 <span class="c">#</span>
   9 <span class="c"># Copyright (C) 2015 Manish Sud. All rights reserved.</span>
  10 <span class="c">#</span>
  11 <span class="c"># This file is part of MayaChemTools.</span>
  12 <span class="c">#</span>
  13 <span class="c"># MayaChemTools is free software; you can redistribute it and/or modify it under</span>
  14 <span class="c"># the terms of the GNU Lesser General Public License as published by the Free</span>
  15 <span class="c"># Software Foundation; either version 3 of the License, or (at your option) any</span>
  16 <span class="c"># later version.</span>
  17 <span class="c">#</span>
  18 <span class="c"># MayaChemTools is distributed in the hope that it will be useful, but without</span>
  19 <span class="c"># any warranty; without even the implied warranty of merchantability of fitness</span>
  20 <span class="c"># for a particular purpose.  See the GNU Lesser General Public License for more</span>
  21 <span class="c"># details.</span>
  22 <span class="c">#</span>
  23 <span class="c"># You should have received a copy of the GNU Lesser General Public License</span>
  24 <span class="c"># along with MayaChemTools; if not, see &lt;http://www.gnu.org/licenses/&gt; or</span>
  25 <span class="c"># write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,</span>
  26 <span class="c"># Boston, MA, 02111-1307, USA.</span>
  27 <span class="c">#</span>
  28 
  29 <span class="k">use</span> <span class="w">strict</span><span class="sc">;</span>
  30 <span class="k">use</span> <span class="w">Carp</span><span class="sc">;</span>
  31 <span class="k">use</span> <span class="w">Exporter</span><span class="sc">;</span>
  32 <span class="k">use</span> <span class="w">Scalar::Util</span> <span class="s">(</span><span class="s">)</span><span class="sc">;</span>
  33 
  34 <span class="k">use</span> <span class="w">vars</span> <span class="q">qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS)</span><span class="sc">;</span>
  35 
  36 <span class="i">@ISA</span> = <span class="q">qw(Exporter)</span><span class="sc">;</span>
  37 <span class="i">@EXPORT</span> = <span class="q">qw()</span><span class="sc">;</span>
  38 <span class="i">@EXPORT_OK</span> = <span class="q">qw()</span><span class="sc">;</span>
  39 
  40 <span class="i">%EXPORT_TAGS</span> = <span class="s">(</span><span class="w">all</span>  <span class="cm">=&gt;</span> <span class="s">[</span><span class="i">@EXPORT</span><span class="cm">,</span> <span class="i">@EXPORT_OK</span><span class="s">]</span><span class="s">)</span><span class="sc">;</span>
  41 
  42 <span class="c"># Setup class variables...</span>
  43 <span class="k">my</span><span class="s">(</span><span class="i">$ClassName</span><span class="s">)</span><span class="sc">;</span>
  44 <span class="i">_InitializeClass</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span>
  45 
  46 <span class="c"># Overload Perl functions...</span>
  47 <span class="k">use</span> <span class="w">overload</span> <span class="q">&#39;&quot;&quot;&#39;</span> <span class="cm">=&gt;</span> <span class="q">&#39;StringifyLexer&#39;</span><span class="sc">;</span>
  48 
  49 <span class="c"># Class constructor...</span>
<a name="new-"></a>  50 <span class="k">sub </span><span class="m">new</span> <span class="s">{</span>
  51   <span class="k">my</span><span class="s">(</span><span class="i">$Class</span><span class="cm">,</span> <span class="i">$Input</span><span class="cm">,</span> <span class="i">@TokensSpec</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
  52 
  53   <span class="c"># Initialize object...</span>
  54   <span class="k">my</span> <span class="i">$This</span> = <span class="s">{</span><span class="s">}</span><span class="sc">;</span>
  55   <span class="k">bless</span> <span class="i">$This</span><span class="cm">,</span> <span class="k">ref</span><span class="s">(</span><span class="i">$Class</span><span class="s">)</span> || <span class="i">$Class</span><span class="sc">;</span>
  56   <span class="i">$This</span><span class="i">-&gt;_InitializeLexer</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span>
  57 
  58   <span class="i">$This</span><span class="i">-&gt;_ValidateParametersAndGenerateLexer</span><span class="s">(</span><span class="i">$Input</span><span class="cm">,</span> <span class="i">@TokensSpec</span><span class="s">)</span><span class="sc">;</span>
  59 
  60   <span class="k">return</span> <span class="i">$This</span><span class="sc">;</span>
  61 <span class="s">}</span>
  62 
  63 
  64 <span class="c"># Initialize class ...</span>
<a name="_InitializeClass-"></a>  65 <span class="k">sub </span><span class="m">_InitializeClass</span> <span class="s">{</span>
  66   <span class="c">#Class name...</span>
  67   <span class="i">$ClassName</span> = <span class="w">__PACKAGE__</span><span class="sc">;</span>
  68 <span class="s">}</span>
  69 
  70 <span class="c"># Initialize object data...</span>
  71 <span class="c">#</span>
<a name="_InitializeLexer-"></a>  72 <span class="k">sub </span><span class="m">_InitializeLexer</span> <span class="s">{</span>
  73   <span class="k">my</span><span class="s">(</span><span class="i">$This</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
  74 
  75   <span class="c"># Input parameter used by lexer to retrieve text to be lexed. Supported parameter types:</span>
  76   <span class="c">#   . Reference to input iterator function</span>
  77   <span class="c">#   . Reference to an open file handle</span>
  78   <span class="c">#   . Text string</span>
  79   <span class="c">#</span>
  80   <span class="i">$This</span>-&gt;{<span class="w">Input</span>} = <span class="k">undef</span><span class="sc">;</span>
  81 
  82   <span class="c"># Type of input paramater determined using Perl ref function:</span>
  83   <span class="c">#   . InputIterator - ref returns CODE</span>
  84   <span class="c">#   . FileStream - ref return GLOB and fileno is valid</span>
  85   <span class="c">#   . String - ref return an empty string</span>
  86   <span class="c">#</span>
  87   <span class="i">$This</span>-&gt;{<span class="w">InputType</span>} = <span class="q">&#39;&#39;</span><span class="sc">;</span>
  88 
  89   <span class="c"># Tokens specifications supplied by the caller. It&#39;s an array containing references</span>
  90   <span class="c"># to arrays with each containing TokenLabel and TokenMatchRegex pair along with</span>
  91   <span class="c"># an option reference to code to be executed after a matched.</span>
  92   <span class="c">#</span>
  93   <span class="c"># For example:</span>
  94   <span class="c">#</span>
  95   <span class="c"># @LexerTokensSpec = (</span>
  96   <span class="c">#                        [ &#39;LETTER&#39;, qr/[a-zA-Z]/ ],</span>
  97   <span class="c">#                        [ &#39;NUMBER&#39;, qr/\d+/ ],</span>
  98   <span class="c">#                        [ &#39;SPACE&#39;, qr/[ ]*/, sub { my($This, $TokenLabel, $MatchedText) = @_; return &#39;&#39;; } ],</span>
  99   <span class="c">#                        [ &#39;NEWLINE&#39;, qr/(?:\r\n|\r|\n)/, sub { my($This, $TokenLabel, $MatchedText) = @_;  return &quot;\n&quot;; } ],</span>
 100   <span class="c">#                        [ &#39;CHAR&#39;, qr/[\.]/ ],</span>
 101   <span class="c">#                       );</span>
 102   <span class="c">#</span>
 103   <span class="i">@</span>{<span class="i">$This</span>-&gt;{<span class="w">TokensSpec</span>}} = <span class="s">(</span><span class="s">)</span><span class="sc">;</span>
 104 
 105   <span class="c"># Refernce to chained lexer...</span>
 106   <span class="i">$This</span>-&gt;{<span class="w">ChainedLexer</span>} = <span class="k">undef</span><span class="sc">;</span>
 107 
 108   <span class="k">return</span> <span class="i">$This</span><span class="sc">;</span>
 109 <span class="s">}</span>
 110 
 111 <span class="c"># Validate input parameters and generate a chained lexer...</span>
 112 <span class="c">#</span>
<a name="_ValidateParametersAndGenerateLexer-"></a> 113 <span class="k">sub </span><span class="m">_ValidateParametersAndGenerateLexer</span> <span class="s">{</span>
 114   <span class="k">my</span><span class="s">(</span><span class="i">$This</span><span class="cm">,</span> <span class="i">$Input</span><span class="cm">,</span> <span class="i">@TokensSpec</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
 115 
 116   <span class="c">#</span>
 117   <span class="c"># Validate input to be lexed...</span>
 118   <span class="k">if</span> <span class="s">(</span>!<span class="k">defined</span> <span class="i">$Input</span><span class="s">)</span> <span class="s">{</span>
 119     <span class="w">croak</span> <span class="q">&quot;Error: ${ClassName}-&gt;new: Object can&#39;t be instantiated: Input is not defined. Supported values: a reference to input iterator function, a reference to an open file handle or a text string...&quot;</span><span class="sc">;</span>
 120   <span class="s">}</span>
 121   <span class="i">$This</span>-&gt;{<span class="w">Input</span>} = <span class="i">$Input</span><span class="sc">;</span>
 122 
 123   <span class="c"># Check input parameter type...</span>
 124   <span class="k">my</span><span class="s">(</span><span class="i">$InputType</span><span class="s">)</span><span class="sc">;</span>
 125 
 126   <span class="i">$InputType</span> = <span class="k">ref</span> <span class="i">$Input</span><span class="sc">;</span>
 127   <span class="k">if</span> <span class="s">(</span><span class="i">$InputType</span> =~ <span class="q">/CODE/i</span><span class="s">)</span> <span class="s">{</span>
 128     <span class="c"># Input iterator...</span>
 129     <span class="i">$This</span>-&gt;{<span class="w">InputType</span>} = <span class="q">&quot;InputIterator&quot;</span><span class="sc">;</span>
 130   <span class="s">}</span>
 131   <span class="k">elsif</span> <span class="s">(</span><span class="i">$InputType</span> =~ <span class="q">/GLOB/i</span> &amp;&amp; <span class="k">defined</span> <span class="k">fileno</span> <span class="i">$Input</span><span class="s">)</span> <span class="s">{</span>
 132     <span class="c"># Input stream...</span>
 133     <span class="i">$This</span>-&gt;{<span class="w">InputType</span>} = <span class="q">&quot;FileStream&quot;</span><span class="sc">;</span>
 134   <span class="s">}</span>
 135   <span class="k">elsif</span> <span class="s">(</span><span class="i">$InputType</span><span class="s">)</span> <span class="s">{</span>
 136     <span class="c"># Perl ref function returns nonempty string for all other references...</span>
 137     <span class="w">croak</span> <span class="q">&quot;Error: ${ClassName}-&gt;new: Object can&#39;t be instantiated: Invalid input parameter type specified. Supported parameter types: a reference to input iterator function, a reference to an open file handle or a text string...&quot;</span><span class="sc">;</span>
 138   <span class="s">}</span>
 139   <span class="k">else</span> <span class="s">{</span>
 140     <span class="c"># Input string...</span>
 141     <span class="i">$This</span>-&gt;{<span class="w">InputType</span>} = <span class="q">&quot;String&quot;</span><span class="sc">;</span>
 142   <span class="s">}</span>
 143 
 144   <span class="c"># Check tokens specifications...</span>
 145   <span class="k">if</span> <span class="s">(</span>!<span class="i">@TokensSpec</span><span class="s">)</span> <span class="s">{</span>
 146     <span class="w">croak</span> <span class="q">&quot;Error: ${ClassName}-&gt;new: TokensSpec is not defined or the array doesn&#39;t contain any values. Supported values: a reference to an array containg token label, regular expression to match and an option reference to function to modify matched values...&quot;</span><span class="sc">;</span>
 147   <span class="s">}</span>
 148   <span class="i">@</span>{<span class="i">$This</span>-&gt;{<span class="w">TokensSpec</span>}} = <span class="i">@TokensSpec</span><span class="sc">;</span>
 149 
 150   <span class="i">$This</span><span class="i">-&gt;_GenerateLexer</span><span class="s">(</span><span class="i">$Input</span><span class="cm">,</span> <span class="i">@TokensSpec</span><span class="s">)</span><span class="sc">;</span>
 151 
 152   <span class="k">return</span> <span class="i">$This</span><span class="sc">;</span>
 153 <span class="s">}</span>
 154 
 155 <span class="c"># Generate a lexer using reference to an input iterator function, an open file</span>
 156 <span class="c"># handle or an input string passed as first parameter by the caller along</span>
 157 <span class="c"># with token specifications as second paramater...</span>
 158 <span class="c">#</span>
<a name="_GenerateLexer-"></a> 159 <span class="k">sub </span><span class="m">_GenerateLexer</span> <span class="s">{</span>
 160   <span class="k">my</span><span class="s">(</span><span class="i">$This</span><span class="cm">,</span> <span class="i">$Input</span><span class="cm">,</span> <span class="i">@TokensSpec</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
 161 
 162   <span class="k">if</span> <span class="s">(</span><span class="i">$This</span>-&gt;{<span class="w">InputType</span>} =~ <span class="q">/^InputIterator$/i</span><span class="s">)</span> <span class="s">{</span>
 163     <span class="i">$This</span><span class="i">-&gt;_GenerateInputIteratorLexer</span><span class="s">(</span><span class="i">$Input</span><span class="cm">,</span> <span class="i">@TokensSpec</span><span class="s">)</span><span class="sc">;</span>
 164   <span class="s">}</span>
 165   <span class="k">elsif</span> <span class="s">(</span><span class="i">$This</span>-&gt;{<span class="w">InputType</span>} =~ <span class="q">/^FileStream$/i</span><span class="s">)</span> <span class="s">{</span>
 166     <span class="i">$This</span><span class="i">-&gt;_GenerateInputFileStreamLexer</span><span class="s">(</span><span class="i">$Input</span><span class="cm">,</span> <span class="i">@TokensSpec</span><span class="s">)</span><span class="sc">;</span>
 167   <span class="s">}</span>
 168   <span class="k">elsif</span> <span class="s">(</span><span class="i">$This</span>-&gt;{<span class="w">InputType</span>} =~ <span class="q">/^String$/i</span><span class="s">)</span> <span class="s">{</span>
 169     <span class="i">$This</span><span class="i">-&gt;_GenerateInputStringLexer</span><span class="s">(</span><span class="i">$Input</span><span class="cm">,</span> <span class="i">@TokensSpec</span><span class="s">)</span><span class="sc">;</span>
 170   <span class="s">}</span>
 171   <span class="k">else</span> <span class="s">{</span>
 172     <span class="w">croak</span> <span class="q">&quot;Error: ${ClassName}-&gt;new: Object can&#39;t be instantiated: Invalid input parameter type specified. Supported parameter types: a reference to input iterator function, a reference to an open file handle or a text string...&quot;</span><span class="sc">;</span>
 173   <span class="s">}</span>
 174 
 175   <span class="k">return</span> <span class="i">$This</span><span class="sc">;</span>
 176 <span class="s">}</span>
 177 
 178 <span class="c"># Generate a lexer using specifed input iterator...</span>
 179 <span class="c">#</span>
<a name="_GenerateInputIteratorLexer-"></a> 180 <span class="k">sub </span><span class="m">_GenerateInputIteratorLexer</span> <span class="s">{</span>
 181   <span class="k">my</span><span class="s">(</span><span class="i">$This</span><span class="cm">,</span> <span class="i">$InputIteratorRef</span><span class="cm">,</span> <span class="i">@TokensSpec</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
 182 
 183   <span class="i">$This</span><span class="i">-&gt;_GenerateChainedLexer</span><span class="s">(</span><span class="i">$InputIteratorRef</span><span class="cm">,</span> <span class="i">@TokensSpec</span><span class="s">)</span><span class="sc">;</span>
 184 
 185   <span class="k">return</span> <span class="i">$This</span><span class="sc">;</span>
 186 <span class="s">}</span>
 187 
 188 <span class="c"># Generate a lexer using specifed input file stream reference...</span>
 189 <span class="c">#</span>
<a name="_GenerateInputFileStreamLexer-"></a> 190 <span class="k">sub </span><span class="m">_GenerateInputFileStreamLexer</span> <span class="s">{</span>
 191   <span class="k">my</span><span class="s">(</span><span class="i">$This</span><span class="cm">,</span> <span class="i">$FileHandleRef</span><span class="cm">,</span> <span class="i">@TokensSpec</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
 192 
 193   <span class="c"># Iterator is a annoymous function reference and Perl keeps $FileHandleRef</span>
 194   <span class="c"># in scope during its execution.</span>
 195 
 196   <span class="i">$This</span><span class="i">-&gt;_GenerateChainedLexer</span><span class="s">(</span> <span class="k">sub</span> <span class="s">{</span> <span class="k">return</span> <span class="q">&lt;$FileHandleRef&gt;</span><span class="sc">;</span> <span class="s">}</span><span class="cm">,</span> <span class="i">@TokensSpec</span><span class="s">)</span><span class="sc">;</span>
 197 
 198   <span class="k">return</span> <span class="i">$This</span><span class="sc">;</span>
 199 <span class="s">}</span>
 200 
 201 <span class="c"># Generate a lexer using specifed input string...</span>
 202 <span class="c">#</span>
<a name="_GenerateInputStringLexer-"></a> 203 <span class="k">sub </span><span class="m">_GenerateInputStringLexer</span> <span class="s">{</span>
 204   <span class="k">my</span><span class="s">(</span><span class="i">$This</span><span class="cm">,</span> <span class="i">$Text</span><span class="cm">,</span> <span class="i">@TokensSpec</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
 205   <span class="k">my</span><span class="s">(</span><span class="i">@InputText</span><span class="s">)</span> = <span class="s">(</span><span class="i">$Text</span><span class="s">)</span><span class="sc">;</span>
 206 
 207   <span class="c"># Iterator is a annoymous function reference and Perl keeps @InputText</span>
 208   <span class="c"># in scope during its execution.</span>
 209 
 210   <span class="i">$This</span><span class="i">-&gt;_GenerateChainedLexer</span><span class="s">(</span> <span class="k">sub</span> <span class="s">{</span> <span class="k">return</span> <span class="k">shift</span> <span class="i">@InputText</span><span class="sc">;</span> <span class="s">}</span><span class="cm">,</span> <span class="i">@TokensSpec</span><span class="s">)</span><span class="sc">;</span>
 211 
 212   <span class="k">return</span> <span class="i">$This</span><span class="sc">;</span>
 213 <span class="s">}</span>
 214 
 215 <span class="c"># Get next available token label and value pair as an array reference or unrecognized</span>
 216 <span class="c"># text from input stream by either removing it from the input or simply peeking ahead...</span>
 217 <span class="c">#</span>
 218 <span class="c"># Supported mode values: Peek, Next. Default: Next</span>
 219 <span class="c">#</span>
<a name="Lex-"></a> 220 <span class="k">sub </span><span class="m">Lex</span> <span class="s">{</span>
 221   <span class="k">my</span><span class="s">(</span><span class="i">$This</span><span class="cm">,</span> <span class="i">$Mode</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
 222 
 223   <span class="k">return</span> <span class="i">$This</span>-&gt;{<span class="w">ChainedLexer</span>}-&gt;<span class="s">(</span><span class="i">$Mode</span><span class="s">)</span>
 224 <span class="s">}</span>
 225 
 226 <span class="c"># Get next available token label and value pair as an array reference or unrecognized</span>
 227 <span class="c"># text from input stream by either removing it from the input stream...</span>
 228 <span class="c">#</span>
<a name="Next-"></a> 229 <span class="k">sub </span><span class="m">Next</span> <span class="s">{</span>
 230   <span class="k">my</span><span class="s">(</span><span class="i">$This</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
 231 
 232   <span class="k">return</span> <span class="i">$This</span><span class="i">-&gt;Lex</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span>
 233 <span class="s">}</span>
 234 
 235 <span class="c"># Get next available token label and value pair as an array reference or unrecognized</span>
 236 <span class="c"># text from input stream by simply peeking ahead and without removing it from the input</span>
 237 <span class="c"># stream..</span>
 238 <span class="c">#</span>
<a name="Peek-"></a> 239 <span class="k">sub </span><span class="m">Peek</span> <span class="s">{</span>
 240   <span class="k">my</span><span class="s">(</span><span class="i">$This</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
 241 
 242   <span class="k">return</span> <span class="i">$This</span><span class="i">-&gt;Lex</span><span class="s">(</span><span class="q">&#39;Peek&#39;</span><span class="s">)</span>
 243 <span class="s">}</span>
 244 
 245 <span class="c"># Get a reference to lexer method to be used by the caller...</span>
 246 <span class="c">#</span>
<a name="GetLex-"></a> 247 <span class="k">sub </span><span class="m">GetLex</span> <span class="s">{</span>
 248   <span class="k">my</span><span class="s">(</span><span class="i">$This</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
 249 
 250   <span class="k">return</span> <span class="k">sub</span> <span class="s">{</span> <span class="i">$This</span><span class="i">-&gt;Lex</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span> <span class="s">}</span><span class="sc">;</span>
 251 <span class="s">}</span>
 252 
 253 <span class="c"># The chained lexer generation is implemented based on examples in Higher-order Perl</span>
 254 <span class="c"># [ Ref 126 ] book.</span>
 255 <span class="c">#</span>
 256 <span class="c"># Generate a lexer using specified input iterator and chaining it with other lexers generated</span>
 257 <span class="c"># for all token specifications. The lexer generated for first token specification uses input</span>
 258 <span class="c"># iterator to retrieve any available input text; the subsequent chained lexeres for rest</span>
 259 <span class="c"># of the tokens use lexers generated for previous token specifications to get next input</span>
 260 <span class="c"># which might be unmatched input text or a reference to an array containing token and</span>
 261 <span class="c"># matched text pair.</span>
 262 <span class="c">#</span>
<a name="_GenerateChainedLexer-"></a> 263 <span class="k">sub </span><span class="m">_GenerateChainedLexer</span> <span class="s">{</span>
 264   <span class="k">my</span><span class="s">(</span><span class="i">$This</span><span class="cm">,</span> <span class="i">$InputIteratorRef</span><span class="cm">,</span> <span class="i">@TokensSpec</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
 265   <span class="k">my</span><span class="s">(</span><span class="i">$TokenSpecRef</span><span class="cm">,</span> <span class="i">$ChainedLexer</span><span class="s">)</span><span class="sc">;</span>
 266 
 267   <span class="i">$ChainedLexer</span> = <span class="k">undef</span><span class="sc">;</span>
 268   <span class="k">for</span> <span class="i">$TokenSpecRef</span> <span class="s">(</span><span class="i">@TokensSpec</span><span class="s">)</span> <span class="s">{</span>
 269     <span class="i">$ChainedLexer</span> = <span class="k">defined</span> <span class="i">$ChainedLexer</span> ? <span class="i">$This</span><span class="i">-&gt;_GenerateLexerForToken</span><span class="s">(</span><span class="i">$ChainedLexer</span><span class="cm">,</span> <span class="i">@</span>{<span class="i">$TokenSpecRef</span>}<span class="s">)</span> <span class="co">:</span> <span class="i">$This</span><span class="i">-&gt;_GenerateLexerForToken</span><span class="s">(</span><span class="i">$InputIteratorRef</span><span class="cm">,</span> <span class="i">@</span>{<span class="i">$TokenSpecRef</span>}<span class="s">)</span><span class="sc">;</span>
 270   <span class="s">}</span>
 271 
 272   <span class="i">$This</span>-&gt;{<span class="w">ChainedLexer</span>} = <span class="i">$ChainedLexer</span><span class="sc">;</span>
 273 
 274   <span class="k">return</span> <span class="i">$This</span><span class="sc">;</span>
 275 <span class="s">}</span>
 276 
 277 
 278 <span class="c"># Generate a lexer using specifed token specification using specified input or</span>
 279 <span class="c"># input retrieved using another token lexer. The lexer retrieving input from the</span>
 280 <span class="c"># specified input stream is at the bottom of the chain.</span>
 281 <span class="c">#</span>
<a name="_GenerateLexerForToken-"></a> 282 <span class="k">sub </span><span class="m">_GenerateLexerForToken</span> <span class="s">{</span>
 283   <span class="k">my</span><span class="s">(</span><span class="i">$This</span><span class="cm">,</span> <span class="i">$InputIteratorOrLexer</span><span class="cm">,</span> <span class="i">$TokenLabel</span><span class="cm">,</span> <span class="i">$RegexPattern</span><span class="cm">,</span> <span class="i">$TokenMatchActionRef</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
 284   <span class="k">my</span><span class="s">(</span><span class="i">$TokenMatchAndSplitRef</span><span class="cm">,</span> <span class="i">$InputBuffer</span><span class="cm">,</span> <span class="i">@ProcessedTokens</span><span class="s">)</span><span class="sc">;</span>
 285 
 286   <span class="c"># Input buffer for a specific lexer in chained lexers containing unprocessed</span>
 287   <span class="c"># text for token specifications retrieved from a downstrean lexer or intial</span>
 288   <span class="c"># input...</span>
 289   <span class="c">#</span>
 290   <span class="i">$InputBuffer</span> = <span class="q">&quot;&quot;</span><span class="sc">;</span>
 291 
 292   <span class="c"># @ProcessedTokens contains either references to an array containing token label</span>
 293   <span class="c"># and matched text or any unmatched input text string...</span>
 294   <span class="c">#</span>
 295   <span class="i">@ProcessedTokens</span> = <span class="s">(</span><span class="s">)</span><span class="sc">;</span>
 296 
 297   <span class="c"># Setup a default annoymous function reference to generate an array reference</span>
 298   <span class="c"># containing $Token and text matched to $RegexPattern.</span>
 299   <span class="c">#</span>
 300   <span class="i">$TokenMatchActionRef</span> = <span class="k">defined</span> <span class="i">$TokenMatchActionRef</span> ? <span class="i">$TokenMatchActionRef</span> <span class="co">:</span> <span class="k">sub</span> <span class="s">{</span> <span class="k">my</span><span class="s">(</span><span class="i">$This</span><span class="cm">,</span> <span class="i">$Label</span><span class="cm">,</span> <span class="i">$MatchedText</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> <span class="k">return</span> <span class="s">[</span><span class="i">$Label</span><span class="cm">,</span> <span class="i">$MatchedText</span><span class="s">]</span>  <span class="s">}</span><span class="sc">;</span>
 301 
 302   <span class="c"># Setup an annoymous function to match and split input text using $RegexPattern for</span>
 303   <span class="c"># a specific token during its lexer invocation in chained lexers.</span>
 304   <span class="c">#</span>
 305   <span class="c"># The usage of parenthesis around $RegexPattern during split allows capturing of matched</span>
 306   <span class="c"># text, which is subsequently processed to retrieve matched $Token values. The split function</span>
 307   <span class="c"># inserts a &quot;&quot; separator in the returned array as first entry whenever $InputText starts with</span>
 308   <span class="c"># $RegexPattern. $InputText is returned as the only element for no match.</span>
 309   <span class="c">#</span>
 310   <span class="i">$TokenMatchAndSplitRef</span> = <span class="k">sub</span> <span class="s">{</span> <span class="k">my</span><span class="s">(</span><span class="i">$InputText</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span> <span class="k">return</span> <span class="k">split</span> <span class="q">/($RegexPattern)/</span><span class="cm">,</span> <span class="i">$InputText</span><span class="sc">;</span> <span class="s">}</span><span class="sc">;</span>
 311 
 312   <span class="c"># Setup a lexer for $TokenLabel as an annoymous function and return its reference to caller</span>
 313   <span class="c"># which in turns chains the lexers for all $Tokens before returning a reference to a lexer</span>
 314   <span class="c"># at top of the lexer chain.</span>
 315   <span class="c">#</span>
 316   <span class="c"># Perl maintains scope of all variables defined with in the scope of the current function</span>
 317   <span class="c"># during invocation of annoymous function even after the return call.</span>
 318   <span class="c">#</span>
 319   <span class="k">return</span> <span class="k">sub</span> <span class="s">{</span>
 320     <span class="k">my</span><span class="s">(</span><span class="i">$Mode</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
 321 
 322     <span class="c"># Currenly supported value for mode: Peek, Next</span>
 323     <span class="c">#</span>
 324     <span class="i">$Mode</span> = <span class="k">defined</span> <span class="i">$Mode</span> ? <span class="i">$Mode</span> <span class="co">:</span> <span class="q">&#39;Next&#39;</span><span class="sc">;</span>
 325 
 326     <span class="k">while</span> <span class="s">(</span><span class="i">@ProcessedTokens</span> == <span class="n">0</span> &amp;&amp; <span class="k">defined</span> <span class="i">$InputBuffer</span> <span class="s">)</span> <span class="s">{</span>
 327       <span class="c"># Get any new input....</span>
 328       <span class="k">my</span> <span class="i">$NewInput</span> = <span class="i">$InputIteratorOrLexer</span>-&gt;<span class="s">(</span><span class="s">)</span><span class="sc">;</span>
 329 
 330       <span class="k">if</span> <span class="s">(</span><span class="k">ref</span> <span class="i">$NewInput</span><span class="s">)</span> <span class="s">{</span>
 331         <span class="c"># Input is an array reference containing matched token and text returned by</span>
 332         <span class="c"># a chained lexer downstream lexer...</span>
 333         <span class="c">#</span>
 334         <span class="c"># Match $RegexPattern in available buffer text to retieve any matched text</span>
 335         <span class="c"># for current $Token. $Separator might be &quot;&quot;: $RegexPattern is at start of</span>
 336         <span class="c"># of $InputBuffer</span>
 337         <span class="c">#</span>
 338         <span class="c"># Process input buffer containing text to be matched for the current lexer</span>
 339         <span class="c"># which didn&#39;t get processed earlier during @NewTokens &gt; 2  while loop:</span>
 340         <span class="c"># no match for current lexer or more input available. It maintains order</span>
 341         <span class="c"># of token matching in input stream.</span>
 342         <span class="c">#</span>
 343         <span class="k">my</span><span class="s">(</span><span class="i">$Separator</span><span class="cm">,</span> <span class="i">$MatchedTokenRefOrText</span><span class="s">)</span><span class="sc">;</span>
 344 
 345         <span class="s">(</span><span class="i">$Separator</span><span class="cm">,</span> <span class="i">$MatchedTokenRefOrText</span><span class="s">)</span> = <span class="i">$TokenMatchAndSplitRef</span>-&gt;<span class="s">(</span><span class="i">$InputBuffer</span><span class="s">)</span><span class="sc">;</span>
 346         <span class="k">if</span> <span class="s">(</span><span class="k">defined</span> <span class="i">$MatchedTokenRefOrText</span><span class="s">)</span> <span class="s">{</span>
 347           <span class="i">$MatchedTokenRefOrText</span> = <span class="i">$TokenMatchActionRef</span>-&gt;<span class="s">(</span><span class="i">$This</span><span class="cm">,</span> <span class="i">$TokenLabel</span><span class="cm">,</span> <span class="i">$MatchedTokenRefOrText</span><span class="s">)</span><span class="sc">;</span>
 348         <span class="s">}</span>
 349 
 350         <span class="c"># Collect valid token references or text...</span>
 351         <span class="k">push</span> <span class="i">@ProcessedTokens</span><span class="cm">,</span> <span class="k">grep</span> <span class="s">{</span> <span class="k">defined</span> <span class="i">$_</span> &amp;&amp; <span class="i">$_</span> <span class="k">ne</span> <span class="q">&quot;&quot;</span> <span class="s">}</span> <span class="s">(</span><span class="i">$Separator</span><span class="cm">,</span> <span class="i">$MatchedTokenRefOrText</span><span class="cm">,</span> <span class="i">$NewInput</span><span class="s">)</span><span class="sc">;</span>
 352 
 353         <span class="c"># Empty put buffer...</span>
 354         <span class="i">$InputBuffer</span> = <span class="q">&quot;&quot;</span><span class="sc">;</span>
 355 
 356         <span class="c"># Get out of the loop as processed token refererences and/or text  are available...</span>
 357         <span class="k">last</span><span class="sc">;</span>
 358       <span class="s">}</span>
 359 
 360       <span class="c"># Process input retrieved from downstream lexer or input iterator which hasn&#39;t</span>
 361       <span class="c"># been processed into tokens..</span>
 362       <span class="k">if</span> <span class="s">(</span><span class="k">defined</span> <span class="i">$NewInput</span><span class="s">)</span> <span class="s">{</span>
 363         <span class="i">$InputBuffer</span> .= <span class="i">$NewInput</span><span class="sc">;</span>
 364       <span class="s">}</span>
 365 
 366       <span class="c"># Retrieve any matched tokens from available input for the current lexer...</span>
 367       <span class="c">#</span>
 368       <span class="k">my</span><span class="s">(</span><span class="i">@NewTokens</span><span class="s">)</span> = <span class="i">$TokenMatchAndSplitRef</span>-&gt;<span class="s">(</span><span class="i">$InputBuffer</span><span class="s">)</span><span class="sc">;</span>
 369 
 370       <span class="k">while</span> <span class="s">(</span> <span class="i">@NewTokens</span> &gt; <span class="n">2</span> || <span class="i">@NewTokens</span> &amp;&amp; !<span class="k">defined</span> <span class="i">$NewInput</span><span class="s">)</span> <span class="s">{</span>
 371         <span class="c"># Scenario 1: Complete match</span>
 372         <span class="c">#   @NewTokens &gt; 2 : Availability of separator, matched token text, separator.</span>
 373         <span class="c">#   The separator might correspond to token for a token for upstream lexer followed</span>
 374         <span class="c">#   by matched token from current lexer. It ends up getting passed to upsrteam</span>
 375         <span class="c">#   lexer for processing.</span>
 376         <span class="c">#</span>
 377         <span class="c"># Scenario 2: No more input available from iterator or downstream lexer</span>
 378         <span class="c">#   @NewTokens &lt;= 2 and no more input implies any left over text in buffer. And</span>
 379         <span class="c">#   it ends up getting passed to upsrteam for processing.</span>
 380         <span class="c">#</span>
 381 
 382         <span class="c"># Take off any unprocessed input text that doesn&#39;t match off the buffer: It&#39;ll be</span>
 383         <span class="c"># passed to upstream chained lexer for processing...</span>
 384         <span class="c">#</span>
 385         <span class="k">push</span> <span class="i">@ProcessedTokens</span><span class="cm">,</span> <span class="k">shift</span> <span class="i">@NewTokens</span><span class="sc">;</span>
 386 
 387         <span class="k">if</span> <span class="s">(</span><span class="i">@NewTokens</span><span class="s">)</span> <span class="s">{</span>
 388           <span class="k">my</span> <span class="i">$MatchedTokenText</span> = <span class="k">shift</span> <span class="i">@NewTokens</span><span class="sc">;</span>
 389           <span class="k">push</span> <span class="i">@ProcessedTokens</span><span class="cm">,</span> <span class="i">$TokenMatchActionRef</span>-&gt;<span class="s">(</span><span class="i">$This</span><span class="cm">,</span> <span class="i">$TokenLabel</span><span class="cm">,</span> <span class="i">$MatchedTokenText</span><span class="s">)</span><span class="sc">;</span>
 390         <span class="s">}</span>
 391       <span class="s">}</span>
 392 
 393       <span class="c"># Retrieve any leftover text from NewTokens and put it back into InputBuffer for</span>
 394       <span class="c"># processing by current lexer. All token references have been taken out....</span>
 395       <span class="c">#</span>
 396       <span class="i">$InputBuffer</span> = <span class="q">&quot;&quot;</span><span class="sc">;</span>
 397       <span class="k">if</span> <span class="s">(</span><span class="i">@NewTokens</span><span class="s">)</span> <span class="s">{</span>
 398         <span class="i">$InputBuffer</span> = <span class="k">join</span> <span class="q">&quot;&quot;</span><span class="cm">,</span> <span class="i">@NewTokens</span><span class="sc">;</span>
 399       <span class="s">}</span>
 400 
 401       <span class="k">if</span> <span class="s">(</span>!<span class="k">defined</span> <span class="i">$NewInput</span><span class="s">)</span> <span class="s">{</span>
 402         <span class="c"># No more input from the downstream lexer...</span>
 403         <span class="i">$InputBuffer</span> = <span class="k">undef</span><span class="sc">;</span>
 404       <span class="s">}</span>
 405 
 406       <span class="c"># Clean up any empty strings from ProcessedTokens containing token</span>
 407       <span class="c"># references or text...</span>
 408       <span class="i">@ProcessedTokens</span> = <span class="k">grep</span> <span class="s">{</span> <span class="i">$_</span> <span class="k">ne</span> <span class="q">&quot;&quot;</span> <span class="s">}</span> <span class="i">@ProcessedTokens</span><span class="sc">;</span>
 409 
 410     <span class="s">}</span>
 411 
 412     <span class="c"># Return reference to an array containing token and matched text or just unmatched input text...</span>
 413     <span class="k">my</span> <span class="i">$TokenRefOrText</span> = <span class="k">undef</span><span class="sc">;</span>
 414 
 415     <span class="k">if</span> <span class="s">(</span><span class="i">@ProcessedTokens</span><span class="s">)</span> <span class="s">{</span>
 416       <span class="c"># Get first available reference either by just peeking or removing it from the list</span>
 417       <span class="c"># of available tokens...</span>
 418       <span class="i">$TokenRefOrText</span> = <span class="s">(</span><span class="i">$Mode</span> =~ <span class="q">/^Peek$/i</span><span class="s">)</span> ?  <span class="i">$ProcessedTokens</span>[<span class="n">0</span>] <span class="co">:</span> <span class="k">shift</span> <span class="i">@ProcessedTokens</span><span class="sc">;</span>
 419     <span class="s">}</span>
 420 
 421     <span class="k">return</span> <span class="i">$TokenRefOrText</span><span class="sc">;</span>
 422   <span class="s">}</span><span class="sc">;</span>
 423 <span class="s">}</span>
 424 
 425 <span class="c"># Is it a lexer object?</span>
<a name="_IsLexer-"></a> 426 <span class="k">sub </span><span class="m">_IsLexer</span> <span class="s">{</span>
 427   <span class="k">my</span><span class="s">(</span><span class="i">$Object</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
 428 
 429   <span class="k">return</span> <span class="s">(</span><span class="i">Scalar::Util::blessed</span><span class="s">(</span><span class="i">$Object</span><span class="s">)</span> &amp;&amp; <span class="i">$Object</span><span class="i">-&gt;isa</span><span class="s">(</span><span class="i">$ClassName</span><span class="s">)</span><span class="s">)</span> ? <span class="n">1</span> <span class="co">:</span> <span class="n">0</span><span class="sc">;</span>
 430 <span class="s">}</span>
 431 
 432 <span class="c"># Return a string containing information about lexer...</span>
<a name="StringifyLexer-"></a> 433 <span class="k">sub </span><span class="m">StringifyLexer</span> <span class="s">{</span>
 434   <span class="k">my</span><span class="s">(</span><span class="i">$This</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
 435   <span class="k">my</span><span class="s">(</span><span class="i">$LexerString</span><span class="s">)</span><span class="sc">;</span>
 436 
 437   <span class="i">$LexerString</span> = <span class="q">&quot;Lexer: PackageName: $ClassName; &quot;</span> . <span class="i">$This</span><span class="i">-&gt;_GetLexerInfoString</span><span class="s">(</span><span class="s">)</span><span class="sc">;</span>
 438 
 439   <span class="k">return</span> <span class="i">$LexerString</span><span class="sc">;</span>
 440 <span class="s">}</span>
 441 
 442 <span class="c"># Return a string containing information about lexer...</span>
<a name="_GetLexerInfoString-"></a> 443 <span class="k">sub </span><span class="m">_GetLexerInfoString</span> <span class="s">{</span>
 444   <span class="k">my</span><span class="s">(</span><span class="i">$This</span><span class="s">)</span> = <span class="i">@_</span><span class="sc">;</span>
 445   <span class="k">my</span><span class="s">(</span><span class="i">$LexerInfoString</span><span class="cm">,</span> <span class="i">$TokensSpec</span><span class="cm">,</span> <span class="i">$TokenSpec</span><span class="cm">,</span> <span class="i">$TokenLabel</span><span class="cm">,</span> <span class="i">$TokenMatchRegex</span><span class="cm">,</span> <span class="i">$TokenMatchAction</span><span class="s">)</span><span class="sc">;</span>
 446 
 447   <span class="i">$LexerInfoString</span> = <span class="q">&quot;InputType: $This-&gt;{InputType}&quot;</span><span class="sc">;</span>
 448 
 449   <span class="k">if</span> <span class="s">(</span><span class="i">$This</span>-&gt;{<span class="w">InputType</span>} =~ <span class="q">/^String$/i</span><span class="s">)</span> <span class="s">{</span>
 450     <span class="i">$LexerInfoString</span> .= <span class="q">&quot;; InputString: $This-&gt;{Input}&quot;</span><span class="sc">;</span>
 451   <span class="s">}</span>
 452 
 453   <span class="i">$TokensSpec</span> = <span class="q">&quot;TokensSpecifications: &lt;None&gt;&quot;</span><span class="sc">;</span>
 454   <span class="k">if</span> <span class="s">(</span><span class="i">@</span>{<span class="i">$This</span>-&gt;{<span class="w">TokensSpec</span>}}<span class="s">)</span> <span class="s">{</span>
 455     <span class="i">$TokensSpec</span> = <span class="q">&quot;TokensSpecifications: &lt; [Label, MatchRegex, MatchAction]:&quot;</span><span class="sc">;</span>
 456     <span class="k">for</span> <span class="i">$TokenSpec</span> <span class="s">(</span><span class="i">@</span>{<span class="i">$This</span>-&gt;{<span class="w">TokensSpec</span>}}<span class="s">)</span> <span class="s">{</span>
 457       <span class="s">(</span><span class="i">$TokenLabel</span><span class="cm">,</span> <span class="i">$TokenMatchRegex</span><span class="s">)</span> = <span class="i">@</span>{<span class="i">$TokenSpec</span>}<span class="sc">;</span>
 458       <span class="i">$TokenMatchAction</span> = <span class="s">(</span><span class="i">@</span>{<span class="i">$TokenSpec</span>} == <span class="n">3</span><span class="s">)</span> ? <span class="q">&quot;$TokenSpec-&gt;[2]&quot;</span> <span class="co">:</span> <span class="q">&quot;undefined&quot;</span><span class="sc">;</span>
 459       <span class="i">$TokensSpec</span> .= <span class="q">&quot; [$TokenLabel, $TokenMatchRegex, $TokenMatchAction]&quot;</span><span class="sc">;</span>
 460     <span class="s">}</span>
 461     <span class="i">$TokensSpec</span> .= <span class="q">&quot; &gt;&quot;</span><span class="sc">;</span>
 462   <span class="s">}</span>
 463 
 464   <span class="i">$LexerInfoString</span> .= <span class="q">&quot;; $TokensSpec&quot;</span><span class="sc">;</span>
 465 
 466   <span class="k">return</span> <span class="i">$LexerInfoString</span><span class="sc">;</span>
 467 <span class="s">}</span>
 468 
<a name="EOF-"></a></pre>
<p>&nbsp;</p>
<br />
<center>
<img src="../../../images/h2o2.png">
</center>
</body>
</html>