Remove pre-X3 components
96
.gitattributes
vendored
@@ -1,96 +0,0 @@
|
||||
* text=auto !eol svneol=native#text/plain
|
||||
*.gitattributes text svneol=native#text/plain
|
||||
|
||||
# Scriptish formats
|
||||
*.bat text svneol=native#text/plain
|
||||
*.bsh text svneol=native#text/x-beanshell
|
||||
*.cgi text svneol=native#text/plain
|
||||
*.cmd text svneol=native#text/plain
|
||||
*.js text svneol=native#text/javascript
|
||||
*.php text svneol=native#text/x-php
|
||||
*.pl text svneol=native#text/x-perl
|
||||
*.pm text svneol=native#text/x-perl
|
||||
*.py text svneol=native#text/x-python
|
||||
*.sh eol=lf svneol=LF#text/x-sh
|
||||
configure eol=lf svneol=LF#text/x-sh
|
||||
|
||||
# Image formats
|
||||
*.bmp binary svneol=unset#image/bmp
|
||||
*.gif binary svneol=unset#image/gif
|
||||
*.ico binary svneol=unset#image/ico
|
||||
*.jpeg binary svneol=unset#image/jpeg
|
||||
*.jpg binary svneol=unset#image/jpeg
|
||||
*.png binary svneol=unset#image/png
|
||||
*.tif binary svneol=unset#image/tiff
|
||||
*.tiff binary svneol=unset#image/tiff
|
||||
*.svg text svneol=native#image/svg%2Bxml
|
||||
|
||||
# Data formats
|
||||
*.pdf binary svneol=unset#application/pdf
|
||||
*.avi binary svneol=unset#video/avi
|
||||
*.doc binary svneol=unset#application/msword
|
||||
*.dsp text svneol=crlf#text/plain
|
||||
*.dsw text svneol=crlf#text/plain
|
||||
*.eps binary svneol=unset#application/postscript
|
||||
*.gz binary svneol=unset#application/gzip
|
||||
*.mov binary svneol=unset#video/quicktime
|
||||
*.mp3 binary svneol=unset#audio/mpeg
|
||||
*.ppt binary svneol=unset#application/vnd.ms-powerpoint
|
||||
*.ps binary svneol=unset#application/postscript
|
||||
*.psd binary svneol=unset#application/photoshop
|
||||
*.rdf binary svneol=unset#text/rdf
|
||||
*.rss text svneol=unset#text/xml
|
||||
*.rtf binary svneol=unset#text/rtf
|
||||
*.sln text svneol=native#text/plain
|
||||
*.swf binary svneol=unset#application/x-shockwave-flash
|
||||
*.tgz binary svneol=unset#application/gzip
|
||||
*.vcproj text svneol=native#text/xml
|
||||
*.vcxproj text svneol=native#text/xml
|
||||
*.vsprops text svneol=native#text/xml
|
||||
*.wav binary svneol=unset#audio/wav
|
||||
*.xls binary svneol=unset#application/vnd.ms-excel
|
||||
*.zip binary svneol=unset#application/zip
|
||||
|
||||
# Text formats
|
||||
.htaccess text svneol=native#text/plain
|
||||
*.bbk text svneol=native#text/xml
|
||||
*.cmake text svneol=native#text/plain
|
||||
*.css text svneol=native#text/css
|
||||
*.dtd text svneol=native#text/xml
|
||||
*.htm text svneol=native#text/html
|
||||
*.html text svneol=native#text/html
|
||||
*.ini text svneol=native#text/plain
|
||||
*.log text svneol=native#text/plain
|
||||
*.mak text svneol=native#text/plain
|
||||
*.qbk text svneol=native#text/plain
|
||||
*.rst text svneol=native#text/plain
|
||||
*.sql text svneol=native#text/x-sql
|
||||
*.txt text svneol=native#text/plain
|
||||
*.xhtml text svneol=native#text/xhtml%2Bxml
|
||||
*.xml text svneol=native#text/xml
|
||||
*.xsd text svneol=native#text/xml
|
||||
*.xsl text svneol=native#text/xml
|
||||
*.xslt text svneol=native#text/xml
|
||||
*.xul text svneol=native#text/xul
|
||||
*.yml text svneol=native#text/plain
|
||||
boost-no-inspect text svneol=native#text/plain
|
||||
CHANGES text svneol=native#text/plain
|
||||
COPYING text svneol=native#text/plain
|
||||
INSTALL text svneol=native#text/plain
|
||||
Jamfile text svneol=native#text/plain
|
||||
Jamroot text svneol=native#text/plain
|
||||
Jamfile.v2 text svneol=native#text/plain
|
||||
Jamrules text svneol=native#text/plain
|
||||
Makefile* text svneol=native#text/plain
|
||||
README text svneol=native#text/plain
|
||||
TODO text svneol=native#text/plain
|
||||
|
||||
# Code formats
|
||||
*.c text svneol=native#text/plain
|
||||
*.cpp text svneol=native#text/plain
|
||||
*.h text svneol=native#text/plain
|
||||
*.hpp text svneol=native#text/plain
|
||||
*.ipp text svneol=native#text/plain
|
||||
*.tpp text svneol=native#text/plain
|
||||
*.jam text svneol=native#text/plain
|
||||
*.java text svneol=native#text/plain
|
||||
1
.github/workflows/ci.yml
vendored
@@ -203,7 +203,6 @@ jobs:
|
||||
set -xe
|
||||
cp -rp $GITHUB_WORKSPACE libs/spirit
|
||||
cd libs/spirit/test
|
||||
ln -s ../repository/test repository # workaround legacy directory structure
|
||||
cd ${{ matrix.spirit_component }}
|
||||
|
||||
$BOOST_ROOT/b2 -d1 -j$BOOST_SPIRIT_BUILD_JOBS link=shared threading=multi variant=${{ matrix.build_type.lowercase }} toolset=${{ matrix.compiler.toolset }}-${{ matrix.compiler.version }} cxxstd=${{ matrix.cpp_version.number }} cxxflags="${{ matrix.compiler.cxxflags }}" $BOOST_SPIRIT_STDLIB warnings=extra warnings-as-errors=off
|
||||
|
||||
5
.gitignore
vendored
@@ -1,7 +1,2 @@
|
||||
|
||||
.DS_Store
|
||||
/build*/
|
||||
|
||||
test/lex/matlib_static_switch.h
|
||||
|
||||
test/lex/matlib_static.h
|
||||
|
||||
@@ -1,294 +0,0 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>Spirit Change Log</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" href="doc/theme/style.css" type="text/css">
|
||||
</head>
|
||||
|
||||
<body>
|
||||
|
||||
<h2>Spirit Change Log</h2>
|
||||
<h2>1.8.11</h2>
|
||||
<ul>
|
||||
<li>Fixed <tt>position_iterator</tt> forming reference to local when the
|
||||
underlying iterator dereference operator returns a non-reference type.
|
||||
<a href="https://github.com/boostorg/spirit/pull/422">PR#422</a>
|
||||
<a href="https://svn.boost.org/trac10/ticket/9737">TRAC#9737</a></li>
|
||||
<li>Removed use of deprecated <tt>boost/detail/iterator.hpp</tt> header.
|
||||
<a href="https://github.com/boostorg/spirit/pull/432">GH#432</a></li>
|
||||
</ul>
|
||||
<h2>1.8.10</h2>
|
||||
<ul>
|
||||
<li>Missing visibility mark on exception types.
|
||||
<a href="https://github.com/boostorg/spirit/pull/409">GH#409</a></li>
|
||||
</ul>
|
||||
<h2>1.8.9</h2>
|
||||
<ul>
|
||||
<li>Fixed a regression introduced in
|
||||
<a href="https://github.com/boostorg/spirit/pull/336">GH#336</a>.
|
||||
<a href="https://github.com/boostorg/spirit/pull/386">GH#386</a></li>
|
||||
<li>Minor code improvements.
|
||||
<a href="https://github.com/boostorg/spirit/pull/367">GH#367</a></li>
|
||||
</ul>
|
||||
<h2>1.8.8</h2>
|
||||
<ul>
|
||||
<li>Fixed <tt>remove_reference</tt> usage without a namespace in Phoenix.
|
||||
<a href="https://github.com/boostorg/spirit/pull/274">GH#274</a></li>
|
||||
<li>Fixed <tt>std::complex</tt> usage without the include.
|
||||
<a href="https://github.com/boostorg/spirit/pull/273">GH#273</a></li>
|
||||
<li>Fixed compilation of <tt>match<T&></tt>.
|
||||
<a href="https://github.com/boostorg/spirit/pull/275">GH#275</a></li>
|
||||
<li>Fixed compilation with <tt>BOOST_DISABLE_THREADS</tt> defined.
|
||||
<a href="https://github.com/boostorg/spirit/pull/323">GH#323</a>
|
||||
<a href="https://svn.boost.org/trac10/ticket/12639">#12639</a></li>
|
||||
<li>Increment scanner through iterator policy.
|
||||
<a href="https://github.com/boostorg/spirit/pull/336">GH#336</a>
|
||||
<a href="https://svn.boost.org/trac10/ticket/7371">TRAC#7371</a></li>
|
||||
<li>Removed deprecated in C++17 <tt>std::iterator</tt> usage.
|
||||
<a href="https://github.com/boostorg/spirit/pull/345">GH#345</a></li>
|
||||
</ul>
|
||||
<h2>1.8.7</h2>
|
||||
<ul>
|
||||
<li>Integrated the Spirit V1.8.x code base with Spirit V2. Spirit V1.8.x is
|
||||
now called
|
||||
<strong>Spirit Classic.</strong> Even if the directory
|
||||
structure has changed (the
|
||||
<strong>Spirit Classic</strong> headers are now moved to the
|
||||
'$BOOST_ROOT/boost/spirit/home/classic' directory), we created forwarding
|
||||
headers allowing to compile existing applications without any change.
|
||||
These forwarding headers are deprecated, though, which will result in
|
||||
corresponding warnings generated for each of the headers. The forwarding
|
||||
headers are expected to be removed in the future.
|
||||
<br />
|
||||
The recommended way of using Spirit Classic is now to include header
|
||||
files from the directory '$BOOST_ROOT/boost/spirit/include'. All files of
|
||||
<strong>Spirit Classic</strong>
|
||||
have now a 'classic_' prefixed to their name. For example the include
|
||||
<br/>
|
||||
<br><code> #include <boost/spirit/core/core.hpp></code><br/>
|
||||
<br/>
|
||||
now should be written as:
|
||||
<br/>
|
||||
<br/><code> #include <boost/spirit/include/classic_core.hpp></code><br/>
|
||||
<br/>
|
||||
To avoid namespace conflicts with the new Spirit V2 library we moved <strong>Spirit
|
||||
Classic</strong> into the <tt>namespace boost::spirit::classic</tt>. This change will be automatically deactivated whenever the deprecated include files are
|
||||
being used. This ensures full backwards compatibility for existing applications.
|
||||
<br />
|
||||
For more details about this change please consult the documentation.</li>
|
||||
</ul>
|
||||
<h2>1.8.6</h2>
|
||||
<ul>
|
||||
<li>Fixed a integer overflow bug preventing to fail parsing on certain large integers. This bug was reported and fixed by Michael Andersen Nexø</li>
|
||||
</ul>
|
||||
<h2>1.8.5</h2>
|
||||
<ul>
|
||||
<li>For performance reasons, leaf_node_d/token_node_d have been changed to implicit lexems that create leaf nodes in one shot. The old token_node_d is still available and called reduced_node_d, now.</li>
|
||||
<li>It's now possible to phoenix::bind (version 1) symbols::add.</li>
|
||||
</ul>
|
||||
<h2>1.8.4</h2>
|
||||
<ul>
|
||||
<li>Fixed no_actions bug where no_action is applied recursively.</li>
|
||||
<li>Fixed the regex_p parser for Boost >= V1.33.0 </li>
|
||||
<li>Implemented a workaround for namespace issues VC++ has with Spirit's file_iterators</li>
|
||||
<li>Fixed bug in tree match policies that prevented using gen_pt/ast_node_d,
|
||||
<a href="http://article.gmane.org/gmane.comp.parsers.spirit.general/9013">reported
|
||||
by Jascha Wetzel</a>.</li>
|
||||
<li>Made position_iterator usable with wchar_t based strings. </li>
|
||||
</ul>
|
||||
<h2>1.8.3</h2>
|
||||
<ul>
|
||||
<li>Config correction for Sun C++ by
|
||||
|
||||
|
||||
Steve Clamage (see <a href="https://sourceforge.net/tracker/?func=detail&atid=107586&aid=1220782&group_id=7586">this link</a>). </li>
|
||||
<li>Fixed multi_pass_iterator for 64 platforms, where sizeof(int) != sizeof(ptr_type).Fixed bug that prevents the use of closures with grammars with multiple entry points, <a href="http://article.gmane.org/gmane.comp.parsers.spirit.general/8868">reported by David Pierre</a></li>
|
||||
<li>Fixed bug that prevented embedding of grammars with multiple entry points, <a href="http://article.gmane.org/gmane.comp.parsers.spirit.general/8860">reported by David Pierre</a></li>
|
||||
<li>Added '\0' to the set of valid escaped characters for escape_ch_p.</li>
|
||||
<li>Fixed a switch_p bug when used with a phoenix::actor as the conditional expression.</li>
|
||||
<li>__LINE__ macro now gets expanded in BOOST_SPIRIT_ASSERT_EXCEPTION</li>
|
||||
<li>Fixed a bug in the intersection parser <a href="http://article.gmane.org/gmane.comp.parsers.spirit.general/8544">reported by Yusaku Sugai</a></li>
|
||||
<li>The symbol parser uses the null character internally. Checks were added so that:
|
||||
<ul>
|
||||
<li>tst.add asserts if string contains the null character</li>
|
||||
<li>tst.find doesn't match null characters in the input</li>
|
||||
</ul></li>
|
||||
<li>Fixed match_attr_traits.ipp to allow non-POD to pass through. The previous version taking in the ellipsis "..." does not allow PODs to pass through.</li>
|
||||
<li>Allow evaluation to int as condition to if_p parser.</li>
|
||||
<li>Applied performance improvement changes to the PT/AST code as suggested by Stefan Slapeta. </li>
|
||||
<li>Fixed several problems with AST tree node directives (inner_node_d[], discard_first_node[], discard_last_node[] and infix_node_d[]). </li>
|
||||
</ul>
|
||||
<h2>1.8.2</h2>
|
||||
<p>Maintenance release (almost the same as 1.8.1 plus a few fixes here and there)</p>
|
||||
<ul>
|
||||
<li>Added specializations to str_p and ch_p to allow str_p('c') and ch_p("c") thus fixing some non-bugs</li>
|
||||
<li>Fixed bug where a match<T> is a variant.</li>
|
||||
<li>added Jamfile/Jamrules from CVS to spirit-1.8.1/</li>
|
||||
<li>added boost-build.jam from boost to spirit-1.8.1/</li>
|
||||
<li>disabled template multi-threading in libs/spirit/test/Jamfile</li>
|
||||
<li>added a boost-header-include rule (from spirit-header-include) pointing to miniboost in libs/spirit/test/Jamfile</li>
|
||||
<li>Fixed if_p inconsistency</li>
|
||||
</ul>
|
||||
<h2>1.6.2</h2>
|
||||
<p>The Spirit 1.6.2 release is a bug-fix release only, no new features were introduced.</p>
|
||||
<ul>
|
||||
<li>wchar_t friendly implementation of graph_p</li>
|
||||
<li>Modified escape_char_parser::parse() to use a static parser instead of a rule. This will make it more friendly to use in trees. It should also be a little more efficient.</li>
|
||||
<li>Moved to
|
||||
|
||||
|
||||
Boost Software license 1.0. </li>
|
||||
<li> workaround for Error 322 name lookup in base class specialization finds type</li>
|
||||
<li> fixed limit_d bug</li>
|
||||
<li> [numerics] Workaround for aC++</li>
|
||||
<li> Fixed a bug in the switch_p parser.</li>
|
||||
<li> Fixed a EOI problem in multi_pass</li>
|
||||
<li>added Jamfile/Jamrules from CVS to spirit-1.6.1/</li>
|
||||
<li>added boost-build.jam from boost to spirit-1.6.1/</li>
|
||||
<li>disabled template multi-threading in libs/spirit/test/Jamfile</li>
|
||||
<li>added a boost-header-include rule (from spirit-header-include) pointing to miniboost in libs/spirit/test/Jamfile</li>
|
||||
</ul>
|
||||
<h2>1.8.1 (Released with Boost 1.32.0)</h2>
|
||||
<p>The Spirit 1.8.1 release is a bug-fix release only, no new features were introduced.</p>
|
||||
<ul>
|
||||
<li>Spirit now requires at least Boost 1.32.0 to compile correctly</li>
|
||||
<li>Removed the support for the older iterator adaptor library and </li>
|
||||
<li>Moved to use the new MPL library</li>
|
||||
<li>Spirit was moved to use the Boost Software License 1.0.</li>
|
||||
<li>Fixed several parsers to support post-skips more correctly.</li>
|
||||
<li>Fixed a no_node_d[] bug.</li>
|
||||
<li>Fixed a bug in shortest_d[].</li>
|
||||
<li>Fixed a bug in limit_d[].</li>
|
||||
<li>Fixed parser traversal meta code.</li>
|
||||
<li>Fixed several bugs in switch_p.</li>
|
||||
<li>Fixed AST generating problems, in particular with the loops related parsers.</li>
|
||||
<li>Fixed several bugs in the multi_pass iterator.
|
||||
<ul>
|
||||
<li>Fixed a problem, when the used base iterator returned a value_type and not a reference from its dereferencing operator.</li>
|
||||
<li>Fixed iterator_traits problem</li>
|
||||
<li>Fixed an EOI problem</li>
|
||||
<li>Fixed a bug, when used with std::cin</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li>Found a bug in grammar.ipp when BOOST_SPIRIT_SINGLE_GRAMMAR_INSTANCE is defined</li>
|
||||
<li>Rewritten safe_bool to use CRTP - now works also on MWCW, fixed several bugs with the implementation.</li>
|
||||
<li>Fixed and extended the debug diagnostics printed by the parse tree code.</li>
|
||||
</ul>
|
||||
<h2>1.8.0 (Released with Boost 1.31.0; Includes unreleased 1.7.1)</h2>
|
||||
<ul>
|
||||
<li>Fixed a wchar_t problem in the regex_p parser.</li>
|
||||
<li>removed code and workarounds for old compilers (VC6/7 and Borland)</li>
|
||||
<li> Changed license to the new boost license.</li>
|
||||
<li> Modified escape_char_parser::parse() to use a static parser instead of a rule. This will make it more friendly to use in trees. It should also be a little more efficient.</li>
|
||||
</ul>
|
||||
<h2>1.7.1 (Unreleased; becomes 1.8.0)</h2>
|
||||
<ul>
|
||||
<li>Added a full suite of predefined actors.</li>
|
||||
<li>Moved rule_alias and stored_rule from core/non-terminal to dynamic.<br>
|
||||
Made as_parser a public API in meta/as_parser.hpp</li>
|
||||
<li>Separated Core.Meta into its own module</li>
|
||||
<li>Refactored Utility module<br>
|
||||
Moved some files into Utility.Parsers</li>
|
||||
</ul>
|
||||
<blockquote>
|
||||
<ul>
|
||||
<li>utilities
|
||||
<ul>
|
||||
<li>parsers
|
||||
<ul>
|
||||
<li>chset, regex, escape_char<br>
|
||||
confix, list, distinct<br>
|
||||
functor_parser</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li> support
|
||||
<ul>
|
||||
<li>scoped_lock<br>
|
||||
flush_multi_pass<br>
|
||||
grammar_def</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li> actors
|
||||
<ul>
|
||||
<li>assign</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
</blockquote>
|
||||
<ul>
|
||||
<li>Stored rules</li>
|
||||
<li>Added the switch_p and select_p dynamic parsers.</li>
|
||||
<li>Multiple scanner support for rules.</li>
|
||||
<li>The Rule's Scanner, Context and Tag template parameters can be specified in any order now. If a template parameter is missing, it will assume the defaults. See test/rule_tests.cpp.</li>
|
||||
<li>Introduced the possibility to specify more than one start rule from a grammar.</li>
|
||||
<li>Added an implementation of the file_iterator iterator based on the new Boost iterator_adaptors (submitted originally by Thomas Witt).</li>
|
||||
</ul>
|
||||
<p><em> [The transition to the new iterator_adaptors should be complete now.]</em></p>
|
||||
<ul>
|
||||
<li>Added an implementation of the fixed_size_queue iterator based on the new Boost iterator_adaptors.</li>
|
||||
<li> wchar_t friendly implementation of graph_p</li>
|
||||
<li>made the copy-constructor and assignment-operator of parser_error_base public to clear VC7.1 C4673 warning. Added copy-constructor and assignment operator to parser_error for clarity of intent.</li>
|
||||
</ul>
|
||||
<h2>1.7.0</h2>
|
||||
<ul>
|
||||
<li> assign(string) semantic action now works in VC6</li>
|
||||
<li>parsers need not be default constructible </li>
|
||||
<li>simplified aggregation of binary and unary parsers (more compiler friendly)</li>
|
||||
<li>epsilon workarounds for VC++</li>
|
||||
<li>match's attribute now uses boost.optional</li>
|
||||
<li>subrules can now have closures</li>
|
||||
<li>project wide 64 bit compatibility</li>
|
||||
<li>dynamic_parser_tag, reissue of rule.set_id(id);</li>
|
||||
<li>numerous primitives improvements and workarounds for problematic compilers</li>
|
||||
<li>proper complement (~) of single char parser primitives and chsets</li>
|
||||
<li>intuitive handling of lexeme_d </li>
|
||||
<li>wide_phrase_scanner_t typedef</li>
|
||||
<li>dynamic parser improvements (better support for more compilers)</li>
|
||||
<li>complete rewrite of the file_iterator (using boost.iterator_adapters). Supports
|
||||
memory maps wherever available</li>
|
||||
<li>multi_pass updates (compatibility with more compilers (e.g VC7) and more)</li>
|
||||
<li>position_iterator improvements</li>
|
||||
<li>better phoenix support for more compilers</li>
|
||||
<li>phoenix new_(...) construct</li>
|
||||
<li>new lazy_p parser</li>
|
||||
<li>utility.distinct parser (undocumented)</li>
|
||||
<li>chset operators improvements </li>
|
||||
<li>confix_p streamlining and improvements</li>
|
||||
<li>numerous Boost integration improvements</li>
|
||||
</ul>
|
||||
<h2>Bug fixes (1.7.0 and 1.6.0)</h2>
|
||||
<ul>
|
||||
<li> Fixed. Using MSVC++6 (SP5), calling the assign action with a string value
|
||||
on parsers using the file_iterator will not work. </li>
|
||||
<li> Fixed: using assign semantic action in a grammar with a multi_pass iterator
|
||||
adaptor applied to an std::istream_iterator resulted in a failure to compile
|
||||
under msvc 7.0. </li>
|
||||
<li> Fixed: There is a bug in the "range_run<CharT>::set (range<CharT>
|
||||
const& r)" function in the "boost\spirit\utility\impl\chset\range_run.ipp".
|
||||
</li>
|
||||
<li> Fixed: handling of trailing whitespace bug (ast_parse/pt_parse related)</li>
|
||||
<li> Fixed: comment_p and end of data bug</li>
|
||||
<li> Fixed: <a href="http://article.gmane.org/gmane.comp.parsers.spirit.general/4029">Most
|
||||
trailing space bug</a>:</li>
|
||||
<li> Fixed:<br>
|
||||
chset<>::operator~(range<>) bug<br>
|
||||
operator&(chset<>, range<>) bug<br>
|
||||
operator&(range<>, chset<>) bug</li>
|
||||
<li> Fixed: <a href="http://sourceforge.net/mailarchive/forum.php?thread_id=2008510&forum_id=25901">impl::detach_clear
|
||||
bug</a></li>
|
||||
<li> Fixed: <a href="http://article.gmane.org/gmane.comp.parsers.spirit.general/3678">mismatch
|
||||
closure return type bug</a></li>
|
||||
<li> Fixed: <a href="http://sf.net/mailarchive/forum.php?thread_id=1963157&forum_id=1595">access_node_d[]</a>
|
||||
and <a href="http://sf.net/mailarchive/forum.php?thread_id=1966224&forum_id=1595">access_match_d[]</a>
|
||||
iterator bugs</li>
|
||||
<li> Fixed a bug regarding threadsafety of Phoenix/Spirit closures.</li>
|
||||
<li> Added missing include files to miniboost</li>
|
||||
</ul>
|
||||
<p> <font size="2" color="#666666">Copyright © 1998-2005 Joel de Guzman, Hartmut Kaiser</font><br>
|
||||
<font size="2"><font color="#666666">Use, modification and distribution is subject to the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) </font> </font></p>
|
||||
<p> </p>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,146 +0,0 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>Acknowledgments</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" href="theme/style.css" type="text/css">
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
|
||||
<tr>
|
||||
<td width="10">
|
||||
</td>
|
||||
<td width="85%">
|
||||
<font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b><b>Acknowledgments</b></b></font>
|
||||
</td>
|
||||
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="rationale.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="references.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>Special thanks to </p>
|
||||
<p><b>Dan Nuffer</b> for his work on lexers, parse trees, ASTs, XML parsers, the
|
||||
multi-pass iterator as well as administering Spirit's site, editing, maintaining
|
||||
the CVS and doing the releases plus a zillion of other chores that were almost
|
||||
taken for granted. </p>
|
||||
<p><b>Hartmut Kaiser</b> for his work on the C parser, the work on the C/C++ preprocessor,
|
||||
utility parsers, the original port to Intel 5.0, various work on Phoenix, porting
|
||||
to v1.5, the meta-parsers, the grouping-parsers, extensive testing and painstaking
|
||||
attention to details.</p>
|
||||
<p><b>Martin Wille</b> who improved grammar multi thread safety, contributed the
|
||||
eol_p parser, the dynamic parsers, documentation and for taking an active role
|
||||
in almost every aspect from brainstorming and design to coding. And, as always, helps keep the regression tests for g++ on Linux as green as ever :-). </p>
|
||||
<p><b>Martijn W. Van Der Lee</b> our Web site administrator and for contributing
|
||||
the RFC821 parser<b>.</b></p>
|
||||
<p><b>Giovanni Bajo</b> for last minute tweaks of Spirit 1.8.0 for CodeWarrior
|
||||
8.3. Actually, I'm ashamed Giovanni was not in this list already. He's done
|
||||
a lot since Spirit 1.5, the first Boost.Spirit release. He's instrumental in
|
||||
the porting of the Spirit iterators stuff to the new Boost Iterators Library
|
||||
(version 2). He also did various bug fixes and wrote some tests here and there.
|
||||
</p>
|
||||
<p><b>Juan Carlos Arevalo-Baeza (JCAB) </b>for his work on the C++ parser, the
|
||||
position iterator, ports to v1.5 and keeping the mailing list discussions alive
|
||||
and kicking.</p>
|
||||
<p><strong>Vaclav Vesely, </strong>lots of stuff, the no_actions directive, various patches fixes, the distinct parsers, the lazy parser, some phoenix tweaks and add-ons (e.g. <tt>new_</tt>). Also, <strong>Stefan Slapeta</strong> and <strong>wife</strong> for editing Vaclav's distinct parser doc. </p>
|
||||
<p><b>Raghavendra Satish </b>for doing the original v1.3 port to VC++ and his
|
||||
work on Phoenix.</p>
|
||||
<p><b>Noah Stein</b> for following up and helping Ragav on the VC++ ports.</p>
|
||||
<p><b>Hakki Dogusan</b>, for his original v1.0 Pascal parser.</p>
|
||||
<p><b>John (EBo) David</b> for his work on the VM and watching over my shoulder
|
||||
as I code giving the impression of distance eXtreme programming.</p>
|
||||
<p><b>Chris Uzdavinis</b> for feeding in comments and valuable suggestions as
|
||||
well as editing the documentation.</p>
|
||||
<p><b>Carsten Stoll</b>, for his work on dynamic parsers.</p>
|
||||
<p><b>Andy Elvey</b> and his conifer parser.</p>
|
||||
<p><b>Bruce Florman</b>, who did the original v1.0 port to VC++.</p>
|
||||
<p><b>Jeff Westfahl </b>for porting the loop parsers to v1.5 and contributing
|
||||
the file iterator.</p>
|
||||
<p><b>Peter Simons</b> for the RFC date parser example and tutorial plus helping
|
||||
out with some nitty gritty details.</p>
|
||||
<p><b>Markus Schöpflin</b> for suggesting the end_p parser and lots of other
|
||||
nifty things and his active presence in the mailing list.</p>
|
||||
<p><b>Doug Gregor</b> for mentoring and his ability to see things that others
|
||||
don't. </p>
|
||||
<p><strong>David Abrahams</strong> for giving me a job that allows me to still
|
||||
work on Spirit, plus countless advice and help on C++ and specifically template
|
||||
metaprogramming.</p>
|
||||
<p><strong>Aleksey Gurtovoy</strong> for his MPL library from which I stole many
|
||||
metaprogramming tricks especially for less conforming compilers such as Borland
|
||||
and VC6/7.</p>
|
||||
<p><strong>Gustavo Guerra</strong> for his last minute review of Spirit and constant
|
||||
feedback, plus patches here and there (e.g. proposing the new dot behavior of
|
||||
the real numerics parsers).</p>
|
||||
<p><strong>Nicola Musatti, Paul Snively, Alisdair Meredith </strong>and<strong>
|
||||
Hugo Duncan </strong> for testing and sending in various patches.</p>
|
||||
<p><strong>Steve Rowe</strong> for his splendid work on the TSTs that will soon
|
||||
be taken into Spirit.</p>
|
||||
<p><strong>Jonathan de Halleux</strong> for his work on actors.</p>
|
||||
<p><strong>Angus Leeming</strong> for last minute editing work on the 1.8.0 release documentation, his work on Phoenix and his active presence in the Spirit mailing list.</p>
|
||||
<p> <strong>Joao Abecasis</strong> for his active presence in the Spirit mailing list, providing user support, participating in the discussions and so on. </p>
|
||||
<p> <strong>Guillaume Melquiond</strong> for a last minute patch to <tt>multi_pass</tt> for 1.8.1. </p>
|
||||
<p> <strong>Peder Holt</strong> for his porting work on Phoenix, Fusion and Spirit to VC6. </p>
|
||||
<p>To my wife <b>Mariel</b> who did the graphics in this document.</p>
|
||||
<p>My, there's a lot in this list! And it's a continuing list. I add people to this list every time. I hope I did not forget anyone. If I missed<br>
|
||||
someone you know who has helped in any way, please inform me.</p>
|
||||
<p> Special thanks also to people who gave feedback and valuable comments, particularly
|
||||
members of Boost and Spirit mailing lists. This includes all those who participated
|
||||
in the review:<br>
|
||||
<br>
|
||||
<strong>John Maddock</strong>, our review manager<br>
|
||||
<strong>Aleksey Gurtovoy<br>
|
||||
Andre Hentz<br>
|
||||
Beman Dawes<br>
|
||||
Carl Daniel<br>
|
||||
Christopher Currie<br>
|
||||
Dan Gohman<br>
|
||||
Dan Nuffer<br>
|
||||
Daryle Walker<br>
|
||||
David Abrahams<br>
|
||||
David B. Held<br>
|
||||
Dirk Gerrits<br>
|
||||
Douglas Gregor<br>
|
||||
Hartmut Kaiser<br>
|
||||
Iain K.Hanson<br>
|
||||
Juan Carlos Arevalo-Baeza<br>
|
||||
Larry Evans<br>
|
||||
Martin Wille<br>
|
||||
Mattias Flodin<br>
|
||||
Noah Stein<br>
|
||||
Nuno Lucas<br>
|
||||
Peter Dimov<br>
|
||||
Peter Simons<br>
|
||||
Petr Kocmid<br>
|
||||
Ross Smith<br>
|
||||
Scott Kirkwood<br>
|
||||
Steve Cleary<br>
|
||||
Thorsten Ottosen<br>
|
||||
Tom Wenisch<br>
|
||||
Vladimir Prus</strong></p>
|
||||
<p>Finally thanks to <a href="http://sourceforge.net">SourceForge</a> for hosting
|
||||
the Spirit project and <a href="http://www.boost.org/">Boost</a>: a C++ community
|
||||
comprised of extremely talented library authors who participate in the discussion
|
||||
and peer review of well crafted C++ libraries.</p>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="rationale.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="references.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<hr size="1">
|
||||
<p class="copyright">Copyright © 1998-2003 Joel de Guzman<br>
|
||||
<br>
|
||||
<font size="2">Use, modification and distribution is subject to the Boost Software
|
||||
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
</font> </p>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,354 +0,0 @@
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<meta content=
|
||||
"HTML Tidy for Windows (vers 1st February 2003), see www.w3.org"
|
||||
name="generator">
|
||||
<title>
|
||||
Basic Concepts
|
||||
</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=us-ascii">
|
||||
<link rel="stylesheet" href="theme/style.css" type="text/css">
|
||||
</head>
|
||||
<body>
|
||||
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="85%">
|
||||
<font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Basic
|
||||
Concepts</b></font>
|
||||
</td>
|
||||
<td width="112">
|
||||
<a href="http://spirit.sf.net"><img src="theme/spirit.gif"
|
||||
width="112" height="48" align="right" border="0"></a>
|
||||
</td>
|
||||
</tr>
|
||||
</table><br>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30">
|
||||
<a href="../index.html"><img src="theme/u_arr.gif" border="0"></a>
|
||||
</td>
|
||||
<td width="30">
|
||||
<a href="quick_start.html"><img src="theme/l_arr.gif" border="0"></a>
|
||||
</td>
|
||||
<td width="30">
|
||||
<a href="organization.html"><img src="theme/r_arr.gif" border="0">
|
||||
</a>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>
|
||||
There are a few fundamental concepts that need to be understood well: 1)
|
||||
The <strong>Parser</strong>, 2) <strong>Match</strong>, 3) The
|
||||
<strong>Scanner</strong>, and 4) <strong>Semantic Actions</strong>. These
|
||||
basic concepts interact with one another, and the functionalities of each
|
||||
interweave throughout the framework to make it one coherent whole.
|
||||
</p>
|
||||
<table width="48%" border="0" align="center">
|
||||
<tr>
|
||||
<td height="211">
|
||||
<img src="theme/intro1.png">
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
<h2>
|
||||
The Parser
|
||||
</h2>
|
||||
<p>
|
||||
Central to the framework is the parser. The parser does the actual work
|
||||
of recognizing a linear input stream of data read sequentially from start
|
||||
to end by the scanner. The parser attempts to match the input following a
|
||||
well-defined set of specifications known as grammar rules. The parser
|
||||
reports the success or failure to its client through a match object. When
|
||||
successful, the parser calls a client-supplied semantic action. Finally,
|
||||
the semantic action extracts structural information depending on the data
|
||||
passed by the parser and the hierarchical context of the parser it is
|
||||
attached to.
|
||||
</p>
|
||||
<p>
|
||||
Parsers come in different flavors. The Spirit framework comes bundled
|
||||
with an extensive set of pre-defined parsers that perform various parsing
|
||||
tasks from the trivial to the complex. The parser, as a concept, has a
|
||||
public conceptual interface contract. Following the contract, anyone can
|
||||
write a conforming parser that will play along well with the framework's
|
||||
predefined components. We shall provide a blueprint detailing the
|
||||
conceptual interface of the parser later.
|
||||
</p>
|
||||
<p>
|
||||
Clients of the framework generally do not need to write their own
|
||||
hand-coded parsers at all. Spirit has an immense repertoire of
|
||||
pre-defined parsers covering all aspects of syntax and semantic analysis.
|
||||
We shall examine this repertoire of parsers in the following sections. In
|
||||
the rare case where a specific functionality is not available, it is
|
||||
extremely easy to write a user-defined parser. The ease in writing a
|
||||
parser entity is the main reason for Spirit's extensibility.
|
||||
</p>
|
||||
<h2>
|
||||
Primitives and Composites
|
||||
</h2>
|
||||
<p>
|
||||
Spirit parsers fall into two categories: <b>primitives</b> and
|
||||
<b>composites</b>. These two categories are more or less synonymous to
|
||||
terminals and non-terminals in parsing lingo. Primitives are
|
||||
non-decomposable atomic units. Composites on the other hand are parsers
|
||||
that are composed of other parsers which can in turn be a primitive or
|
||||
another composite. To illustrate, consider the Spirit expression:
|
||||
</p>
|
||||
|
||||
<pre><code><font color="#000000"> </font></code><code><span class="identifier">real_p</span> <span class=
|
||||
"special">>></span> <span class="special">*(</span><span class=
|
||||
"literal">','</span> <span class="special">>></span> <span class=
|
||||
"identifier">real_p</span><span class="special">)</span></code>
|
||||
</pre>
|
||||
<p>
|
||||
<tt><tt>real_p</tt></tt> is a primitive parser that can parse real
|
||||
numbers. The quoted comma <tt class="quotes">','</tt> in the expression
|
||||
is a shortcut and is equivalent to <tt>ch_p<span class=
|
||||
"operators">(</span><span class="quotes">','</span><span class=
|
||||
"operators">)</span></tt>, which is another primitive parser that
|
||||
recognizes single characters.
|
||||
</p>
|
||||
<p>
|
||||
The expression above corresponds to the following parse tree:
|
||||
</p>
|
||||
<table width="29%" border="0" align="center">
|
||||
<tr>
|
||||
<td>
|
||||
<img src="theme/intro7.png">
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>
|
||||
The expression:
|
||||
</p>
|
||||
|
||||
<pre><code><font color="#000000"> </font></code><span class=
|
||||
"literal">','</span> <span class="special">>></span> <span class=
|
||||
"identifier">real_p</span>
|
||||
</pre>
|
||||
<p>
|
||||
composes a <b>sequence</b> parser. The <tt>sequence</tt> parser is a
|
||||
composite parser comprising two parsers: the one on its left hand side
|
||||
(lhs), <tt>ch_p<span class="operators">(</span><span class=
|
||||
"quotes">','</span><span class="operators">)</span></tt> ; and the other
|
||||
on its right hand side (rhs), <tt>real_p</tt>. This composite parser,
|
||||
when called, calls its lhs and rhs in sequence and reports a successful
|
||||
match only if both are successful.
|
||||
</p>
|
||||
<table width="14%" border="0" align="center">
|
||||
<tr>
|
||||
<td>
|
||||
<img src="theme/intro2.png">
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>
|
||||
The <tt>sequence</tt> parser is a binary composite. It is composed of two
|
||||
parsers. There are unary composites as well. Unary composites hold only a
|
||||
single subject. Like the binary composite, the unary composite may change
|
||||
the behavior of its embedded subject. One particular example is the
|
||||
<b>Kleene star</b>. The Kleene star, when called to parse, calls its sole
|
||||
subject zero or more times. "Zero or more" implies that the Kleene star
|
||||
always returns a successful match, possibly matching the null string: "".
|
||||
</p>
|
||||
<p>
|
||||
The expression:
|
||||
</p>
|
||||
|
||||
<pre><code><font color="#000000"> </font></code><code><span class=
|
||||
"special">*(</span><span class="literal">','</span> <span class=
|
||||
"special">>></span> <span class=
|
||||
"identifier">real_p</span><span class="special">)</span></code>
|
||||
</pre>
|
||||
<p>
|
||||
wraps the whole sequence composite above inside a <tt>kleene_star</tt>.
|
||||
</p>
|
||||
<table width="17%" border="0" align="center">
|
||||
<tr>
|
||||
<td>
|
||||
<img src="theme/intro3.png">
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>
|
||||
Finally, the full expression composes a <tt>real_p</tt> primitive parser
|
||||
and the <tt>kleene_star</tt> we have above into another higher level
|
||||
<tt>sequence</tt> parser composite.
|
||||
</p>
|
||||
<table width="34%" border="0" align="center">
|
||||
<tr>
|
||||
<td>
|
||||
<img src="theme/intro4.png">
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>
|
||||
A few simple classes, when composed and structured in a hierarchy, form a
|
||||
very powerful object-oriented recursive-descent parsing engine. These
|
||||
classes provide the infrastructure needed for the construction of
|
||||
more-complex parsers. The final parser composite is a non-deterministic
|
||||
recursive-descent parser with infinite look-ahead.
|
||||
</p>
|
||||
<p>
|
||||
Top-down descent traverses the hierarchy. The outer <tt>sequence</tt>
|
||||
calls the leftmost <tt>real_p</tt> parser. If successful, the
|
||||
<tt>kleene_star</tt> is called next. The <tt>kleene_star</tt> calls the
|
||||
inner <tt>sequence</tt> repeatedly in a loop until it fails to match, or
|
||||
the input is exhausted. Inside, <tt>ch_p(',')</tt> and then
|
||||
<tt>real_p</tt> are called in sequence. The following diagram illustrates
|
||||
what is happening, somewhat reminiscent of Pascal syntax diagrams.
|
||||
</p>
|
||||
<table width="37%" border="0" align="center">
|
||||
<tr>
|
||||
<td>
|
||||
<img src="theme/intro5.png">
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>
|
||||
The flexibility of object embedding and composition combined with
|
||||
recursion opens up a unique approach to parsing. Subclasses are free to
|
||||
form aggregates and algorithms of arbitrary complexity. Complex parsers
|
||||
can be created with the composition of only a few primitive classes.
|
||||
</p>
|
||||
<p>
|
||||
The framework is designed to be fully open-ended and extensible. New
|
||||
primitives or composites, from the trivial to the complex, may be added
|
||||
any time. Composition happens (statically) at compile time. This is
|
||||
possible through the expressive flexibility of C++ expression templates
|
||||
and template meta-programming.
|
||||
</p>
|
||||
<p>
|
||||
The result is a composite composed of primitives and smaller composites.
|
||||
This embedding strategy gives us the ability to build hierarchical
|
||||
structures that fully model EBNF expressions of arbitrary complexity.
|
||||
Later on, we shall see more primitive and composite building blocks.
|
||||
</p>
|
||||
<h2>
|
||||
The Scanner
|
||||
</h2>
|
||||
<p>
|
||||
Like the parser, the scanner is also an abstract concept. The task of the
|
||||
scanner is to feed the sequential input data stream to the parser. The
|
||||
scanner is composed of two STL conforming forward iterators, first and
|
||||
last, where first is held by reference and last, by value. The first
|
||||
iterator is held by reference to allow re-positioning by the parser. A
|
||||
set of policies governs how the scanner behaves. Parsers extract data
|
||||
from the scanner and position the iterator appropriately through its
|
||||
member functions.
|
||||
</p>
|
||||
<p>
|
||||
Knowledge of the intricacies of these policies is not required at all in
|
||||
most cases. However, knowledge of the scanner's basic API is required to
|
||||
write fully-conforming Spirit parsers. The scanner's API will be outlined
|
||||
in a separate section. In addition, for the power users and the
|
||||
adventurous among us, a full section will be devoted to covering the
|
||||
scanner policies. The scanner policies make Spirit very flexible and
|
||||
extensible. For instance, some of the policies may be modified to filter
|
||||
data. A practical example is a scanner policy that does not distinguish
|
||||
upper and lower case whereby making it useful for parsing case
|
||||
insensitive input. Another example is a scanner policy that strips white
|
||||
spaces from the input.
|
||||
</p>
|
||||
<h2>
|
||||
The Match
|
||||
</h2>
|
||||
<p>
|
||||
The parser has a conceptual parse member function taking in a scanner and
|
||||
returning a match object. The primary function of the match object is to
|
||||
report parsing success (or failure) back to the parser's caller; i.e., it
|
||||
evaluates to true if the parse function is successful, false otherwise.
|
||||
If the parse is successful, the match object may also be queried to
|
||||
report the number of characters matched (using <tt>match.length()</tt>).
|
||||
The length is non-negative if the match is successful, and the typical
|
||||
length of a parse failure is -1. A zero length is perfectly valid and
|
||||
still represents a successful match.
|
||||
</p>
|
||||
<p>
|
||||
Parsers may have attribute data associated with it. For example, the
|
||||
real_p parser has a numeric datum associated with it. This attribute is
|
||||
the parsed number. This attribute is passed on to the returned match
|
||||
object. The match object may be queried to get this attribute. This datum
|
||||
is valid only when the match is successful.
|
||||
</p>
|
||||
<h2>
|
||||
Semantic Actions
|
||||
</h2>
|
||||
<p>
|
||||
A composite parser forms a hierarchy. Parsing proceeds from the topmost
|
||||
parent parser which delegates and apportions the parsing task to its
|
||||
children recursively to its children's children and so on until a
|
||||
primitive is reached. By attaching semantic actions to various points in
|
||||
this hierarchy, in effect we can transform the flat linear input stream
|
||||
into a structured representation. This is essentially what parsers do.
|
||||
</p>
|
||||
<p>
|
||||
Recall our example above:
|
||||
</p>
|
||||
|
||||
<pre><code><font color="#000000"> </font></code><code><span class=
|
||||
"identifier">real_p</span> <span class=
|
||||
"special">>></span> <span class="special">*(</span><span class=
|
||||
"literal">','</span> <span class="special">>></span> <span class=
|
||||
"identifier">real_p</span><span class="special">)</span></code>
|
||||
</pre>
|
||||
<p>
|
||||
By hooking a function (or functor) into the real_p parsers, we can
|
||||
extract the numbers from the input:
|
||||
</p>
|
||||
|
||||
<pre><code><font color="#000000"> </font></code><span class=
|
||||
"identifier">real_p</span><span class="special">[&</span><span class=
|
||||
"identifier">f</span><span class="special">]</span> <span class=
|
||||
"special">>></span> <span class="special">*(</span><span class=
|
||||
"literal">','</span> <span class="special">>></span> <span class=
|
||||
"identifier">real_p</span><span class="special">[&</span><span class=
|
||||
"identifier">f</span><span class="special">])</span>
|
||||
</pre>
|
||||
<table width="41%" border="0" align="center">
|
||||
<tr>
|
||||
<td>
|
||||
<img src="theme/intro6.png">
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
<p> where <tt>f</tt> is a function that takes in a single argument. The <tt><span class="operators">[&</span>f<span class=
|
||||
"operators">]</span></tt> hooks the parser with the function such that when
|
||||
<tt>real_p</tt> recognizes a valid number, the function <tt>f</tt> is called.
|
||||
It is up to the function then to do what is appropriate. For example, it can
|
||||
stuff the numbers in a vector. Or perhaps, if the grammar is changed slightly
|
||||
by replacing <tt class="quotes">','</tt> with <tt class="quotes">'+'</tt>, then
|
||||
we have a primitive calculator that computes sums. The function <tt>f</tt> then
|
||||
can then be made to add all incoming numbers.<br>
|
||||
</p>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30">
|
||||
<a href="../index.html"><img src="theme/u_arr.gif" border="0"></a>
|
||||
</td>
|
||||
<td width="30">
|
||||
<a href="quick_start.html"><img src="theme/l_arr.gif" border="0"></a>
|
||||
</td>
|
||||
<td width="30">
|
||||
<a href="organization.html"><img src="theme/r_arr.gif" border="0">
|
||||
</a>
|
||||
</td>
|
||||
</tr>
|
||||
</table><br>
|
||||
<hr size="1">
|
||||
<p class="copyright">
|
||||
Copyright © 1998-2003 Joel de Guzman<br>
|
||||
<br>
|
||||
<font size="2">Use, modification and distribution is subject to the Boost
|
||||
Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or
|
||||
copy at http://www.boost.org/LICENSE_1_0.txt)</font>
|
||||
</p>
|
||||
<p>
|
||||
|
||||
</p>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,158 +0,0 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>Character Sets</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" href="theme/style.css" type="text/css">
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
|
||||
<tr>
|
||||
<td width="10">
|
||||
</td>
|
||||
<td width="85%">
|
||||
<font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Character Sets</b></font>
|
||||
</td>
|
||||
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="loops.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="confix.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>The character set <tt>chset</tt> matches a set of characters over a finite
|
||||
range bounded by the limits of its template parameter <tt>CharT</tt>. This class
|
||||
is an optimization of a parser that acts on a set of single characters. The
|
||||
template class is parameterized by the character type <tt>CharT</tt> and can
|
||||
work efficiently with 8, 16 and 32 and even 64 bit characters.</p>
|
||||
<pre><span class=identifier> </span><span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>CharT </span><span class=special>= </span><span class=keyword>char</span><span class=special>>
|
||||
</span><span class=keyword>class </span><span class=identifier>chset</span><span class=special>;</span></pre>
|
||||
<p>The <tt>chset</tt> is constructed from literals (e.g. <tt>'x'</tt>), <tt>ch_p</tt>
|
||||
or <tt>chlit<></tt>, <tt>range_p</tt> or <tt>range<></tt>, <tt>anychar_p</tt>
|
||||
and <tt>nothing_p</tt> (see <a href="primitives.html">primitives</a>) or copy-constructed
|
||||
from another <tt>chset</tt>. The <tt>chset</tt> class uses a copy-on-write scheme
|
||||
that enables instances to be passed along easily by value.</p>
|
||||
<table width="80%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="note_box"><img src="theme/lens.gif" width="15" height="16"> <b>Sparse
|
||||
bit vectors</b><br>
|
||||
<br>
|
||||
To accommodate 16/32 and 64 bit characters, the <tt>chset</tt> class
|
||||
statically switches from a <tt>std::bitset</tt> implementation when the
|
||||
character type is not greater than 8 bits, to a sparse bit/boolean set which
|
||||
uses a sorted vector of disjoint ranges (<tt>range_run</tt>). The set is
|
||||
constructed from ranges such that adjacent or overlapping ranges are coalesced.<br>
|
||||
<br>
|
||||
range_runs are very space-economical in situations where there are lots
|
||||
of ranges and a few individual disjoint values. Searching is O(log n) where
|
||||
n is the number of ranges.</td>
|
||||
</tr>
|
||||
</table>
|
||||
<p> Examples:<br>
|
||||
</p>
|
||||
<pre><span class=identifier> </span><span class=identifier>chset</span><span class=special><> </span><span class=identifier>s1</span><span class=special>(</span><span class=literal>'x'</span><span class=special>);
|
||||
</span><span class=identifier>chset</span><span class=special><> </span><span class=identifier>s2</span><span class=special>(</span><span class=identifier>anychar_p </span><span class=special>- </span><span class=identifier>s1</span><span class=special>);</span></pre>
|
||||
<p>Optionally, character sets may also be constructed using a definition string
|
||||
following a syntax that resembles posix style regular expression character sets,
|
||||
except that double quotes delimit the set elements instead of square brackets
|
||||
and there is no special negation <tt>^</tt> character.</p>
|
||||
<pre> <span class=identifier>range </span><span class=special>= </span><span class=identifier>anychar_p </span><span class=special>>> </span><span class=literal>'-' </span><span class=special>>> </span><span class=identifier>anychar_p</span><span class=special>;
|
||||
</span><span class=identifier>set </span><span class=special>= *(</span><span class=identifier>range_p </span><span class=special>| </span><span class=identifier>anychar_p</span><span class=special>);</span></pre>
|
||||
<p>Since we are defining the set using a C string, the usual C/C++ literal string
|
||||
syntax rules apply. Examples:<br>
|
||||
</p>
|
||||
<pre> <span class=identifier>chset</span><span class=special><> </span><span class=identifier>s1</span><span class=special>(</span><span class=string>"a-zA-Z"</span><span class=special>); </span><span class=comment>// alphabetic characters
|
||||
</span><span class=identifier>chset</span><span class=special><> </span><span class=identifier>s2</span><span class=special>(</span><span class=string>"0-9a-fA-F"</span><span class=special>); </span><span class=comment>// hexadecimal characters
|
||||
</span><span class=identifier>chset</span><span class=special><> </span><span class=identifier>s3</span><span class=special>(</span><span class=string>"actgACTG"</span><span class=special>); </span><span class=comment>// DNA identifiers
|
||||
</span><span class=identifier>chset</span><span class=special><> </span><span class=identifier>s4</span><span class=special>(</span><span class=string>"\x7f\x7e"</span><span class=special>); </span><span class=comment>// Hexadecimal 0x7F and 0x7E</span></pre>
|
||||
<p>The standard Spirit set operators apply (see <a href="operators.html">operators</a>)
|
||||
plus an additional character-set-specific inverse (negation <tt>~</tt>) operator:<span class=comment></span></p>
|
||||
|
||||
<table width="90%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="table_title" colspan="2">Character set operators</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells" width="28%"><b>~a</b></td>
|
||||
<td class="table_cells" width="72%">Set inverse</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells" width="28%"><b>a | b</b></td>
|
||||
<td class="table_cells" width="72%">Set union</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells" width="28%"><b>a & </b></td>
|
||||
<td class="table_cells" width="72%">Set intersection</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells" width="28%"><b>a - b</b></td>
|
||||
<td class="table_cells" width="72%">Set difference</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells" width="28%"><b>a ^ b</b></td>
|
||||
<td class="table_cells" width="72%">Set xor</td>
|
||||
</tr>
|
||||
</table>
|
||||
<p></p>
|
||||
<p></p>
|
||||
<p></p>
|
||||
<p></p>
|
||||
<p></p>
|
||||
<p></p>
|
||||
<p></p>
|
||||
<p></p>
|
||||
<p>where operands a and b are both <tt>chsets</tt> or one of the operand is either
|
||||
a literal character, <tt>ch_p</tt> or <tt>chlit</tt>, <tt>range_p</tt> or <tt>range</tt>,
|
||||
<tt>anychar_p</tt> or <tt>nothing_p</tt>. Special optimized overloads are provided
|
||||
for <tt>anychar_p</tt> and <tt>nothing_p</tt> operands. A <tt>nothing_p</tt>
|
||||
operand is converted to an empty set, while an <tt>anychar_p</tt> operand is
|
||||
converted to a set having elements of the full range of the character type used
|
||||
(e.g. 0-255 for unsigned 8 bit chars).</p>
|
||||
<p>A special case is <tt>~anychar_p</tt> which yields <tt>nothing_p</tt>, but
|
||||
<tt>~nothing_p</tt> is illegal. Inversion of <tt>anychar_p</tt> is asymmetrical,
|
||||
a one-way trip comparable to converting <tt>T*</tt> to a <tt>void*.</tt></p>
|
||||
<table width="90%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="table_title" colspan="2">Special conversions</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells" width="28%"><b>chset<CharT>(nothing_p)</b></td>
|
||||
<td class="table_cells" width="72%">empty set</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells" width="28%"><b>chset<CharT>(anychar_p)</b></td>
|
||||
<td class="table_cells" width="72%">full range of CharT (e.g. 0-255 for unsigned
|
||||
8 bit chars)</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells" width="28%"><b>~anychar_p</b></td>
|
||||
<td class="table_cells" width="72%">nothing_p</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells" width="28%"><b>~nothing_p</b></td>
|
||||
<td class="table_cells" width="72%">illegal</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
<p></p><table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="loops.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="confix.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<hr size="1">
|
||||
<p class="copyright">Copyright © 1998-2003 Joel de Guzman<br>
|
||||
<br>
|
||||
<font size="2">Use, modification and distribution is subject to the Boost Software
|
||||
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
||||
http://www.boost.org/LICENSE_1_0.txt) </font> </p>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,338 +0,0 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>Closures</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" href="theme/style.css" type="text/css">
|
||||
<style type="text/css">
|
||||
<!--
|
||||
.style1 {font-family: "Courier New", Courier, mono}
|
||||
-->
|
||||
</style>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
|
||||
<tr>
|
||||
<td width="10">
|
||||
</td>
|
||||
<td width="85%">
|
||||
<font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Closures</b></font>
|
||||
</td>
|
||||
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="phoenix.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="dynamic_parsers.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<h2>Overview</h2>
|
||||
<p>Using phoenix, in the previous chapter, we've seen how we can get data from our parsers using <tt>var</tt>:</p>
|
||||
<pre><code><font color="#000000"><span class=special> </span><span class=keyword>int </span><span class=identifier>i</span><span class=special>;
|
||||
</span><span class=identifier> integer </span><span class=special>= </span><span class=identifier>int_p</span><span class=special>[</span><span class="identifier">var</span><span class=special>(</span><span class=identifier>i</span><span class=special>) = </span><span class="identifier">arg1</span><span class=special>];</span></font></code></pre>
|
||||
<p>Nifty! Our rule <tt>integer</tt>, if successful, passes the parsed integer
|
||||
to the variable <tt>i</tt>. Every time we need to parse an integer, we can call
|
||||
our rule <tt>integer</tt> and simply extract the parsed number from the variable
|
||||
<tt>i</tt>. There's something you should be aware of though. In the viewpoint
|
||||
of the grammar, the variable <tt>i</tt> is global. When the grammar gets more
|
||||
complex, it's hard to keep track of the current state of <tt>i</tt>. And, with
|
||||
recursive rules, global variables simply won't be adequate. </p>
|
||||
<p>Closures are needed if you need your rules (or grammars) to be reentrant. For example, a rule (or grammar) might be called recursively indirectly or directly by itself. The calculator is a good example. The expression rule recursively calls itself indirectly when it invokes the factor rule. </p>
|
||||
<p>Closures provide named (lazy) variables associated with each parse rule invocation. A closure variable is addressed using member syntax:</p>
|
||||
<pre><code><font color="#000000"><span class=identifier> </span>rulename<span class="special">.</span>varname</font></code></pre>
|
||||
<p>A closure variable <tt>R.x</tt> may be addressed in the semantic action of any other rule invoked by <tt>R</tt>; it refers to the innermost enclosing invocation of <tt>R</tt>. If no such invocation exists, an assertion occurs at runtime. </p>
|
||||
<p>Closures provide an environment, a stack frame, for local variables.
|
||||
Most importantly, the closure variables are accessible from the EBNF grammar
|
||||
specification and can be used to pass parser information upstream or downstream
|
||||
from the topmost rule down to the terminals in a top-down recursive descent.
|
||||
Closures facilitate dynamic scoping in C++.
|
||||
Spirit's closure implementation is based on <em>Todd Veldhuizen</em>'s <strong>Dynamic
|
||||
scoping in C++</strong> technique that he presented in his paper <a href="ftp://ftp.cs.indiana.edu/pub/techreports/TR542.pdf">Techniques
|
||||
for Scientic C++</a>. </p>
|
||||
<p>When a rule is given a closure, the closure's local variables are created prior
|
||||
to entering the parse function and destructed after exiting the parse function.
|
||||
These local variables are true local variables that exist on the hardware stack.</p>
|
||||
<table width="80%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="note_box"><img src="theme/alert.gif" width="16" height="16"> <strong>Closures</strong>
|
||||
<strong>and Phoenix</strong><br> <br>
|
||||
Spirit v1.8 closure support requires <a href="../phoenix/index.html">Phoenix</a>.
|
||||
In the future, Spirit will fully support <a href="../../../../libs/lambda/index.html">BLL</a>.
|
||||
Currently, work is underway to merge the features of both libraries.</td>
|
||||
</tr>
|
||||
</table>
|
||||
<h2>Example</h2>
|
||||
<p>Let's go back to the calculator grammar introduced in the <a href="functional.html">Functional</a> chapter. Here's the full grammar again, plus the closure declarations:</p>
|
||||
<pre><span class=special> </span><span class=keyword>struct </span><span class=identifier>calc_closure </span><span class=special>: </span><span class=identifier>boost</span><span class=special>::</span><span class=identifier>spirit</span><span class=special>::</span><span class=identifier>closure</span><span class=special><</span><span class=identifier>calc_closure</span><span class=special>, </span><span class=keyword>double</span><span class=special>>
|
||||
</span><span class=special>{
|
||||
</span><span class=identifier>member1 </span><span class=identifier>val</span><span class=special>;
|
||||
</span><span class=special>};
|
||||
|
||||
</span><span class=keyword>struct </span><span class=identifier>calculator </span><span class=special>: </span><span class=keyword>public </span><span class=identifier>grammar</span><span class=special><</span><span class=identifier>calculator</span><span class=special>, </span><span class=identifier>calc_closure</span><span class=special>::</span><span class=identifier>context_t</span><span class=special>>
|
||||
</span><span class=special>{
|
||||
</span><span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>>
|
||||
</span><span class=keyword>struct </span><span class=identifier>definition
|
||||
</span><span class=special>{
|
||||
</span><span class=identifier>definition</span><span class=special>(</span><span class=identifier>calculator </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>self</span><span class=special>)
|
||||
</span><span class=special>{
|
||||
</span><span class=identifier>top </span><span class=special>= </span><span class=identifier>expression</span><span class=special>[</span><span class=identifier>self</span><span class=special>.</span><span class=identifier>val </span><span class=special>= </span><span class=identifier>arg1</span><span class=special>];
|
||||
|
||||
</span><span class=identifier>expression
|
||||
</span><span class=special>= </span><span class=identifier>term</span><span class=special>[</span><span class=identifier>expression</span><span class=special>.</span><span class=identifier>val </span><span class=special>= </span><span class=identifier>arg1</span><span class=special>]
|
||||
</span><span class=special>>> </span><span class=special>*( </span><span class=special>(</span><span class=literal>'+' </span><span class=special>>> </span><span class=identifier>term</span><span class=special>[</span><span class=identifier>expression</span><span class=special>.</span><span class=identifier>val </span><span class=special>+= </span><span class=identifier>arg1</span><span class=special>])
|
||||
</span><span class=special>| </span><span class=special>(</span><span class=literal>'-' </span><span class=special>>> </span><span class=identifier>term</span><span class=special>[</span><span class=identifier>expression</span><span class=special>.</span><span class=identifier>val </span><span class=special>-= </span><span class=identifier>arg1</span><span class=special>])
|
||||
</span><span class=special>)
|
||||
</span><span class=special>;
|
||||
|
||||
</span><span class=identifier>term
|
||||
</span><span class=special>= </span><span class=identifier>factor</span><span class=special>[</span><span class=identifier>term</span><span class=special>.</span><span class=identifier>val </span><span class=special>= </span><span class=identifier>arg1</span><span class=special>]
|
||||
</span><span class=special>>> </span><span class=special>*( </span><span class=special>(</span><span class=literal>'*' </span><span class=special>>> </span><span class=identifier>factor</span><span class=special>[</span><span class=identifier>term</span><span class=special>.</span><span class=identifier>val </span><span class=special>*= </span><span class=identifier>arg1</span><span class=special>])
|
||||
</span><span class=special>| </span><span class=special>(</span><span class=literal>'/' </span><span class=special>>> </span><span class=identifier>factor</span><span class=special>[</span><span class=identifier>term</span><span class=special>.</span><span class=identifier>val </span><span class=special>/= </span><span class=identifier>arg1</span><span class=special>])
|
||||
</span><span class=special>)
|
||||
</span><span class=special>;
|
||||
|
||||
</span><span class=identifier>factor
|
||||
</span><span class=special>= </span><span class=identifier>ureal_p</span><span class=special>[</span><span class=identifier>factor</span><span class=special>.</span><span class=identifier>val </span><span class=special>= </span><span class=identifier>arg1</span><span class=special>]
|
||||
</span><span class=special>| </span><span class=literal>'(' </span><span class=special>>> </span><span class=identifier>expression</span><span class=special>[</span><span class=identifier>factor</span><span class=special>.</span><span class=identifier>val </span><span class=special>= </span><span class=identifier>arg1</span><span class=special>] </span><span class=special>>> </span><span class=literal>')'
|
||||
</span><span class=special>| </span><span class=special>(</span><span class=literal>'-' </span><span class=special>>> </span><span class=identifier>factor</span><span class=special>[</span><span class=identifier>factor</span><span class=special>.</span><span class=identifier>val </span><span class=special>= </span><span class=special>-</span><span class=identifier>arg1</span><span class=special>])
|
||||
</span><span class=special>| </span><span class=special>(</span><span class=literal>'+' </span><span class=special>>> </span><span class=identifier>factor</span><span class=special>[</span><span class=identifier>factor</span><span class=special>.</span><span class=identifier>val </span><span class=special>= </span><span class=identifier>arg1</span><span class=special>])
|
||||
</span><span class=special>;
|
||||
</span><span class=special>}
|
||||
|
||||
</span><span class=keyword>typedef </span><span class=identifier>rule</span><span class=special><</span><span class=identifier>ScannerT</span><span class=special>, </span><span class=identifier>calc_closure</span><span class=special>::</span><span class=identifier>context_t</span><span class=special>> </span><span class=identifier>rule_t</span><span class=special>;
|
||||
</span><span class=identifier>rule_t </span><span class=identifier>expression</span><span class=special>, </span><span class=identifier>term</span><span class=special>, </span><span class=identifier>factor</span><span class=special>;
|
||||
</span><span class=identifier>rule</span><span class=special><</span><span class=identifier>ScannerT</span><span class=special>> </span><span class=identifier>top</span><span class=special>;
|
||||
|
||||
</span><span class=identifier>rule</span><span class=special><</span><span class=identifier>ScannerT</span><span class=special>> </span><span class=keyword>const</span><span class=special>&
|
||||
</span><span class=identifier>start</span><span class=special>() </span><span class=keyword>const </span><span class=special>{ </span><span class=keyword>return </span><span class=identifier>top</span><span class=special>; </span><span class=special>}
|
||||
</span><span class=special>};
|
||||
</span><span class=special>};</span></pre>
|
||||
<p> <img height="16" width="15" src="theme/lens.gif"> The full source code can be <a href="../example/fundamental/phoenix_calc.cpp">viewed here</a>. This is part of the Spirit distribution.</p>
|
||||
<p>Surely, we've come a long way from the original version of this calculator. With inline <a href="phoenix.html#lambda">lambda expressions</a>, we were able to write self contained grammars complete with semantic actions. </p>
|
||||
<p>The first thing to notice is the declaration of <tt>calc_closure</tt>. </p>
|
||||
<p> <strong>Declaring closures</strong></p>
|
||||
<p> The general closure declaration syntax is:</p>
|
||||
<pre><code> <span class=keyword>struct </span><span class=identifier>name</span><span class=special></span> <span class=special>: </span><span class=identifier>spirit</span><span class=special>::</span><span class=identifier>closure</span><span class=special><</span><span class=identifier>name</span><span class=special>, </span><span class=keyword>type1, type2, type3,... typeN</span><span class=special>>
|
||||
{
|
||||
</span><span class=identifier>member1 m_name1</span><span class=special>;
|
||||
</span><span class=identifier>member2 m_name2</span><span class=special>;
|
||||
</span><span class=identifier>member3 m_name3</span><span class=special>;
|
||||
...
|
||||
</span><span class=identifier>memberN m_nameN</span><span class=special>;
|
||||
};</span></code></pre>
|
||||
<p> <tt>member1</tt>... <tt>memberN</tt> are indirect links to the actual closure variables. Their indirect types correspond to <code><tt>type1</tt></code>... <code><tt>typeN</tt></code>. In our example, we declared <tt>calc_closure</tt>:</p>
|
||||
<pre><span class=number> </span><span class=keyword>struct </span><span class=identifier>calc_closure </span><span class=special>: </span><span class=identifier>boost</span><span class=special>::</span><span class=identifier>spirit</span><span class=special>::</span><span class=identifier>closure</span><span class=special><</span><span class=identifier>calc_closure</span><span class=special>, </span><span class=keyword>double</span><span class=special>>
|
||||
</span><span class=special>{
|
||||
</span><span class=identifier>member1 </span><span class=identifier>val</span><span class=special>;
|
||||
</span><span class=special>};</span></pre>
|
||||
<p><tt>calc_closure</tt> has a single variable <tt>val</tt> of type <span class=keyword>double</span><span class=special></span>.</p>
|
||||
<table width="80%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="note_box"><p><img src="theme/alert.gif" width="16" height="16"> <tt>BOOST_SPIRIT_CLOSURE_LIMIT</tt><br>
|
||||
<br>
|
||||
Spirit predefined maximum closure limit. This limit defines the maximum number of elements a closure can hold. This number defaults to 3. The actual maximum is rounded up in multiples of 3. Thus, if this value is 4, the actual limit is 6. The ultimate maximum limit in this implementation is 15. It should <strong>NOT</strong> be greater than <tt>PHOENIX_LIMIT</tt> (see <a href="../phoenix/index.html">phoenix</a>). Example:<br>
|
||||
<br>
|
||||
<span class="comment style1">// Define these before including anything else <br>
|
||||
</span><span class="preprocessor style1">#define</span><span class="style1"> PHOENIX_LIMIT 10<br>
|
||||
</span><span class="preprocessor">#define</span><span class="style1"> BOOST_SPIRIT_CLOSURE_LIMIT 10</span></p> </td>
|
||||
</tr>
|
||||
</table>
|
||||
<p><strong>Attaching closures</strong></p>
|
||||
<p>Closures can be applied to rules, subrules and grammars (non-terminals). The closure has a
|
||||
special <a href="indepth_the_parser_context.html">parser context</a> that can be used with these non-terminals. The closure's
|
||||
context is its means to hook into the non-terminal. The context of the closure <tt>C</tt> is <tt>C::context_t</tt>. </p>
|
||||
<p>We can see in the example that we attached <tt>calc_closure</tt> to the <tt>expression</tt>, <tt>term</tt> and <tt>factor</tt> rules in our grammar:</p>
|
||||
<pre><span class=special> </span><span class=keyword>typedef </span><span class=identifier>rule</span><span class=special><</span><span class=identifier>ScannerT</span><span class=special>, </span><span class=identifier>calc_closure</span><span class=special>::</span><span class=identifier>context_t</span><span class=special>> </span><span class=identifier>rule_t</span><span class=special>;
|
||||
</span><span class=identifier>rule_t </span><span class=identifier>expression</span><span class=special>, </span><span class=identifier>term</span><span class=special>, </span><span class=identifier>factor</span><span class=special>;</span> </pre>
|
||||
<p>as well as the grammar itself:</p>
|
||||
<pre><span class=special> </span><span class=keyword>struct </span><span class=identifier>calculator </span><span class=special>: </span><span class=keyword>public </span><span class=identifier>grammar</span><span class=special><</span><span class=identifier>calculator</span><span class=special>, </span><span class=identifier>calc_closure</span><span class=special>::</span><span class=identifier>context_t</span><span class=special>></span></pre>
|
||||
<p><strong>Closure return value</strong></p>
|
||||
<p>The closure <tt>member1</tt> is the closure's return value. This return value, like the one returned by <tt>anychar_p</tt>, for example, can be used to propagate data up the parser hierarchy or passed to semantic actions. Thus, <tt>expression</tt>, <tt>term</tt> and <tt>factor</tt>, as well as the <tt>calculator</tt> grammar itself, all return a <tt>double</tt>. </p>
|
||||
<p><strong>Accessing closure variables</strong></p>
|
||||
<p>Closure variables can be accessed from within semantic actions just like you
|
||||
would struct members: by qualifying the member name with its owner rule, subrule
|
||||
or grammar. In our example above, notice how we referred to the closure member val. Example:</p>
|
||||
<pre class="identifier"><code> expression<span class=special>.</span>val <span class="comment">// refer to expression's closure member val</span></code></pre>
|
||||
<p><strong>Initializing closure variables </strong></p>
|
||||
<p>We didn't use this feature in the example, yet, for completeness... </p>
|
||||
<p>Sometimes, we need to initialize our closure variables upon entering a non-terminal (rule, subrule or grammar). Closure enabled non-terminals, by default, default-construct variables upon entering the parse member function.
|
||||
If this is not desirable, we can pass constructor arguments to the non-terminal. The syntax mimics a
|
||||
function call. </p>
|
||||
<p>For (<em>a contrived</em>) example, if you wish to construct <tt>calc_closure</tt>'s variables
|
||||
to <tt>3.6</tt>, when we invoke the rule <tt>expression</tt>, we write:</p>
|
||||
<pre class="identifier"><code> expression<span class="special">(</span><span class="keyword">3.6</span><span class="special">) </span><span class="comment">// invoke rule expression and set its closure variable to 3.6</span></code></pre>
|
||||
<p>The constructor arguments are actually Phoenix lambda expressions, so you can
|
||||
use arbitrarily complex expressions. Here's another <em>contrived example<strong>: </strong></em></p>
|
||||
<pre class="identifier"><code> <span class="comment">// call rule factor and set its closure variable to (expression.x / 8) * factor.y
|
||||
</span> <code>factor</code><span class="special">((</span>expression<span class="special">.</span>x<span class="keyword"> </span><span class="special">/</span><span class="keyword"> 8</span><span class="special">) *</span><span class="keyword"> </span>term<span class="special">.</span>y<span class="special">)</span></code></pre>
|
||||
<p><img src="theme/lens.gif" width="15" height="16"> We can pass less arguments than the actual number of variables in the closure.
|
||||
The variables at the right with no corresponding constructor arguments are default
|
||||
constructed. Passing more arguments than there are closure variables is an error.</p>
|
||||
<p><img src="theme/lens.gif" width="15" height="16"> See <a href="../example/intermediate/parameters.cpp">parameters.cpp</a> for a compilable example. This is part of the Spirit distribution.</p>
|
||||
<h2>Closures and Dynamic parsing</h2>
|
||||
<p>Let's write a very simple parser for an XML/HTML like language with arbitrarily nested tags. The typical approach to this type of nested tag parsing is to delegate the actual tag matching to semantic actions, perhaps using a symbol table. For example, the semantic actions are responsible for ensuring that the tags are nested (e.g. this code: <tt><p><table></p></table></tt> is erroneous).</p>
|
||||
<p>Spirit allows us to dynamically modify the parser at runtime. The ability to guide parser behavior through semantic actions makes it possible to ensure the nesting of tags directly in the parser. We shall see how this is possible. here's the grammar in its simplest form:</p>
|
||||
<pre><span class=identifier> element </span><span class=special>= </span><span class=identifier>start_tag </span><span class=special>>> </span><span class=special>*</span><span class=identifier>element </span><span class=special>>> </span><span class=identifier>end_tag</span><span class=special>;</span>
|
||||
</pre>
|
||||
<p>An element is a <tt>start_tag</tt> (e.g. <tt><font></tt>) folowed by zero or more elements, and ended by an <tt>end_tag</tt> (e.g. <tt></font></tt>). Now, here's a first shot at our <tt>start_tag</tt>:</p>
|
||||
<pre><span class=special> </span><span class=identifier>start_tag </span><span class=special>= </span><span class=literal>'<' </span><span class=special>>> </span><span class=identifier>lexeme_d</span><span class=special>[</span><span class=special>(+</span><span class=identifier>alpha_p</span><span class=special>)</span><span class=special>] </span><span class=special>>> </span><span class=literal>'>'</span><span class=special>;</span></pre>
|
||||
<p>Notice that the <tt>end_tag</tt> is just the same as <tt>start_tag</tt> with the addition of a slash:</p>
|
||||
<pre><span class=special> </span><span class=identifier>end_tag </span><span class=special>= </span><span class=literal>"</" </span><span class=special>>> </span>what_we_got_in_the_start_tag <span class=special></span><span class=special>>> </span><span class=literal>'>'</span><span class=special>;</span>
|
||||
</pre>
|
||||
<p>What we need to do is to temporarily store what we got in our <tt>start_tag</tt> and use that later to parse our <tt>end_tag</tt>. Nifty, we can use the <a href="parametric_parsers.html">parametric parser</a> primitives to parse our <tt>end_tag</tt>: </p>
|
||||
<pre><span class=special> </span><span class=identifier>end_tag </span><span class=special>= </span><span class=string>"</" </span><span class=special>>> </span><span class=identifier>f_str_p</span><span class=special>(</span>tag<span class=special>) </span><span class=special>>> </span><span class=literal>'>'</span><span class=special>;</span></pre>
|
||||
<p>where we parameterize <tt>f_str_p</tt> with what we stored (tag). </p>
|
||||
<p>Be reminded though that our grammar is recursive. The element rule calls itself. Hence, we can't just use a variable and use <tt>phoenix::var</tt> or <tt>boost::ref</tt>. Nested recursion will simply gobble up the variable. Each invocation of element must have a closure variable <tt>tag</tt>. Here now is the complete grammar:</p>
|
||||
<pre><span class=number> </span><span class=keyword>struct </span><span class=identifier>tags_closure </span><span class=special>: </span><span class=identifier>boost</span><span class=special>::</span><span class=identifier>spirit</span><span class=special>::</span><span class=identifier>closure</span><span class=special><</span><span class=identifier>tags_closure</span><span class=special>, </span><span class=identifier>string</span><span class=special>> </span><span class=special>
|
||||
{
|
||||
</span><span class=identifier>member1 </span><span class=identifier>tag</span><span class=special>;
|
||||
</span><span class=special>};
|
||||
|
||||
</span><span class=keyword>struct </span><span class=identifier>tags </span><span class=special>: </span><span class=keyword>public </span><span class=identifier>grammar</span><span class=special><</span><span class=identifier>tags</span><span class=special>>
|
||||
</span><span class=special>{
|
||||
</span><span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>>
|
||||
</span><span class=keyword>struct </span><span class=identifier>definition </span><span class=special>{
|
||||
|
||||
</span><span class=identifier>definition</span><span class=special>(</span><span class=identifier>tags </span><span class=keyword>const</span><span class=special>& </span><span class=comment>/*self*/</span><span class=special>)
|
||||
</span><span class=special>{
|
||||
</span><span class=identifier>element </span><span class=special>= </span><span class=identifier>start_tag </span><span class=special>>> </span><span class=special>*</span><span class=identifier>element </span><span class=special>>> </span><span class=identifier>end_tag</span><span class=special>;
|
||||
|
||||
</span><span class=identifier>start_tag </span><span class=special>=
|
||||
</span><span class=literal>'<'
|
||||
</span><span class=special>>> </span><span class=identifier>lexeme_d
|
||||
</span><span class=special>[
|
||||
</span><span class=special>(+</span><span class=identifier>alpha_p</span><span class=special>)
|
||||
</span><span class=special>[
|
||||
</span><span class=comment>// construct string from arg1 and arg2 lazily
|
||||
</span><span class=comment>// and assign to element.tag
|
||||
|
||||
</span><span class=identifier>element</span><span class=special>.</span><span class=identifier>tag </span><span class=special>= </span><span class=identifier>construct_</span><span class=special><</span><span class=identifier>string</span><span class=special>>(</span><span class=identifier>arg1</span><span class=special>, </span><span class=identifier>arg2</span><span class=special>)
|
||||
</span><span class=special>]
|
||||
</span><span class=special>]
|
||||
</span><span class=special>>> </span><span class=literal>'>'</span><span class=special>;
|
||||
|
||||
</span><span class=identifier>end_tag </span><span class=special>= </span><span class=string>"</" </span><span class=special>>> </span><span class=identifier>f_str_p</span><span class=special>(</span><span class=identifier>element</span><span class=special>.</span><span class=identifier>tag</span><span class=special>) </span><span class=special>>> </span><span class=literal>'>'</span><span class=special>;
|
||||
</span><span class=special>}
|
||||
|
||||
</span><span class=identifier>rule</span><span class=special><</span><span class=identifier>ScannerT</span><span class=special>, </span><span class=identifier>tags_closure</span><span class=special>::</span><span class=identifier>context_t</span><span class=special>> </span><span class=identifier>element</span><span class=special>;
|
||||
</span><span class=identifier>rule</span><span class=special><</span><span class=identifier>ScannerT</span><span class=special>> </span><span class=identifier>start_tag</span><span class=special>, </span><span class=identifier>end_tag</span><span class=special>;
|
||||
|
||||
</span><span class=identifier>rule</span><span class=special><</span><span class=identifier>ScannerT</span><span class=special>, </span><span class=identifier>tags_closure</span><span class=special>::</span><span class=identifier>context_t</span><span class=special>> </span><span class=keyword>const</span><span class=special>&
|
||||
</span><span class=identifier>start</span><span class=special>() </span><span class=keyword>const </span><span class=special>{ </span><span class=keyword>return </span><span class=identifier>element</span><span class=special>; </span><span class=special>}
|
||||
</span><span class=special>};
|
||||
</span><span class=special>};</span></pre>
|
||||
<p>We attached a semantic action to the <tt>(+alpha_p)</tt> part of the start_tag. There, we stored the parsed tag in the <tt>element</tt>'s closure variable <tt>tag</tt>. Later, in the <tt>end_tag</tt>, we simply used the <tt>element</tt>'s closure variable <tt>tag</tt> to parameterize our <tt>f_str_p</tt> parser. Simple and elegant. If some of the details begin to look like greek (e.g. what is <tt>construct_</tt>?), please consult the <a href="phoenix.html">Phoenix</a> chapter. </p>
|
||||
<p><img height="16" width="15" src="theme/lens.gif"> The full source code can be <a href="../example/fundamental/matching_tags.cpp">viewed here</a>. This is part of the Spirit distribution.</p>
|
||||
<h2><img src="theme/lens.gif" width="15" height="16"> Closures in-depth</h2>
|
||||
<p><strong>What are Closures?</strong></p>
|
||||
<p>The closure is an object that <span class="quotes">"closes"</span>
|
||||
over the local variables of a function making them visible and accessible outside
|
||||
the function. What is more interesting is that the closure actually packages
|
||||
a local context (stack frame where some variables reside) and makes it available
|
||||
outside the scope in which they actually exist. The information is essentially
|
||||
<span class="quotes">"captured"</span> by the closure allowing it
|
||||
to be referred to anywhere and anytime, even prior to the actual creation of
|
||||
the variables. </p>
|
||||
<p>The following diagram depicts the situation where a function <tt>A</tt> (or
|
||||
rule) exposes its closure and another function <tt>B</tt> references <tt>A</tt>'s
|
||||
variables through its closure.</p>
|
||||
<table width="40%" border="0" align="center">
|
||||
<tr>
|
||||
<td><img src="theme/closure1.png"></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td> <div align="center"><b><font face="Geneva, Arial, Helvetica, san-serif" size="+1" color="#003399">The
|
||||
closure as an object that <i>"closes"</i> over the local variables
|
||||
of a function making them visible and accessible outside the function</font></b></div></td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>Of course, function <tt>A</tt> should be active when <tt>A.x</tt> is referenced.
|
||||
What this means is that function <tt>B</tt> is reliant on function <tt>A</tt>
|
||||
(If <tt>B</tt> is a nested function of <tt>A</tt>, this will always be the case).
|
||||
The free form nature of Spirit rules allows access to a closure variable anytime,
|
||||
anywhere. Accessing <tt>A.x</tt> is equivalent to referring to the topmost stack
|
||||
variable <tt>x</tt> of function <tt>A</tt>. If function <tt>A</tt> is not active
|
||||
when <tt>A.x</tt> is referenced, a runtime exception will be thrown.</p>
|
||||
<p><strong>Nested Functions</strong></p>
|
||||
<p>To fully understand the importance of closures, it is best to look at a language
|
||||
such as Pascal which allows nested functions. Since we are dealing with C++,
|
||||
lets us assume for the moment that C++ allows nested functions. Consider the
|
||||
following <b><i>pseudo</i></b> C++ code:</p>
|
||||
<pre><span class=identifier> </span><span class=keyword>void </span><span class=identifier>a</span><span class=special>()
|
||||
</span><span class=special>{
|
||||
</span><span class=keyword>int </span><span class=identifier>va</span><span class=special>;
|
||||
</span><span class=keyword>void </span><span class=identifier>b</span><span class=special>()
|
||||
</span><span class=special>{
|
||||
</span><span class=keyword>int </span><span class=identifier>vb</span><span class=special>;
|
||||
</span> <span class=keyword>void </span><span class=identifier>c</span><span class=special>()
|
||||
</span><span class=special>{
|
||||
</span><span class=keyword>int </span><span class=identifier>vc</span><span class=special>;
|
||||
</span><span class=special>}
|
||||
|
||||
</span><span class=identifier>c</span><span class=special>()</span><span class=special>;
|
||||
</span><span class=special>}
|
||||
|
||||
</span><span class=identifier>b</span><span class=special>();
|
||||
</span><span class=special>}</span></pre>
|
||||
<p>We have three functions <tt>a</tt>, <tt>b</tt> and <tt>c</tt> where <tt>c</tt>
|
||||
is nested in <tt>b</tt> and <tt>b</tt> is nested in <tt>a</tt>. We also have
|
||||
three variables <tt>va</tt>, <tt>vb</tt> and <tt>vc</tt>. The lifetime of each
|
||||
of these local variables starts when the function where it is declared is entered
|
||||
and ends when the function exits. The scope of a local variable spans all nested
|
||||
functions inside the enclosing function where the variable is declared.</p>
|
||||
<p>Going downstream from function <tt>a</tt> to function <tt>c</tt>, when function
|
||||
a is entered, the variable <tt>va</tt> will be created in the stack. When function
|
||||
<tt>b</tt> is entered (called by <tt>a</tt>), <tt>va</tt> is very well in scope
|
||||
and is visble in <tt>b</tt>. At which point a fresh variable, <tt>vb</tt>, is
|
||||
created on the stack. When function <tt>c</tt> is entered, both <tt>va</tt>
|
||||
and <tt>vb</tt> are visibly in scope, and a fresh local variable <tt>vc</tt>
|
||||
is created. </p>
|
||||
<p>Going upstream, <tt>vc</tt> is not and cannot be visible outside the function
|
||||
<tt>c</tt>. <tt>vc</tt>'s life has already expired once <tt>c</tt> exits. The
|
||||
same is true with <tt>vb</tt>; vb is accessible in function <tt>c</tt> but not
|
||||
in function <tt>a</tt>. </p>
|
||||
<strong>Nested Mutually Recursive Rules</strong>
|
||||
<p>Now consider that <tt>a</tt>, <tt>b</tt> and <tt>c</tt> are rules:</p>
|
||||
<pre><span class=identifier> </span><span class=identifier>a </span><span class=special>= </span><span class=identifier>b </span><span class=special>>> </span><span class=special>*((</span><span class=literal>'+' </span><span class=special>>> </span><span class=identifier>b</span><span class=special>) </span><span class=special>| </span><span class=special>(</span><span class=literal>'-' </span><span class=special>>> </span><span class=identifier>b</span><span class=special>));
|
||||
</span><span class=identifier>b </span><span class=special>= </span><span class=identifier>c </span><span class=special>>> </span><span class=special>*((</span><span class=literal>'*' </span><span class=special>>> </span><span class=identifier>c</span><span class=special>) </span><span class=special>| </span><span class=special>(</span><span class=literal>'/' </span><span class=special>>> </span><span class=identifier>c</span><span class=special>));
|
||||
</span><span class=identifier>c </span><span class=special>= </span><span class=identifier>int_p </span><span class=special>| </span><span class=literal>'(' </span><span class=special>>> </span><span class=identifier>a </span><span class=special>>> </span><span class=literal>')' </span><span class=special>| </span><span class=special>(</span><span class=literal>'-' </span><span class=special>>> </span><span class=identifier>c</span><span class=special>) </span><span class=special>| </span><span class=special>(</span><span class=literal>'+' </span><span class=special>>> </span><span class=identifier>c</span><span class=special>);</span></pre>
|
||||
<p>We can visualize <tt>a</tt>, <tt>b</tt> and <tt>c</tt> as mutually recursive
|
||||
functions where <tt>a</tt> calls <tt>b</tt>, <tt>b</tt> calls <tt>c</tt> and
|
||||
<tt>c</tt> recursively calls <tt>a</tt>. Now, imagine if <tt>a</tt>, <tt>b</tt>
|
||||
and <tt>c</tt> each has a local variable named <tt>value</tt> that can be referred
|
||||
to in our grammar by explicit qualification:</p>
|
||||
<pre><span class=special> </span><span class=identifier>a</span><span class=special>.</span><span class=identifier>value </span><span class=comment>// refer to a's value local variable
|
||||
</span><span class=identifier>b</span><span class=special>.</span><span class=identifier>value </span><span class=comment>// refer to b's value local variable
|
||||
</span><span class=identifier>c</span><span class=special>.</span><span class=identifier>value </span><span class=comment>// refer to c's value local variable</span>
|
||||
</pre>
|
||||
<p>Like above, when <tt>a</tt> is entered, a local variable <tt>value</tt> is
|
||||
created on the stack. This variable can be referred to by both <tt>b</tt> and
|
||||
<tt>c</tt>. Again, when <tt>b</tt> is called by <tt>a</tt>, <tt>b</tt> creates
|
||||
a local variable <tt>value</tt>. This variable is accessible by <tt>c</tt> but
|
||||
not by <tt>a</tt>. </p>
|
||||
<p>Here now is where the analogy with nested functions end: when <tt>c</tt> is
|
||||
called, a fresh variable <tt>value</tt> is created which, as usual, lasts the
|
||||
whole lifetime of <tt>c</tt>. Pay close attention however that <tt>c</tt> may
|
||||
call <tt>a</tt> recursively. When this happens, <tt>a</tt> may now refer to
|
||||
the local variable of <tt>c</tt><code><span class=special>.</span></code></p>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="phoenix.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="dynamic_parsers.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<hr size="1">
|
||||
<p class="copyright">Copyright © 1998-2003 Joel de Guzman<br>
|
||||
<br>
|
||||
<font size="2">Use, modification and distribution is subject to the Boost Software
|
||||
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
||||
http://www.boost.org/LICENSE_1_0.txt) </font> </p>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,185 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Confix Parsers</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" href="theme/style.css" type="text/css">
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
|
||||
<tr>
|
||||
<td width="10"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b> </b></font></td>
|
||||
<td width="85%"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Confix Parsers</b></font></td>
|
||||
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="character_sets.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="list_parsers.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<p><a name="confix_parser"></a><b>Confix Parsers</b></p>
|
||||
<p>Confix Parsers recognize a sequence out of three independent elements: an
|
||||
opening, an expression and a closing. A simple example is a C comment:
|
||||
</p>
|
||||
<pre><code class="comment"> /* This is a C comment */</code></pre>
|
||||
<p>which could be parsed through the following rule definition:<code><font color="#000000">
|
||||
</font></code> </p>
|
||||
<pre><span class=identifier> </span><span class=identifier>rule</span><span class=special><> </span><span class=identifier>c_comment_rule
|
||||
</span><span class=special>= </span><span class=identifier>confix_p</span><span class=special>(</span><span class=literal>"/*"</span><span class=special>, </span><span class=special>*</span><span class=identifier>anychar_p</span><span class=special>, </span><span class=literal>"*/"</span><span class=special>)
|
||||
</span><span class=special>;</span></pre>
|
||||
<p>The <tt>confix_p</tt> parser generator
|
||||
should be used for generating the required Confix Parser. The
|
||||
three parameters to <tt>confix_p</tt> can be single
|
||||
characters (as above), strings or, if more complex parsing logic is required,
|
||||
auxiliary parsers, each of which is automatically converted to the corresponding
|
||||
parser type needed for successful parsing.</p>
|
||||
<p>The generated parser is equivalent to the following rule: </p>
|
||||
<pre><code> <span class=identifier>open </span><span class=special>>> (</span><span class=identifier>expr </span><span class=special>- </span><span class=identifier>close</span><span class=special>) >> </span><span class=identifier>close</span></code></pre>
|
||||
<p>If the expr parser is an <tt>action_parser_category</tt> type parser (a parser
|
||||
with an attached semantic action) we have to do something special. This happens,
|
||||
if the user wrote something like:</p>
|
||||
<pre><code><span class=identifier> confix_p</span><span class=special>(</span><span class=identifier>open</span><span class=special>, </span><span class=identifier>expr</span><span class=special>[</span><span class=identifier>func</span><span class=special>], </span><span class=identifier>close</span><span class=special>)</span></code></pre>
|
||||
<p>where <code>expr</code> is the parser matching the expr of the confix sequence
|
||||
and <code>func</code> is a functor to be called after matching the <code>expr</code>.
|
||||
If we would do nothing, the resulting code would parse the sequence as follows:</p>
|
||||
<pre><code> <span class=identifier>open </span><span class=special>>> (</span><span class=identifier>expr</span><span class=special>[</span><span class=identifier>func</span><span class=special>] - </span><span class=identifier>close</span><span class=special>) >> </span><span class=identifier>close</span></code></pre>
|
||||
<p>which in most cases is not what the user expects. (If this <u>is</u> what you've
|
||||
expected, then please use the <tt>confix_p</tt> generator
|
||||
function <tt>direct()</tt>, which will inhibit the parser refactoring). To make
|
||||
the confix parser behave as expected:</p>
|
||||
<pre><code><span class=identifier> open </span><span class=special>>> (</span><span class=identifier>expr </span><span class=special>- </span><span class=identifier>close</span><span class=special>)[</span><span class=identifier>func</span><span class=special>] >> </span><span class=identifier>close</span></code></pre>
|
||||
<p>the actor attached to the <code>expr</code> parser has to be re-attached to
|
||||
the <code>(expr - close)</code> parser construct, which will make the resulting
|
||||
confix parser 'do the right thing'. This refactoring is done by the help of
|
||||
the <a href="refactoring.html">Refactoring Parsers</a>. Additionally special
|
||||
care must be taken, if the expr parser is a <tt>unary_parser_category</tt> type
|
||||
parser as </p>
|
||||
<pre><code><span class=identifier> confix_p</span><span class=special>(</span><span class=identifier>open</span><span class=special>, *</span><span class=identifier>anychar_p</span><span class=special>, </span><span class=identifier>close</span><span class=special>)</span></code></pre>
|
||||
<p>which without any refactoring would result in </p>
|
||||
<pre><code> <span class=identifier>open</span> <span class=special>>> (*</span><span class=identifier>anychar_p </span><span class=special>- </span><span class=identifier>close</span><span class=special>) >> </span><span class=identifier>close</span></code></pre>
|
||||
<p>and will not give the expected result (*anychar_p will eat up all the input up
|
||||
to the end of the input stream). So we have to refactor this into:
|
||||
<pre><code><span class=identifier> open </span><span class=special>>> *(</span><span class=identifier>anychar_p </span><span class=special>- </span><span class=identifier>close</span><span class=special>) >> </span><span class=identifier>close</span></code></pre>
|
||||
<p>what will give the correct result. </p>
|
||||
<p>The case, where the expr parser is a combination of the two mentioned problems
|
||||
(i.e. the expr parser is a unary parser with an attached action), is handled
|
||||
accordingly too, so: </p>
|
||||
<pre><code><span class=identifier> confix_p</span><span class=special>(</span><span class=identifier>open</span><span class=special>, (*</span><span class=identifier>anychar_p</span><span class=special>)[</span><span class=identifier>func</span><span class=special>], </span>close<span class=special>)</span></code></pre>
|
||||
<p>will be parsed as expected: </p>
|
||||
<pre><code> <span class=identifier>open</span> <span class=special>>> (*(</span><span class=identifier>anychar_p </span><span class=special>- </span><span class=identifier>end</span><span class=special>))[</span><span class=identifier>func</span><span class=special>] >> </span>close</code></pre>
|
||||
<p>The required refactoring is implemented here with the help of the <a href="refactoring.html">Refactoring
|
||||
Parsers</a> too.</p>
|
||||
<table width="90%" border="0" align="center">
|
||||
<tr>
|
||||
<td colspan="2" class="table_title"><b>Summary of Confix Parser refactorings</b></td>
|
||||
</tr>
|
||||
<tr class="table_title">
|
||||
<td width="40%"><b>You write it as:</b></td>
|
||||
<td width="60%"><code><font face="Verdana, Arial, Helvetica, sans-serif">It
|
||||
is refactored to:</font></code></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td width="40%" class="table_cells"><code>confix_p<span class="special">(</span><span class=identifier>open</span><span class="special">,</span>
|
||||
expr<span class="special">,</span> close<span class="special">)</span></code></td>
|
||||
<td width="60%" class="table_cells"> <p><code>open <span class=special>>>
|
||||
(</span>expr <span class=special>-</span> close<span class=special>)</span><font color="#0000FF">
|
||||
</font><span class=special>>></span> close</code></p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td width="40%" class="table_cells"><code>confix_p<span class="special">(</span><span class=identifier>open</span><span class="special">,</span>
|
||||
expr<span class="special">[</span>func<span class="special">],</span> close<span class="special">)</span></code></td>
|
||||
<td width="60%" class="table_cells"> <p><code>open <span class=special>>>
|
||||
(</span>expr <span class=special>-</span> close<span class="special">)[</span>func<span class="special">]
|
||||
<font color="#0000FF" class="special">>></font></span> close</code></p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td width="40%" class="table_cells" height="9"><code>confix_p<span class="special">(</span><span class=identifier>open</span><span class="special">,
|
||||
*</span>expr<span class="special">,</span> close<span class="special">)</span></code></td>
|
||||
<td width="60%" class="table_cells" height="9"> <p><code>open <font color="#0000FF"><span class="special">>></span></font>
|
||||
<span class="special"><font color="#0000FF" class="special">*</font>(</span>expr
|
||||
<font color="#0000FF" class="special">-</font> close<span class="special">)
|
||||
<font color="#0000FF" class="special">>></font></span> close</code></p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td width="40%" class="table_cells"><code>confix_p<span class="special">(</span><span class=identifier>open</span><span class="special">,
|
||||
(*</span>expr<span class="special">)[</span>func<span class="special">],
|
||||
close</span><span class="special">)</span></code></td>
|
||||
<td width="60%" class="table_cells"> <p><code>open <font color="#0000FF"><span class="special">>></span></font><span class="special">
|
||||
(<font color="#0000FF" class="special">*</font>(</span>expr <font color="#0000FF" class="special">-</font>
|
||||
close<span class="special">))[</span>func<span class="special">] <font color="#0000FF" class="special">>></font></span>
|
||||
close</code></p>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
<p><a name="comment_parsers"></a><b>Comment Parsers</b></p>
|
||||
<p>The Comment Parser generator template <tt>comment_p</tt>
|
||||
is helper for generating a correct <a href="#confix_parser">Confix Parser</a>
|
||||
from auxiliary parameters, which is able to parse comment constructs as follows:
|
||||
</p>
|
||||
<pre><code> StartCommentToken <span class="special">>></span> Comment text <span class="special">>></span> EndCommentToken</code></pre>
|
||||
<p>There are the following types supported as parameters: parsers, single
|
||||
characters and strings (see as_parser). If it
|
||||
is used with one parameter, a comment starting with the given first parser
|
||||
parameter up to the end of the line is matched. So for instance the following
|
||||
parser matches C++ style comments:</p>
|
||||
|
||||
<pre><code><span class=identifier> comment_p</span><span class=special>(</span><span class=string>"//"</span><span class=special>)</span></code></pre>
|
||||
<p>If it is used with two parameters, a comment starting with the first parser
|
||||
parameter up to the second parser parameter is matched. For instance a C style
|
||||
comment parser could be constrcuted as:</p>
|
||||
<pre><code> <span class=identifier>comment_p</span><span class=special>(</span><span class=string>"/*"</span><span class=special>, </span><span class=string>"*/"</span><span class=special>)</span></code></pre>
|
||||
<p>The <tt>comment_p</tt> parser generator allows to generate parsers for matching
|
||||
non-nested comments (as for C/C++ comments). Sometimes it is necessary to parse
|
||||
nested comments as for instance allowed in Pascal.</p>
|
||||
<pre><code class="comment"> { This is a { nested } PASCAL-comment }</code></pre>
|
||||
<p>Such nested comments are
|
||||
parseable through parsers generated by the <tt>comment_nest_p</tt> generator
|
||||
template functor. The following example shows a parser, which can be used for
|
||||
parsing the two different (nestable) Pascal comment styles:</p>
|
||||
<pre><code> <span class=identifier>rule</span><span class=special><> </span><span class=identifier>pascal_comment
|
||||
</span><span class=special>= </span><span class=identifier>comment_nest_p</span><span class=special>(</span><span class=string>"(*"</span><span class=special>, </span><span class=string>"*)"</span><span class=special>)
|
||||
| </span><span class=identifier>comment_nest_p</span><span class=special>(</span><span class=literal>'{'</span><span class=special>, </span><span class=literal>'}'</span><span class=special>)
|
||||
;</span></code></pre>
|
||||
<p>Please note, that a comment is parsed implicitly as if the whole <tt>comment_p(...)</tt>
|
||||
statement were embedded into a <tt>lexeme_d[]</tt> directive, i.e. during parsing
|
||||
of a comment no token skipping will occur, even if you've defined a skip parser
|
||||
for your whole parsing process.</p>
|
||||
<p> <img height="16" width="15" src="theme/lens.gif"> <a href="../example/fundamental/comments.cpp">comments.cpp</a> demonstrates various comment parsing schemes: </p>
|
||||
<ol>
|
||||
<li>Parsing of different comment styles </li>
|
||||
<ul>
|
||||
<li>parsing C/C++-style comment</li>
|
||||
<li>parsing C++-style comment</li>
|
||||
<li>parsing PASCAL-style comment</li>
|
||||
</ul>
|
||||
<li>Parsing tagged data with the help of the confix_parser</li>
|
||||
<li>Parsing tagged data with the help of the confix_parser but the semantic<br>
|
||||
action is directly attached to the body sequence parser</li>
|
||||
</ol>
|
||||
<p>This is part of the Spirit distribution.</p>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="character_sets.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="list_parsers.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<hr size="1">
|
||||
<p class="copyright">Copyright © 2001-2002 Hartmut Kaiser<br>
|
||||
<br>
|
||||
<font size="2">Use, modification and distribution is subject to the Boost Software
|
||||
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
||||
http://www.boost.org/LICENSE_1_0.txt) </font> </p>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,270 +0,0 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>Debugging</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" href="theme/style.css" type="text/css">
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
|
||||
<tr>
|
||||
<td width="10">
|
||||
</td>
|
||||
<td width="85%"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Debugging</b></font>
|
||||
</td>
|
||||
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="position_iterator.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="error_handling.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>The top-down nature of Spirit makes the generated parser easy to micro- debug
|
||||
using the standard debugger bundled with the C++ compiler we are using. With
|
||||
recursive-descent, the parse traversal utilizes the hardware stack through C++
|
||||
function call mechanisms. There are no difficult to debug tables or state machines
|
||||
that obscure the parsing logic flow. The stack trace we see in the debugger
|
||||
follows faithfully the hierarchical grammar structure.</p>
|
||||
<p> Since any production rule can initiate a parse traversal , it is a lot easier
|
||||
to pinpoint the bugs by focusing on one or a few rules. For relatively complex
|
||||
parsing tasks, the same way we write robust C++ programs, it is advisable to
|
||||
develop a grammar iteratively on a per-module basis where each module is a small
|
||||
subset of the complete grammar. That way, we can stress-test individual modules
|
||||
piecemeal until we reach the top-most module. For instance, when developing
|
||||
a scripting language, we can start with expressions, then move on to statements,
|
||||
then functions, upwards until we have a complete grammar. </p>
|
||||
<p> At some point when the grammar gets quite complicated, it is desirable to
|
||||
visualize the parse traversal and see what's happening. There are some facilities
|
||||
in the framework that aid in the visualisation of the parse traversal for the
|
||||
purpose of debugging. The following macros enable these features.</p>
|
||||
<a name="debugging_macros"></a>
|
||||
<h2>Debugging Macros</h2>
|
||||
<a name="spirit_assert_exception"></a>
|
||||
<h3>BOOST_SPIRIT_ASSERT_EXCEPTION</h3>
|
||||
<p> Spirit contains assertions that may activate when spirit is used incorrectly.
|
||||
By default these assertions use the assert macro from the standard library.
|
||||
If you want spirit to throw an exception instead, define <tt>BOOST_SPIRIT_ASSERT_EXCEPTION</tt>
|
||||
to the name of the class that you want to be thrown. This class's constructor
|
||||
will be passed a <tt>const char*</tt> stringified version of the file, line,
|
||||
and assertion condition, when it is thrown. If you want to totally disable the
|
||||
assertion, <tt>#define NDEBUG</tt>.</p>
|
||||
<a name="spirit_debug"></a>
|
||||
<h3>BOOST_SPIRIT_DEBUG</h3>
|
||||
<p>Define this to enable debugging.</p>
|
||||
<p>With debugging enabled, special output is generated at key points of the
|
||||
parse process, using the standard output operator (<tt><span class="keyword">operator</span><span class="special"><<</span></tt>)
|
||||
with <tt>BOOST_SPIRIT_DEBUG_OUT</tt> (default is <tt><span class="identifier">std</span><span class="special">::</span><span class="identifier">cout</span></tt>,
|
||||
see below) as its left operand.</p>
|
||||
<table width="80%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="note_box"><img src="theme/note.gif"> In order to use spirit's
|
||||
debugging support you must ensure that appropriate overloads of
|
||||
<tt><span class="identifier">operator</span><span class="special"><<</span></tt>
|
||||
taking <tt>BOOST_SPIRIT_DEBUG_OUT</tt> as its left operand are available.
|
||||
The expected semantics are those of the standard output operator.<br>
|
||||
<br>
|
||||
These overloads may be provided either within the namespace where the
|
||||
corresponding class is declared (will be found through Argument Dependent Lookup) or [within an
|
||||
anonymous namespace] within <tt><span class="keyword">namespace</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">spirit</span></tt>,
|
||||
so it is visible where it is called.<br>
|
||||
<br>
|
||||
<img src="theme/alert.gif"> Note in particular that when <tt>BOOST_SPIRIT_DEBUG_FLAGS_CLOSURES</tt>
|
||||
is set, overloads of <tt><span class="identifier">operator</span><span class="special"><<</span></tt>
|
||||
taking instances of the types used in closures as their right operands are required.<br>
|
||||
<br>
|
||||
You may find an example of overloading the output operator for
|
||||
<tt><span class="identifier">std</span><span class="special">::</span><span class="identifier">pair</span></tt>
|
||||
in a <a href="faq.html#output_operator">related FAQ entry</a>.</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
<p>By default, if the <tt>BOOST_SPIRIT_DEBUG</tt> macro is defined, all available
|
||||
debug output is generated. To fine tune the amount of generated text you can
|
||||
define the <tt>BOOST_SPIRIT_DEBUG_FLAGS</tt> constant to be equal of a combination
|
||||
of the following flags:</p>
|
||||
<table width="90%" border="0" align="center">
|
||||
<tr>
|
||||
<td colspan="2" class="table_title"><b>Available flags to fine tune debug
|
||||
output </b></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td width="29%" height="27" class="table_cells"><tt>BOOST_SPIRIT_DEBUG_FLAGS_NODES</tt></td>
|
||||
<td width="71%" class="table_cells"><p>print information about nodes (general
|
||||
for all parsers)</p></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td height="27" class="table_cells"><tt>BOOST_SPIRIT_DEBUG_FLAGS_TREES</tt></td>
|
||||
<td class="table_cells"><p>print information about parse trees and AST's (general
|
||||
for all tree parsers)</p></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td height="27" class="table_cells"><tt>BOOST_SPIRIT_DEBUG_FLAGS_CLOSURES</tt></td>
|
||||
<td class="table_cells">print information about closures (general for all
|
||||
parsers with closures)</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td height="27" class="table_cells"><tt>BOOST_SPIRIT_DEBUG_FLAGS_ESCAPE_CHAR</tt></td>
|
||||
<td class="table_cells"><p>print information out of the <tt>esc_char_parser</tt></p></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td height="27" class="table_cells"><tt>BOOST_SPIRIT_DEBUG_FLAGS_SLEX</tt></td>
|
||||
<td class="table_cells">print information out of the <tt>SLEX</tt> parser</td>
|
||||
</tr>
|
||||
</table>
|
||||
<p><a name="spirit_debug_out"></a> </p>
|
||||
<h3>BOOST_SPIRIT_DEBUG_OUT</h3>
|
||||
<p> Define this to redirect the debugging diagnostics printout to somewhere else
|
||||
(e.g. a file or stream). Defaults to <tt>std::cout</tt>.</p>
|
||||
<a name="spirit_debug_token printer"></a>
|
||||
<h3>BOOST_SPIRIT_DEBUG_TOKEN_PRINTER</h3>
|
||||
<p> The <tt>BOOST_SPIRIT_DEBUG_TOKEN_PRINTER</tt> macro allows you to redefine the way characters are printed on the stream. </p>
|
||||
<p>If <tt>BOOST_SPIRIT_DEBUG_OUT</tt> is of type <tt>StreamT</tt>, the character type is <tt>CharT</tt> and <tt>BOOST_SPIRIT_DEBUG_TOKEN_PRINTER</tt> is
|
||||
defined to <tt>foo</tt>, it must be compatible with this usage:</p>
|
||||
<pre><code><span class=identifier> foo</span><span class=special>(</span><span class=identifier>StreamT</span><span class=special>, </span><span class=identifier>CharT</span><span class=special>)</span></code></pre>
|
||||
<p>The default printer requires <tt>operator<<(StreamT, CharT)</tt> to
|
||||
be defined. Additionally, if <tt>CharT</tt> is convertible to a normal character
|
||||
type (<tt>char</tt>, <tt>wchar_t</tt> or <tt>int</tt>), it prints control
|
||||
characters in a friendly manner (e.g., when it receives <span class=special>'\n'</span> it
|
||||
actually prints the <span class=special>\</span> and <span class=special>n</span> characters,
|
||||
instead of a newline).</p>
|
||||
<a name="spirit_debug_print_some"></a>
|
||||
<h3>BOOST_SPIRIT_DEBUG_PRINT_SOME</h3>
|
||||
<p> The <tt>BOOST_SPIRIT_DEBUG_PRINT_SOME</tt> constant defines the number of
|
||||
characters from the stream to be printed for diagnosis. This defaults to the
|
||||
first 20 characters.</p>
|
||||
<p><a name="spirit_debug_tracenode"></a> </p>
|
||||
<h3>BOOST_SPIRIT_DEBUG_TRACENODE</h3>
|
||||
<p> By default all parser nodes are traced. This constant may be used to redefine
|
||||
this default. If this is <tt>1</tt> (<tt>true</tt>), then tracing is enabled
|
||||
by default, if this constant is <tt>0</tt> (<tt>false</tt>), the tracing is
|
||||
disabled by default. This preprocessor constant is set to <tt>1 </tt>(<tt>true</tt>)
|
||||
by default.</p>
|
||||
<p>Please note, that the following <tt>BOOST_SPIRIT_DEBUG_...() </tt>macros are
|
||||
to be used at function scope only.</p>
|
||||
<a name="spirit_debug_node_p_"></a>
|
||||
<h3>BOOST_SPIRIT_DEBUG_NODE(p)</h3>
|
||||
<p> Define this to print some debugging diagnostics for parser p. This macro</p>
|
||||
<ul>
|
||||
<li>Registers the parser name for debugging</li>
|
||||
<li>Enables/disables the tracing for parser depending on <tt>BOOST_SPIRIT_DEBUG_TRACENODE</tt></li>
|
||||
</ul>
|
||||
<p> <b>Pre-parse</b>: Before entering the rule, the rule name followed by a peek
|
||||
into the data at the current iterator position is printed.</p>
|
||||
<p> <b>Post-parse</b>: After parsing the rule, the rule name followed by a peek
|
||||
into the data at the current iterator position is printed. Here, <tt>'/'</tt>
|
||||
before the rule name flags a successful match while <tt>'#'</tt> before the rule
|
||||
name flags an unsuccessful match.</p>
|
||||
<p> The following are synonyms for <tt>BOOST_SPIRIT_DEBUG_NODE</tt></p>
|
||||
<ol>
|
||||
<li>BOOST_SPIRIT_DEBUG_RULE</li>
|
||||
<li>BOOST_SPIRIT_DEBUG_GRAMMAR</li>
|
||||
</ol>
|
||||
<a name="spirit_trace_node_p__flag_"></a>
|
||||
<h3>BOOST_SPIRIT_DEBUG_TRACE_NODE(p, flag)</h3>
|
||||
<p> Similar to <tt>BOOST_SPIRIT_DEBUG_NODE</tt>. Additionally allows selective debugging.
|
||||
This is useful in situations where we want to debug just a hand picked set of
|
||||
nodes.</p>
|
||||
<p> The following are synonyms for <tt>BOOST_SPIRIT_DEBUG_TRACE_NODE</tt></p>
|
||||
<ol>
|
||||
<li>BOOST_SPIRIT_DEBUG_TRACE_RULE</li>
|
||||
<li>BOOST_SPIRIT_DEBUG_TRACE_GRAMMAR</li>
|
||||
</ol>
|
||||
<p><a name="spirit_trace_node_p__flag__name_"></a> </p>
|
||||
<h3>BOOST_SPIRIT_DEBUG_TRACE_NODE_NAME(p, name, flag)</h3>
|
||||
<p> Similar to <tt>BOOST_SPIRIT_DEBUG_NODE</tt>. Additionally allows selective
|
||||
debugging and allows to specify the name used during debug printout. This is
|
||||
useful in situations where we want to debug just a hand picked set of nodes.
|
||||
The <tt>name</tt> may be redefined in situations, where the parser parameter does not reflect the name of the parser to debug.</p>
|
||||
<p> The following are synonyms for <tt>BOOST_SPIRIT_DEBUG_TRACE_NODE</tt></p>
|
||||
<ol>
|
||||
<li>BOOST_SPIRIT_DEBUG_TRACE_RULE_NAME</li>
|
||||
<li>BOOST_SPIRIT_DEBUG_TRACE_GRAMMAR_NAME</li>
|
||||
</ol>
|
||||
<hr>
|
||||
<p>Here's the original calculator with debugging features enabled:</p>
|
||||
<pre>
|
||||
<code><span class=preprocessor>#define </span><span class=identifier>BOOST_SPIRIT_DEBUG </span><span class=comment>///$$$ DEFINE THIS BEFORE ANYTHING ELSE $$$///
|
||||
</span><span class=preprocessor>#include </span><span class=string>"boost/spirit/include/classic.hpp"
|
||||
|
||||
</span><span class=comment>/***/
|
||||
|
||||
/*** CALCULATOR GRAMMAR DEFINITIONS HERE ***/
|
||||
|
||||
</span><span class=identifier>BOOST_SPIRIT_DEBUG_RULE</span><span class=special>(</span><span class=identifier>integer</span><span class=special>);
|
||||
</span><span class=identifier>BOOST_SPIRIT_DEBUG_RULE</span><span class=special>(</span><span class=identifier>group</span><span class=special>);
|
||||
</span><span class=identifier>BOOST_SPIRIT_DEBUG_RULE</span><span class=special>(</span><span class=identifier>factor</span><span class=special>);
|
||||
</span><span class=identifier>BOOST_SPIRIT_DEBUG_RULE</span><span class=special>(</span><span class=identifier>term</span><span class=special>);
|
||||
</span><span class=identifier>BOOST_SPIRIT_DEBUG_RULE</span><span class=special>(</span><span class=identifier>expr</span><span class=special>);
|
||||
</span></code></pre>
|
||||
<p> <img src="theme/note.gif" width="16" height="16"> Be sure to add the macros <strong>inside</strong> the grammar definition's constructor. Now here's a sample session with the calculator.</p>
|
||||
|
||||
<pre><code> <span class="preprocessor">Type an expression...or [q or Q] to quit</span>
|
||||
|
||||
<span class="preprocessor">1 + 2</span>
|
||||
|
||||
grammar(calc): "1 + 2"
|
||||
rule(expression): "1 + 2"
|
||||
rule(term): "1 + 2"
|
||||
rule(factor): "1 + 2"
|
||||
rule(integer): "1 + 2"
|
||||
<span class="preprocessor">push 1</span>
|
||||
/rule(integer): " + 2"
|
||||
/rule(factor): " + 2"
|
||||
/rule(term): " + 2"
|
||||
rule(term): "2"
|
||||
rule(factor): "2"
|
||||
rule(integer): "2"
|
||||
<span class="preprocessor">push 2</span>
|
||||
/rule(integer): ""
|
||||
/rule(factor): ""
|
||||
/rule(term): ""
|
||||
<span class="preprocessor">popped 1 and 2 from the stack. pushing 3 onto the stack.</span>
|
||||
/rule(expression): ""
|
||||
/grammar(calc): ""
|
||||
<span class="preprocessor">-------------------------
|
||||
Parsing succeeded
|
||||
result = 3
|
||||
-------------------------</span></code></pre>
|
||||
|
||||
<p> We typed in "1 + 2". Notice that there are two successful branches
|
||||
from the top rule <tt>expr</tt>. The text in red is generated by the parser's
|
||||
semantic actions while the others are generated by the debug-diagnostics of
|
||||
our rules. Notice how the first <tt>integer</tt> rule took "1", the
|
||||
first <tt>term</tt> rule took "+" and finally the second <tt>integer</tt>
|
||||
rule took "2".</p>
|
||||
<p>Please note the special meaning of the first characters appearing on the printed
|
||||
lines:</p>
|
||||
<ul>
|
||||
<li>a single <span class="literal">'/'</span> starts a line containing the information
|
||||
about a successfully matched parser node (<tt>rule<></tt>, <tt>grammar<></tt>
|
||||
or <tt>subrule<></tt>)</li>
|
||||
<li>a single <span class="literal">'#'</span> starts a line containing the information
|
||||
about a failed parser node</li>
|
||||
<li>a single <span class="literal">'^'</span> starts a line containing the first member (return value/synthesised
|
||||
attribute) of the closure of a successfully matched parser node.</li>
|
||||
</ul>
|
||||
<p>Check out <a href="../example/fundamental/calc_debug.cpp">calc_debug.cpp</a> to see debugging in action. </p>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="position_iterator.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="error_handling.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<hr size="1">
|
||||
<p class="copyright">Copyright © 1998-2003 Joel de Guzman<br>
|
||||
Copyright © 2003 Hartmut Kaiser<br>
|
||||
<br>
|
||||
<font size="2">Use, modification and distribution is subject to the Boost Software
|
||||
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
||||
http://www.boost.org/LICENSE_1_0.txt)</font></p>
|
||||
<p class="copyright"> </p>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,202 +0,0 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>Directives</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" href="theme/style.css" type="text/css">
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
|
||||
<tr>
|
||||
<td width="10">
|
||||
</td>
|
||||
<td width="85%">
|
||||
<font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Directives</b></font>
|
||||
</td>
|
||||
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="epsilon.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="scanner.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>Parser directives have the form: <b>directive[expression]</b></p>
|
||||
<p>A directive modifies the behavior of its enclosed expression, essentially <em>decorating</em>
|
||||
it. The framework pre-defines a few directives. Clients of the framework are
|
||||
free to define their own directives as needed. Information on how this is done
|
||||
will be provided later. For now, we shall deal only with predefined directives.</p>
|
||||
<h2>lexeme_d</h2>
|
||||
<p>Turns off white space skipping. At the phrase level, the parser ignores white
|
||||
spaces, possibly including comments. Use <tt>lexeme_d</tt> in situations where
|
||||
we want to work at the character level instead of the phrase level. Parsers
|
||||
can be made to work at the character level by enclosing the pertinent parts
|
||||
inside the lexeme_d directive. For example, let us complete the example presented
|
||||
in the <a href="introduction.html">Introduction</a>. There, we skipped the definition
|
||||
of the <tt>integer</tt> rule. Here's how it is actually defined:</p>
|
||||
<pre><code><font color="#000000"><span class=identifier> </span><span class=identifier>integer </span><span class=special>= </span><span class=identifier>lexeme_d</span><span class=special>[ </span><span class=special>!(</span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>'+'</span><span class=special>) </span><span class=special>| </span><span class=literal>'-'</span><span class=special>) </span><span class=special>>> </span><span class=special>+</span><span class=identifier>digit </span><span class=special>];</span></font></code></pre>
|
||||
<p>The <tt>lexeme_d</tt> directive instructs the parser to work on the character
|
||||
level. Without it, the <tt>integer</tt> rule would have allowed erroneous embedded
|
||||
white spaces in inputs such as <span class="quotes">"1 2 345"</span>
|
||||
which will be parsed as <span class="quotes">"12345"</span>.</p>
|
||||
<h2>as_lower_d</h2>
|
||||
<p>There are times when we want to inhibit case sensitivity. The <tt>as_lower_d</tt>
|
||||
directive converts all characters from the input to lower-case.</p>
|
||||
<table width="80%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="note_box"><img src="theme/alert.gif" width="16" height="16"><b>
|
||||
as_lower_d behavior</b> <br>
|
||||
<br>
|
||||
It is important to note that only the input is converted to lower case.
|
||||
Parsers enclosed inside the <tt>as_lower_d</tt> expecting upper case characters
|
||||
will fail to parse. Example: <tt>as_lower_d[<span class="quotes">'X'</span>]</tt>
|
||||
will never succeed because it expects an upper case <tt class="quotes">'X'</tt>
|
||||
that the <tt>as_lower_d</tt> directive will never supply.</td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>For example, in Pascal, keywords and identifiers are case insensitive. Pascal
|
||||
ignores the case of letters in identifiers and keywords. Identifiers Id, ID
|
||||
and id are indistinguishable in Pascal. Without the as_lower_d directive, it
|
||||
would be awkward to define a rule that recognizes this. Here's a possibility:</p>
|
||||
<pre><code><font color="#000000"><span class=special> </span><span class=identifier>r </span><span class=special>= </span><span class=identifier>str_p</span><span class=special>(</span><span class=string>"id"</span><span class=special>) </span><span class=special>| </span><span class=string>"Id" </span><span class=special>| </span><span class=string>"iD" </span><span class=special>| </span><span class=string>"ID"</span><span class=special>;</span></font></code></pre>
|
||||
<p>Now, try doing that with the case insensitive Pascal keyword <span class="quotes">"BEGIN"</span>.
|
||||
The <tt>as_lower_d</tt> directive makes this simple:</p>
|
||||
<pre><code><font color="#000000"><span class=special> </span><span class=identifier>r </span><span class=special>= </span><span class=identifier>as_lower_d</span><span class=special>[</span><span class=string>"begin"</span><span class=special>];</span></font></code></pre>
|
||||
<table width="80%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="note_box"><div align="justify"><img src="theme/note.gif" width="16" height="16">
|
||||
<b>Primitive arguments</b> <br>
|
||||
<br>
|
||||
The astute reader will notice that we did not explicitly wrap <span class="quotes">"begin"</span>
|
||||
inside an <tt>str_p</tt>. Whenever appropriate, directives should be able
|
||||
to allow primitive types such as <tt>char</tt>, <tt>int</tt>, <tt>wchar_t</tt>,
|
||||
<tt>char const<span class="operators">*</span></tt>, <tt>wchar_t const<span class="operators">*</span></tt>
|
||||
and so on. Examples: <tt><br>
|
||||
<br>
|
||||
</tt><code><span class=identifier>as_lower_d</span><tt><span class=special>[</span><span class=string>"hello"</span><span class=special>]
|
||||
</span><span class=comment>// same as as_lower_d[str_p("hello")]</span></tt><code></code><span class=identifier><br>
|
||||
as_lower_d</span><span class=special>[</span><span class=literal>'x'</span><span class=special>]
|
||||
</span><span class=comment>// same as as_lower_d[ch_p('x')]</span></code></div></td>
|
||||
</tr>
|
||||
</table>
|
||||
<h3>no_actions_d</h3>
|
||||
<p>There are cases where you want <a href="semantic_actions.html">semantic actions</a>
|
||||
not to be triggered. By enclosing a parser in the <tt>no_actions_d</tt> directive,
|
||||
all semantic actions directly or indirectly attached to the parser will not
|
||||
fire. </p>
|
||||
<pre><code><font color="#000000"><span class=special> </span>no_actions_d<span class=special>[</span><span class=identifier>expression</span><span class=special>]</span></font></code><code><font color="#000000"><span class=special></span></font></code></pre>
|
||||
<h3>Tweaking the Scanner Type</h3>
|
||||
<p><img src="theme/note.gif" width="16" height="16"> How does <tt>lexeme_d, as_lower_d</tt>
|
||||
and <font color="#000000"><tt>no_actions_d</tt></font> work? These directives
|
||||
do their magic by tweaking the scanner policies. Well, you don't need to know
|
||||
what that means for now. Scanner policies are discussed <a href="indepth_the_scanner.html">later</a>.
|
||||
However, it is important to note that when the scanner policy is tweaked, the
|
||||
result is a different scanner. Why is this important to note? The <a href="rule.html">rule</a>
|
||||
is tied to a particular scanner (one or more scanners, to be precise). If you
|
||||
wrap a rule inside a <tt>lexeme_d, as_lower_d</tt> or <font color="#000000"><tt>no_actions_d,</tt>the
|
||||
compiler will complain about <a href="faq.html#scanner_business">scanner mismatch</a>
|
||||
unless you associate the required scanner with the rule. </font></p>
|
||||
<p><tt>lexeme_scanner</tt>, <tt>as_lower_scanner</tt> and <tt>no_actions_scanner</tt>
|
||||
are your friends if the need to wrap a rule inside these directives arise. Learn
|
||||
bout these beasts in the next chapter on <a href="scanner.html#lexeme_scanner">The
|
||||
Scanner and Parsing</a>.</p>
|
||||
<h2>longest_d</h2>
|
||||
<p>Alternatives in the Spirit parser compiler are short-circuited (see <a href="operators.html">Operators</a>).
|
||||
Sometimes, this is not what is desired. The <tt>longest_d</tt> directive instructs
|
||||
the parser not to short-circuit alternatives enclosed inside this directive,
|
||||
but instead makes the parser try all possible alternatives and choose the one
|
||||
matching the longest portion of the input stream.</p>
|
||||
<p>Consider the parsing of integers and real numbers:</p>
|
||||
<pre><code><font color="#000000"><span class=comment> </span><span class=identifier>number </span><span class=special>= </span><span class=identifier>real </span><span class=special>| </span><span class=identifier>integer</span><span class=special>;</span></font></code></pre>
|
||||
<p>A number can be a real or an integer. This grammar is ambiguous. An input <span class="quotes">"1234"</span>
|
||||
should potentially match both real and integer. Recall though that alternatives
|
||||
are short-circuited . Thus, for inputs such as above, the real alternative always
|
||||
wins. However, if we swap the alternatives:</p>
|
||||
<pre><code><font color="#000000"><span class=special> </span><span class=identifier>number </span><span class=special>= </span><span class=identifier>integer </span><span class=special>| </span><span class=identifier>real</span><span class=special>;</span></font></code></pre>
|
||||
<p>we still have a problem. Now, an input <span class="quotes">"123.456"</span>
|
||||
will be partially matched by integer until the decimal point. This is not what
|
||||
we want. The solution here is either to fix the ambiguity by factoring out the
|
||||
common prefixes of real and integer or, if that is not possible nor desired,
|
||||
use the <tt>longest_d</tt> directive:</p>
|
||||
<pre><code><font color="#000000"><span class=special> </span><span class=identifier>number </span><span class=special>= </span><span class=identifier>longest_d</span><span class=special>[ </span><span class=identifier>integer </span><span class=special>| </span><span class=identifier>real </span><span class=special>];</span></font></code></pre>
|
||||
<h2>shortest_d</h2>
|
||||
<p>Opposite of the <tt>longest_d</tt> directive.</p>
|
||||
<table width="80%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="note_box"><img src="theme/note.gif" width="16" height="16"> <b>Multiple
|
||||
alternatives</b> <br>
|
||||
<br>
|
||||
The <tt>longest_d</tt> and <tt>shortest_d</tt> directives can accept two
|
||||
or more alternatives. Examples:<br>
|
||||
<br>
|
||||
<font color="#000000"><span class=identifier><code>longest</code></span><code><span class=special>[
|
||||
</span><span class=identifier>a </span><span class=special>| </span><span class=identifier>b
|
||||
</span><span class=special>| </span><span class=identifier>c </span><span class=special>];
|
||||
</span><span class=identifier><br>
|
||||
shortest</span><span class=special>[ </span><span class=identifier>a </span><span class=special>|
|
||||
</span><span class=identifier>b </span><span class=special>| </span><span class=identifier>c
|
||||
</span><span class=special>| </span><span class=identifier>d </span><span class=special>];</span></code></font></td>
|
||||
</tr>
|
||||
</table>
|
||||
<h2>limit_d</h2>
|
||||
<p>Ensures that the result of a parser is constrained to a given min..max range
|
||||
(inclusive). If not, then the parser fails and returns a no-match.</p>
|
||||
<p><b>Usage:</b></p>
|
||||
<pre><code><font color="#000000"><span class=special> </span><span class=identifier>limit_d</span><span class=special>(</span><span class=identifier>min</span><span class=special>, </span><span class=identifier>max</span><span class=special>)[</span><span class=identifier>expression</span><span class=special>]</span></font></code></pre>
|
||||
<p>This directive is particularly useful in conjunction with parsers that parse
|
||||
specific scalar ranges (for example, <a href="numerics.html">numeric parsers</a>).
|
||||
Here's a practical example. Although the numeric parsers can be configured to
|
||||
accept only a limited number of digits (say, 0..2), there is no way to limit
|
||||
the result to a range (say -1.0..1.0). This design is deliberate. Doing so would
|
||||
have undermined Spirit's design rule that <i><span class="quotes">"the
|
||||
client should not pay for features that she does not use"</span></i>. We
|
||||
would have stored the min, max values in the numeric parser itself, used or
|
||||
unused. Well, we could get by by using static constants configured by a non-type
|
||||
template parameter, but that is not acceptable because that way, we can only
|
||||
accommodate integers. What about real numbers or user defined numbers such as
|
||||
big-ints?</p>
|
||||
<p><b>Example</b>, parse time of the form <b>HH:MM:SS</b>:</p>
|
||||
<pre><code><font color="#000000"><span class=special> </span><span class=identifier>uint_parser</span><span class=special><</span><span class=keyword>int</span><span class=special>, </span><span class=number>10</span><span class=special>, </span><span class=number>2</span><span class=special>, </span><span class=number>2</span><span class=special>> </span><span class=identifier>uint2_p</span><span class=special>;
|
||||
|
||||
</span><span class=identifier>r </span><span class=special>= </span><span class=identifier>lexeme_d
|
||||
</span><span class=special>[
|
||||
</span><span class=identifier>limit_d</span><span class=special>(</span><span class=number>0u</span><span class=special>, </span><span class=number>23u</span><span class=special>)[</span><span class=identifier>uint2_p</span><span class=special>] </span><span class=special>>> </span><span class=literal>':' </span><span class=comment>// Hours 00..23
|
||||
</span><span class=special>>> </span><span class=identifier>limit_d</span><span class=special>(</span><span class=number>0u</span><span class=special>, </span><span class=number>59u</span><span class=special>)[</span><span class=identifier>uint2_p</span><span class=special>] </span><span class=special>>> </span><span class=literal>':' </span><span class=comment>// Minutes 00..59
|
||||
</span><span class=special>>> </span><span class=identifier>limit_d</span><span class=special>(</span><span class=number>0u</span><span class=special>, </span><span class=number>59u</span><span class=special>)[</span><span class=identifier>uint2_p</span><span class=special>] </span><span class=comment>// Seconds 00..59
|
||||
</span><span class=special>];</span></font></code>
|
||||
</pre>
|
||||
<h2>min_limit_d</h2>
|
||||
<p>Sometimes, it is useful to unconstrain just the maximum limit. This will allow
|
||||
for an interval that's unbounded in one direction. The directive min_limit_d
|
||||
ensures that the result of a parser is not less than minimum. If not, then the
|
||||
parser fails and returns a no-match.</p>
|
||||
<p><b>Usage:</b></p>
|
||||
<pre><code><font color="#000000"><span class=special> </span><span class=identifier>min_limit_d</span><span class=special>(</span><span class=identifier>min</span><span class=special>)[</span><span class=identifier>expression</span><span class=special>]</span></font></code></pre>
|
||||
<p><b>Example</b>, ensure that a date is not less than 1900</p>
|
||||
<pre><code><font color="#000000"><span class=special> </span><span class=identifier>min_limit_d</span><span class=special>(</span><span class=number>1900u</span><span class=special>)[</span><span class=identifier>uint_p</span><span class=special>]</span></font></code></pre>
|
||||
<h2>max_limit_d</h2>
|
||||
<p>Opposite of <tt>min_limit_d</tt>. Take note that <tt>limit_d[p]</tt> is equivalent
|
||||
to:</p>
|
||||
<pre><code><font color="#000000"><span class=special> </span><span class=identifier>min_limit_d</span><span class=special>(</span><span class=identifier>min</span><span class=special>)[</span><span class=identifier>max_limit_d</span><span class=special>(</span><span class=identifier>max</span><span class=special>)[</span><span class=identifier>p</span><span class=special>]]</span></font></code><code><font color="#000000"><span class=special></span></font></code></pre>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="epsilon.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="scanner.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<hr size="1">
|
||||
<p class="copyright">Copyright © 1998-2003 Joel de Guzman<br>
|
||||
<br>
|
||||
<font size="2">Use, modification and distribution is subject to the Boost Software
|
||||
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
||||
http://www.boost.org/LICENSE_1_0.txt) </font> </p>
|
||||
<p> </p>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,122 +0,0 @@
|
||||
<html>
|
||||
<head>
|
||||
<!-- Generated by the Spirit (http://spirit.sf.net) QuickDoc -->
|
||||
<title>Distinct Parser</title>
|
||||
<link rel="stylesheet" href="theme/style.css" type="text/css">
|
||||
</head>
|
||||
<body>
|
||||
<table width="100%" height="48" border="0" background="theme/bkd2.gif" cellspacing="2">
|
||||
<tr>
|
||||
<td width="10">
|
||||
</td>
|
||||
<td width="85%">
|
||||
<font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Distinct Parser </b></font></td>
|
||||
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" align="right" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="scoped_lock.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="symbols.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<h3>Distinct Parsers</h3><p>
|
||||
The distinct parsers are utility parsers which ensure that matched input is
|
||||
not immediately followed by a forbidden pattern. Their typical usage is to
|
||||
distinguish keywords from identifiers.</p>
|
||||
<h3>distinct_parser</h3>
|
||||
<p>
|
||||
The basic usage of the <tt>distinct_parser</tt> is to replace the <tt>str_p</tt> parser. For
|
||||
example the <tt>declaration_rule</tt> in the following example:</p>
|
||||
<pre>
|
||||
<code><span class=identifier>rule</span><span class=special><</span><span class="identifier">ScannerT</span><span class=special>> </span><span class=identifier>declaration_rule </span><span class=special>= </span><span class=identifier>str_p</span><span class=special>(</span><span class=string>"declare"</span><span class=special>) >> </span><span class=identifier>lexeme_d</span><span class=special>[+</span><span class=identifier>alpha_p</span><span class=special>];
|
||||
</span></code></pre>
|
||||
<p>
|
||||
would correctly match an input "declare abc", but as well an input"declareabc" what is usually not intended. In order to avoid this, we can
|
||||
use <tt>distinct_parser</tt>:</p>
|
||||
<code>
|
||||
<pre>
|
||||
<span class=comment>// keyword_p may be defined in the global scope
|
||||
</span><span class=identifier>distinct_parser</span><span class=special><> </span><span class=identifier>keyword_p</span><span class=special>(</span><span class=string>"a-zA-Z0-9_"</span><span class=special>);
|
||||
|
||||
</span><span class=identifier>rule</span><span class=special><</span><span class="identifier">ScannerT</span><span class=special>> </span><span class=identifier>declaration_rule </span><span class=special>= </span><span class=identifier>keyword_p</span><span class=special>(</span><span class=string>"declare"</span><span class=special>) >> </span><span class=identifier>lexeme_d</span><span class=special>[+</span><span class=identifier>alpha_p</span><span class=special>];
|
||||
</span></pre>
|
||||
</code>
|
||||
<p>
|
||||
The <tt>keyword_p</tt> works in the same way as the <tt>str_p</tt> parser but matches only
|
||||
when the matched input is not immediately followed by one of the characters
|
||||
from the set passed to the constructor of <tt>keyword_p</tt>. In the example the
|
||||
"declare" can't be immediately followed by any alphabetic character, any
|
||||
number or an underscore.</p>
|
||||
<p>
|
||||
See the full <a href="../example/fundamental/distinct/distinct_parser.cpp">example here </a>.</p>
|
||||
<h3>distinct_directive</h3><p>
|
||||
For more sophisticated cases, for example when keywords are stored in a
|
||||
symbol table, we can use <tt>distinct_directive</tt>.</p>
|
||||
<pre>
|
||||
<code><span class=identifier>distinct_directive</span><span class=special><> </span><span class=identifier>keyword_d</span><span class=special>(</span><span class=string>"a-zA-Z0-9_"</span><span class=special>);
|
||||
|
||||
</span><span class=identifier>symbol</span><span class=special><> </span><span class=identifier>keywords </span><span class=special>= </span><span class=string>"declare"</span><span class=special>, </span><span class=string>"begin"</span><span class=special>, </span><span class=string>"end"</span><span class=special>;
|
||||
</span><span class=identifier>rule</span><span class=special><</span><span class="identifier">ScannerT</span><span class=special>> </span><span class=identifier>keyword </span><span class=special>= </span><span class=identifier>keyword_d</span><span class=special>[</span><span class=identifier>keywords</span><span class=special>];
|
||||
</span></code></pre>
|
||||
<h3>dynamic_distinct_parser and dynamic_distinct_directive</h3><p>
|
||||
In some cases a set of forbidden follow-up characters is not sufficient.
|
||||
For example ASN.1 naming conventions allows identifiers to contain dashes,
|
||||
but not double dashes (which marks the beginning of a comment).
|
||||
Furthermore, identifiers can't end with a dash. So, a matched keyword can't
|
||||
be followed by any alphanumeric character or exactly one dash, but can be
|
||||
followed by two dashes.</p>
|
||||
<p>
|
||||
This is when <tt>dynamic_distinct_parser</tt> and the <tt>dynamic_distinct_directive </tt>come into play. The constructor of the <tt>dynamic_distinct_parser</tt> accepts a
|
||||
parser which matches any input that <strong>must NOT</strong> follow the keyword.</p>
|
||||
<pre>
|
||||
<code><span class=comment>// Alphanumeric characters and a dash followed by a non-dash
|
||||
// may not follow an ASN.1 identifier.
|
||||
</span><span class=identifier>dynamic_distinct_parser</span><span class=special><> </span><span class=identifier>keyword_p</span><span class=special>(</span><span class=identifier>alnum_p </span><span class=special>| (</span><span class=literal>'-' </span><span class=special>>> ~</span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>'-'</span><span class=special>)));
|
||||
|
||||
</span><span class=identifier>rule</span><span class=special><</span><span class="identifier">ScannerT</span><span class=special>> </span><span class=identifier>declaration_rule </span><span class=special>= </span><span class=identifier>keyword_p</span><span class=special>(</span><span class=string>"declare"</span><span class=special>) >> </span><span class=identifier>lexeme_d</span><span class=special>[+</span><span class=identifier>alpha_p</span><span class=special>];
|
||||
</span></code></pre>
|
||||
<p>
|
||||
Since the <tt>dynamic_distinct_parser</tt> internally uses a rule, its type is
|
||||
dependent on the scanner type. So, the <tt>keyword_p</tt> shouldn't be defined
|
||||
globally, but rather within the grammar.</p>
|
||||
<p>
|
||||
See the full <a href="../example/fundamental/distinct/distinct_parser_dynamic.cpp">example here</a>.</p>
|
||||
<h3>How it works</h3><p>
|
||||
When the <tt>keyword_p_1</tt> and the <tt>keyword_p_2</tt> are defined as</p>
|
||||
<code><pre>
|
||||
<span class=identifier>distinct_parser</span><span class=special><> </span><span class=identifier>keyword_p</span><span class=special>(</span><span class=identifier>forbidden_chars</span><span class=special>);
|
||||
</span><span class=identifier>distinct_parser_dynamic</span><span class=special><> </span><span class=identifier>keyword_p</span><span class=special>(</span><span class=identifier>forbidden_tail_parser</span><span class=special>);
|
||||
</span></pre></code>
|
||||
<p>
|
||||
the parsers</p>
|
||||
<code><pre>
|
||||
<span class=identifier>keyword_p_1</span><span class=special>(</span><span class=identifier>str</span><span class=special>)
|
||||
</span><span class=identifier>keyword_p_2</span><span class=special>(</span><span class=identifier>str</span><span class=special>)
|
||||
</span></pre></code>
|
||||
<p>
|
||||
are equivalent to the rules</p>
|
||||
<code><pre>
|
||||
<span class=identifier>lexeme_d</span><span class=special>[</span><span class=identifier>chseq_p</span><span class=special>(</span><span class=identifier>str</span><span class=special>) >> ~</span><span class=identifier>epsilon_p</span><span class=special>(</span><span class=identifier>chset_p</span><span class=special>(</span><span class=identifier>forbidden_chars</span><span class=special>))]
|
||||
</span><span class=identifier>lexeme_d</span><span class=special>[</span><span class=identifier>chseq_p</span><span class=special>(</span><span class=identifier>str</span><span class=special>) >> ~</span><span class=identifier>epsilon_p</span><span class=special>(</span><span class=identifier>forbidden_tail_parser</span><span class=special>)]
|
||||
</span></pre></code>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="scoped_lock.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="symbols.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<hr size="1">
|
||||
<p class="copyright">Copyright © 2003-2004
|
||||
|
||||
|
||||
Vaclav Vesely<br><br>
|
||||
<font size="2">Use, modification and distribution is subject to the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) </font> </p>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,99 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
|
||||
<html><head>
|
||||
|
||||
<title>Dynamic Parsers</title><meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" href="theme/style.css" type="text/css"></head>
|
||||
<body>
|
||||
<table background="theme/bkd2.gif" border="0" cellspacing="2" width="100%">
|
||||
<tbody><tr>
|
||||
<td width="10">
|
||||
</td>
|
||||
<td width="85%"> <font face="Verdana, Arial, Helvetica, sans-serif" size="6"><b>Dynamic
|
||||
Parsers </b></font></td>
|
||||
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" align="right" border="0" height="48" width="112"></a></td>
|
||||
</tr>
|
||||
</tbody></table>
|
||||
<br>
|
||||
<table border="0">
|
||||
<tbody><tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="closures.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="stored_rule.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</tbody></table>
|
||||
<p>We see dynamic parsing everywhere in Spirit. A special group of
|
||||
parsers, aptly named dynamic parsers, form the most basic building
|
||||
blocks to dynamic parsing. This chapter focuses on these critters.
|
||||
You'll notice the similarity of these parsers with C++'s control
|
||||
structures. The similarity is not a coincidence. These parsers give an
|
||||
imperative flavor to parsing, and, since imperative constructs are not
|
||||
native to declarative EBNF, mimicking the host language, C++, should
|
||||
make their use immediately familiar. </p>
|
||||
<p>Dynamic parsers modify the parsing behavior according to conditions. Constructing
|
||||
dynamic parsers requires a condition argument and a body parser argument. Additional
|
||||
arguments are required by some parsers.</p>
|
||||
<h2>Conditions</h2>
|
||||
<p>Functions or functors returning values convertable to bool can be used as conditions.
|
||||
When the evaluation of the function/functor yields true it will be considered
|
||||
as meeting the condition.</p>
|
||||
<p>Parsers can be used as conditions, as well. When the parser matches the condition
|
||||
is met. Parsers used as conditions work in an all-or-nothing manner: the scanner
|
||||
will not be advanced when they don't match.</p>
|
||||
<p>A failure to meet the condition will not result in a parse error.</p>
|
||||
<h2>if_p</h2>
|
||||
<p><tt>if_p</tt> can be used with or without an else-part. The syntax is:</p>
|
||||
<pre> <span class="identifier">if_p</span><span class="special">(</span><span class="identifier">condition</span><span class="special">)[</span><span class="identifier">then</span><span class="special">-</span><span class="identifier">parser</span><span class="special">]</span></pre>
|
||||
<p><span class="special"></span>or</p>
|
||||
<pre><span class="identifier"> if_p</span><span class="special">(</span><span class="identifier">condition</span><span class="special">)[</span><span class="identifier">then</span><span class="special">-</span><span class="identifier">parser</span><span class="special">].</span><span class="identifier">else_p</span><span class="special">[</span><span class="identifier">else</span><span class="special">-</span><span class="identifier">parser</span><span class="special">]</span></pre>
|
||||
<p>When the condition is met the then-parser is used next in the parsing process.
|
||||
When the condition is not met and an else-parser is available the else-parser
|
||||
is used next. When the condition isn't met and no else-parser is available then
|
||||
the whole parser matches the empty sequence. (<img src="theme/alert.gif" height="16" width="16">
|
||||
Note: older versions of <tt>if_p</tt> report a failure when the condition isn't
|
||||
met and no else-parser is available.)</p>
|
||||
<p>Example:</p>
|
||||
<pre> <span class="special"></span><span class="identifier">if_p</span><span class="special">(</span><span class="string">"0x"</span><span class="special">)[</span><span class="identifier">hex_p</span><span class="special">].</span><span class="identifier">else_p</span><span class="special">[</span><span class="identifier">uint_p</span><span class="special">]</span></pre>
|
||||
<h2>while_p, do_p</h2>
|
||||
<p><tt>while_p</tt>/<tt>do_p</tt> syntax is:</p>
|
||||
<pre> <span class="identifier">while_p</span><span class="special">(</span><span class="identifier">condition</span><span class="special">)[</span><span class="identifier">body</span><span class="special">-</span><span class="identifier">parser</span><span class="special">]<br> </span><span class="identifier">do_p</span><span class="special">[</span><span class="identifier">body</span><span class="special">-</span><span class="identifier">parser</span><span class="special">].</span><span class="identifier">while_p</span><span class="special">(</span><span class="identifier">condition</span><span class="special">)</span></pre>
|
||||
<p>As long as the condition is met the dynamic parser constructed by <tt>while_p</tt>
|
||||
will try to match the body-parser. <tt>do_p</tt> returns a parser that tries
|
||||
to match the body-parser and then behaves just like the parser returned by <tt>while_p</tt>.
|
||||
A failure to match the body-parser will cause a failure to be reported by the
|
||||
while/do-parser.</p>
|
||||
<p>Example:</p>
|
||||
<pre><span class="special"> </span><span class="identifier">uint_p</span><span class="special">[</span><span class="identifier">assign_a</span><span class="special">(</span><span class="identifier">sum</span><span class="special">)] >> </span><span class="identifier">while_p</span><span class="special">(</span><span class="literal">'+'</span><span class="special">)[</span><span class="identifier">uint_p</span><span class="special">[</span><span class="identifier">add</span><span class="special">(</span><span class="identifier">sum</span><span class="special">)]]<br> </span><span class="literal">'"' </span><span class="special">>> </span><span class="identifier">while_p</span><span class="special">(~</span><span class="identifier">eps_p</span><span class="special">(</span><span class="literal">'"'</span><span class="special">))[</span><span class="identifier">c_escape_ch_p</span><span class="special">[</span><span class="identifier">push_back_a</span><span class="special">(</span><span class="identifier">result</span><span class="special">)]] >> </span><span class="literal">'"'</span>
|
||||
</pre>
|
||||
<p>Assuming <span style="font-family: monospace;">add</span> is a user defined function object.<br></p><h2>for_p</h2>
|
||||
<p><tt>for_p</tt> requires four arguments. The syntax is:</p>
|
||||
<pre> <span class="literal"></span><span class="identifier">for_p</span><span class="special">(</span><span class="identifier">init</span><span class="special">, </span><span class="identifier">condition</span><span class="special">, </span><span class="identifier">step</span><span class="special">)[</span><span class="identifier">body</span><span class="special">-</span><span class="identifier">parser</span><span class="special">]</span></pre>
|
||||
<p>init and step have to be 0-ary functions/functors. for_p returns a parser that
|
||||
will:</p>
|
||||
<ol>
|
||||
<li> call init</li>
|
||||
<li>check the condition, if the
|
||||
condition isn't met then a match is returned. The match will cover
|
||||
everything that has been matched successfully up to this point.</li>
|
||||
<li> tries to match the body-parser. A failure to match the body-parser will cause a failure to be reported by the for-parser</li>
|
||||
<li> calls step</li>
|
||||
<li> goes to 2.</li>
|
||||
</ol>
|
||||
<table border="0">
|
||||
<tbody><tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="closures.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="stored_rule.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</tbody></table>
|
||||
<br>
|
||||
<hr size="1">
|
||||
<p class="copyright">Copyright © 2002-2003 Joel de Guzman<br>
|
||||
Copyright © 2002-2003 Martin Wille<br>
|
||||
<br>
|
||||
<font size="2">Use, modification and distribution is subject to the Boost Software
|
||||
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
||||
http://www.boost.org/LICENSE_1_0.txt)</font></p>
|
||||
<p class="copyright"> </p>
|
||||
</body></html>
|
||||
@@ -1,121 +0,0 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>Epsilon</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" href="theme/style.css" type="text/css">
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
|
||||
<tr>
|
||||
<td width="10">
|
||||
</td>
|
||||
<td width="85%"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Epsilon</b></font>
|
||||
</td>
|
||||
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="rule.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="directives.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>The <strong>Epsilon</strong> (<tt>epsilon_p</tt> and <tt>eps_p</tt>) is a multi-purpose
|
||||
parser that returns a zero length match. </p>
|
||||
<h3>Simple Form</h3>
|
||||
<p>In its simplest form, epsilon_p matches the null string and always returns
|
||||
a match of zero length:</p>
|
||||
<pre><code><span class=special> </span><span class="identifier">epsilon_p </span><span class="comment">// always returns a zero-length match</span></code></pre>
|
||||
<p>This form is usually used to trigger a <a href="semantic_actions.html">semantic
|
||||
action</a> unconditionally. For example, it is useful in triggering error messages
|
||||
when a set of alternatives fail:</p>
|
||||
<pre><code><span class=special> </span><span class="identifier">r</span><span class="special"> = </span><span class="identifier">A</span><span class="special"> | </span><span class="identifier">B</span><span class="special"> | </span><span class="identifier">C</span><span class="special"> | </span><span class="identifier">eps_p</span><span class="special">[</span><span class="identifier">error</span><span class="special">];</span><span class="identifier"></span><span class="comment"> // error if A, B, or C fails to match</span></code></pre>
|
||||
<h3>Semantic Predicate</h3>
|
||||
<p>Semantic predicates allow you to attach a function anywhere in the grammar.
|
||||
In this role, the epsilon takes a 0-ary (nullary) function/functor. The run-time
|
||||
function/functor is typically a test that is called upon to resolve ambiguity
|
||||
in the grammar. A parse failure will be reported when the function/functor result
|
||||
evaluates to false. Otherwise an empty match will be reported. The general form
|
||||
is:</p>
|
||||
<pre> eps_p<span class="special">(</span>f<span class="special">) >></span> rest<span class="special">;</span>
|
||||
</pre>
|
||||
<p>The nullary function <tt>f</tt> is called to do a semantic test (say, checking
|
||||
if a symbol is in the <a href="symbols.html">symbol table</a>). If test returns
|
||||
<tt>true</tt>, <tt>rest</tt> will be evaluated. Otherwise, the production will
|
||||
return early with a no-match without ever touching <tt>rest</tt>.</p>
|
||||
<h3>Syntactic Predicate</h3>
|
||||
<p>Similar to Semantic predicates, Syntactic predicates assert a certain conditional
|
||||
syntax to be satisfied before evaluating another production. This time, epsilon_p
|
||||
accepts a (conditional) parser. The general form is:</p>
|
||||
<pre> eps_p<span class="special">(</span>p<span class="special">) >></span> rest<span class="special">;</span>
|
||||
</pre>
|
||||
<p>If <tt>p</tt> is matched on the input stream then attempt to recognize <tt>rest</tt>.
|
||||
The parser <tt>p </tt>is called to do a syntax check. Regardless of <tt>p</tt>'s
|
||||
success, <tt>eps_p(p)</tt> will always return a zero length match (i.e. the
|
||||
input is not consumed). If test returns <tt>true</tt>, <tt>rest</tt> will be
|
||||
evaluated. Otherwise, the production will return early with a no-match without
|
||||
ever touching <tt>rest</tt>.</p>
|
||||
<p>Example:</p>
|
||||
<pre><code><span class=special> </span><span class="identifier">eps_p</span><span class="special">(</span><span class="literal">'0'</span><span class="special">) >> </span><span class="identifier">oct_p </span><span class="comment">// note that '0' is actually a ch_p('0')</span><span class="identifier"> </span></code></pre>
|
||||
<p>Epsilon here is used as a syntactic predicate. <tt>oct_p</tt> (see <a href="numerics.html">numerics</a>)
|
||||
is parsed only if we see a leading <tt>'0'</tt>. Wrapping the leading <tt>'0'</tt>
|
||||
inside an epsilon makes the parser not consume anything from the input. If a
|
||||
<tt>'0'</tt> is seen, <tt>epsilon_p</tt> reports a successful match with zero
|
||||
length. </p>
|
||||
<table width="80%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="note_box"><div align="justify"><img src="theme/note.gif" width="16" height="16">
|
||||
<b>Primitive arguments</b> <br>
|
||||
<br>
|
||||
Epsilon allows primitive type arguments such as <tt>char</tt>, <tt>int</tt>,
|
||||
<tt>wchar_t</tt>, <tt>char const<span class="operators">*</span></tt>,
|
||||
<tt>wchar_t const<span class="operators">*</span></tt> and so on. Examples:
|
||||
<tt><br>
|
||||
<br>
|
||||
</tt><code><span class="identifier">eps_p</span><tt><span class=special>(</span><span class=string>"hello"</span><span class=special>)</span><span class=comment>
|
||||
// same as eps_p(str_p("hello"))</span></tt><span class=identifier><br>
|
||||
eps_p</span><span class=special>(</span><span class=literal>'x'</span><span class="special">)
|
||||
</span><span class=comment>// same as eps_p(ch_p('x'))</span></code></div></td>
|
||||
</tr>
|
||||
</table>
|
||||
<h3><img src="theme/alert.gif" width="16" height="16"> Inhibiting Semantic Actions</h3>
|
||||
<p>In a syntactic predicate <tt>eps_p(p)</tt>, any semantic action directly or
|
||||
indirectly attached to the conditional parser <tt>p</tt> will not be called.
|
||||
However, semantic actions attached to epsilon itself will always be called.
|
||||
The following code snippets illustrates the behavior:</p>
|
||||
<pre> eps_p<span class="special">(</span>c<span class="special">[</span>f<span class="special">])</span> <span class="comment">// f not called</span><br> eps_p<span class="special">(</span>c<span class="special">)[</span>f<span class="special">]</span> <span class="comment">// f is called</span><br> eps_p<span class="special">[</span>f<span class="special">]</span> <span class="comment">// f is called</span></pre>
|
||||
<p>Actually, the conditional parser <tt>p</tt> is implicitly wrapped in a <tt><a href="scanner.html#no_actions_scanner">no_actions_d</a></tt>
|
||||
directive:</p>
|
||||
<pre><code><span class=special> </span>no_actions_d<span class="special">[</span>p<span class="special">]</span></code></pre>
|
||||
<p>The conditional parser is required to be free from side-effects (semantic actions).
|
||||
<code></code>The conditional parser's purpose is to resolve ambiguity by looking
|
||||
ahead in the input stream for a certain pattern. Ambiguity and semantic actions
|
||||
do not mix well. On an ambiguous grammar, backtracking happens. And when it
|
||||
happens, we cannot undo the effects of triggered semantic actions. </p>
|
||||
<h3>Negation</h3>
|
||||
<p>Operator <tt>~</tt> is defined for parsers constructed by <tt>epsilon_p</tt>/<tt>eps_p</tt>.
|
||||
It performs negation by complementing the results reported. <tt>~~eps_p(x)</tt>
|
||||
is identical to <tt>eps_p(x)</tt>.</p>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="rule.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="directives.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<hr size="1">
|
||||
<p class="copyright">Copyright © 1998-2003 Joel de Guzman<br>
|
||||
Copyright © 2003 Martin Wille<br>
|
||||
<br>
|
||||
<font size="2">Use, modification and distribution is subject to the Boost Software
|
||||
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
||||
http://www.boost.org/LICENSE_1_0.txt) </font> </p>
|
||||
<p> </p>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,212 +0,0 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>Error Handling</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" href="theme/style.css" type="text/css">
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
|
||||
<tr>
|
||||
<td width="10">
|
||||
</td>
|
||||
<td width="85%"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Error
|
||||
Handling </b></font></td>
|
||||
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="debugging.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="quickref.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>C++'s exception handling mechanism is a perfect match for error handling in
|
||||
the framework. Imagine a complete parser as a maze. At each branch, the input
|
||||
dictates where we will turn. Given an erroneous input, we may reach a dead end.
|
||||
If we ever reach one, it would be a waste of time to backtrack from where we
|
||||
came from. Instead, we supply guards in strategic points. Beyond a certain point,
|
||||
we put put parser assertions in places where one is not allowed to go. </p>
|
||||
<p>The assertions are like springs that catapult us back to the guard. If we ever
|
||||
reach a brick wall given a specific input pattern, everything unwinds quickly
|
||||
and we are thrown right back to the guard. This can be a very effective optimization
|
||||
when used wisely. Right back at the guard, we have a chance to correct the situation,
|
||||
if possible. The following illustration depicts the scenario.</p>
|
||||
<table border="0" align="center">
|
||||
<tr>
|
||||
<td><img src="theme/error_handling.png" width="313" height="238"></td>
|
||||
</tr>
|
||||
</table>
|
||||
<a name="the_parser_exception"></a>
|
||||
<h2>Parser Errors</h2>
|
||||
<p> The <tt>parser_error</tt> class is the generic parser exception class used
|
||||
by Spirit. This is the base class for all parser exceptions.</p>
|
||||
<pre> <code><span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>ErrorDescrT</span><span class=special>, </span><span class=keyword>typename </span><span class=identifier>IteratorT </span><span class=special>= </span><span class=keyword>char </span><span class=keyword>const</span><span class=special>*>
|
||||
</span><span class=keyword>class </span><span class=identifier>parser_error </span><span class=special>
|
||||
{
|
||||
</span><span class=keyword>public</span><span class=special>:
|
||||
</span><span class=identifier>parser_error</span><span class=special>(</span><span class=identifier>IteratorT </span><span class=identifier>where</span><span class=special>, </span><span class=identifier>ErrorDescrT </span><span class=identifier>descriptor</span><span class=special>);
|
||||
</span><span class=identifier>IteratorT </span><span class=identifier>where</span><span class=special>;
|
||||
</span><span class=identifier>ErrorDescrT</span><span class=identifier> descriptor</span><span class=special>;
|
||||
</span><span class=special>};
|
||||
</span></code></pre>
|
||||
<p> The exception holds the iterator position where the error was encountered
|
||||
in its <tt>where</tt> member variable. In addition to the iterator, <tt>parser_error</tt>
|
||||
also holds information regarding the error (error descriptor) in its <tt>descriptor
|
||||
</tt> member variable.</p>
|
||||
<p> Semantic actions are free to throw parser exceptions when necessary. A utility
|
||||
function <tt>throw_</tt> may be called. This function creates and throws a <tt>parser_error</tt>
|
||||
given an iterator and an error descriptor:</p>
|
||||
<pre>
|
||||
<code><span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>ErrorDescrT</span><span class=special>, </span><span class=keyword>typename </span><span class=identifier>IteratorT</span><span class=special>>
|
||||
</span><span class=keyword>void </span><span class=identifier>throw_</span><span class=special>(</span><span class=identifier>IteratorT where</span><span class=special>, </span><span class=identifier>ErrorDescrT descriptor</span><span class=special>);
|
||||
</span></code></pre>
|
||||
<a name="the_parser_assertion"></a>
|
||||
<h2>Parser Assertions</h2>
|
||||
<p> Assertions may be put in places where we don't have any other option other
|
||||
than expect parsing to succeed. If parsing fails, a specific type of exception
|
||||
is thrown.</p>
|
||||
<p> Before declaring the grammar, we declare some assertion objects. <tt>assertion</tt>
|
||||
is a template class parameterized by the type of error that will be thrown once
|
||||
the assertion fails. The following assertions are parameterized by a user defined
|
||||
Error enumeration.</p>
|
||||
<a name="examples"></a>
|
||||
<h3>Examples</h3>
|
||||
<pre>
|
||||
<code><span class=keyword>enum </span><span class=identifier>Errors
|
||||
</span><span class=special>{
|
||||
</span><span class=identifier>program_expected</span><span class=special>,
|
||||
</span><span class=identifier>begin_expected</span><span class=special>,
|
||||
</span><span class=identifier>end_expected
|
||||
</span><span class=special>};
|
||||
|
||||
</span><span class=identifier>assertion</span><span class=special><</span><span class=identifier>Errors</span><span class=special>> </span><span class=identifier>expect_program</span><span class=special>(</span><span class=identifier>program_expected</span><span class=special>);
|
||||
</span><span class=identifier>assertion</span><span class=special><</span><span class=identifier>Errors</span><span class=special>> </span><span class=identifier>expect_begin</span><span class=special>(</span><span class=identifier>begin_expected</span><span class=special>);
|
||||
</span><span class=identifier>assertion</span><span class=special><</span><span class=identifier>Errors</span><span class=special>> </span><span class=identifier>expect_end</span><span class=special>(</span><span class=identifier>end_expected</span><span class=special>);
|
||||
</span></code></pre>
|
||||
<p> The example above uses enums to hold the information regarding the error,
|
||||
we are free to use other types such as integers and strings. For example, <tt>assertion<string></tt>
|
||||
accepts a string as its info. It is advisable to use light-weight objects though,
|
||||
after all, error descriptors are usually static. Enums are convenient for error
|
||||
handlers to detect and easily catch since C++ treats enums as unique types.</p>
|
||||
<table width="80%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="note_box"> <b><img src="theme/lens.gif" width="15" height="16">
|
||||
The assertive_parser</b><br>
|
||||
<br>
|
||||
Actually, the expression <tt>expect_end(str_p("end"))</tt>creates
|
||||
an assertive_parser object. An assertive_parser is a parser that throws
|
||||
an exception in response to a parsing failure. The assertive_parser throws
|
||||
a parser_error exception rather than returning an unsuccessful match to
|
||||
signal that the parser failed to match the input. During parsing, parsers
|
||||
are given an iterator of type <tt>IteratorT</tt>. This is combined with
|
||||
the error descriptor type <tt>ErrorDescrT</tt> of the assertion (in this
|
||||
case enum <tt>Errors</tt>). Both are used to create a <tt>parser_error<Errors,
|
||||
IteratorT></tt> which is then thrown to signal the exception. </td>
|
||||
</tr>
|
||||
</table>
|
||||
<p> The predeclared <tt>expect_end</tt> assertion object may now be used in the
|
||||
grammar as wrappers around parsers. For example:</p>
|
||||
<pre>
|
||||
<code><span class=identifier>expect_end</span><span class=special>(</span><span class=identifier>str_p</span><span class=special>(</span><span class=string>"end"</span><span class=special>))
|
||||
</span></code></pre>
|
||||
<p> This will throw an exception if it fails to see "end" from the input.</p>
|
||||
<a name="the_guard"></a>
|
||||
<h2>The Guard</h2>
|
||||
<p> The <tt>guard</tt> is used to catch a specific type of <tt>parser_error</tt>.
|
||||
guards are typically predeclared just like assertions. Extending our previous
|
||||
example:</p>
|
||||
<pre>
|
||||
<code><span class=identifier>guard</span><span class=special><</span><span class=identifier>Errors</span><span class=special>> </span><span class=identifier>my_guard</span><span class=special>;
|
||||
</span></code></pre>
|
||||
<p> <tt>Errors</tt>, in this example is the error descriptor type we want to detect.
|
||||
This is the same enum as above. <tt>my_guard</tt> may now be used in a grammar
|
||||
declaration:</p>
|
||||
<pre> <code><span class=identifier>my_guard</span><span class=special>(</span><span class=identifier>p</span><span class=special>)[</span><span class=identifier>error_handler</span><span class=special>]</span></code></pre>
|
||||
<p> where <tt>p</tt> is an expression that evaluates to a parser. Somewhere inside
|
||||
<tt>p</tt>, a parser may throw a parser exception. <tt>error_handler</tt> is
|
||||
the error handler which may be a function or functor compatible with the interface:</p>
|
||||
<pre> <code>error_status<span class=special><</span>T<span class=special>></span><span class=identifier>
|
||||
f</span><span class=special>(</span>ScannerT const& scan, ErrorT error<span class=special>);
|
||||
</span></code></pre>
|
||||
<p> Where scan points to the scanner state prior to parsing and error is the error
|
||||
that arose. The handler is allowed to move the scanner position as it sees fit,
|
||||
possibly in an attempt to perform error correction. The handler must then return
|
||||
an <tt>error_status<T></tt> object. </p>
|
||||
<table width="80%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="note_box"> <b><img src="theme/lens.gif" width="15" height="16">
|
||||
The fallback_parser </b><br>
|
||||
<br>
|
||||
The expression <tt>my_guard(expr, error_handler)</tt>creates a fallback_parser
|
||||
object. The fallback_parser handles parser_error exceptions of a specific
|
||||
type. Since <tt>my_guard</tt> is declared as <tt>guard<Errors></tt>,
|
||||
the fallback_parser catches <tt>Errors</tt> specific parser errors: <tt>parser_error<Errors,
|
||||
IteratorT></tt>. The class sets up a try block. When an exception is
|
||||
caught, the catch block then calls the error_handler. </td>
|
||||
</tr>
|
||||
</table>
|
||||
<h2>error_status<T></h2>
|
||||
<pre>
|
||||
<code><span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>T </span><span class=special>= </span><span class=identifier>nil_t</span><span class=special>>
|
||||
</span><span class=keyword>struct </span><span class=identifier>error_status
|
||||
</span><span class=special>{
|
||||
</span><span class=keyword>enum </span><span class=identifier>result_t </span><span class=special>{ </span><span class=identifier>fail</span><span class=special>, </span><span class=identifier>retry</span><span class=special>, </span><span class=identifier>accept</span><span class=special>, </span><span class=identifier>rethrow </span><span class=special>};
|
||||
|
||||
</span><span class=identifier>error_status</span><span class=special>(</span><span class=identifier>
|
||||
result_t result </span><span class=special>= </span><span class=identifier>fail</span><span class=special>,
|
||||
</span><span class=keyword>int </span><span class=identifier>length </span><span class=special>= -</span><span class=number>1</span><span class=special>,
|
||||
</span><span class=identifier>T </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>value </span><span class=special>= </span><span class=identifier>T</span><span class=special>());
|
||||
</span>
|
||||
<span class=identifier>result_t result</span><span class=special>;
|
||||
</span><span class=keyword>int </span><span class=identifier>length</span><span class=special>;
|
||||
</span><span class=identifier>T value</span><span class=special>;
|
||||
};</span></code></pre>
|
||||
<p>Where <tt>T</tt> is an attribute type compatible with the match attribute of
|
||||
the <tt>fallback_parser</tt>'s subject (defaults to <tt>nil_t</tt>). The class
|
||||
<tt>error_status</tt> reports the result of an error handler. This result can
|
||||
be one of: </p>
|
||||
<table width="90%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="table_title" colspan="8"> error_status result </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<tr>
|
||||
<td class="table_cells"><b>fail</b></td>
|
||||
<td class="table_cells">quit and fail. Return a <tt>no_match</tt></td>
|
||||
</tr>
|
||||
<td class="table_cells"><b>retry</b></td>
|
||||
<td class="table_cells">attempt error recovery, possibly moving the scanner</td>
|
||||
</tr>
|
||||
<td class="table_cells"><b>accept</b></td>
|
||||
<td class="table_cells">force success returning a matching length, moving the
|
||||
scanner appropriately and returning an attribute value</td>
|
||||
</tr>
|
||||
<td class="table_cells"><b>rethrow</b></td>
|
||||
<td class="table_cells">rethrows the error</td>
|
||||
</tr>
|
||||
</table>
|
||||
<p><img src="theme/lens.gif" width="15" height="16"> See <a href="../example/fundamental/error_handling.cpp">error_handling.cpp</a> for a compilable example. This is part of the Spirit distribution.</p>
|
||||
<table width="80%" border="0" align="center">
|
||||
</table>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="debugging.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="quickref.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<hr size="1">
|
||||
<p class="copyright">Copyright © 1998-2003 Joel de Guzman<br>
|
||||
<br>
|
||||
<font size="2">Use, modification and distribution is subject to the Boost Software
|
||||
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
||||
http://www.boost.org/LICENSE_1_0.txt)</font></p>
|
||||
<p class="copyright"> </p>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,88 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Escape Character Parser</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link href="theme/style.css" rel="stylesheet" type="text/css">
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
|
||||
<tr>
|
||||
<td width="10" height="49"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b> </b></font></td>
|
||||
<td width="85%" height="49"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Escape Character Parser</b></font></td>
|
||||
<td width="112" height="49"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="switch_parser.html"><img src="theme/l_arr.gif" width="20" height="19" border="0"></a></td>
|
||||
<td width="30"><a href="loops.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<p><a name="escape_char_parser"></a>The Escape Character Parser is a utility
|
||||
parser, which parses escaped character sequences used in C/C++,
|
||||
LEX or Perl regular expressions. Combined with the confix_p utility parser, it is useful for parsing C/C++ strings containing double quotes and other escaped
|
||||
characters:</p>
|
||||
<pre> confix_p<span class="special">(</span><em class="literal">'"'</em><span class="special">,</span> <span class="special">*</span>c_escape_ch_p<span class="special">,</span> <em><span class="literal">'"</span></em><span class="literal"><em>'</em></span><span class="special">)</span></pre>
|
||||
<p>There are two different types of the Escape Character Parser:
|
||||
<tt>c_escape_ch_p</tt>, which parses C/C++ escaped character sequences and
|
||||
<tt>lex_escape_ch_p</tt>, which parses LEX style escaped character sequences.
|
||||
The following table shows the valid character sequences understood by these
|
||||
utility parsers.</p>
|
||||
<table width="90%" border="0" align="center">
|
||||
<tr>
|
||||
<td colspan="2" class="table_title"><b>Summary of valid escaped character
|
||||
sequences</b></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td width="29%" height="27" class="table_cells"><b>c_escape_ch_p</b></td>
|
||||
<td width="71%" class="table_cells"><p><code>\b, \t, \n, \f, \r, \\, \",
|
||||
\', \xHH, \OOO</code><br>
|
||||
where: H is some hexadecimal digit (0..9, a..f, A..F) and O is some octal
|
||||
digit (0..7)</p></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td height="27" class="table_cells"><strong>lex_escape_ch_p</strong></td>
|
||||
<td class="table_cells">
|
||||
<p>all C/C++ escaped character sequences as described above and additionally
|
||||
any other character, which follows a backslash</p>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>If there is a semantic action attached directly to the Escape Character Parser,
|
||||
all valid escaped characters are converted to their character equivalent
|
||||
(i.e. a backslash followed by a 'r' is converted to '\r'), which is
|
||||
fed to the attached actor. The number of hexadecimal
|
||||
or octal digits parsed depends on the size of one input character. An
|
||||
overflow will be detected and will generate a non-match. lex_escape_ch_p
|
||||
will strip the leading backslash for all character
|
||||
sequences which are not listed as valid C/C++ escape sequences when passing
|
||||
the unescaped character to an attached action.</p>
|
||||
<p>Please note though, that if there is a semantic action attached to an
|
||||
outermost parser (for instance as in <tt>(*c_escape_ch_p)[some_actor]</tt>,
|
||||
where the action is attached to the kleene star generated parser) no conversion
|
||||
takes place at the moment, but nevertheless the escaped characters are parsed
|
||||
correctly. This limitation will be removed in a future version of the library.</p>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="switch_parser.html"><img src="theme/l_arr.gif" width="20" height="19" border="0"></a></td>
|
||||
<td width="30"><a href="loops.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
<br>
|
||||
<hr size="1">
|
||||
<p class="copyright">Copyright © 2001-2002 Daniel C. Nuffer<br>
|
||||
Copyright © 2003 Hartmut Kaiser <br>
|
||||
<br>
|
||||
<font size="2">Use, modification and distribution is subject to the Boost Software
|
||||
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
||||
http://www.boost.org/LICENSE_1_0.txt) </font> </p>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,506 +0,0 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>FAQ</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" href="theme/style.css" type="text/css">
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
|
||||
<tr>
|
||||
<td width="10">
|
||||
</td>
|
||||
<td width="85%"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>FAQ</b></font></td>
|
||||
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="techniques.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="rationale.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<ul>
|
||||
<li><a href="#scanner_business">The Scanner Business</a></li>
|
||||
<li><a href="#left_recursion">Eliminating Left Recursion</a> </li>
|
||||
<li><a href="#right_associativity">Implementing Right Associativity</a></li>
|
||||
<li><a href="#lexeme_and_rules">The lexeme_d directive and rules</a></li>
|
||||
<li><a href="#kleene_star">Kleene Star infinite loop</a></li>
|
||||
<li><a href="#CVS">Boost CVS and Spirit CVS</a></li>
|
||||
<li><a href="#compilation_times">How to reduce compilation times with complex
|
||||
Spirit grammars</a></li>
|
||||
<li><strong><a href="#frame_assertion">Closure frame assertion</a></strong></li>
|
||||
<li><strong><a href="#greedy_rd">Greedy RD</a></strong></li>
|
||||
<li><strong><a href="#referencing_a_rule_at_construction">Referencing a rule
|
||||
at construction time</a></strong></li>
|
||||
<li><strong><a href="#storing_rules">Storing Rules</a></strong></li>
|
||||
<li><strong><a href="#parsing_ints_and_reals">Parsing ints and reals</a> </strong></li>
|
||||
<li><strong><a href="#output_operator">BOOST_SPIRIT_DEBUG and missing <tt>operator<<</tt></a></strong></li>
|
||||
<li><strong><a href="#repository">Applications that used to be part of spirit</a></strong></li>
|
||||
</ul>
|
||||
<p><b> <a name="scanner_business" id="scanner_business"></a> The Scanner Business</b></p>
|
||||
<p><font color="#FF0000">Question:</font> Why doesn't this compile?</p>
|
||||
<pre><code><font color="#000000"><span class=special> </span><span class=identifier>rule</span><span class=special><> </span><span class=identifier>r </span><span class=special>= /*...*/;
|
||||
</span> <span class=identifier>parse</span><span class=special>(</span><span class=string>"hello world"</span><span class=special>, </span><span class=identifier>r</span><span class=special>, </span><span class=identifier>space_p</span><span class=special>); </span><span class=comment>// BAD [attempts phrase level parsing]</span></font></code></pre>
|
||||
<p>But if I <font color="#000000">remove the skip-parser, everything goes back
|
||||
to normal again:<code></code></font></p>
|
||||
<pre><code><font color="#000000"> <span class=identifier>rule</span><span class=special><> </span><span class=identifier>r </span><span class=special>= *</span><span class=identifier>anychar_p</span><span class=special>;
|
||||
</span><span class=identifier>parse</span><span class=special>(</span><span class=string>"hello world"</span><span class=special>, </span><span class=identifier>r</span><span class=special>); </span><span class=comment>// OK [character level parsing]</span></font></code></pre>
|
||||
<p>Sometimes you'll want to pass in a rule to one of the functions parse functions
|
||||
that Spirit provides. The problem is that the rule is a template class that
|
||||
is parameterized by the scanner type. This is rather awkward but unavoidable:
|
||||
<strong>the rule is tied to a scanner</strong>. What's not obvious is that this
|
||||
scanner must be compatible with the scanner that is ultimately passed to the
|
||||
rule's parse member function. Otherwise, the compiler will complain. </p>
|
||||
<p>Why does the first call to parse not compile? Because of scanner incompatibility.
|
||||
Behind the scenes, the free parse function creates a scanner from the iterators
|
||||
passed in. In the first call to parse, the scanner created is a plain vanilla
|
||||
<tt>scanner<></tt>. This is compatible with the default scanner type of
|
||||
<tt>rule<></tt> [see default template parameters of <a href="rule.html">the
|
||||
rule</a>]. The second call creates a scanner of type <tt><a href="scanner.html#phrase_scanner_t">phrase_scanner_t</a></tt>.
|
||||
Thus, in order for the second call to succeed, the rule must be parameterized
|
||||
as <tt>rule<phrase_scanner_t></tt>:</p>
|
||||
<pre><code><font color="#000000"><span class=comment> </span><span class=identifier>rule</span><span class=special><</span><span class=identifier>phrase_scanner_t</span><span class=special>> </span><span class=identifier>r </span><span class=special>= </span><span class=special>*</span><span class=identifier>anychar_p</span><span class=special>;
|
||||
</span><span class=identifier>parse</span><span class=special>(</span><span class=string>"hello world"</span><span class=special>, </span><span class=identifier>r</span><span class=special>, </span><span class=identifier>space_p</span><span class=special>); </span><span class=comment>// OK [phrase level parsing]</span></font></code></pre>
|
||||
<p>Take note however that <tt>phrase_scanner_t</tt> is compatible only when you
|
||||
are using <tt>char const*</tt> iterators and <tt>space_p</tt> as the skip parser.
|
||||
Other than that, you'll have to find the right type of scanner. This is tedious
|
||||
to do correctly. In light of this issue, <strong>it is best to avoid rules as
|
||||
arguments to the parse functions</strong>. Keep in mind that this happens only
|
||||
with rules. The rule is the only parser that has to be tied to a particular
|
||||
scanner type. For instance:</p>
|
||||
<pre><span class=comment> </span><span class=identifier>parse</span><span class=special>(</span><span class=string>"hello world"</span><span class=special>, *</span><span class=identifier>anychar_p</span><span class=special>); </span><span class=comment><code><font color="#000000"><span class=comment>// OK [character level parsing]</span></font></code>
|
||||
</span><span class=identifier>parse</span><span class=special>(</span><span class=string>"hello world"</span><span class=special>, *</span><span class=identifier>anychar_p</span><span class=special>, </span><span class=identifier>space_p</span><span class=special>); </span><span class="comment">// OK [phrase level parsing]</span></pre>
|
||||
<table width="80%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="note_box"> <strong><img src="theme/note.gif" width="16" height="16">
|
||||
Multiple Scanner Support</strong><br>
|
||||
<br>
|
||||
As of v1.8.0, rules can use one or more scanner types. There are cases,
|
||||
for instance, where we need a rule that can work on the phrase and character
|
||||
levels. Rule/scanner mismatch has been a source of confusion and is the
|
||||
no. 1 <a href="faq.html#scanner_business">FAQ</a>. To address this issue,
|
||||
we now have <a href="rule.html#multiple_scanner_support">multiple scanner
|
||||
support</a>. <br>
|
||||
<br>
|
||||
<img src="theme/bulb.gif" width="13" height="18"> See the techniques section
|
||||
for an <a href="techniques.html#multiple_scanner_support">example</a> of
|
||||
a <a href="grammar.html">grammar</a> using a multiple scanner enabled rule,
|
||||
<a href="scanner.html#lexeme_scanner">lexeme_scanner</a> and <a href="scanner.html#as_lower_scanner">as_lower_scanner.</a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<p><b> <a name="left_recursion"></a> Eliminating Left Recursion </b></p>
|
||||
<p><font color="#FF0000">Question:</font> I ported a grammar from YACC. It's "kinda"
|
||||
working - the parser itself compiles with no errors. But when I try to parse,
|
||||
it gives me an "invalid page fault". I tracked down the problem to
|
||||
this grammar snippet:</p>
|
||||
<pre> <span class=identifier>or_expr </span><span class=special>= </span><span class=identifier>xor_expr </span><span class=special>| (</span><span class=identifier>or_expr </span><span class=special>>> </span><span class=identifier>VBAR </span><span class=special>>> </span><span class=identifier>xor_expr</span><span class=special>);</span></pre>
|
||||
<p>What you should do is to eliminate direct and indirect left-recursion. This
|
||||
causes the invalid page fault because the program enters an infinite loop. The
|
||||
code above is good for bottom up parsers such as YACC but not for LL parsers
|
||||
such as Spirit.</p>
|
||||
<p>This is similar to a rule in Hartmut Kaiser's C
|
||||
parser (this should be available for download from <a href="http://spirit.sf.net">Spirit's site</a> as soon as you read this).</p>
|
||||
<pre>
|
||||
<span class=identifier>inclusive_or_expression
|
||||
</span><span class=special>= </span><span class=identifier>exclusive_or_expression
|
||||
</span><span class=special>| </span><span class=identifier>inclusive_or_expression </span><span class=special>>> </span><span class=identifier>OR </span><span class=special>>> </span><span class=identifier>exclusive_or_expression
|
||||
</span><span class=special>;</span></pre>
|
||||
<p><span class=special></span>Transforming left recursion to right recursion,
|
||||
we have:</p>
|
||||
<pre> <span class=identifier>inclusive_or_expression
|
||||
</span><span class=special>= </span><span class=identifier>exclusive_or_expression </span><span class=special>>> </span><span class=identifier>inclusive_or_expression_helper
|
||||
</span><span class=special>;
|
||||
|
||||
</span><span class=identifier>inclusive_or_expression_helper
|
||||
</span><span class=special>= </span><span class=identifier>OR </span><span class=special>>> </span><span class=identifier>exclusive_or_expression </span><span class=special>>> </span><span class=identifier>inclusive_or_expression_helper
|
||||
</span><span class=special>| </span><span class=identifier>epsilon_p
|
||||
</span><span class=special>;</span></pre>
|
||||
<p><span class=special></span>I'd go further. Since:</p>
|
||||
<pre> <span class=identifier>r </span><span class=special>= </span><span class=identifier>a </span><span class=special>| </span><span class=identifier>epsilon_p</span><span class=special>;</span></pre>
|
||||
<p><span class=special></span>is equivalent to:<span class=special><br>
|
||||
</span></p>
|
||||
<pre> <span class=identifier>r </span><span class=special>= !</span><span class=identifier>a</span><span class=special>;</span></pre>
|
||||
<p>we can simplify <tt>inclusive_or_expression_helper</tt> thus:</p>
|
||||
<pre> <span class=identifier>inclusive_or_expression_helper
|
||||
</span><span class=special>= !(</span><span class=identifier>OR </span><span class=special>>> </span><span class=identifier>exclusive_or_expression </span><span class=special>>> </span><span class=identifier>inclusive_or_expression_helper</span><span class=special>)
|
||||
;</span></pre>
|
||||
<p><span class=special></span>Now, since:</p>
|
||||
<pre> <span class=identifier>r </span><span class=special>= !(</span><span class=identifier>a </span><span class=special>>> </span><span class=identifier>r</span><span class=special>);</span></pre>
|
||||
<p><span class=special></span>is equivalent to:</p>
|
||||
<pre> <span class=identifier>r </span><span class=special>= *</span><span class=identifier>a</span><span class=special>;</span></pre>
|
||||
<p><span class=special></span>we have:</p>
|
||||
<pre> <span class=identifier>inclusive_or_expression_helper
|
||||
</span><span class=special>= *(</span><span class=identifier>OR </span><span class=special>>> </span><span class=identifier>exclusive_or_expression</span><span class=special>)
|
||||
;</span></pre>
|
||||
<p><span class=special></span>Now simplifying <tt>inclusive_or_expression</tt>
|
||||
fully, we have:</p>
|
||||
<pre> <span class=identifier>inclusive_or_expression
|
||||
</span><span class=special>= </span><span class=identifier>exclusive_or_expression </span><span class=special>>> *(</span><span class=identifier>OR </span><span class=special>>> </span><span class=identifier>exclusive_or_expression</span><span class=special>)
|
||||
;</span></pre>
|
||||
<p><span class=special></span>Reminds me of the calculators. So in short:</p>
|
||||
<pre> <span class=identifier>a </span><span class=special>= </span><span class=identifier>b </span><span class=special>| </span><span class=identifier>a </span><span class=special>>> </span><span class=identifier>op </span><span class=special>>> </span><span class=identifier>b</span><span class=special>;</span></pre>
|
||||
<p><span class=special></span><span class=identifier>in </span><span class=identifier>pseudo-YACC
|
||||
</span><span class=identifier>is</span><span class=special>:</span></p>
|
||||
<pre> <span class=identifier>a </span><span class=special>= </span><span class=identifier>b </span><span class=special>>> *(</span><span class=identifier>op </span><span class=special>>> </span><span class=identifier>b</span><span class=special>);</span></pre>
|
||||
<p><span class=special></span>in Spirit. What could be simpler? Look Ma, no recursion,
|
||||
just iteration.</p>
|
||||
<p><b> <a name="right_associativity" id="right_associativity"></a> Implementing Right Associativity </b></p>
|
||||
<p> <font color="#FF0000">Question:</font> I tried adding <tt>'^'</tt> as an operator to compute the power to a calculator grammar. The following code
|
||||
</p>
|
||||
<pre> <span class=identifier>pow_expression
|
||||
</span><span class=special>= </span><span class=identifier>pow_operand </span><span class=special>>> </span><span class=special>*( </span><span class=literal>'^' </span><span class=special>>> </span><span class=identifier>pow_operand </span><span class=special>[ </span><span class=special>& </span><span class=identifier>do_pow </span><span class=special>]
|
||||
</span><span class=special>)
|
||||
</span><span class=special>;</span>
|
||||
</pre>
|
||||
<p>parses the input correctly, but I want the operator to be evalutated from right to left. In other words, the expression <tt>2^3^4</tt> is supposed to have the same semantics as <tt>2^(3^4)</tt> instead of <tt>(2^3)^4</tt>. How do I do it?
|
||||
</p>
|
||||
<p> The "textbook recipe" for Right Associativity is Right Recursion. In BNF that means:
|
||||
<pre> <pow_expression> ::= <pow_operand> '^' <pow_expression> | <pow_operand>
|
||||
</pre>
|
||||
<p>But we better don't take the theory too literally here, because if the first alternative fails, the semantic actions within <tt>pow_operand</tt> might have been executed already and will then be executed again when trying the second alternative. So let's apply Left Factorization to factor out <tt>pow_operand</tt>:
|
||||
<pre> <pow_expression> ::= <pow_operand> <pow_expression_helper>
|
||||
<pow_expression_helper> ::= '^' <pow_expression> | <i>ε</i>
|
||||
</pre>
|
||||
<p>The production <tt>pow_expression_helper</tt> matches the empty string <i>ε</i>, so we can replace the alternative with the optional operator in Spirit code.
|
||||
</p>
|
||||
<pre> <span class=identifier>pow_expression
|
||||
</span><span class=special>= </span><span class=identifier>pow_operand </span><span class=special>>> </span><span class=special>!( </span><span class=literal>'^' </span><span class=special>>> </span><span class=identifier>pow_expression </span><span class=special>[ </span><span class=special>& </span><span class=identifier>do_pow </span><span class=special>]
|
||||
</span><span class=special>)
|
||||
</span><span class=special>;</span>
|
||||
</pre>
|
||||
<p>Now any semantic actions within <tt>pow_operand</tt> can safely be executed. For stack-based evaluation that means that each match of <tt>pow_operand</tt> will leave one value on the stack and the recursion makes sure there are (at least) two values on the stack when <tt>do_pow</tt> is fired to reduce these two values to their power.
|
||||
</p>
|
||||
<p>In cases where this technique isn't applicable, such as C-style assignment
|
||||
<pre> <span class=identifier>assignment
|
||||
</span><span class=special>= </span><span class=identifier>lvalue </span><span class=special>>> </span><span class=literal>'=' </span><span class=special>>> </span><span class=identifier>assignment
|
||||
</span><span class=special>| </span><span class=identifier>ternary_conditional
|
||||
</span><span class=special>;</span>
|
||||
</pre>
|
||||
<p>you can append <tt>| epsilon_p [ <i>action</i> ] >> nothing_p</tt> to a parser to correct the semantic context when backtracking occurs (in the example case that would be dropping the address pushed by <tt>lvalue</tt> off the evaluation stack):
|
||||
</p>
|
||||
<pre> <span class=identifier>assignment
|
||||
</span><span class=special>= </span><span class=identifier>lvalue </span><span class=special>>> </span><span class=special>( </span><span class=literal>'=' </span><span class=special>>> </span><span class=identifier>assignment </span></span><span class=special>[ </span><span class=special>& </span><span class=identifier>do_store </span><span class=special>]
|
||||
</span><span class=special>| </span><span class=identifier>epsilon_p </span><span class=special>[ </span><span class=special>& </span><span class=identifier>do_drop </span><span class=special>]
|
||||
</span><span class=special>>> </span><span class=identifier>nothing_p
|
||||
</span><span class=special>)
|
||||
</span><span class=special>| </span><span class=identifier>ternary_conditional
|
||||
</span><span class=special>;</span>
|
||||
</pre>
|
||||
<p>However, this trick compromises the clear separation of syntax and semantics, so you also might want to consider using an <a href="trees.html">AST</a> instead of semantic actions so you can just go with the first definition of <tt>assignment</tt>.
|
||||
</p>
|
||||
<p><b> <a name="lexeme_and_rules" id="lexeme_and_rules"></a> The lexeme_d directive
|
||||
and rules</b></p>
|
||||
<p> <font color="#FF0000">Question:</font> Does lexeme_d not support expressions
|
||||
which include rules? In the example below, the definition of atomicRule compiles,
|
||||
</p>
|
||||
<pre> <span class=identifier></span><span class=identifier>rule</span><span class=special><</span><span class=identifier>phrase_scanner_t</span><span class=special>> </span><span class=identifier>atomicRule</span>
|
||||
<span class=special>= </span><span class=identifier>lexeme_d</span><span class=special>[(</span><span class=identifier>alpha_p </span><span class=special>| </span><span class=literal>'_'</span><span class=special>) >> *(</span><span class=identifier>alnum_p </span><span class=special>| </span><span class=literal>'.' </span><span class=special>| </span><span class=literal>'-' </span><span class=special>| </span><span class=literal>'_'</span><span class=special>)];</span></pre>
|
||||
<p>but if I move <tt>alnum_p | '.' | '-' | '_'</tt> into its own rule, the compiler
|
||||
complains about conversion from <tt>const scanner<...></tt> to <tt>const
|
||||
phrase_scaner_t&</tt>. </p>
|
||||
<pre> <span class=identifier>rule</span><span class=special><</span><span class=identifier>phrase_scanner_t</span><span class=special>> </span><span class=identifier>ch </span><span class=special>
|
||||
= </span><span class=identifier>alnum_p </span><span class=special>| </span><span class=literal>'.' </span><span class=special>| </span><span class=literal>'-' </span><span class=special>| </span><span class=literal>'_'</span><span class=special>;</span>
|
||||
|
||||
<span class=identifier> rule</span><span class=special><</span><span class=identifier>phrase_scanner_t</span><span class=special>> </span><span class=identifier>compositeRule</span>
|
||||
<span class=special>= </span><span class=identifier>lexeme_d</span><span class=special>[(</span><span class=identifier>alpha_p </span><span class=special>| </span><span class=literal>'_'</span><span class=special>) >> *(</span><span class=identifier>ch</span><span class=special>)]; </span><span class="comment">// <- error source</span></pre>
|
||||
<p>You might get the impression that the <tt>lexeme_d</tt> directive and rules
|
||||
do not mix. Actually, this problem is related to the first FAQ entry: The Scanner
|
||||
Business. More precisely, the <tt>lexeme_d</tt> directive and rules with incompatible
|
||||
scanner types do not mix. This problem is more subtle. What's causing the scanner
|
||||
incompatibility is the directive itself. The <tt>lexeme_d</tt> directive transforms
|
||||
the scanner it receives into something that disables the skip parser. This non-skipping
|
||||
scanner, unfortunately, is incompatible with the original scanner before transformation
|
||||
took place.</p>
|
||||
<p>The simplest solution is not to use rules in the <tt>lexeme_d</tt>. Instead,
|
||||
you can definitely apply <tt>lexeme_d</tt> to subrules and grammars if you really
|
||||
need more complex parsers inside the <tt>lexeme_d</tt>. If you really must use
|
||||
a rule, you need to know the exact scanner used by the directive. The <tt>lexeme_scanner</tt>
|
||||
metafunction is your friend here. The example above will work as expected once
|
||||
we give the <tt>ch</tt> rule a correct scanner type:</p>
|
||||
<pre> <span class=identifier>rule</span><span class=special><</span><span class=identifier>lexeme_scanner</span><span class="special"><</span><span class=identifier>phrase_scanner_t</span><span class=special>>::</span><span class="identifier">type</span><span class=special>> </span><span class=identifier>ch </span><span class=special>
|
||||
= </span><span class=identifier>alnum_p </span><span class=special>| </span><span class=literal>'.' </span><span class=special>| </span><span class=literal>'-' </span><span class=special>| </span><span class=literal>'_'</span><span class=special>;</span></pre>
|
||||
<p>Note: make sure to add "<tt>typename</tt>" before <tt>lexeme_scanner</tt>
|
||||
when this is used inside a template class or function.</p>
|
||||
<p>The same thing happens when rules are used inside the <tt>as_lower_d</tt> directive.
|
||||
In such cases, you can use the <tt>as_lower_scanner</tt>. See the <span class=identifier><tt><a href="scanner.html#lexeme_scanner">lexeme_scanner</a></tt></span>
|
||||
and <tt><a href="scanner.html#as_lower_scanner">as_lower_scanner</a></tt>.</p>
|
||||
<table width="80%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="note_box"><img src="theme/bulb.gif" width="13" height="18"> See
|
||||
the techniques section for an <a href="techniques.html#multiple_scanner_support">example</a>
|
||||
of a <a href="grammar.html">grammar</a> using a <a href="rule.html#multiple_scanner_support">multiple
|
||||
scanner enabled rule,</a> <a href="scanner.html#lexeme_scanner">lexeme_scanner</a>
|
||||
and <a href="scanner.html#as_lower_scanner">as_lower_scanner.</a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<p><strong><a name="kleene_star"></a>Kleene Star infinite loop</strong></p>
|
||||
<p><font color="#FF0000">Question</font>: Why Does This Loop Forever?</p>
|
||||
<pre> <span class=identifier>rule</span><span class=special><> </span><span class=identifier>optional </span><span class=special>= !(</span>str_p<span class="special">(</span><span class="string">"optional"</span><span class="special">));
|
||||
</span><span class=identifier>rule</span><span class=special><> </span><span class="identifier">list_of_optional </span><span class=special>= *</span><span class=identifier>optional</span><span class="special">;</span></pre>
|
||||
<p>The problem with this is that the kleene star will continue looping until it
|
||||
gets a no-match from it's enclosed parser. Because the <tt>optional</tt> rule
|
||||
is optional, it will always return a match. Even if the input doesn't match
|
||||
"optional" it will return a zero length match. <tt>list_of_optional</tt>
|
||||
will keep calling optional forever since optional will never return a no-match.
|
||||
So in general, any rule that can be "nullable" (meaning it can return
|
||||
a zero length match) must not be put inside a kleene star.</p>
|
||||
<p><strong><a name="CVS"></a>Boost CVS and Spirit CVS</strong></p>
|
||||
<p><font color="#FF0000">Question:</font> There is Boost CVS and Spirit CVS. Which
|
||||
is used for further development of Spirit?</p>
|
||||
<p> Generally, development takes place in Spirit's CVS. However, from time to
|
||||
time a new version of Spirit will be integrated in Boost. When this happens
|
||||
development takes place in the Boost CVS. There will be announcements on the
|
||||
Spirit mailing lists whenever the status of the Spirit CVS changes.<br>
|
||||
</p>
|
||||
<table width="80%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="note_box"><img src="theme/alert.gif" width="16" height="16">
|
||||
During development of Spirit v1.8.1 (released as part of boost-1.32.0) and
|
||||
v1.6.2, Spirit's developers decided to stop maintaining Spirit CVS for
|
||||
BRANCH_1_8 and BRANCH_1_6. This was necessary to reduce the added work of
|
||||
maintaining and synch'ing two repositories. The maintenance of these branches
|
||||
will take place on Boost CVS. At this time, new developments towards Spirit
|
||||
v2 and other experimental developments are expected to happen in Spirit
|
||||
CVS.</td>
|
||||
</tr>
|
||||
</table>
|
||||
<p><strong><a name="compilation_times"></a>How to reduce compilation times with
|
||||
complex Spirit grammars </strong></p>
|
||||
<p><font color="#FF0000">Question:</font> Are there any techniques to minimize
|
||||
compile times using spirit? For simple parsers compile time doesn't seem to
|
||||
be a big issue, but recently I created a parser with about 78 rules
|
||||
and it took about 2 hours to compile. I would like to break the grammar up into
|
||||
smaller chunks, but it is not as easy as I thought it would be because rules
|
||||
in two grammar capsules are defined in terms of each other. Any thoughts?</p>
|
||||
<p> The only way to reduce compile times is </p>
|
||||
<ul>
|
||||
<li> to split up your grammars into smaller chunks</li>
|
||||
<li> prevent the compiler from seeing all grammar definitions at the same time
|
||||
(in the same compilation unit)</li>
|
||||
</ul>
|
||||
<p>The first task is merely logistical, the second is rather a technical one. </p>
|
||||
<p>A good example of solving the first task is given in the Spirit cpp_lexer example
|
||||
written by JCAB (you may find it on the <a href="http://spirit.sourceforge.net/repository/applications/show_contents.php">applications' repository</a>).
|
||||
</p>
|
||||
<p>The cross referencing problems may be solved by some kind of forward declaration,
|
||||
or, if this does not work, by introducing some dummy template argument to the
|
||||
non-templated grammars. Thus allows the instantiation time to be deferred until the
|
||||
compiler has seen all the definitions:</p>
|
||||
<pre> <span class="keyword">template</span> <<span class="keyword">typename</span> T = <span class="keyword">int</span>><br> grammar2;</p>
|
||||
|
||||
<span class="keyword">template</span> <<span class="keyword">typename</span> T = <span class="keyword">int</span>><br> <span class="keyword">struct</span> grammar1 : <span class="keyword">public</span> grammar<grammar1><br> {
|
||||
<span class="comment">// refers to grammar2<></span>
|
||||
};
|
||||
|
||||
<span class="keyword">template</span> <typename T>
|
||||
<span class="keyword">struct</span> grammar2 : <span class="keyword">public</span> grammar<grammar2>
|
||||
{
|
||||
<span class="comment">// refers to grammar1<></span>
|
||||
};
|
||||
|
||||
//...
|
||||
grammar1<> g; <span class="comment">// both grammars instantiated here</span>
|
||||
</pre>
|
||||
<p>The second task is slightly more complex. You must ensure that in the first
|
||||
compilation unit the compiler sees only some function/template <strong>declaration</strong>
|
||||
and in the second compilation unit the function/template <strong>definition</strong>.
|
||||
Still no problem, if no templates are involved. If templates are involved,
|
||||
you need to manually (explicitly) instantiate these templates with the correct
|
||||
template parameters inside a separate compilation unit. This way the compilation
|
||||
time is split between several compilation units, reducing the overall
|
||||
required time drastically too. </p>
|
||||
<p>For a sample, showing how to achieve this, you may want to look at the <tt>Wave</tt>
|
||||
preprocessor library, where this technique is used extensively. (this should be available for download from <a href="http://spirit.sf.net">Spirit's site</a> as soon as you read this).</p>
|
||||
<p><strong><a name="frame_assertion" id="frame_assertion"></a>Closure frame assertion</strong></p>
|
||||
<p><font color="#FF0000">Question:</font> When I run the parser I get an assertion
|
||||
<span class="string">"frame.get() != 0 in file closures.hpp"</span>.
|
||||
What am I doing wrong?</p>
|
||||
<p>Basically, the assertion fires when you are accessing a closure variable that
|
||||
is not constructed yet. Here's an example. We have three rules <tt>a</tt>, <tt>b</tt>
|
||||
and <tt>c</tt>. Consider that the rule <tt>a</tt> has a closure member <tt>m</tt>.
|
||||
Now:</p>
|
||||
<pre> <span class="identifier">a</span> <span class="special">=</span> <span class="identifier">b</span><span class="special">;</span>
|
||||
<span class="identifier">b</span> <span class="special">=</span> <span class="identifier">int_p</span><span class="special">[</span><span class="identifier">a</span><span class="special">.</span><span class="identifier">m</span> <span class="special">=</span> 123<span class="special">];</span>
|
||||
<span class="identifier">c</span> <span class="special">=</span> <span class="identifier">b</span><span class="special">;</span></pre>
|
||||
<p>When the rule <tt>a</tt> is invoked, its frame is set, along with its member
|
||||
<tt>m</tt>. So, when <tt>b</tt> is called from <tt>a</tt>, the semantic action
|
||||
<tt>[a.m = 123]</tt>will store <tt>123</tt> into <tt>a</tt>'s closure member
|
||||
<tt>m</tt>. On the other hand, when <tt>c</tt> is invoked, and <tt>c</tt> attempts
|
||||
to call <tt>b</tt>, no frame for <tt>a</tt> is set. Thus, when <tt>b</tt> is
|
||||
called from <tt>c</tt>, the semantic action <tt>[a.m = 123]</tt>will fire the
|
||||
<span class="string">"frame.get() != 0 in file closures.hpp"</span>
|
||||
assertion.</p>
|
||||
<p><strong><a name="greedy_rd" id="greedy_rd"></a>Greedy RD</strong></p>
|
||||
<p><font color="#FF0000">Question:</font> I'm wondering why the this won't work
|
||||
when parsed:</p>
|
||||
<pre>
|
||||
<span class="identifier"> a</span> <span class="special">= +</span><span class="identifier">anychar_p</span><span class="special">;</span>
|
||||
<span class="identifier">b</span> = <span class="string">'('</span> <span class="special">>></span> <span class="identifier">a</span> <span class="special">>></span> <span class="string">')'</span><span class="special">;</span></pre>
|
||||
<p>Try this:</p>
|
||||
<pre>
|
||||
<span class="identifier"> a</span> <span class="special">= +(</span><span class="identifier">anychar_p - </span><span class="string">')'</span><span class="special">);</span>
|
||||
<span class="identifier">b</span> <span class="special">=</span> <span class="string">'('</span> <span class="special">>></span> <span class="identifier">a</span> <span class="special">>></span> <span class="string">')'</span><span class="special">;</span></pre>
|
||||
<p>David Held writes: That's because it's like the langoliers--it eats everything
|
||||
up. You usually want to say what it shouldn't eat up by subtracting the terminating
|
||||
character from the parser. The moral being: Using <tt>*anychar_p</tt> or <tt>+anychar_p</tt>
|
||||
all by itself is usually a <em>Bad Thing</em>™.</p>
|
||||
<p>In other words: Recursive Descent is inherently greedy (however, see <a href="rationale.html#exhaustive_rd">Exhaustive
|
||||
backtracking and greedy RD</a>).</p>
|
||||
<p><span class="special"></span><strong><a name="referencing_a_rule_at_construction" id="referencing_a_rule_at_construction"></a>Referencing
|
||||
a rule at construction time</strong></p>
|
||||
<p><font color="#FF0000">Question:</font> The code below terminates with a segmentation
|
||||
fault, but I'm (obviously) confused about what I'm doing wrong.</p>
|
||||
<pre> rule<span class="special"><</span>ScannerT<span class="special">,</span> clos<span class="special">::</span>context_t<span class="special">></span> id <span class="special">=</span> int_p<span class="special">[</span>id<span class="special">.</span>i <span class="special">=</span> arg1<span class="special">];</span></pre>
|
||||
<p>You have a rule <tt>id</tt> being constructed. Before it is constructed, you
|
||||
reference <tt>id.i</tt> in the RHS of the constructor. It's a chicken and egg
|
||||
thing. The closure member <tt>id.i</tt> is not yet constructed at that point.
|
||||
Using assignment will solve the problem. Try this instead:</p>
|
||||
<pre> rule<span class="special"><</span>ScannerT<span class="special">,</span> clos<span class="special">::</span>context_t<span class="special">></span> id<span class="special">;</span>
|
||||
id <span class="special">=</span> int_p<span class="special">[</span>id<span class="special">.</span>i <span class="special">=</span> arg1<span class="special">];</span></pre>
|
||||
<p><span class="special"></span><strong><a name="storing_rules" id="storing_rules"></a>Storing
|
||||
Rules </strong></p>
|
||||
<p><font color="#FF0000">Question:</font> Why can't I store rules in STL containers
|
||||
for later use and why can't I pass and return rules to and from functions by
|
||||
value? </p>
|
||||
<p>EBNF is primarily declarative. Like in functional programming, It's a static
|
||||
recipe and there's no notion of do this then that. However, in Spirit, we managed
|
||||
to coax imperative C++ to take in declarative EBNF. Hah! Fun!... We did that
|
||||
by masquerading the C++ assignment operator to mimic EBNF's <tt>::=</tt>, among
|
||||
other things (e.g. <tt>>></tt>, <tt>|</tt>, <tt>&</tt> etc.). We used
|
||||
the rule class to let us do that by giving its assignment operator (and copy
|
||||
constructor) a different meaning and semantics. Doing so made the rule unlike
|
||||
any other C++ object. You can't copy it. You can't assign it. You can't place
|
||||
it in a container (vector, stack, etc).Heck, you can't even return it from a
|
||||
function *by value*.</p>
|
||||
<table width="80%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="note_box"><img src="theme/alert.gif" width="16" height="16"> The
|
||||
rule is a weird object, unlike any other C++ object. It does not have the
|
||||
proper copy and assignment semantics and cannot be stored and passed around
|
||||
by value.</td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>However nice declarative EBNF is, the dynamic nature of C++ can be an advantage.
|
||||
We've seen this in action here and there. There are indeed some interesting
|
||||
applications of dynamic parsers using Spirit. Yet, we haven't fully utilized
|
||||
the power of dynamic parsing, unless(!), we have a rule that's not so alien
|
||||
to C++ (i.e. behaves as a good C++ object). With such a beast, we can write
|
||||
parsers that's defined at run time, as opposed to at compile time.</p>
|
||||
<p>Now that I started focusing on rules (hey, check out the hunky new rule features),
|
||||
it might be a good time to implement the rule-holder. It is basically just a
|
||||
rule, but with C++ object semantics. Yet it's not as simple. Without true garbage
|
||||
collection, the implementation will be a bit tricky. We can't simply use reference
|
||||
counting because a rule-holder (hey, anyone here has a better name?) *is-a*
|
||||
rule, and rules are typically recursive and thus cyclic. The problem is which
|
||||
will own which.</p>
|
||||
<p>Ok... this will do for now. You'll definitely see more of the rule-holder in
|
||||
the coming days.</p>
|
||||
<p><strong><a name="parsing_ints_and_reals"></a>Parsing Ints and Reals</strong></p>
|
||||
<p> <font color="#FF0000">Question:</font> I was trying to parse an int or float value with the <tt>longest_d</tt> directive and put some actors on the alternatives to visualize the results. When I parse "123.456", the output reports:</p>
|
||||
<ol>
|
||||
<li>(int) has been matched: full match = false</li>
|
||||
<li> (double) has been matched: full match = true</li>
|
||||
</ol>
|
||||
<p>That is not what I expected. What am I missing? </p>
|
||||
<p> Actually, the problem is that both semantic actions of the int and real branch will be triggered because both branches will be tried. This doesn't buy us much. What actually wins in the end is what you expected. But there's no easy way to know which one wins. The problem stems from the ambiguity. </p>
|
||||
<blockquote>
|
||||
<p>Case1: Consider this input: "2". Is it an int or a real? They are both (strictly following the grammar of a real). </p>
|
||||
<p>Case2 : Now how about "1.0"? Is it an int or a real? They are both, albeit the int part gets a partial match: "1". That is why you are getting a (partial) match for your <em>int</em> rule (full match = false). </p>
|
||||
</blockquote>
|
||||
<p> Instead of using the <tt>longest_d</tt> to parse ints and reals, what I suggest is to remove the ambiguity and use the plain short-circuiting alternatives. The first step is to use <tt><a href="numerics.html#strict_reals">strict_real_p</a> </tt>to make the first case unambiguous. Unlike
|
||||
|
||||
|
||||
<tt>real_p</tt>, <tt>strict_real_p</tt> requires a dot to be present for a number to be considered a successful match.
|
||||
|
||||
Your grammar can be written unambiguously as:</p>
|
||||
<pre> strict_real_p<span class="special"> | </span>int_p</pre>
|
||||
<p> Note that because ambiguity is resolved, attaching actions to both branches is safe. Only one will be triggered:</p>
|
||||
<pre> strict_real_p<span class="special">[</span>R<span class="special">] | </span>int_p<span class="special">[</span>I<span class="special">]</span></pre>
|
||||
<blockquote>
|
||||
<p> "1.0" ---> triggers R<br>
|
||||
"2" ---> triggers I</p>
|
||||
</blockquote>
|
||||
<p> Again, as a rule of thumb, it is always best to resolve as much ambiguity as possible. The best grammars are those which involve no backtracking at all: an LL(1) grammar. Backtracking and semantic actions do not mix well.</p>
|
||||
<p><b><a name="output_operator" id="output_operator"></a>BOOST_SPIRIT_DEBUG and missing <tt>operator<<</tt></b></p>
|
||||
<p><font color="#FF0000">Question:</font> My code compiles fine in release mode but when I try to define <tt>BOOST_SPIRIT_DEBUG</tt> the compiler complains about a missing <tt><span class="keyword">operator</span><span class="special"><<</span></tt>.</p>
|
||||
<p>When <tt>BOOST_SPIRIT_DEBUG</tt> is defined debug output is generated for
|
||||
spirit parsers. To this end it is expected that each closure member has the
|
||||
default output operator defined.</p>
|
||||
<p>You may provide the operator overload either in the namespace where the
|
||||
class is declared (will be found through Argument Dependent Lookup) or make it visible where it is
|
||||
used, that is <tt><span class="keyword">namespace</span> <span
|
||||
class="identifier">boost</span><span class="special">::</span><span
|
||||
class="identifier">spirit</span></tt>. Here's an example for <tt><span
|
||||
class="identifier">std</span><span class="special">::</span><span
|
||||
class="identifier">pair</span></tt>:</p>
|
||||
<pre><code>
|
||||
<span class="preprocessor">#include</span> <span class="string"><iosfwd></span>
|
||||
<span class="preprocessor">#include</span> <span class="string"><utility></span>
|
||||
|
||||
<span class="keyword">namespace</span> <span class="identifier">std</span> <span class="special">{</span>
|
||||
|
||||
<span class="keyword">template</span> <span class="special"><</span>
|
||||
<span class="keyword">typename</span> <span class="identifier">C</span><span class="special">,</span>
|
||||
<span class="keyword">typename</span> <span class="identifier">E</span><span class="special">,</span>
|
||||
<span class="keyword">typename</span> <span class="identifier">T1</span><span class="special">,</span>
|
||||
<span class="keyword">typename</span> <span class="identifier">T2</span>
|
||||
<span class="special">></span>
|
||||
<span class="identifier">basic_ostream</span><span class="special"><</span><span class="identifier">C</span><span class="special">,</span> <span class="identifier">E</span><span class="special">></span> <span class="special">&</span> <span class="keyword">operator</span><span class="special"><<(</span>
|
||||
<span class="identifier">basic_ostream</span><span class="special"><</span><span class="identifier">C</span><span class="special">,</span> <span class="identifier">E</span><span class="special">></span> <span class="special">&</span> <span class="identifier">out</span><span class="special">,</span>
|
||||
<span class="identifier">pair</span><span class="special"><</span><span class="identifier">T1</span><span class="special">,</span> <span class="identifier">T2</span><span class="special">></span> <span class="keyword">const</span> <span class="special">&</span> <span class="identifier">what</span><span class="special">)</span>
|
||||
<span class="special">{</span>
|
||||
<span class="keyword">return</span> <span class="identifier">out</span> <span class="special"><<</span> <span class="string">'('</span> <span class="special"><<</span> <span class="identifier">what</span><span class="special">.</span><span class="identifier">first</span> <span class="special"><<</span> <span class="string">", "</span>
|
||||
<span class="special"><<</span> <span class="identifier">what</span><span class="special">.</span><span class="identifier">second</span> <span class="special"><<</span> <span class="string">')'</span><span class="special">;</span>
|
||||
<span class="special">}</span>
|
||||
|
||||
<span class="special">}</span>
|
||||
|
||||
</code></pre>
|
||||
<p><b><a name="repository" id="repository"></a>Applications that used to be part of spirit</b></p>
|
||||
<p><font color="#FF0000">Question:</font> Where can I find <i><insert great application></i>, that used to be part of the Spirit distribution?</p>
|
||||
<p>Old versions of Spirit used to include applications built with it.
|
||||
In order to streamline the distribution they were moved to a separate
|
||||
<a href="http://spirit.sourceforge.net/repository/applications/show_contents.php">applications repository</a>.
|
||||
In that page you'll find links to full applications that use the Spirit
|
||||
parser framework. We encourage you to send in your own applications for
|
||||
inclusion (see the page for instructions).</p>
|
||||
<p>You may also check out the <a href="http://spirit.sourceforge.net/repository/grammars/show_contents.php">grammars' repository</a>.</p>
|
||||
<table width="80%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="note_box">
|
||||
<img src="theme/note.gif" width="16" height="16"> You'll still find the
|
||||
example applications that complement (actually are part of) the
|
||||
documentation in the usual place: <code>libs/spirit/example</code>.<br>
|
||||
<br>
|
||||
<img src="theme/alert.gif" width="16" height="16"> The applications and
|
||||
grammars listed in the repositories are works of the respective authors.
|
||||
It is the author's responsibility to provide support and maintenance.
|
||||
Should you have any questions, please send the author an email.
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="techniques.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="rationale.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<hr size="1">
|
||||
<p class="copyright">Copyright © 1998-2003 Joel de Guzman<br>
|
||||
<span class="copyright">Copyright © 2002-2003 Hartmut Kaiser </span><br>
|
||||
<span class="copyright">Copyright © 2006-2007 Tobias Schwinger </span><br>
|
||||
<br>
|
||||
<font size="2">Use, modification and distribution is subject to the Boost Software
|
||||
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
||||
http://www.boost.org/LICENSE_1_0.txt)</font></p>
|
||||
<p class="copyright"> </p>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,97 +0,0 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>File Iterator</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" href="theme/style.css" type="text/css">
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
|
||||
<tr>
|
||||
<td width="10">
|
||||
</td>
|
||||
<td width="85%">
|
||||
<font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>File Iterator</b></font>
|
||||
</td>
|
||||
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="multi_pass.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="position_iterator.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>Since Spirit is a back-tracking parser, it requires at least a forward iterator.
|
||||
In particular, an input iterator is not sufficient. Many times it is convenient
|
||||
to read the input to a parser from a file, but the STL file iterators are input
|
||||
iterators. To get around this limitation, Spirit has a utility class <tt>file_iterator</tt>,
|
||||
which is a read-only random-access iterator for files.</p>
|
||||
<p>To use the Spirit file iterator, simply create a file iterator with the path
|
||||
to the file you wish to parse, and then create an EOF iterator for the file:</p>
|
||||
<pre><span class=identifier> </span><span class=preprocessor>#include </span><span class=special><</span><span class=identifier>boost</span><span class=special>/</span><span class=identifier>spirit</span><span class=special>/</span><span class=identifier>iterator</span><span class=special>/</span><span class=identifier>file_iterator</span><span class=special>.</span><span class=identifier>hpp</span><span class=special>> </span><span class=comment>// the header file</span></pre>
|
||||
<pre> <span class=identifier>file_iterator</span><span class=special><> </span><span class=identifier>first</span><span class=special>(</span><span class=string>"input.dat"</span><span class=special>);
|
||||
|
||||
</span><span class=keyword>if </span><span class=special>(!</span><span class=identifier>first</span><span class=special>)
|
||||
{
|
||||
</span><span class=identifier>std</span><span class=special>::</span><span class=identifier>cout </span><span class=special><< </span><span class=string>"Unable to open file!\n"</span><span class=special>;
|
||||
|
||||
</span><span class=comment>// Clean up, throw an exception, whatever
|
||||
</span><span class=keyword>return </span><span class=special>-</span><span class=number>1</span><span class=special>;
|
||||
}
|
||||
|
||||
</span><span class=identifier>file_iterator</span><span class=special><> </span><span class=identifier>last </span><span class=special>= </span><span class=identifier>first</span><span class=special>.</span><span class=identifier>make_end</span><span class=special>();</span></pre>
|
||||
<p>You now have a pair of iterators to use with Spirit . If your parser is fully
|
||||
parametrized (no hard-coded <tt><char const *></tt>), it is a simple matter
|
||||
of redefining the iterator type to <tt>file_iterator</tt>:<br>
|
||||
</p>
|
||||
<pre> <span class=keyword>typedef char </span><span class="identifier">char_t</span><span class=special>;
|
||||
</span><span class=keyword>typedef </span><span class=identifier>file_iterator </span><span class=special><</span><span class=keyword>char</span><span class=identifier>_t</span><span class=special>> </span><span class=identifier>iterator_t</span><span class=special>;
|
||||
</span><span class=keyword>typedef </span><span class=identifier>scanner</span><span class=special><</span><span class=identifier>iterator_t</span><span class=special>> </span><span class=identifier>scanner_t</span><span class=special>;
|
||||
</span><span class=keyword>typedef </span><span class=identifier>rule </span><span class=special><</span><span class=identifier>scanner_t</span><span class=special>> </span><span class=identifier>rule_t</span><span class=special>;
|
||||
|
||||
</span><span class=identifier>rule_t my_rule</span><span class=special>;
|
||||
|
||||
</span><span class=comment>// Define your rule
|
||||
|
||||
</span><span class=identifier>parse_info</span><span class=special><</span><span class=identifier>iterator_t</span><span class=special>> </span><span class=identifier>info </span><span class=special>= </span><span class=identifier>parse</span><span class=special>(</span><span class=identifier>first</span><span class=special>, </span><span class=identifier>last</span><span class=special>, </span><span class=identifier>my_rule</span><span class=special>);</span></pre>
|
||||
<p>Of course, you don't have to deal with the <a href="faq.html#scanner_business">scanner-business</a>
|
||||
at all if you use grammars rather than rules as arguments to the parse functions.
|
||||
You simply pass the iterator pairs and the grammar as is:<span class=special><br>
|
||||
</span></p>
|
||||
<pre> <span class=identifier>my_grammar </span><span class=identifier>g</span><span class=special>;
|
||||
</span><span class=identifier>parse_info</span><span class=special><</span><span class=identifier>iterator_t</span><span class=special>> </span><span class=identifier>info </span><span class=special>= </span><span class=identifier>parse</span><span class=special>(</span><span class=identifier>first</span><span class=special>, </span><span class=identifier>last</span><span class=special>, </span><span class=identifier>g</span><span class=special>);</span></pre>
|
||||
<table width="80%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="note_box"><img src="theme/bulb.gif" width="13" height="18"><b>
|
||||
Generic iterator</b><br>
|
||||
<br>
|
||||
The Spirit file iterator can be parameterized with any type that is default
|
||||
constructible and assignable. It transparently supports large files (greater
|
||||
than 2GB) on systems that provide an appropriate interface. The file iterator
|
||||
can be useful outside of Spirit as well. For instance, the Boost.Tokenizer
|
||||
package requires a bidirectional iterator, which is provided by file_iterator.</td>
|
||||
</tr>
|
||||
</table>
|
||||
<p><img src="theme/lens.gif" width="15" height="16"> See <a href="../example/fundamental/file_parser.cpp">file_parser.cpp</a> for a compilable example. This is part of the Spirit distribution.</p>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="multi_pass.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="position_iterator.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<hr size="1">
|
||||
<p class="copyright">Copyright © 2002 Jeff Westfahl</p>
|
||||
<p class="copyright"><font size="2"> Use, modification and distribution is subject to the Boost Software
|
||||
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
||||
http://www.boost.org/LICENSE_1_0.txt)
|
||||
</font> </p>
|
||||
<p class="copyright"> </p>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,265 +0,0 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>Functional</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" href="theme/style.css" type="text/css">
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
|
||||
<tr>
|
||||
<td width="10">
|
||||
</td>
|
||||
<td width="85%"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Functional</b></font>
|
||||
</td>
|
||||
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="parametric_parsers.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="phoenix.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>If you look more closely, you'll notice that Spirit is all about composition
|
||||
of <i>parser functions</i>. A parser is just a function that accepts a scanner
|
||||
and returns a match. Parser <i>functions</i> are composed to form increasingly
|
||||
complex <i>higher order forms</i>. Notice too that the parser, albeit an object,
|
||||
is immutable and constant. All primitive and composite parser objects are <tt>const</tt>.
|
||||
The parse member function is even declared as <tt>const</tt>:</p>
|
||||
<pre>
|
||||
<code><span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>>
|
||||
</span><span class=keyword>typename </span><span class=identifier>parser_result</span><span class=special><</span><span class=identifier>self_t</span><span class=special>, </span><span class=identifier>ScannerT</span><span class=special>>::</span><span class=identifier>type
|
||||
</span><span class=identifier>parse</span><span class=special>(</span><span class=identifier>ScannerT </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>scan</span><span class=special>) </span><span class=keyword>const</span><span class=special>;</span></code></pre>
|
||||
<p> In all accounts, this looks and feels a lot like <b>Functional Programming</b>.
|
||||
And indeed it is. Spirit is by all means an application of Functional programming
|
||||
in the imperative C++ domain. In Haskell, for example, there is what are called
|
||||
<a href="references.html#combinators">parser combinators</a> which are strikingly
|
||||
similar to the approach taken by Spirit- parser functions which are composed
|
||||
using various operators to create higher order parser functions that model a
|
||||
top-down recursive descent parser. Those smart Haskell folks have been doing
|
||||
this way before Spirit.</p>
|
||||
<p> Functional style programming (or FP) libraries are gaining momentum in the
|
||||
C++ community. Certainly, we'll see more of FP in Spirit now and in the future.
|
||||
Actually, if one looks more closely, even the C++ standard library has an FP
|
||||
flavor. Stealthily beneath the core of the standard C++ library, a closer look
|
||||
into STL gives us a glimpse of a truly FP paradigm already in place. It is obvious
|
||||
that the authors of STL know and practice FP.</p>
|
||||
|
||||
<h2>Semantic Actions in the FP Perspective</h2>
|
||||
|
||||
<h3>STL style FP</h3>
|
||||
<p> A more obvious application of STL-style FP in Spirit is the semantic action.
|
||||
What is STL-style FP? It is primarily the use of functors that can be composed
|
||||
to form higher order functors.</p>
|
||||
<table width="80%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="note_box"> <img src="theme/note.gif" width="16" height="16"> <strong>Functors</strong><br>
|
||||
<br>
|
||||
A Function Object, or Functor is simply any object that can be called as
|
||||
if it is a function. An ordinary function is a function object, and so is
|
||||
a function pointer; more generally, so is an object of a class that defines
|
||||
operator(). </td>
|
||||
</tr>
|
||||
</table>
|
||||
<p> This STL-style FP can be seen everywhere these days. The following example
|
||||
is taken from <a href="https://www.boost.org/sgi/stl/">SGI's Standard Template
|
||||
Library Programmer's Guide</a>:</p>
|
||||
<pre>
|
||||
<code><span class=comment>// Computes sin(x)/(x + DBL_MIN) for each element of a range.
|
||||
|
||||
</span><span class=identifier>transform</span><span class=special>(</span><span class=identifier>first</span><span class=special>, </span><span class=identifier>last</span><span class=special>, </span><span class=identifier>first</span><span class=special>,
|
||||
</span><span class=identifier>compose2</span><span class=special>(</span><span class=identifier>divides</span><span class=special><</span><span class=keyword>double</span><span class=special>>(),
|
||||
</span><span class=identifier>ptr_fun</span><span class=special>(</span><span class=identifier>sin</span><span class=special>),
|
||||
</span><span class=identifier>bind2nd</span><span class=special>(</span><span class=identifier>plus</span><span class=special><</span><span class=keyword>double</span><span class=special>>(), </span><span class=identifier>DBL_MIN</span><span class=special>)));</span></code></pre>
|
||||
<p align="left"> Really, this is just <i>currying</i> in FP terminology.</p>
|
||||
<table width="80%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="note_box"> <img src="theme/lens.gif" width="15" height="16"> <strong>Currying</strong><br>
|
||||
<br>
|
||||
What is "currying", and where does it come from?<br>
|
||||
<br>
|
||||
Currying has its origins in the mathematical study of functions. It was
|
||||
observed by Frege in 1893 that it suffices to restrict attention to functions
|
||||
of a single argument. For example, for any two parameter function <tt>f(x,y)</tt>,
|
||||
there is a one parameter function <tt>f'</tt> such that <tt>f'(x)</tt> is
|
||||
a function that can be applied to y to give <tt>(f'(x))(y) = f (x,y)</tt>.
|
||||
This corresponds to the well known fact that the sets <tt>(AxB -> C)</tt>
|
||||
and <tt>(A -> (B -> C))</tt> are isomorphic, where <tt>"x"</tt>
|
||||
is cartesian product and <tt>"->"</tt> is function space. In
|
||||
functional programming, function application is denoted by juxtaposition,
|
||||
and assumed to associate to the left, so that the equation above becomes
|
||||
<tt>f' x y = f(x,y)</tt>. </td>
|
||||
</tr>
|
||||
</table>
|
||||
<p> In the context of Spirit, the same FP style functor composition may be applied
|
||||
to semantic actions. <a href="../example/fundamental/full_calc.cpp">full_calc.cpp</a> is a good example. Here's a snippet from that sample:</p>
|
||||
<pre>
|
||||
<code><span class=identifier>expression </span><span class=special>=
|
||||
</span><span class=identifier>term
|
||||
</span><span class=special>>> </span><span class=special>*( </span><span class=special>(</span><span class=literal>'+' </span><span class=special>>> </span><span class=identifier>term</span><span class=special>)[</span><span class=identifier>make_op</span><span class=special>(</span><span class=identifier>plus</span><span class=special><</span><span class=keyword>long</span><span class=special>>(), </span><span class=identifier>self</span><span class=special>.</span><span class=identifier>eval</span><span class=special>)]
|
||||
</span><span class=special>| </span><span class=special>(</span><span class=literal>'-' </span><span class=special>>> </span><span class=identifier>term</span><span class=special>)[</span><span class=identifier>make_op</span><span class=special>(</span><span class=identifier>minus</span><span class=special><</span><span class=keyword>long</span><span class=special>>(), </span><span class=identifier>self</span><span class=special>.</span><span class=identifier>eval</span><span class=special>)]
|
||||
</span><span class=special>)
|
||||
</span><span class=special>;</span></code></pre>
|
||||
|
||||
<p> <img height="16" width="15" src="theme/lens.gif"> The full source code can be <a href="../example/fundamental/full_calc.cpp">viewed here</a>. This is part of the Spirit distribution.</p>
|
||||
<h3>Boost style FP</h3>
|
||||
<p> Boost takes the FP paradigm further. There are libraries in boost that focus
|
||||
specifically on Function objects and higher-order programming.</p>
|
||||
<table width="90%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="table_title" colspan="14"> Boost FP libraries </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells"><a href="http://www.boost.org/libs/bind/bind.html">bind</a>
|
||||
and <a href="http://www.boost.org/libs/bind/mem_fn.html">mem_fn</a></td>
|
||||
<td class="table_cells">Generalized binders for function/object/pointers and
|
||||
member functions, from Peter Dimov</td>
|
||||
</tr>
|
||||
<td class="table_cells"><a href="http://www.boost.org/libs/function/index.html">function</a></td>
|
||||
<td class="table_cells">Function object wrappers for deferred calls or callbacks,
|
||||
from Doug Gregor</td>
|
||||
</tr>
|
||||
<td class="table_cells"><a href="http://www.boost.org/libs/functional/index.html">functional</a></td>
|
||||
<td class="table_cells">Enhanced function object adaptors, from Mark Rodgers</td>
|
||||
</tr>
|
||||
<td class="table_cells"><a href="http://www.boost.org/libs/lambda/index.html">lambda</a></td>
|
||||
<td class="table_cells">Define small unnamed function objects at the actual
|
||||
call site, and more, from Jaakko Järvi and Gary Powell</td>
|
||||
</tr>
|
||||
<td class="table_cells"><a href="http://www.boost.org/libs/bind/ref.html">ref</a></td>
|
||||
<td class="table_cells">A utility library for passing references to generic
|
||||
functions, from Jaako Järvi, Peter Dimov, Doug Gregor, and Dave Abrahams</td>
|
||||
</tr>
|
||||
</table>
|
||||
<p> The following is an example that uses boost <strong>Bind</strong> to use a
|
||||
member function as a Spirit semantic action. You can see this example in full
|
||||
in the file<a href="../example/fundamental/bind.cpp"> bind.cpp</a>.</p>
|
||||
<pre>
|
||||
<code><span class=keyword>class </span><span class=identifier>list_parser
|
||||
</span><span class=special>{
|
||||
</span><span class=keyword>public</span><span class=special>:
|
||||
|
||||
</span><span class=keyword>typedef </span><span class=identifier>list_parser </span><span class=identifier>self_t</span><span class=special>;
|
||||
|
||||
</span><span class=keyword>bool
|
||||
</span><span class=identifier>parse</span><span class=special>(</span><span class=keyword>char </span><span class=keyword>const</span><span class=special>* </span><span class=identifier>str</span><span class=special>)
|
||||
</span><span class=special>{
|
||||
</span><span class=keyword>return </span><span class=identifier>spirit</span><span class=special>::</span><span class=identifier>parse</span><span class=special>(</span><span class=identifier>str</span><span class=special>,
|
||||
|
||||
</span><span class=comment>// Begin grammar
|
||||
</span><span class=special>(
|
||||
</span><span class=identifier>real_p
|
||||
</span><span class=special>[
|
||||
</span><span class=identifier>bind</span><span class=special>(&</span><span class=identifier>self_t</span><span class=special>::</span><span class=identifier>add</span><span class=special>, </span><span class=keyword>this</span><span class=special>, </span><span class=identifier>_1</span><span class=special>)
|
||||
</span><span class=special>]
|
||||
|
||||
</span><span class=special>>> </span><span class=special>*( </span><span class=literal>','
|
||||
</span><span class=special>>> </span><span class=identifier>real_p
|
||||
</span><span class=special>[
|
||||
</span><span class=identifier>bind</span><span class=special>(&</span><span class=identifier>self_t</span><span class=special>::</span><span class=identifier>add</span><span class=special>, </span><span class=keyword>this</span><span class=special>, </span><span class=identifier>_1</span><span class=special>)
|
||||
</span><span class=special>]
|
||||
</span><span class=special>)
|
||||
</span><span class=special>)
|
||||
</span><span class=special>,
|
||||
</span><span class=comment>// End grammar
|
||||
|
||||
</span><span class=identifier>space_p</span><span class=special>).</span><span class=identifier>full</span><span class=special>;
|
||||
</span><span class=special>}
|
||||
|
||||
</span><span class=keyword>void
|
||||
</span><span class=identifier>add</span><span class=special>(</span><span class=keyword>double </span><span class=identifier>n</span><span class=special>)
|
||||
</span><span class=special>{
|
||||
</span><span class=identifier>v</span><span class=special>.</span><span class=identifier>push_back</span><span class=special>(</span><span class=identifier>n</span><span class=special>);
|
||||
</span><span class=special>}
|
||||
|
||||
</span><span class=identifier>vector</span><span class=special><</span><span class=keyword>double</span><span class=special>> </span><span class=identifier>v</span><span class=special>;
|
||||
</span><span class=special>};
|
||||
</span></code></pre>
|
||||
<p> <img height="16" width="15" src="theme/lens.gif"> The full source code can be <a href="../example/fundamental/bind.cpp">viewed here</a>. This is part of the Spirit distribution.</p>
|
||||
<p>This parser parses a comma separated list of real numbers and stores them
|
||||
in a vector<double>. Boost.bind creates a Spirit conforming semantic action
|
||||
from the <tt>list_parser</tt>'s member function <tt>add</tt>.</p>
|
||||
<h3>Lambda and Phoenix</h3>
|
||||
<p> There's a library, authored by yours truly, named <a href="../phoenix/index.html">Phoenix</a>.
|
||||
While this is not officially part of the Spirit distribution, this library has
|
||||
been used extensively to experiment on advanced FP techniques in C++. This library
|
||||
is highly influenced by <a href="https://people.cs.umass.edu/~yannis/fc++/">FC++</a>
|
||||
and boost Lambda (<a href="http://www.boost.org/libs/lambda/index.html">BLL</a>).</p>
|
||||
<table width="80%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="note_box"> <b><img src="theme/lens.gif" width="15" height="16">
|
||||
BLL</b><br>
|
||||
<br>
|
||||
In as much as Phoenix is influenced by boost Lambda (<a href="http://www.boost.org/libs/lambda/index.html">BLL</a>),
|
||||
Phoenix innovations such as local variables, local functions and adaptable
|
||||
closures, in turn influenced BLL. Currently, BLL is very similar to Phoenix.
|
||||
Most importantly, BLL incorporated Phoenix's adaptable closures. In the
|
||||
future, Spirit will fully support BLL. </td>
|
||||
</tr>
|
||||
</table>
|
||||
<p> Phoenix allows one to write semantic actions inline in C++ through lambda
|
||||
(an unnamed function) expressions. Here's a snippet from the <a href="../example/fundamental/phoenix_calc.cpp">phoenix_calc.cpp</a> example:</p>
|
||||
<pre>
|
||||
<code><span class=identifier>expression
|
||||
</span><span class=special>= </span><span class=identifier>term</span><span class=special>[</span><span class=identifier>expression</span><span class=special>.</span><span class=identifier>val </span><span class=special>= </span><span class=identifier>arg1</span><span class=special>]
|
||||
</span><span class=special>>> </span><span class=special>*( </span><span class=special>(</span><span class=literal>'+' </span><span class=special>>> </span><span class=identifier>term</span><span class=special>[</span><span class=identifier>expression</span><span class=special>.</span><span class=identifier>val </span><span class=special>+= </span><span class=identifier>arg1</span><span class=special>])
|
||||
</span><span class=special>| </span><span class=special>(</span><span class=literal>'-' </span><span class=special>>> </span><span class=identifier>term</span><span class=special>[</span><span class=identifier>expression</span><span class=special>.</span><span class=identifier>val </span><span class=special>-= </span><span class=identifier>arg1</span><span class=special>])
|
||||
</span><span class=special>)
|
||||
</span><span class=special>;
|
||||
|
||||
</span><span class=identifier>term
|
||||
</span><span class=special>= </span><span class=identifier>factor</span><span class=special>[</span><span class=identifier>term</span><span class=special>.</span><span class=identifier>val </span><span class=special>= </span><span class=identifier>arg1</span><span class=special>]
|
||||
</span><span class=special>>> </span><span class=special>*( </span><span class=special>(</span><span class=literal>'*' </span><span class=special>>> </span><span class=identifier>factor</span><span class=special>[</span><span class=identifier>term</span><span class=special>.</span><span class=identifier>val </span><span class=special>*= </span><span class=identifier>arg1</span><span class=special>])
|
||||
</span><span class=special>| </span><span class=special>(</span><span class=literal>'/' </span><span class=special>>> </span><span class=identifier>factor</span><span class=special>[</span><span class=identifier>term</span><span class=special>.</span><span class=identifier>val </span><span class=special>/= </span><span class=identifier>arg1</span><span class=special>])
|
||||
</span><span class=special>)
|
||||
</span><span class=special>;
|
||||
|
||||
</span><span class=identifier>factor
|
||||
</span><span class=special>= </span><span class=identifier>ureal_p</span><span class=special>[</span><span class=identifier>factor</span><span class=special>.</span><span class=identifier>val </span><span class=special>= </span><span class=identifier>arg1</span><span class=special>]
|
||||
</span><span class=special>| </span><span class=literal>'(' </span><span class=special>>> </span><span class=identifier>expression</span><span class=special>[</span><span class=identifier>factor</span><span class=special>.</span><span class=identifier>val </span><span class=special>= </span><span class=identifier>arg1</span><span class=special>] </span><span class=special>>> </span><span class=literal>')'
|
||||
</span><span class=special>| </span><span class=special>(</span><span class=literal>'-' </span><span class=special>>> </span><span class=identifier>factor</span><span class=special>[</span><span class=identifier>factor</span><span class=special>.</span><span class=identifier>val </span><span class=special>= </span><span class=special>-</span><span class=identifier>arg1</span><span class=special>])
|
||||
</span><span class=special>| </span><span class=special>(</span><span class=literal>'+' </span><span class=special>>> </span><span class=identifier>factor</span><span class=special>[</span><span class=identifier>factor</span><span class=special>.</span><span class=identifier>val </span><span class=special>= </span><span class=identifier>arg1</span><span class=special>])
|
||||
</span><span class=special>;</span></code></pre>
|
||||
<p> <img height="16" width="15" src="theme/lens.gif"> The full source code can be <a href="../example/fundamental/phoenix_calc.cpp">viewed here</a>. This is part of the Spirit distribution.</p>
|
||||
<p>You do not have to worry about the details for now. There is a lot going on here that needs to be explained. The succeeding chapters will be enlightening.</p>
|
||||
<p>Notice the use of lambda expressions such as:</p>
|
||||
<pre>
|
||||
<code><span class=identifier>expression</span><span class=special>.</span><span class=identifier>val </span><span class=special>+= </span><span class=identifier>arg1</span></code></pre>
|
||||
<table width="80%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="note_box"> <b><img src="theme/lens.gif" width="15" height="16">
|
||||
<a name="lambda"></a>Lambda Expressions?</b><br>
|
||||
<br>
|
||||
Lambda expressions are actually unnamed partially applied functions where
|
||||
placeholders (e.g. arg1, arg2) are provided in place of some of the arguments.
|
||||
The reason this is called a lambda expression is that traditionally, such
|
||||
placeholders are written using the Greek letter lambda <img src="theme/lambda.png" width="15" height="22">.</td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>where <tt>expression.val</tt> is a closure variable of the expression rule
|
||||
(see <a href="closures.html">Closures</a>). <code><span class=identifier><tt>arg1</tt></span></code>
|
||||
is a placeholder for the first argument that the semantic action will receive
|
||||
(see <a href="../phoenix/doc/place_holders.html">Phoenix Place-holders</a>).
|
||||
In Boost.Lambda (BLL), this corresponds to <tt>_1</tt>. </p>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="parametric_parsers.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="phoenix.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<hr size="1">
|
||||
<p class="copyright">Copyright © 1998-2003 Joel de Guzman<br>
|
||||
<br>
|
||||
<font size="2">Use, modification and distribution is subject to the Boost Software
|
||||
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
||||
http://www.boost.org/LICENSE_1_0.txt)</font></p>
|
||||
<p class="copyright"> </p>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,108 +0,0 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>Functor Parser</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" href="theme/style.css" type="text/css">
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
|
||||
<tr>
|
||||
<td width="10">
|
||||
</td>
|
||||
<td width="85%"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Functor
|
||||
Parser</b></font> </td>
|
||||
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="list_parsers.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="refactoring.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>The simplest way to write your hand coded parser that works well with the rest
|
||||
of the Spirit library is to simply write a functor parser.</p>
|
||||
<p> A functor parser is expected to have the interface:</p>
|
||||
<pre>
|
||||
<code><span class=keyword>struct </span><span class=identifier>functor
|
||||
</span><span class=special>{
|
||||
</span><span class=keyword>typedef </span><span class=identifier>T </span><span class=identifier>result_t</span><span class=special>;
|
||||
|
||||
</span><span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>>
|
||||
</span><span class=keyword>std::ptrdiff_t
|
||||
</span><span class=keyword>operator</span><span class=special>()(</span><span class=identifier>ScannerT </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>scan</span><span class=special>, </span><span class=identifier>result_t</span><span class=special>& </span><span class=identifier>result</span><span class=special>) </span><span class=keyword>const</span><span class=special>;
|
||||
</span><span class=special>};
|
||||
</span></code></pre>
|
||||
<p> where typedef T result_t; is the attribute type of the parser that will be
|
||||
passed back to the match result (see <a href="indepth_the_parser.html">In-depth:
|
||||
The Parser</a>). If the parser does not need to return an attribute, this can
|
||||
simply be nil_t. The <span class=keyword><tt>std::ptrdiff_t</tt></span> result
|
||||
is the number of matching characters matched by your parser. A negative value
|
||||
flags an unsuccessful match.</p>
|
||||
<p> A conforming functor parser can transformed into a well formed Spirit parser
|
||||
by wrapping it in the functor_parser template:</p>
|
||||
<pre>
|
||||
<code><span class=identifier>functor_parser</span><span class=special><</span><span class=identifier>functor</span><span class=special>> </span><span class=identifier>functor_p</span><span class=special>;
|
||||
</span></code></pre>
|
||||
|
||||
<h2>Example</h2>
|
||||
<p> The following example puts the functor_parser into action:</p>
|
||||
<pre>
|
||||
<code><span class=keyword>struct </span><span class=identifier>number_parser
|
||||
</span><span class=special>{
|
||||
</span><span class=keyword>typedef </span><span class=keyword>int </span><span class=identifier>result_t</span><span class=special>;
|
||||
</span><span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>>
|
||||
</span><span class=keyword>std::ptrdiff_t</span>
|
||||
<span class=keyword>operator</span><span class=special>()(</span><span class=identifier>ScannerT </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>scan</span><span class=special>, </span><span class=identifier>result_t</span><span class=special>& </span><span class=identifier>result</span><span class=special>) </span><span class=keyword>const
|
||||
</span><span class=special>{
|
||||
</span><span class=keyword>if </span><span class=special>(</span><span class=identifier>scan</span><span class=special>.</span><span class=identifier>at_end</span><span class=special>())
|
||||
</span><span class=keyword>return </span><span class=special>-</span><span class=number>1</span><span class=special>;
|
||||
|
||||
</span><span class=keyword>char </span><span class=identifier>ch </span><span class=special>= </span><span class=special>*</span><span class=identifier>scan</span><span class=special>;
|
||||
</span><span class=keyword>if </span><span class=special>(</span><span class=identifier>ch </span><span class=special>< </span><span class=literal>'0' </span><span class=special>|| </span><span class=identifier>ch </span><span class=special>> </span><span class=literal>'9'</span><span class=special>)
|
||||
</span><span class=keyword>return </span><span class=special>-</span><span class=number>1</span><span class=special>;
|
||||
|
||||
</span><span class=identifier>result </span><span class=special>= </span><span class=number>0</span><span class=special>;
|
||||
</span><span class=keyword>std::ptrdiff_t</span> <span class=identifier>len </span><span class=special>= </span><span class=number>0</span><span class=special>;
|
||||
|
||||
</span><span class=keyword>do
|
||||
</span><span class=special>{
|
||||
</span><span class=identifier>result </span><span class=special>= </span><span class=identifier>result</span><span class=special>*</span><span class=number>10 </span><span class=special>+ </span><span class=keyword>int</span><span class=special>(</span><span class=identifier>ch </span><span class=special>- </span><span class=literal>'0'</span><span class=special>);
|
||||
</span><span class=special>++</span><span class=identifier>len</span><span class=special>;
|
||||
</span><span class=special>++</span><span class=identifier>scan</span><span class=special>;
|
||||
</span><span class=special>} </span><span class=keyword>while </span><span class=special>(!</span><span class=identifier>scan</span><span class=special>.</span><span class=identifier>at_end</span><span class=special>() </span><span class=special>&& </span><span class=special>(</span><span class=identifier>ch </span><span class=special>= </span><span class=special>*</span><span class=identifier>scan</span><span class=special>, </span><span class=identifier>ch </span><span class=special>>= </span><span class=literal>'0' </span><span class=special>&& </span><span class=identifier>ch </span><span class=special><= </span><span class=literal>'9'</span><span class=special>));
|
||||
|
||||
</span><span class=keyword>return </span><span class=identifier>len</span><span class=special>;
|
||||
</span><span class=special>}
|
||||
</span><span class=special>};
|
||||
|
||||
</span><span class=identifier>functor_parser</span><span class=special><</span><span class=identifier>number_parser</span><span class=special>> </span><span class=identifier>number_parser_p</span><span class=special>;
|
||||
</span></code></pre>
|
||||
<p> <img src="theme/lens.gif" width="15" height="16"> The full source code can be <a href="../example/fundamental/functor_parser.cpp">viewed here</a>. This is part of the Spirit distribution. </p>
|
||||
<p>To further understand the implementation, see <a href="indepth_the_scanner.html">In-depth:
|
||||
The Scanner</a> for the scanner API details. We now have a parser <tt>number_parser_p</tt> that we can use just like any other Spirit parser. Example:</p>
|
||||
<pre>
|
||||
<code><span class=identifier>r </span><span class=special>= </span><span class=identifier>number_parser_p </span><span class=special>>> </span><span class=special>*(</span><span class=literal>',' </span><span class=special>>> </span><span class=identifier>number_parser_p</span><span class=special>);
|
||||
</span></code></pre>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="list_parsers.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="refactoring.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<hr size="1">
|
||||
<p class="copyright">Copyright © 1998-2003 Joel de Guzman<br>
|
||||
<br>
|
||||
<font size="2">Use, modification and distribution is subject to the Boost Software
|
||||
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
||||
http://www.boost.org/LICENSE_1_0.txt)</font></p>
|
||||
<p class="copyright"> </p>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,271 +0,0 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>The Grammar</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" href="theme/style.css" type="text/css">
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
|
||||
<tr>
|
||||
<td width="10">
|
||||
</td>
|
||||
<td width="85%">
|
||||
<font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>The Grammar</b></font>
|
||||
</td>
|
||||
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="scanner.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="subrules.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>The <b>grammar</b> encapsulates a set of rules. The <tt>grammar</tt> class
|
||||
is a protocol base class. It is essentially an interface contract. The <tt>grammar</tt>
|
||||
is a template class that is parameterized by its derived class, <tt>DerivedT</tt>,
|
||||
and its context, <tt>ContextT</tt>. The template parameter ContextT defaults
|
||||
to <tt>parser_context</tt>, a predefined context. </p>
|
||||
<p>You need not be concerned at all with the ContextT template parameter unless
|
||||
you wish to tweak the low level behavior of the grammar. Detailed information
|
||||
on the ContextT template parameter is provided <a href="indepth_the_parser_context.html">elsewhere</a>.
|
||||
The <tt>grammar</tt> relies on the template parameter DerivedT, a grammar subclass
|
||||
to define the actual rules.</p>
|
||||
<p>Presented below is the public API. There may actually be more template parameters
|
||||
after <tt>ContextT</tt>. Everything after the <tt>ContextT</tt> parameter should
|
||||
not be of concern to the client and are strictly for internal use only.</p>
|
||||
<pre><code><font color="#000000"><span class=identifier> </span><span class=keyword>template</span><span class=special><
|
||||
</span><span class=keyword>typename </span><span class=identifier>DerivedT</span><span class=special>,
|
||||
</span><span class=keyword>typename </span><span class=identifier>ContextT </span><span class=special>= </span><span class=identifier>parser_context</span><span class=special><</span><span class=special>> >
|
||||
</span><span class=keyword>struct </span><span class=identifier>grammar</span><span class=special>;</span></font></code></pre>
|
||||
<h2>Grammar definition</h2>
|
||||
<p>A concrete sub-class inheriting from <tt>grammar</tt> is expected to have a
|
||||
nested template class (or struct) named <tt>definition</tt>:</p>
|
||||
<blockquote>
|
||||
<p><img src="theme/bullet.gif" width="13" height="13"> It is a nested template
|
||||
class with a typename <tt>ScannerT</tt> parameter.<br>
|
||||
<img src="theme/bullet.gif" width="13" height="13"> Its constructor defines
|
||||
the grammar rules.<br>
|
||||
<img src="theme/bullet.gif" width="13" height="13"> Its constructor is passed
|
||||
in a reference to the actual grammar <tt>self</tt>.<br>
|
||||
<img src="theme/bullet.gif" width="13" height="13"> It has a member function
|
||||
named <tt>start</tt> that returns a reference to the start <tt>rule</tt>.</p>
|
||||
</blockquote>
|
||||
<h2>Grammar skeleton</h2>
|
||||
<pre><code><font color="#000000"><span class=special> </span><span class=keyword>struct </span><span class=identifier>my_grammar </span><span class=special>: </span><span class=keyword>public </span><span class=identifier>grammar</span><span class=special><</span><span class=identifier>my_grammar</span><span class=special>>
|
||||
</span><span class=special>{
|
||||
</span><span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>>
|
||||
</span><span class=keyword>struct </span><span class=identifier>definition
|
||||
</span><span class=special>{
|
||||
</span><span class=identifier>rule</span><span class=special><</span><span class=identifier>ScannerT</span><span class=special>> </span><span class=identifier>r</span><span class=special>;
|
||||
</span><span class=identifier>definition</span><span class=special>(</span><span class=identifier>my_grammar </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>self</span><span class=special>) </span><span class=special>{ </span><span class=identifier>r </span><span class=special>= </span><span class=comment>/*..define here..*/</span><span class=special>; </span><span class=special>}
|
||||
</span><span class=identifier>rule</span><span class=special><</span><span class=identifier>ScannerT</span><span class=special>> </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>start</span><span class=special>() </span><span class=keyword>const </span><span class=special>{ </span><span class=keyword>return </span><span class=identifier>r</span><span class=special>; </span><span class=special>}
|
||||
</span><span class=special>};
|
||||
</span><span class=special>};</span></font></code></pre>
|
||||
<p>Decoupling the scanner type from the rules that form a grammar allows the grammar
|
||||
to be used in different contexts possibly using different scanners. We do not
|
||||
care what scanner we are dealing with. The user-defined <tt>my_grammar</tt>
|
||||
can be used with <b>any</b> type of scanner. Unlike the rule, the grammar is
|
||||
not tied to a specific scanner type. See <a href="faq.html#scanner_business">"Scanner
|
||||
Business"</a> to see why this is important and to gain further understanding
|
||||
on this scanner-rule coupling problem.</p>
|
||||
<h2>Instantiating and using my_grammar</h2>
|
||||
<p>Our grammar above may be instantiated and put into action:</p>
|
||||
<pre><code><font color="#000000"><span class=special> </span><span class=identifier>my_grammar </span><span class=identifier>g</span><span class=special>;
|
||||
|
||||
</span><span class=keyword>if </span><span class=special>(</span><span class=identifier>parse</span><span class=special>(</span><span class=identifier>first</span><span class=special>, </span><span class=identifier>last</span><span class=special>, </span><span class=identifier>g</span><span class=special>, </span><span class=identifier>space_p</span><span class=special>).</span><span class=identifier>full</span><span class=special>)
|
||||
</span><span class=identifier>cout </span><span class=special><< </span><span class=string>"parsing succeeded\n"</span><span class=special>;
|
||||
</span><span class=keyword>else
|
||||
</span><span class=identifier>cout </span><span class=special><< </span><span class=string>"parsing failed\n"</span><span class=special>;</span></font></code></pre>
|
||||
<p><tt>my_grammar</tt> <b>IS-A </b>parser and can be used anywhere a parser is
|
||||
expected, even referenced by another rule:</p>
|
||||
<pre><code><font color="#000000"><span class=special> </span><span class=identifier>rule</span><span class=special><> </span><span class=identifier>r </span><span class=special>= </span><span class=identifier>g </span><span class=special>>> </span><span class=identifier>str_p</span><span class=special>(</span><span class=string>"cool huh?"</span><span class=special>);</span></font></code></pre>
|
||||
<table width="80%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="note_box"><img src="theme/alert.gif" width="16" height="16"> <b>Referencing
|
||||
grammars<br>
|
||||
</b><br>
|
||||
Like the rule, the grammar is also held by reference when it is placed in
|
||||
the right hand side of an EBNF expression. It is the responsibility of the
|
||||
client to ensure that the referenced grammar stays in scope and does not
|
||||
get destructed while it is being referenced. </td>
|
||||
</tr>
|
||||
</table>
|
||||
<h2><a name="full_grammar"></a>Full Grammar Example</h2>
|
||||
<p>Recalling our original calculator example, here it is now rewritten using a
|
||||
grammar:</p>
|
||||
<pre><code><font color="#000000"><span class=special> </span><span class=keyword>struct </span><span class=identifier>calculator </span><span class=special>: </span><span class=keyword>public </span><span class=identifier>grammar</span><span class=special><</span><span class=identifier>calculator</span><span class=special>>
|
||||
</span><span class=special>{
|
||||
</span><span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>>
|
||||
</span><span class=keyword>struct </span><span class=identifier>definition
|
||||
</span><span class=special>{
|
||||
</span><span class=identifier>definition</span><span class=special>(</span><span class=identifier>calculator </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>self</span><span class=special>)
|
||||
</span><span class=special>{
|
||||
</span><span class=identifier>group </span><span class=special>= </span><span class=literal>'(' </span><span class=special>>> </span><span class=identifier>expression </span><span class=special>>> </span><span class=literal>')'</span><span class=special>;
|
||||
</span><span class=identifier>factor </span><span class=special>= </span><span class=identifier>integer </span><span class=special>| </span><span class=identifier>group</span><span class=special>;
|
||||
</span><span class=identifier>term </span><span class=special>= </span><span class=identifier>factor </span><span class=special>>> </span><span class=special>*((</span><span class=literal>'*' </span><span class=special>>> </span><span class=identifier>factor</span><span class=special>) </span><span class=special>| </span><span class=special>(</span><span class=literal>'/' </span><span class=special>>> </span><span class=identifier>factor</span><span class=special>));
|
||||
</span><span class=identifier>expression </span><span class=special>= </span><span class=identifier>term </span><span class=special>>> </span><span class=special>*((</span><span class=literal>'+' </span><span class=special>>> </span><span class=identifier>term</span><span class=special>) </span><span class=special>| </span><span class=special>(</span><span class=literal>'-' </span><span class=special>>> </span><span class=identifier>term</span><span class=special>));
|
||||
</span><span class=special>}
|
||||
|
||||
</span><span class=identifier>rule</span><span class=special><</span><span class=identifier>ScannerT</span><span class=special>> </span><span class=identifier>expression</span><span class=special>, </span><span class=identifier>term</span><span class=special>, </span><span class=identifier>factor</span><span class=special>, </span><span class=identifier>group</span><span class=special>;
|
||||
|
||||
</span><span class=identifier>rule</span><span class=special><</span><span class=identifier>ScannerT</span><span class=special>> </span><span class=keyword>const</span><span class=special>&
|
||||
</span><span class=identifier>start</span><span class=special>() </span><span class=keyword>const </span><span class=special>{ </span><span class=keyword>return </span><span class=identifier>expression</span><span class=special>; </span><span class=special>}
|
||||
</span><span class=special>};
|
||||
</span><span class=special>};</span></font></code></pre>
|
||||
<p><img src="theme/lens.gif" width="15" height="16"> A fully working example with
|
||||
<a href="semantic_actions.html">semantic actions</a> can be <a href="../example/fundamental/calc_plain.cpp">viewed
|
||||
here</a>. This is part of the Spirit distribution. </p>
|
||||
<table width="80%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="note_box"><img src="theme/lens.gif" width="15" height="16"> <b>self</b><br>
|
||||
<br>
|
||||
You might notice that the definition of the grammar has a constructor that
|
||||
accepts a const reference to the outer grammar. In the example above, notice
|
||||
that <tt>calculator::definition</tt> takes in a <tt>calculator const&
|
||||
self</tt>. While this is unused in the example above, in many cases, this
|
||||
is very useful. The self argument is the definition's window to the outside
|
||||
world. For example, the calculator class might have a reference to some
|
||||
state information that the definition can update while parsing proceeds
|
||||
through <a href="semantic_actions.html">semantic actions</a>. </td>
|
||||
</tr>
|
||||
</table>
|
||||
<h2>Grammar Capsules</h2>
|
||||
<p>As a grammar becomes complicated, it is a good idea to group parts into logical
|
||||
modules. For instance, when writing a language, it might be wise to put expressions
|
||||
and statements into separate grammar capsules. The grammar takes advantage of
|
||||
the encapsulation properties of C++ classes. The declarative nature of classes
|
||||
makes it a perfect fit for the definition of grammars. Since the grammar is
|
||||
nothing more than a class declaration, we can conveniently publish it in header
|
||||
files. The idea is that once written and fully tested, a grammar can be reused
|
||||
in many contexts. We now have the notion of grammar libraries.</p>
|
||||
<h2><a name="multithreading"></a>Reentrancy and multithreading</h2>
|
||||
<p>An instance of a grammar may be used in different places multiple times without
|
||||
any problem. The implementation is tuned to allow this at the expense of some
|
||||
overhead. However, we can save considerable cycles and bytes if we are certain
|
||||
that a grammar will only have a single instance. If this is desired, simply
|
||||
define <tt>BOOST_SPIRIT_SINGLE_GRAMMAR_INSTANCE</tt> before including any spirit
|
||||
header files.</p>
|
||||
<pre><font face="Courier New, Courier, mono"><code><span class="preprocessor"> #define</span></code></font><span class="preprocessor"><code><font face="Courier New, Courier, mono"> </font><tt>BOOST_SPIRIT_SINGLE_GRAMMAR_INSTANCE</tt></code></span></pre>
|
||||
<p> On the other hand, if a grammar is intended to be used in multithreaded code,
|
||||
we should then define <tt>BOOST_SPIRIT_THREADSAFE</tt> before including any
|
||||
spirit header files. In this case it will also be required to link against <a href="http://www.boost.org/libs/thread/doc/index.html">Boost.Threads</a></p>
|
||||
<pre><font face="Courier New, Courier, mono"><span class="preprocessor"> #define</span></font> <span class="preprocessor"><tt>BOOST_SPIRIT_THREADSAFE</tt></span></pre>
|
||||
<h2>Using more than one grammar start rule </h2>
|
||||
<p>Sometimes it is desirable to have more than one visible entry point to a grammar
|
||||
(apart from the start rule). To allow additional start points, Spirit provides
|
||||
a helper template <tt>grammar_def</tt>, which may be used as a base class for
|
||||
the <tt>definition</tt> subclass of your <tt>grammar</tt>. Here's an example:</p>
|
||||
<pre><code> <span class="comment">// this header has to be explicitly included</span>
|
||||
<span class="preprocessor">#include</span> <span class="string"><boost/spirit/utility/grammar_def.hpp></span>
|
||||
|
||||
</span><span class=keyword>struct </span><span class=identifier>calculator2 </span><span class=special>: </span><span class=keyword>public </span><span class=identifier>grammar</span><span class=special><</span><span class=identifier>calculator2</span><span class=special>>
|
||||
{
|
||||
</span> <span class="keyword">enum</span>
|
||||
{
|
||||
expression = 0,
|
||||
term = 1,
|
||||
factor = 2,
|
||||
};
|
||||
|
||||
<span class=special> </span><span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>>
|
||||
</span><span class=keyword>struct </span><span class=identifier>definition
|
||||
</span><span class="special">:</span> <span class="keyword">public</span><span class=identifier> grammar_def</span><span class="special"><</span><span class=identifier>rule</span><span class=special><</span><span class=identifier>ScannerT</span><span class=special>>,</span> same<span class="special">,</span> same<span class="special">></span>
|
||||
<span class=special>{</span>
|
||||
<span class=identifier>definition</span><span class=special>(</span><span class=identifier>calculator2 </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>self</span><span class=special>)
|
||||
{
|
||||
</span><span class=identifier>group </span><span class=special>= </span><span class=literal>'(' </span><span class=special>>> </span><span class=identifier>expression </span><span class=special>>> </span><span class=literal>')'</span><span class=special>;
|
||||
</span><span class=identifier>factor </span><span class=special>= </span><span class=identifier>integer </span><span class=special>| </span><span class=identifier>group</span><span class=special>;
|
||||
</span><span class=identifier>term </span><span class=special>= </span><span class=identifier>factor </span><span class=special>>> *((</span><span class=literal>'*' </span><span class=special>>> </span><span class=identifier>factor</span><span class=special>) | (</span><span class=literal>'/' </span><span class=special>>> </span><span class=identifier>factor</span><span class=special>));
|
||||
</span><span class=identifier>expression </span><span class=special>= </span><span class=identifier>term </span><span class=special>>> *((</span><span class=literal>'+' </span><span class=special>>> </span><span class=identifier>term</span><span class=special>) | (</span><span class=literal>'-' </span><span class=special>>> </span><span class=identifier>term</span><span class=special>));</span>
|
||||
|
||||
<span class="keyword">this</span><span class="special">-></span>start_parsers<span class="special">(</span>expression<span class="special">,</span> term<span class="special">,</span> factor<span class="special">);</span>
|
||||
<span class="special">}</span>
|
||||
|
||||
<span class=identifier>rule</span><span class=special><</span><span class=identifier>ScannerT</span><span class=special>> </span><span class=identifier>expression</span><span class=special>, </span><span class=identifier>term</span><span class=special>, </span><span class=identifier>factor, group</span><span class=special>;
|
||||
</span><span class=special> };
|
||||
};</span></font></code></pre>
|
||||
<p>The <tt>grammar_def</tt> template has to be instantiated with the types of
|
||||
all the rules you wish to make visible from outside the <tt>grammar</tt>:</p>
|
||||
<pre><code><span class=identifier> </span><span class=identifier>grammar_def</span><span class="special"><</span><span class=identifier>rule</span><span class=special><</span><span class=identifier>ScannerT</span><span class=special>>,</span> same<span class="special">,</span> same<span class="special">></span></code> </pre>
|
||||
<p>The shorthand notation <tt>same</tt> is used to indicate that the same type
|
||||
be used as specified by the previous template parameter (e.g. <code><tt>rule<ScannerT></tt></code>).
|
||||
Obviously, <tt>same</tt> may not be used as the first template parameter. </p>
|
||||
<table width="80%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="note_box"> <img src="theme/bulb.gif" width="13" height="18"> <strong>grammar_def
|
||||
start types</strong><br>
|
||||
<br>
|
||||
It may not be obvious, but it is interesting to note that aside from rule<>s,
|
||||
any parser type may be specified (e.g. chlit<>, strlit<>, int_parser<>,
|
||||
etc.).</td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>Using the grammar_def class, there is no need to provide a <tt>start()</tt>member
|
||||
function anymore. Instead, you'll have to insert a call to the <tt>this->start_parsers()</tt>
|
||||
(which is a member function of the <tt>grammar_def</tt> template) to define
|
||||
the start symbols for your <tt>grammar</tt>. <img src="theme/note.gif" width="16" height="16">
|
||||
Note that the number and the sequence of the rules used as the parameters to
|
||||
the <tt>start_parsers()</tt> function should match the types specified in the
|
||||
<tt>grammar_def</tt> template:</p>
|
||||
<pre><code> <span class="keyword">this</span><span class="special">-></span>start_parsers<span class="special">(</span>expression<span class="special">,</span> term<span class="special">,</span> factor<span class="special">);</span></code></pre>
|
||||
<p> The grammar entry point may be specified using the following syntax:</p>
|
||||
<pre><code><font color="#000000"><span class=identifier> g</span><span class="special">.</span><span class=identifier>use_parser</span><span class="special"><</span><span class=identifier>N</span><span class=special>>() </span><span class="comment">// Where g is your grammar and N is the Nth entry.</span></font></code></pre>
|
||||
<p>This sample shows how to use the <tt>term</tt> rule from the <tt>calculator2</tt>
|
||||
grammar above:</p>
|
||||
<pre><code><font color="#000000"><span class=identifier> calculator2 g</span><span class=special>;
|
||||
|
||||
</span><span class=keyword>if </span><span class=special>(</span><span class=identifier>parse</span><span class=special>(</span><span class=identifier>
|
||||
first</span><span class=special>, </span><span class=identifier>last</span><span class=special>,
|
||||
</span><span class=identifier>g</span><span class="special">.</span><span class=identifier>use_parser</span><span class="special"><</span><span class=identifier>calculator2::term</span><span class=special>>(),</span><span class=identifier>
|
||||
space_p</span><span class=special>
|
||||
).</span><span class=identifier>full</span><span class=special>)
|
||||
{
|
||||
</span><span class=identifier>cout </span><span class=special><< </span><span class=string>"parsing succeeded\n"</span><span class=special>;
|
||||
}
|
||||
</span><span class=keyword>else</span> <span class="special">{</span>
|
||||
<span class=identifier>cout </span><span class=special><< </span><span class=string>"parsing failed\n"</span><span class=special>;
|
||||
}</span></font></code></pre>
|
||||
<p>The template parameter for the <tt>use_parser<></tt> template type should
|
||||
be the zero based index into the list of rules specified in the <tt>start_parsers()</tt>
|
||||
function call. </p>
|
||||
<table width="80%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="note_box"><img src="theme/note.gif" width="16" height="16"> <tt><strong>use_parser<0></strong></tt><br>
|
||||
<br>
|
||||
Note, that using <span class="literal">0</span> (zero) as the template parameter
|
||||
to <tt>use_parser</tt> is equivalent to using the start rule, exported by
|
||||
conventional means through the <tt>start()</tt> function, as shown in the
|
||||
first <tt><a href="grammar.html#full_grammar">calculator</a></tt> sample
|
||||
above. So this notation may be used even for grammars exporting one rule
|
||||
through its <tt>start()</tt> function only. On the other hand, calling a
|
||||
<tt>grammar</tt> without the <tt>use_parser</tt> notation will execute the
|
||||
rule specified as the first parameter to the <tt>start_parsers()</tt> function.
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>The maximum number of usable start rules is limited by the preprocessor constant:</p>
|
||||
<pre> <span class="identifier">BOOST_SPIRIT_GRAMMAR_STARTRULE_TYPE_LIMIT</span> <span class="comment">// defaults to 3</span></pre>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="scanner.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="subrules.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<hr size="1">
|
||||
<p class="copyright">Copyright © 1998-2003 Joel de Guzman<br>
|
||||
Copyright © 2003-2004 Hartmut Kaiser <br>
|
||||
<br>
|
||||
<font size="2">Use, modification and distribution is subject to the Boost Software
|
||||
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
||||
http://www.boost.org/LICENSE_1_0.txt) </font> </p>
|
||||
<p> </p>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,114 +0,0 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>Includes</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" href="theme/style.css" type="text/css">
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
|
||||
<tr>
|
||||
<td width="10">
|
||||
</td>
|
||||
<td width="85%"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Includes</b></font>
|
||||
</td>
|
||||
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="quickref.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="portability.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<h2>Modules</h2>
|
||||
<p>Spirit is designed to be header only. Generally, there are no libraries to build
|
||||
and link against. Certain features, however, require additional libraries; in particular
|
||||
the <a href="regular_expression_parser.html">regular expression parser</a> requires
|
||||
<a href="http://www.boost.org/libs/regex/index.html">Boost.Regex</a> and <a href="grammar.html#multithreading">multithreading support</a>
|
||||
requires <a href="http://www.boost.org/libs/thread/doc/index.html">Boost.Threads.</a></p>
|
||||
<p>Using Spirit is as easy as including the main header file:</p>
|
||||
<pre> <span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">.</span>hpp<span class="special">></span></pre>
|
||||
<p>Doing so will include all the header files. This might not be desirable. A
|
||||
low cholesterol alternative is to include only the module that you need. Each
|
||||
of the modules has its own header file. The master spirit header file actually
|
||||
includes all the module files. To avoid unnecessary inclusion of features that
|
||||
you do not need, it is better to include only the modules that you need.</p>
|
||||
<pre> <span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span>actor<span class="special">.</span>hpp<span class="special">><br> </span><span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span>attribute<span class="special">.</span>hpp<span class="special">>
|
||||
<span class="preprocessor">#include</span> <</span>boost<span class="special">/</span>spirit<span class="special">/</span>core<span class="special">.</span>hpp<span class="special">></span><span class="special">
|
||||
</span><span class="special"></span><span class="special"> <span class="preprocessor">#include</span> <</span>boost<span class="special">/</span>spirit<span class="special">/</span>debug<span class="special">.</span>hpp<span class="special">></span><span class="special">
|
||||
</span><span class="special"> <span class="preprocessor">#include</span> <</span>boost<span class="special">/</span>spirit<span class="special">/</span>dynamic<span class="special">.</span>hpp<span class="special">></span><span class="special">
|
||||
<span class="preprocessor">#include</span> <</span>boost<span class="special">/</span>spirit<span class="special">/</span>error_handling<span class="special">.</span>hpp<span class="special">></span><span class="special"></span><span class="special">
|
||||
<span class="preprocessor">#include</span> <</span>boost<span class="special">/</span>spirit<span class="special">/</span>iterator<span class="special">.</span>hpp<span class="special">>
|
||||
<span class="preprocessor">#include</span> <</span>boost<span class="special">/</span>spirit<span class="special">/</span>meta<span class="special">.</span>hpp<span class="special">></span><span class="special">
|
||||
</span><span class="special"></span><span class="special"> <span class="preprocessor">#include</span> <</span>boost<span class="special">/</span>spirit<span class="special">/</span>symbols<span class="special">.</span>hpp<span class="special">></span><span class="special"></span><span class="special">
|
||||
<span class="preprocessor">#include</span> <</span>boost<span class="special">/</span>spirit<span class="special">/</span>tree<span class="special">.</span>hpp<span class="special">></span><span class="special"></span><span class="special">
|
||||
<span class="preprocessor">#include</span> <</span>boost<span class="special">/</span>spirit<span class="special">/</span>utility<span class="special">.</span>hpp<span class="special">></span><span class="special"></span><span class="special"></span></pre>
|
||||
<h2>Sub-Modules</h2>
|
||||
<p> For even finer control over header file inclusion, you can include only the
|
||||
specific files that you need. Each module is in its own sub-directory:</p>
|
||||
<h3>actor</h3>
|
||||
<pre> <span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span>actor<span class="special">/</span><span class="identifier">assign_actor</span><span class="special">.</span>hpp<span class="special">><br></span> <span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span>actor<span class="special">/</span><span class="identifier">assign_key</span><span class="special">.</span>hpp<span class="special">></span><span class="special"></span><span class="special"></span>
|
||||
<span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span>actor<span class="special">/</span><span class="identifier">clear_actor</span><span class="special">.</span>hpp<span class="special">></span><span class="special"></span><span class="special"></span>
|
||||
<span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span>actor<span class="special">/</span><span class="identifier">decrement_actor</span><span class="special">.</span>hpp<span class="special">></span><span class="special"></span><span class="special"></span>
|
||||
<span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span>actor<span class="special">/</span><span class="identifier">erase_actor</span><span class="special">.</span>hpp<span class="special">></span><span class="special"></span> <br> <span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span>actor<span class="special">/</span><span class="identifier">increment_actor</span><span class="special">.</span>hpp<span class="special">><br></span> <span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span>actor<span class="special">/</span><span class="identifier">insert_key_actor</span><span class="special">.</span>hpp<span class="special">></span><span class="special"></span><span class="special"></span>
|
||||
<span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span>actor<span class="special">/</span><span class="identifier">push_back_actor</span><span class="special">.</span>hpp<span class="special">></span><span class="special"></span><span class="special"></span>
|
||||
<span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span>actor<span class="special">/</span><span class="identifier">push_front_actor</span><span class="special">.</span>hpp<span class="special">></span><span class="special"></span><span class="special"></span>
|
||||
<span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span>actor<span class="special">/</span><span class="identifier">swap_actor</span><span class="special">.</span>hpp<span class="special">></span><span class="special"></span><span class="special"></span></pre>
|
||||
<h3>attribute</h3>
|
||||
<pre> <span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span>attribute<span class="special">/</span><span class="identifier">closure</span><span class="special">.</span><span class="identifier">hpp</span><span class="special">><br></span> <span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span>attribute<span class="special">/</span>closure_context.hpp<span class="special">></span><span class="special"></span><span class="special"></span>
|
||||
<span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span>attribute<span class="special">/</span>parametric.hpp<span class="special">></span></pre>
|
||||
<h3>debug</h3>
|
||||
<p><img src="theme/alert.gif" width="16" height="16"> The debug module should
|
||||
not be directly included. See <a href="debugging.html">Debugging</a> for more
|
||||
info on how to use Spirit's debugger. </p>
|
||||
<h3>dynamic</h3>
|
||||
<pre> <span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span>dynamic<span class="special">/</span><span class="identifier">for</span><span class="special">.</span><span class="identifier">hpp</span><span class="special">><br></span> <span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span>dynamic<span class="special">/</span>if.hpp<span class="special">></span><span class="special"></span><span class="special"></span>
|
||||
<span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span><span class="identifier">dynamic</span><span class="special">/</span>lazy.hpp<span class="special">> <br> </span><span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span><span class="identifier">dynamic</span><span class="special">/</span>rule_alias.hpp<span class="special">>
|
||||
</span><span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span><span class="identifier">dynamic</span><span class="special">/</span>select.hpp<span class="special">>
|
||||
</span><span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span><span class="identifier">dynamic</span><span class="special">/</span>stored_rule.hpp<span class="special">>
|
||||
</span><span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span><span class="identifier">dynamic</span><span class="special">/</span>switch.hpp<span class="special">>
|
||||
</span><span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span><span class="identifier">dynamic</span><span class="special">/</span>while.hpp<span class="special">> </span></pre>
|
||||
<h3>error_handling</h3>
|
||||
<pre> <span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span>error_handling<span class="special">/</span><span class="identifier">exceptions</span><span class="special">.</span><span class="identifier">hpp</span><span class="special">></span></pre>
|
||||
<h3>iterator</h3>
|
||||
<pre> <span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span>iterator<span class="special">/</span><span class="identifier">file_iterator</span><span class="special">.</span><span class="identifier">hpp</span><span class="special">><br></span> <span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span><span class="identifier">iterator</span><span class="special">/</span>fixed_size_queue.hpp<span class="special">></span><span class="special"></span><span class="special"></span>
|
||||
<span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span><span class="identifier">iterator</span><span class="special">/</span>multi_pass.hpp<span class="special">> <br> </span><span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span><span class="identifier">iterator</span><span class="special">/</span>position_iterator.hpp<span class="special">></span></pre>
|
||||
<h3>meta</h3>
|
||||
<pre> <span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span>meta<span class="special">/</span><span class="identifier">as_parser</span><span class="special">.</span><span class="identifier">hpp</span><span class="special">><br></span> <span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span><span class="identifier">meta</span><span class="special">/</span>fundamental.hpp<span class="special">></span><span class="special"></span><span class="special"></span>
|
||||
<span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span><span class="identifier">meta</span><span class="special">/</span>parser_traits.hpp<span class="special">> <br> </span><span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span><span class="identifier">meta</span><span class="special">/</span>refactoring.hpp<span class="special">><br> </span><span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span><span class="identifier">meta</span><span class="special">/</span>traverse.hpp<span class="special">></span></pre>
|
||||
<h3>tree</h3>
|
||||
<pre> <span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span><span class="identifier">tree</span><span class="special">/</span><span class="identifier">ast</span><span class="special">.</span><span class="identifier">hpp</span><span class="special">><br></span> <span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span><span class="identifier">tree</span><span class="special">/</span>parse_tree.hpp<span class="special">></span><span class="special"></span><span class="special"></span>
|
||||
<span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span><span class="identifier">tree</span><span class="special">/</span>parse_tree_utils.hpp<span class="special">></span><span class="special"><br> </span><span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span><span class="identifier">tree</span><span class="special">/</span>tree_to_xml.hpp<span class="special">></span></pre>
|
||||
<h3>utility</h3>
|
||||
<pre> <span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span><span class="identifier">utility</span><span class="special">/</span><span class="identifier">chset</span><span class="special">.</span><span class="identifier">hpp</span><span class="special">> <br></span> <span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span><span class="identifier">utility</span><span class="special">/</span><span class="identifier">chset_operators</span><span class="special">.</span><span class="identifier">hpp</span><span class="special">><br> </span><span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span><span class="identifier">utility</span><span class="special">/</span><span class="identifier">confix</span><span class="special">.</span><span class="identifier">hpp</span><span class="special">>
|
||||
</span><span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span><span class="identifier">utility</span><span class="special">/</span><span class="identifier">distinct</span><span class="special">.</span><span class="identifier">hpp</span><span class="special">>
|
||||
</span><span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span><span class="identifier">utility</span><span class="special">/</span><span class="identifier">escape_char</span><span class="special">.</span><span class="identifier">hpp</span><span class="special">>
|
||||
</span><span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span><span class="identifier">utility</span><span class="special">/</span><span class="identifier">flush_multi_pass</span><span class="special">.</span><span class="identifier">hpp</span><span class="special">>
|
||||
</span><span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span><span class="identifier">utility</span><span class="special">/</span><span class="identifier">functor_parser</span><span class="special">.</span><span class="identifier">hpp</span><span class="special">>
|
||||
</span><span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span><span class="identifier">utility</span><span class="special">/</span><span class="identifier">lists</span><span class="special">.</span><span class="identifier">hpp</span><span class="special">>
|
||||
</span><span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span><span class="identifier">utility</span><span class="special">/</span><span class="identifier">loops</span><span class="special">.</span><span class="identifier">hpp</span><span class="special">>
|
||||
</span><span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span><span class="identifier">utility</span><span class="special">/</span><span class="identifier">regex</span><span class="special">.</span><span class="identifier">hpp</span><span class="special">>
|
||||
</span><span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span><span class="identifier">utility</span><span class="special">/</span><span class="identifier">scoped_lock</span><span class="special">.</span><span class="identifier">hpp</span><span class="special">>
|
||||
</span></pre>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="quickref.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="portability.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<hr size="1">
|
||||
<p class="copyright">Copyright © 1998-2003 Joel de Guzman<br>
|
||||
<br>
|
||||
<font size="2">Use, modification and distribution is subject to the Boost Software
|
||||
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
||||
http://www.boost.org/LICENSE_1_0.txt)</font></p>
|
||||
<p class="copyright"> </p>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,287 +0,0 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>In-depth: The Parser</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" href="theme/style.css" type="text/css">
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
|
||||
<tr>
|
||||
<td width="10">
|
||||
</td>
|
||||
<td width="85%">
|
||||
<font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>In-depth: The Parser</b></font>
|
||||
</td>
|
||||
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="semantic_actions.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="indepth_the_scanner.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>What makes Spirit tick? Now on to some details... The parser class is the most
|
||||
fundamental entity in the framework. A parser accepts a scanner comprised of
|
||||
a first-last iterator pair and returns a match object as its result. The iterators
|
||||
delimit the data currently being parsed. The match object evaluates to true
|
||||
if the parse succeeds, in which case the input is advanced accordingly. Each
|
||||
parser can represent a specific pattern or algorithm, or it can be a more complex
|
||||
parser formed as a composition of other parsers.</p>
|
||||
<p>All parsers inherit from the base template class, parser:</p>
|
||||
<pre>
|
||||
<span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>DerivedT</span><span class=special>>
|
||||
</span><span class=keyword>struct </span><span class=identifier>parser
|
||||
</span><span class=special>{
|
||||
</span><span class=comment>/*...*/
|
||||
|
||||
</span><span class=identifier>DerivedT</span><span class=special>& </span><span class=identifier>derived</span><span class=special>();
|
||||
</span><span class=identifier>DerivedT </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>derived</span><span class=special>() </span><span class=keyword>const</span><span class=special>;
|
||||
</span><span class=special>};</span></pre>
|
||||
<p>This class is a protocol base class for all parsers. The parser class does
|
||||
not really know how to parse anything but instead relies on the template parameter
|
||||
<tt>DerivedT</tt> to do the actual parsing. This technique is known as the <a href="references.html#curious_recurring">"Curiously
|
||||
Recurring Template Pattern"</a> in template meta-programming circles. This
|
||||
inheritance strategy gives us the power of polymorphism without the virtual
|
||||
function overhead. In essence this is a way to implement <a href="references.html#generic_patterns">compile
|
||||
time polymorphism</a>.</p>
|
||||
<h2> parser_category_t</h2>
|
||||
<p> Each derived parser has a typedef <tt>parser_category_t</tt> that defines
|
||||
its category. By default, if one is not specified, it will inherit from the
|
||||
base parser class which typedefs its parser_category_t as <tt>plain_parser_category</tt>.
|
||||
Some template classes are provided to distinguish different types of parsers.
|
||||
The following categories are the most generic. More specific types may inherit
|
||||
from these.</p>
|
||||
<table width="90%" border="0" align="center">
|
||||
<tr>
|
||||
<td colspan="2" class="table_title">Parser categories</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells" width="33%"><tt>plain_parser_category</tt></td>
|
||||
<td class="table_cells" width="67%">Your plain vanilla parser</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells" width="33%"><tt>binary_parser_category</tt></td>
|
||||
<td class="table_cells" width="67%">A parser that has subject a and b (e.g.
|
||||
alternative)</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells" width="33%"><tt>unary_parser_category</tt></td>
|
||||
<td class="table_cells" width="67%">A parser that has single subject (e.g.
|
||||
kleene star)</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells" width="33%"><tt>action_parser_category</tt></td>
|
||||
<td class="table_cells" width="67%">A parser with an attached semantic action</td>
|
||||
</tr>
|
||||
</table>
|
||||
<pre><span class=identifier> </span><span class=keyword>struct </span><span class=identifier>plain_parser_category </span><span class=special>{};
|
||||
</span><span class=keyword>struct </span><span class=identifier>binary_parser_category </span><span class=special>: </span><span class=identifier>plain_parser_category </span><span class=special>{};
|
||||
</span><span class=keyword>struct </span><span class=identifier>unary_parser_category </span><span class=special>: </span><span class=identifier>plain_parser_category </span><span class=special>{};
|
||||
</span><span class=keyword>struct </span><span class=identifier>action_parser_category </span><span class=special>: </span><span class=identifier>unary_parser_category </span><span class=special>{};</span></pre>
|
||||
<h2>embed_t</h2>
|
||||
<p>Each parser has a typedef <tt>embed_t</tt>. This typedef specifies how a parser
|
||||
is embedded in a composite. By default, if one is not specified, the parser
|
||||
will be embedded by value. That is, a copy of the parser is placed as a member
|
||||
variable of the composite. Most parsers are embedded by value. In certain situations
|
||||
however, this is not desirable or possible. One particular example is the <a href="rule.html">rule</a>.
|
||||
The rule, unlike other parsers is embedded by reference.</p>
|
||||
<h2><a name="match"></a>The match</h2>
|
||||
<p>The match holds the result of a parser. A match object evaluates to true when
|
||||
a successful match is found, otherwise false. The length of the match is the
|
||||
number of characters (or tokens) that is successfully matched. This can be queried
|
||||
through its <tt>length()</tt> member function. A negative value means that the
|
||||
match is unsuccessful. </p>
|
||||
<p> Each parser may have an associated attribute. This attribute is also returned
|
||||
back to the client on a successful parse through the match object. We can get
|
||||
this attribute via the match's <tt>value()</tt> member function. Be warned though
|
||||
that the match's attribute may be invalid, in which case, getting the attribute
|
||||
will result in an exception. The member function <tt>has_valid_attribute()</tt>
|
||||
can be queried to know if it is safe to get the match's attribute. The attribute
|
||||
may be set anytime through the member function <tt>value(v)</tt>where <tt>v</tt>
|
||||
is the new attribute value.<br>
|
||||
<br>
|
||||
A match attribute is valid:</p>
|
||||
<ul>
|
||||
<li> on a successful match</li>
|
||||
<li>when its value is set through the <tt>value(val)</tt> member function</li>
|
||||
<li> if it is assigned or copied from a compatible match object (e.g. <tt>match<double></tt>
|
||||
from <tt>match<int></tt>) with a valid attribute. A match object <tt>A</tt>
|
||||
is compatible with another match object <tt>B</tt> if the attribute type of
|
||||
<tt>A</tt> can be assigned from the attribute type of <tt></tt> <tt>B</tt>
|
||||
(i.e. <tt>a = b;</tt> must compile).</li>
|
||||
</ul>
|
||||
<p>The match attribute is undefined:</p>
|
||||
<ul>
|
||||
<li>on an unsuccessful match </li>
|
||||
<li>when an attempt to copy or assign from another match object with an incompatible
|
||||
attribute type (e.g. <tt>match<std::string></tt> from <tt>match<int></tt>).</li>
|
||||
</ul>
|
||||
<h3>The match class:</h3>
|
||||
<pre><span class=keyword> template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>T</span><span class=special>>
|
||||
</span><span class=keyword> class </span><span class=identifier>match
|
||||
</span><span class=keyword> </span><span class=special>{
|
||||
</span><span class=keyword> public</span><span class=special>:
|
||||
|
||||
</span><span class=keyword> </span><span class=comment>/*...*/
|
||||
|
||||
</span><span class=special> </span><span class=keyword> typedef</span><span class="identifier"> T attr_t</span><span class="special">;<br>
|
||||
</span><span class=keyword> </span><span class="special"> </span><span class=keyword>operator safe_bool</span><span class=special>() </span><span class=keyword>const</span>; <span class="comment">// convertible to a bool</span>
|
||||
<span class=keyword> int </span><span class=identifier>length</span><span class=special>() </span><span class=keyword>const</span>;
|
||||
<span class="keyword">bool</span> has_valid_attribute<span class="special">()</span> <span class="keyword">const</span><span class="special">;</span>
|
||||
<span class=keyword> </span> <span class=identifier>void</span><span class=special> </span><span class=identifier>value</span><span class=special>(</span><span class="identifier">T </span><span class="keyword">const</span><span class=special>&) </span><span class=keyword>const;
|
||||
</span><span class=identifier>T </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>value</span><span class=special>();
|
||||
</span><span class=keyword> </span><span class=special>};</span></pre>
|
||||
<h2>match_result</h2>
|
||||
<p>It has been mentioned repeatedly that the parser returns a match object as
|
||||
its result. This is a simplification. Actually, for the sake of genericity,
|
||||
parsers are really not hard-coded to return a match object. More accurately,
|
||||
a parser returns an object that adheres to a conceptual interface, of which
|
||||
the match is an example. Nevertheless, we shall call the result type of a parser
|
||||
a match object regardless if it is actually a match class, a derivative or a
|
||||
totally unrelated type.</p>
|
||||
<table width="80%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="note_box"><img src="theme/lens.gif" width="15" height="16"> <b>Meta-functions</b><br>
|
||||
<br>
|
||||
What are meta-functions? We all know how functions look like. In simplest
|
||||
terms, a function accepts some arguments and returns a result. Here is the
|
||||
function we all love so much:<br>
|
||||
<br>
|
||||
<code><span class="keyword">int</span> identity_func<span class="special">(</span><span class="keyword">int</span>
|
||||
arg<span class="special">)</span><br>
|
||||
<span class="special">{</span> <span class="keyword">return</span> arg<span class="special">;
|
||||
}</span> <span class="comment">// return the argument arg</span><br>
|
||||
</code><br>
|
||||
Meta-functions are essentially the same. These beasts also accept arguments
|
||||
and return a result. However, while functions work at runtime on values,
|
||||
meta-functions work at compile time on types (or constants, but we shall
|
||||
deal only with types). The meta-function is a template class (or struct).
|
||||
The template parameters are the arguments to the meta-function and a typedef
|
||||
within the class is the meta-function's return type. Here is the corresponding
|
||||
meta-function:<code><br>
|
||||
<br>
|
||||
<span class="keyword">template</span> <span class="special"><</span><span class="keyword">typename</span>
|
||||
ArgT<span class="special">></span><br>
|
||||
<span class="keyword">struct</span> identity_meta_func<br>
|
||||
<span class="special">{</span> <span class="keyword">typedef</span> ArgT
|
||||
type<span class="special">; } </span><span class="comment">// return the
|
||||
argument ArgT</span><br>
|
||||
<br>
|
||||
</code>The meta-function above is invoked as:<br>
|
||||
<br>
|
||||
<code><span class="keyword">typename</span> identity_meta_func<span class="special"><</span>ArgT<span class="special">>::</span>type</code><br>
|
||||
<br>
|
||||
By convention, meta-functions return the result through the typedef <tt>type</tt>.
|
||||
Take note that <tt>typename</tt> is only required within templates.</td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>The actual match type used by the parser depends on two types: the parser's
|
||||
attribute type and the scanner type. <tt>match_result</tt> is the meta-function
|
||||
that returns the desired match type given an attribute type and a scanner type.
|
||||
</p>
|
||||
<p>Usage:</p>
|
||||
<pre> <span class=keyword>typename </span><span class=identifier>match_result</span><span class=special><</span><span class=identifier>ScannerT</span><span class=special>, </span><span class=identifier>T</span><span class=special>>::</span><span class=identifier>type</span></pre>
|
||||
<p>The meta-function basically answers the question "given a scanner type
|
||||
<tt>ScannerT</tt> and an attribute type <tt>T</tt>, what is the desired match
|
||||
type?" [<img src="theme/note.gif" width="16" height="16"> <tt>typename</tt>
|
||||
is only required within templates ].</p>
|
||||
<h2>The parse member function</h2>
|
||||
<p>Concrete sub-classes inheriting from parser must have a corresponding member
|
||||
function <tt>parse(...)</tt> compatible with the conceptual Interface:<br>
|
||||
</p>
|
||||
<pre><span class=identifier> </span><span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>>
|
||||
</span><span class=identifier>RT
|
||||
</span><span class=identifier>parse</span><span class=special>(</span><span class=identifier>ScannerT</span><span class=special></span> const<span class=special>& </span>scan<span class=identifier></span><span class=special>) </span><span class=keyword>const</span><span class=special>;</span></pre>
|
||||
<p>where <tt>RT</tt> is the desired return type of the parser. </p>
|
||||
<h2>The parser result</h2>
|
||||
<p>Concrete sub-classes inheriting from parser in most cases need to have a nested
|
||||
meta-function <tt>result</tt> that returns the result <tt>type</tt> of the parser's
|
||||
parse member function, given a scanner type. The meta-function has the form:</p>
|
||||
<pre><span class=keyword> template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>>
|
||||
</span><span class=keyword>struct </span><span class=identifier>result
|
||||
</span><span class=special>{
|
||||
</span><span class=keyword>typedef </span>RT <span class=identifier></span><span class=identifier>type</span><span class=special>;
|
||||
</span><span class=special>};</span></pre>
|
||||
<p>where <tt>RT</tt> is the desired return type of the parser. This is usually,
|
||||
but not always, dependent on the template parameter <tt>ScannerT</tt>. For example,
|
||||
given an attribute type <tt>int</tt>, we can use the match_result metafunction:</p>
|
||||
<pre><span class=keyword> template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>>
|
||||
</span><span class=keyword>struct </span><span class=identifier>result
|
||||
</span><span class=special>{
|
||||
</span><span class=keyword>typedef typename </span><span class=identifier>match_result</span><span class=special><</span><span class=identifier>ScannerT</span><span class=special>, </span><span class="keyword">int</span><span class=special>>::</span><span class=identifier>type type</span><span class=special>;
|
||||
};</span></pre>
|
||||
<p>If a parser does not supply a result metafunction, a default is provided by
|
||||
the base parser class.<span class=special> </span>The default is declared as:</p>
|
||||
<pre><span class=keyword> template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>>
|
||||
</span><span class=keyword>struct </span><span class=identifier>result
|
||||
</span><span class=special>{
|
||||
</span><span class=keyword>typedef typename </span><span class=identifier>match_result</span><span class=special><</span><span class=identifier>ScannerT</span><span class=special>, </span><span class="identifier">nil_t</span><span class=special>>::</span><span class=identifier>type type</span><span class=special>;
|
||||
};</span></pre>
|
||||
<p>Without a result metafunction, notice that the parser's default attribute is
|
||||
<tt>nil_t</tt> (i.e. the parser has no attribute).</p>
|
||||
<h2><span class=special></span>parser_result</h2>
|
||||
<p>Given a a scanner type <tt>ScannerT</tt> and a parser type <tt>ParserT</tt>,
|
||||
what will be the actual result of the parser? The answer to this question is
|
||||
provided to by the <tt>parser_result</tt> meta-function.</p>
|
||||
<p>Usage:</p>
|
||||
<pre> <span class=keyword>typename </span><span class=identifier>parser_result</span><span class=special><</span><span class=identifier>ParserT, ScannerT</span><span class=special>>::</span><span class=identifier>type</span></pre>
|
||||
<p>In general, the meta-function just forwards the invocation to the parser's
|
||||
result meta-function:</p>
|
||||
<pre><span class=identifier> </span><span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>ParserT</span><span class=special>, </span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>>
|
||||
</span><span class=keyword>struct </span><span class=identifier>parser_result
|
||||
</span><span class=special>{
|
||||
</span><span class=keyword>typedef </span><span class=keyword>typename </span><span class=identifier>ParserT</span><span class=special>::</span><span class=keyword>template </span><span class=identifier>result</span><span class=special><</span><span class=identifier>ScannerT</span><span class=special>>::</span><span class=identifier>type </span><span class=identifier>type</span><span class=special>;
|
||||
</span><span class=special>};</span></pre>
|
||||
<p>This is similar to a global function calling a member function. Most of the
|
||||
time, the usage above is equivalent to:</p>
|
||||
<pre><span class=keyword> typename </span><span class=identifier>ParserT</span><span class=special>::</span><span class=keyword>template </span><span class=identifier>result</span><span class=special><</span><span class=identifier>ScannerT</span><span class=special>>::</span><span class=identifier>type</span></pre>
|
||||
<p>Yet, this should not be relied upon to be true all the time because the parser_result
|
||||
metafunction might be specialized for specific parser and/or scanner types.</p>
|
||||
<p>The parser_result metafunction makes the signature of the required parse member
|
||||
function almost canonical:</p>
|
||||
<pre><span class=identifier> </span><span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>>
|
||||
</span><span class=keyword>typename </span><span class=identifier>parser_result</span><span class=special><</span><span class=identifier>self_t, ScannerT</span><span class=special>>::</span><span class=identifier>type</span><br> <span class=identifier>parse</span><span class=special>(</span><span class=identifier>ScannerT</span><span class=special></span> const<span class=special>& </span>scan<span class=identifier></span><span class=special>) </span><span class=keyword>const</span><span class=special>;</span></pre>
|
||||
<p>where<span class=special></span> <tt>self_t</tt> is a typedef to the parser.</p>
|
||||
<h2>parser class declaration</h2>
|
||||
<pre><span class=identifier> </span><span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>DerivedT</span><span class=special>>
|
||||
</span><span class=keyword>struct </span><span class=identifier>parser
|
||||
</span><span class=special>{
|
||||
</span><span class=keyword>typedef </span><span class=identifier>DerivedT embed_t</span><span class=special>;
|
||||
</span><span class=keyword>typedef </span><span class=identifier>DerivedT derived_t</span><span class=special>;
|
||||
</span><span class=keyword>typedef </span><span class=identifier>plain_parser_category parser_category_t</span><span class=special>;
|
||||
|
||||
</span><span class=keyword>template </span><span class=special><</span><span class="keyword">typename</span> ScannerT<span class=special>>
|
||||
</span><span class=keyword>struct </span><span class=identifier>result
|
||||
</span><span class=special>{
|
||||
</span><span class=keyword>typedef typename </span><span class=identifier>match_result</span><span class=special><</span><span class=identifier>ScannerT</span><span class=special>, </span><span class=identifier>nil_t</span><span class=special>>::</span><span class=identifier>type type</span><span class=special>;
|
||||
};
|
||||
|
||||
</span><span class=identifier>DerivedT</span><span class=special>& </span><span class=identifier>derived</span><span class=special>();
|
||||
</span><span class=identifier>DerivedT </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>derived</span><span class=special>() </span><span class=keyword>const</span><span class=special>;
|
||||
|
||||
</span><span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>ActionT</span><span class=special>>
|
||||
</span><span class=identifier>action</span><span class=special><</span><span class=identifier>DerivedT</span><span class=special>, </span><span class=identifier>ActionT</span><span class=special>>
|
||||
</span><span class=keyword>operator</span><span class=special>[](</span><span class=identifier>ActionT </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>actor</span><span class=special>) </span><span class=keyword>const</span><span class=special>;
|
||||
};</span></pre>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="semantic_actions.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="indepth_the_scanner.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<hr size="1">
|
||||
<p class="copyright">Copyright © 1998-2003 Joel de Guzman<br>
|
||||
<br>
|
||||
<font size="2">Use, modification and distribution is subject to the Boost Software
|
||||
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
||||
http://www.boost.org/LICENSE_1_0.txt) </font> </p>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,226 +0,0 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>In-depth: The Parser Context</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" href="theme/style.css" type="text/css">
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
|
||||
<tr>
|
||||
<td width="10">
|
||||
</td>
|
||||
<td width="85%"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>In-depth:
|
||||
The Parser Context</b></font></td>
|
||||
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html">
|
||||
<img src="theme/u_arr.gif" border="0" width="20" height="19"></a></td>
|
||||
<td width="30"><a href="indepth_the_scanner.html">
|
||||
<img src="theme/l_arr.gif" border="0" width="20" height="19"></a></td>
|
||||
<td width="30"><a href="predefined_actors.html">
|
||||
<img src="theme/r_arr.gif" border="0" width="20" height="19"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<h2>Overview</h2>
|
||||
<p>The parser's <b>context</b> is yet another concept. An instance (object) of
|
||||
the <tt>context</tt> class is created before a non-terminal starts parsing and
|
||||
is destructed after parsing has concluded. A non-terminal is either a <tt>rule</tt>,
|
||||
a <tt>subrule</tt>, or a <tt>grammar</tt>. Non-terminals have a <tt>ContextT</tt> template parameter. The following pseudo code depicts what's happening when
|
||||
a non-terminal is invoked:</p>
|
||||
<pre><code><font color="#000000"><span class=special> </span><span class=identifier>return_type
|
||||
</span><span class=identifier>a_non_terminal</span><span class=special>::</span><span class=identifier>parse</span><span class=special>(</span><span class=identifier>ScannerT </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>scan</span><span class=special>)
|
||||
{
|
||||
</span><span class=identifier>context_t ctx</span><span class=special>(/**/);
|
||||
</span><span class=identifier>ctx</span><span class=special>.</span><span class=identifier>pre_parse</span><span class=special>(/**/);
|
||||
|
||||
</span><span class=comment>// main parse code of the non-terminal here...
|
||||
|
||||
</span><span class=keyword>return </span><span class=identifier>ctx</span><span class=special>.</span><span class=identifier>post_parse</span><span class=special>(/**/);
|
||||
}</span></font></code></pre>
|
||||
<p>The context is provided for extensibility. Its main purpose is to expose the
|
||||
start and end of the non-terminal's parse member function to accommodate external
|
||||
hooks. We can extend the non-terminal in a multitude of ways by writing specialized
|
||||
context classes, without modifying the class itself. For example, we can make
|
||||
the non-terminal emit debug diagnostics information by writing a context class
|
||||
that prints out the current state of the scanner at each point in the parse
|
||||
traversal where the non-terminal is invoked.</p>
|
||||
<p>Example of a parser context that prints out debug information:</p>
|
||||
<pre><code><font color="#000000"> pre_parse</font>:<font color="#000000"> non-terminal XXX is entered<font color="#0000ff">.</font> The current state of the input
|
||||
is <font color="#616161"><i>"hello world, this is a test"</i></font>
|
||||
|
||||
post_parse</font>:<font color="#000000"> non-terminal XXX has concluded<font color="#0000ff">,</font> the non-terminal matched <font color="#616161"><i>"hello world"</i></font><font color="#0000ff">.</font>
|
||||
The current state of the input is <font color="#616161"><i>", this is a test"</i></font></font></code></pre>
|
||||
<p>Most of the time, the context will be invisible from the user's view. In general,
|
||||
clients of the framework need not deal directly nor even know about contexts.
|
||||
Power users, however, might find some use of contexts. Thus, this is part of
|
||||
the public API. Other parts of the framework in other layers above the core
|
||||
take advantage of the context to extend non-terminals. </p>
|
||||
<h2>Class declaration</h2>
|
||||
<p>The <tt>parser_context</tt> class is the default context class that the non-terminal
|
||||
uses. </p>
|
||||
<pre><span class=keyword> </span><span class="identifier">template</span> <span class="special"><</span><span class="keyword">typename</span> <span class="identifier">AttrT</span> <span class="special">=</span> <span class="identifier">nil_t</span><span class="special">></span><span class=keyword><br> struct </span><span class=identifier>parser_context
|
||||
</span><span class=special> {
|
||||
</span><span class=keyword>typedef </span>AttrT <span class=identifier>attr_t</span><span class=special>;
|
||||
</span><span class=keyword>typedef </span><span class=identifier>implementation_defined base_t</span><span class=special>;
|
||||
</span><span class="keyword">typedef</span><span class=special> </span>parser_context_linker<span class="special"><</span>parser_context<span class="special"><</span><span class="identifier">AttrT</span><span class="special">></span> <span class="special">></span> <span class="identifier">context_linker_t</span><span class=special>;
|
||||
|
||||
</span><span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>ParserT</span><span class=special>>
|
||||
</span><span class=identifier>parser_context</span><span class=special>(</span><span class=identifier>ParserT </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>p</span><span class=special>) {}
|
||||
|
||||
</span><span class=keyword> template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>ParserT</span><span class=special>, </span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>>
|
||||
</span><span class=keyword> void
|
||||
</span><span class=identifier> pre_parse</span><span class=special>(</span><span class=identifier>ParserT </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>p</span><span class=special>, </span><span class=identifier>ScannerT </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>scan</span><span class=special>) {}
|
||||
|
||||
</span><span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>ResultT</span><span class=special>, </span><span class=keyword>typename </span><span class=identifier>ParserT</span><span class=special>, </span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>>
|
||||
</span><span class=identifier>ResultT</span><span class=special>&
|
||||
</span><span class=identifier> post_parse</span><span class=special>(</span><span class=identifier>ResultT</span><span class=special>& </span><span class=identifier>hit</span><span class=special>, </span><span class=identifier>ParserT </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>p</span><span class=special>, </span><span class=identifier>ScannerT </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>scan</span><span class=special>)
|
||||
{ </span><span class=keyword>return </span><span class=identifier>hit</span><span class=special>; }
|
||||
};</span></pre>
|
||||
<p>The non-terminal's <tt>ContextT</tt> template parameter is a concept. The <tt>parser_context</tt>
|
||||
class above is the simplest model of this concept. The default <tt>parser_context</tt>'s<tt>
|
||||
pre_parse</tt> and <tt>post_parse</tt> member functions are simply no-ops. You
|
||||
can think of the non-terminal's <tt>ContextT</tt> template parameter as the
|
||||
policy that governs how the non-terminal will behave before and after parsing.
|
||||
The client can supply her own context policy by passing a user defined context
|
||||
template parameter to a particular non-terminal.</p>
|
||||
<table width="90%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="table_title" colspan="8"> Parser Context Policies </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<tr>
|
||||
<td class="table_cells"><strong><span class=identifier>attr_t</span></strong></td>
|
||||
<td class="table_cells">typedef: the attribute type of the non-terminal. See
|
||||
the <a href="indepth_the_parser.html#match">match</a>.</td>
|
||||
</tr>
|
||||
<td class="table_cells"><strong><span class=identifier>base_t</span></strong></td>
|
||||
<td class="table_cells">typedef: the base class of the non-terminal. The non-terminal
|
||||
inherits from this class.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells"><strong><span class="identifier">context_linker_t</span></strong></td>
|
||||
<td class="table_cells">typedef: this class type opens up the possibility
|
||||
for Spirit to plug in additional functionality into the non-terminal parse
|
||||
function or even bypass the given context. This should simply be typedefed
|
||||
to <tt>parser_context_linker<T></tt> where T is the type of the user
|
||||
defined context class.</td>
|
||||
</tr>
|
||||
<td class="table_cells"><strong>constructor</strong></td>
|
||||
<td class="table_cells">Construct the context. The non-terminal is passed as
|
||||
an argument to the constructor.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells"><strong>pre_parse</strong></td>
|
||||
<td class="table_cells">Do something prior to parsing. The non-terminal and
|
||||
the current scanner are passed as arguments.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells"><strong>post_parse</strong></td>
|
||||
<td class="table_cells">Do something after parsing. This is called regardless
|
||||
of the parse result. A reference to the parser's result is passed in. The
|
||||
context has the power to modify this. The non-terminal and the current scanner
|
||||
are also passed as arguments.</td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>The <tt>base_t</tt> deserves further explanation. Here goes... The context
|
||||
is strictly a stack based class. It is created before parsing and destructed
|
||||
after the non-terminal's parse member function exits. Sometimes, we need
|
||||
auxiliary
|
||||
data that exists throughout the full lifetime of the non-terminal host.
|
||||
Since the non-terminal inherits from the context's <tt>base_t</tt>, the context
|
||||
itself, when created, gets access to this upon construction when the non-terminal
|
||||
is passed as an argument to the constructor. Ditto on <tt>pre_parse</tt> and
|
||||
<tt>post_parse</tt>.</p>
|
||||
<p>The non-terminal inherits from the context's <tt>base_t</tt> typedef. The sole
|
||||
requirement is that it is a class that is default constructible. The copy-construction
|
||||
and assignment requirements depends on the host. If the host requires it, so
|
||||
does the context's <tt>base_t</tt>. In general, it wouldn't hurt to provide
|
||||
these basic requirements.</p>
|
||||
<h2>Non-default Attribute Type </h2>
|
||||
<p>Right out of the box, the <tt>parser_context</tt> class may be paramaterized with a type other than the default <tt>nil_t</tt>. The following code demonstrates the usage of the <tt>parser_context</tt> template with an explicit argument to declare rules with match results different from <tt>nil_t</tt>:</p>
|
||||
<pre><span class=number> </span><span class=identifier>rule</span><span class=special><</span><span class=identifier>parser_context</span><span class=special><</span><span class=keyword>int</span><span class=special>> </span><span class=special>> </span><span class=identifier>int_rule </span><span class=special>= </span><span class=identifier>int_p</span><span class=special>;
|
||||
|
||||
</span><span class=identifier>parse</span><span class=special>(
|
||||
</span><span class=string>"123"</span><span class=special>,
|
||||
</span><span class=comment>// Using a returned value in the semantic action
|
||||
</span><span class=identifier>int_rule</span><span class=special>[</span><span class=identifier>cout </span><span class=special><< </span><span class=identifier>arg1 </span><span class=special><< </span><span class=identifier>endl</span><span class=special>]
|
||||
</span><span class=special>);</span> </pre>
|
||||
<p>In this example, <tt>int_rule</tt> is declared with <tt>int</tt> attribute type. Hence, the <tt>int_rule</tt> variable can hold any parser which returns an <tt>int</tt> value (for example <tt>int_p</tt> or <tt>bin_p</tt>). The important thing to note is that we can use the returned value in the semantic action bound to the <tt>int_rule</tt>. </p>
|
||||
<p><img src="theme/lens.gif" width="15" height="16"> See <a href="../example/fundamental/parser_context.cpp">parser_context.cpp</a> in the examples. This is part of the Spirit distribution.</p>
|
||||
<h2>An Example </h2>
|
||||
<p>As an example let's have a look at the Spirit parser context, which inserts some debug output to the parsing process:</p>
|
||||
<pre> <span class="keyword">template</span><<span class="keyword">typename</span> ContextT>
|
||||
<span class="keyword">struct</span> parser_context_linker : <span class="keyword">public</span> ContextT
|
||||
<span class="special">{</span>
|
||||
<span class="keyword">typedef</span> ContextT base_t;
|
||||
|
||||
<span class="keyword">template</span> <<span class="keyword">typename</span> ParserT>
|
||||
parser_context_linker(ParserT const& p)
|
||||
: ContextT(p) {}
|
||||
|
||||
<span class="comment">// This is called just before parsing of this non-terminal</span>
|
||||
<span class="keyword">template</span> <span class="special"><</span><span class="keyword">typename</span> ParserT<span class="special">,</span> <span class="keyword">typename</span> ScannerT<span class="special">></span>
|
||||
<span class="keyword">void</span> pre_parse<span class="special">(</span>ParserT <span class="keyword">const</span><span class="special">&</span> p<span class="special">,</span> ScannerT <span class="special">&</span>scan<span class="special">)</span>
|
||||
<span class="special">{</span>
|
||||
<span class="comment">// call the pre_parse function of the base class</span>
|
||||
<span class="keyword">this</span><span class="special">-></span>base_t<span class="special">::</span>pre_parse<span class="special">(</span>p<span class="special">,</span> scan<span class="special">);</span>
|
||||
<span class="preprocessor">
|
||||
#if</span> <span class="identifier">BOOST_SPIRIT_DEBUG_FLAGS</span> <span class="special">&</span> <span class="identifier">BOOST_SPIRIT_DEBUG_FLAGS_NODES</span>
|
||||
<span class="keyword">if</span> <span class="special">(</span>trace_parser<span class="special">(</span>p<span class="special">.</span>derived<span class="special">())) {</span>
|
||||
<span class="comment">// print out pre parse info</span>
|
||||
impl<span class="special">::</span>print_node_info<span class="special">(</span>
|
||||
<span class="keyword">false</span><span class="special">,</span> scan.get_level<span class="special">(),</span> <span class="keyword">false</span><span class="special">,</span>
|
||||
parser_name<span class="special">(</span>p.derived<span class="special">()),</span>
|
||||
scan<span class="special">.</span>first<span class="special">,</span> scan.last<span class="special">);</span>
|
||||
<span class="special">}</span>
|
||||
scan.get_level<span class="special">()++;</span> <span class="comment">// increase nesting level</span>
|
||||
<span class="preprocessor">#endif</span>
|
||||
<span class="special">}</span>
|
||||
<span class="comment">// This is called just after parsing of the current non-terminal</span>
|
||||
<span class="keyword">template</span> <span class="special"><</span><span class="keyword">typename</span> ResultT<span class="special">,</span> <span class="keyword">typename</span> ParserT<span class="special">,</span> <span class="keyword">typename</span> ScannerT<span class="special">></span>
|
||||
ResultT<span class="special">&</span> post_parse<span class="special">(</span>
|
||||
ResultT<span class="special">&</span> hit<span class="special">,</span> ParserT <span class="keyword">const</span><span class="special">&</span> p<span class="special">,</span> ScannerT<span class="special">&</span> scan<span class="special">)
|
||||
{</span>
|
||||
<span class="preprocessor">
|
||||
#if</span> <span class="identifier">BOOST_SPIRIT_DEBUG_FLAGS</span> <span class="special">&</span> <span class="identifier">BOOST_SPIRIT_DEBUG_FLAGS_NODES</span>
|
||||
<span class="special">--</span>scan.get_level<span class="special">();</span> <span class="comment">// decrease nesting level</span>
|
||||
<span class="keyword">if</span> <span class="special">(</span>trace_parser<span class="special">(</span>p<span class="special">.</span>derived<span class="special">())) {</span>
|
||||
impl<span class="special">::</span>print_node_info<span class="special">(</span>
|
||||
hit<span class="special">,</span> scan<span class="special">.</span>get_level<span class="special">(),</span> <span class="keyword">true</span><span class="special">,</span>
|
||||
parser_name<span class="special">(</span>p<span class="special">.</span>derived<span class="special">()),</span>
|
||||
scan<span class="special">.</span>first<span class="special">,</span> scan<span class="special">.</span>last<span class="special">);
|
||||
}</span>
|
||||
<span class="preprocessor">#endif</span>
|
||||
<span class="comment">// call the post_parse function of the base class</span>
|
||||
<span class="keyword">return</span> <span class="keyword">this</span><span class="special">-></span>base_t<span class="special">::</span>post_parse<span class="special">(</span>hit<span class="special">,</span> p<span class="special">,</span> scan<span class="special">);
|
||||
}
|
||||
};</span>
|
||||
</pre>
|
||||
<p>During debugging (<tt>BOOST_SPIRIT_DEBUG</tt> is defined) this parser context is injected into the derivation hierarchy of the current <tt>parser_context</tt>, which was originally specified to be used for a concrete parser, so the template parameter <tt>ContextT</tt> represents the original <tt>parser_context</tt>. For this reason the <tt>pre_parse</tt> and <tt>post_parse</tt> functions call it's counterparts from the base class. Additionally these functions call a special <tt>print_node_info</tt> function, which does the actual output of the parser state info of the current non-terminal. For more info about the printed information, you may want to have a look at the topic <a href="debugging.html">Debugging</a>.</p>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html">
|
||||
<img src="theme/u_arr.gif" border="0" width="20" height="19"></a></td>
|
||||
<td width="30"><a href="indepth_the_scanner.html">
|
||||
<img src="theme/l_arr.gif" border="0" width="20" height="19"></a></td>
|
||||
<td width="30"><a href="predefined_actors.html">
|
||||
<img src="theme/r_arr.gif" border="0" width="20" height="19"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<hr size="1">
|
||||
<p class="copyright">Copyright © 1998-2003 Joel de Guzman<br>
|
||||
<br>
|
||||
<font size="2">Use, modification and distribution is subject to the Boost Software
|
||||
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
||||
http://www.boost.org/LICENSE_1_0.txt)</font></p>
|
||||
<p class="copyright"> </p>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,290 +0,0 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>In-depth The Scanner</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" href="theme/style.css" type="text/css">
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
|
||||
<tr>
|
||||
<td width="10">
|
||||
</td>
|
||||
<td width="85%"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>In-depth:
|
||||
The Scanner</b></font> </td>
|
||||
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="indepth_the_parser.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="indepth_the_parser_context.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<h2>Basic Scanner API </h2>
|
||||
<table width="90%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="table_title" colspan="10"> class scanner </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<tr>
|
||||
<td class="table_cells"><code><span class=identifier>value_t</span></code></td>
|
||||
<td class="table_cells">typedef: The value type of the scanner's iterator</td>
|
||||
</tr>
|
||||
<td class="table_cells"><code><span class=identifier>ref_t</span></code></td>
|
||||
<td class="table_cells">typedef: The reference type of the scanner's iterator</td>
|
||||
</tr>
|
||||
<td class="table_cells"><code><span class=keyword>bool </span><span class=identifier>at_end</span><span class=special>()
|
||||
</span><span class=keyword>const</span></code></td>
|
||||
<td class="table_cells">Returns true if the input is exhausted</td>
|
||||
</tr>
|
||||
<td class="table_cells"><code><span class=identifier>value_t </span><span class=keyword>operator</span><span class=special>*()
|
||||
</span><span class=keyword>const</span></code></td>
|
||||
<td class="table_cells">Dereference/get a <code><span class=identifier>value_t</span></code>
|
||||
from the input</td>
|
||||
</tr>
|
||||
<td class="table_cells"><code><span class=keyword> </span><span class=identifier>scanner
|
||||
</span><span class=keyword>const</span><span class=special>& </span><span class=keyword>operator</span><span class=special>++()</span></code></td>
|
||||
<td class="table_cells">move the scanner forward</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells"><code><span class=identifier>IteratorT& first</span><span class=special></span></code></td>
|
||||
<td class="table_cells">The iterator pointing to the current input position.
|
||||
Held by reference</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells"><code><span class=identifier>IteratorT </span><span class=keyword>const</span>
|
||||
<span class=identifier>last</span><span class=special></span></code></td>
|
||||
<td class="table_cells">The iterator pointing to the end of the input. Held
|
||||
by value</td>
|
||||
</tr>
|
||||
</table>
|
||||
<p> The basic behavior of the scanner is handled by policies. The actual execution
|
||||
of the scanner's public member functions listed in the table above is implemented
|
||||
by the scanner policies.</p>
|
||||
<p> Three sets of policies govern the behavior of the scanner. These policies
|
||||
make it possible to extend Spirit non-intrusively. The scanner policies allow
|
||||
the core-functionality to be extended without requiring any potentially destabilizing
|
||||
changes to the code. A library writer might provide her own policies that override
|
||||
the ones that are already in place to fine tune the parsing process
|
||||
to fit her own needs. Layers above the core might also want to take advantage
|
||||
of this policy based machanism. Abstract syntax tree generation, debuggers and
|
||||
lexers come to mind.</p>
|
||||
<p> There are three sets of policies that govern:</p>
|
||||
<ul>
|
||||
<li>Iteration and filtering</li>
|
||||
<li>Recognition and matching</li>
|
||||
<li>Handling semantic actions</li>
|
||||
</ul>
|
||||
<a name="iteration_policy"></a>
|
||||
<h2>iteration_policy</h2>
|
||||
<p> Here are the default policies that govern iteration and filtering:</p>
|
||||
<pre>
|
||||
<code><span class=keyword>struct </span><span class=identifier>iteration_policy
|
||||
</span><span class=special>{
|
||||
</span><span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>>
|
||||
</span><span class=keyword>void
|
||||
</span><span class=identifier>advance</span><span class=special>(</span><span class=identifier>ScannerT </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>scan</span><span class=special>) </span><span class=keyword>const
|
||||
</span><span class=special>{ </span><span class=special>++</span><span class=identifier>scan</span><span class=special>.</span><span class=identifier>first</span><span class=special>; </span><span class=special>}
|
||||
|
||||
</span><span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>>
|
||||
</span><span class=keyword>bool </span><span class=identifier>at_end</span><span class=special>(</span><span class=identifier>ScannerT </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>scan</span><span class=special>) </span><span class=keyword>const
|
||||
</span><span class=special>{ </span><span class=keyword>return </span><span class=identifier>scan</span><span class=special>.</span><span class=identifier>first </span><span class=special>== </span><span class=identifier>scan</span><span class=special>.</span><span class=identifier>last</span><span class=special>; </span><span class=special>}
|
||||
|
||||
</span><span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>T</span><span class=special>>
|
||||
</span><span class=identifier>T </span><span class=identifier>filter</span><span class=special>(</span><span class=identifier>T </span><span class=identifier>ch</span><span class=special>) </span><span class=keyword>const
|
||||
</span><span class=special>{ </span><span class=keyword>return </span><span class=identifier>ch</span><span class=special>; </span><span class=special>}
|
||||
|
||||
</span><span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>>
|
||||
</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>::</span><span class=identifier>ref_t
|
||||
</span><span class=identifier>get</span><span class=special>(</span><span class=identifier>ScannerT </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>scan</span><span class=special>) </span><span class=keyword>const
|
||||
</span><span class=special>{ </span><span class=keyword>return </span><span class=special>*</span><span class=identifier>scan</span><span class=special>.</span><span class=identifier>first</span><span class=special>; </span><span class=special>}
|
||||
</span><span class=special>};</span></code></pre>
|
||||
<table width="90%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="table_title" colspan="8"> Iteration and filtering policies </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<tr>
|
||||
<td class="table_cells"><b>advance</b></td>
|
||||
<td class="table_cells">Move the iterator forward</td>
|
||||
</tr>
|
||||
<td class="table_cells"><b>at_end</b></td>
|
||||
<td class="table_cells">Return true if the input is exhausted</td>
|
||||
</tr>
|
||||
<td class="table_cells"><b>filter</b></td>
|
||||
<td class="table_cells">Filter a character read from the input</td>
|
||||
</tr>
|
||||
<td class="table_cells"><b>get</b></td>
|
||||
<td class="table_cells">Read a character from the input</td>
|
||||
</tr>
|
||||
</table>
|
||||
<p> The following code snippet demonstrates a simple policy that converts all
|
||||
characters to lower case:</p>
|
||||
<pre>
|
||||
<code><span class=keyword>struct </span><span class=identifier>inhibit_case_iteration_policy </span><span class=special>: </span><span class=keyword>public </span><span class=identifier>iteration_policy
|
||||
</span><span class=special>{
|
||||
</span><span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>CharT</span><span class=special>>
|
||||
</span><span class=identifier>CharT filter</span><span class=special>(</span><span class=identifier>CharT ch</span><span class=special>) </span><span class=keyword>const
|
||||
</span><span class=special>{
|
||||
</span><span class=keyword>return </span>std::<span class=identifier>tolower</span><span class=special>(</span><span class=identifier>ch</span><span class=special>);
|
||||
}
|
||||
};</span></code></pre>
|
||||
<a name="match_policy"></a>
|
||||
<h2>match_policy</h2>
|
||||
<p> Here are the default policies that govern recognition and matching:</p>
|
||||
<pre>
|
||||
<code><span class=keyword>struct </span><span class=identifier>match_policy
|
||||
</span><span class=special>{
|
||||
</span><span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>T</span><span class=special>>
|
||||
</span><span class=keyword>struct </span><span class=identifier>result </span><span class=special>
|
||||
{
|
||||
</span><span class=keyword>typedef </span><span class=identifier>match</span><span class=special><</span><span class=identifier>T</span><span class=special>> </span><span class=identifier>type</span><span class=special>; </span><span class=special>
|
||||
};
|
||||
|
||||
</span><span class=keyword>const </span><span class=identifier>match</span><span class=special><</span><span class=identifier>nil_t</span><span class=special>>
|
||||
</span><span class=identifier>no_match</span><span class=special>() </span><span class=keyword>const
|
||||
</span><span class=special>{ </span><span class=keyword>
|
||||
return </span><span class=identifier>match</span><span class=special><</span><span class=identifier>nil_t</span><span class=special>>(); </span><span class=special>
|
||||
}
|
||||
|
||||
</span><span class=keyword>const </span><span class=identifier>match</span><span class=special><</span><span class=identifier>nil_t</span><span class=special>>
|
||||
</span><span class=identifier>empty_match</span><span class=special>() </span><span class=keyword>const
|
||||
</span><span class=special>{ </span><span class=keyword>
|
||||
return </span><span class=identifier>match</span><span class=special><</span><span class=identifier>nil_t</span><span class=special>>(</span><span class=number>0</span><span class=special>, </span><span class=identifier>nil_t</span><span class=special>());
|
||||
</span><span class=special>}
|
||||
|
||||
</span><span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>AttrT</span><span class=special>, </span><span class=keyword>typename </span><span class=identifier>IteratorT</span><span class=special>>
|
||||
</span><span class=identifier>match</span><span class=special><</span><span class=identifier>AttrT</span><span class=special>>
|
||||
</span><span class=identifier>create_match</span><span class=special>(
|
||||
</span><span class=keyword>std::size_t </span><span class=identifier>length</span><span class=special>,
|
||||
</span><span class=identifier>AttrT </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>val</span><span class=special>,
|
||||
</span><span class=identifier>IteratorT </span><span class=keyword>const</span><span class=special>& </span><span class=comment>/*first*/</span><span class=special>,
|
||||
</span><span class=identifier>IteratorT </span><span class=keyword>const</span><span class=special>& </span><span class=comment>/*last*/</span><span class=special>) </span><span class=keyword>const
|
||||
</span><span class=special>{ </span><span class=keyword>
|
||||
return </span><span class=identifier>match</span><span class=special><</span><span class=identifier>AttrT</span><span class=special>>(</span><span class=identifier>length</span><span class=special>, </span><span class=identifier>val</span><span class=special>); </span><span class=special>
|
||||
}
|
||||
|
||||
</span><span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>MatchT</span><span class=special>, </span><span class=keyword>typename </span><span class=identifier>IteratorT</span><span class=special>>
|
||||
</span><span class=keyword>void
|
||||
</span><span class=identifier>group_match</span><span class=special>(
|
||||
</span><span class=identifier>MatchT</span><span class=special>& </span><span class=comment>/*m*/</span><span class=special>,
|
||||
</span><span class=identifier>parser_id </span><span class=keyword>const</span><span class=special>& </span><span class=comment>/*id*/</span><span class=special>,
|
||||
</span><span class=identifier>IteratorT </span><span class=keyword>const</span><span class=special>& </span><span class=comment>/*first*/</span><span class=special>,
|
||||
</span><span class=identifier>IteratorT </span><span class=keyword>const</span><span class=special>& </span><span class=comment>/*last*/</span><span class=special>) </span><span class=keyword>const </span><span class=special>{}
|
||||
|
||||
</span><span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>Match1T</span><span class=special>, </span><span class=keyword>typename </span><span class=identifier>Match2T</span><span class=special>>
|
||||
</span><span class=keyword>void
|
||||
</span><span class=identifier>concat_match</span><span class=special>(</span><span class=identifier>Match1T</span><span class=special>& </span><span class=identifier>l</span><span class=special>, </span><span class=identifier>Match2T </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>r</span><span class=special>) </span><span class=keyword>const
|
||||
</span><span class=special>{ </span><span class=identifier>
|
||||
l</span><span class=special>.</span><span class=identifier>concat</span><span class=special>(</span><span class=identifier>r</span><span class=special>);
|
||||
</span><span class=special>}
|
||||
</span><span class=special>};</span></code></pre>
|
||||
<table width="90%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="table_title" colspan="12"> Recognition and matching </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<tr>
|
||||
<td class="table_cells"><b>result</b></td>
|
||||
<td class="table_cells">A metafunction that returns a match type given an
|
||||
attribute type (see In-depth: The Parser)</td>
|
||||
</tr>
|
||||
<td class="table_cells"><b>no_match</b></td>
|
||||
<td class="table_cells">Create a failed match</td>
|
||||
</tr>
|
||||
<td class="table_cells"><b>empty_match</b></td>
|
||||
<td class="table_cells">Create an empty match. An empty match is a successful
|
||||
epsilon match (matching length == 0)</td>
|
||||
</tr>
|
||||
<td class="table_cells"><b>create_match</b></td>
|
||||
<td class="table_cells">Create a match given the matching length, an attribute
|
||||
and the iterator pair pointing to the matching portion of the input</td>
|
||||
</tr>
|
||||
<td class="table_cells"><b>group_match</b></td>
|
||||
<td class="table_cells">For non terminals such as rules, this is called after
|
||||
a successful match has been made to allow post processing</td>
|
||||
</tr>
|
||||
<td class="table_cells"><b>concat_match</b></td>
|
||||
<td class="table_cells">Concatenate two match objects</td>
|
||||
</tr>
|
||||
</table>
|
||||
<a name="action_policy"></a>
|
||||
<h2>action_policy</h2>
|
||||
<p> The action policy has only one function for handling semantic actions:</p>
|
||||
<pre>
|
||||
<code><span class=keyword>struct </span><span class=identifier>action_policy
|
||||
</span><span class=special>{
|
||||
</span><span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>ActorT</span><span class=special>, </span><span class=keyword>typename </span><span class=identifier>AttrT</span><span class=special>, </span><span class=keyword>typename </span><span class=identifier>IteratorT</span><span class=special>>
|
||||
</span><span class=keyword>void
|
||||
</span><span class=identifier>do_action</span><span class=special>(
|
||||
</span><span class=identifier>ActorT </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>actor</span><span class=special>,
|
||||
</span><span class=identifier>AttrT </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>val</span><span class=special>,
|
||||
</span><span class=identifier>IteratorT </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>first</span><span class=special>,
|
||||
</span><span class=identifier>IteratorT </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>last</span><span class=special>) </span><span class=keyword>const</span><span class=special>;
|
||||
</span><span class=special>};</span></code></pre>
|
||||
<p> The default action policy forwards to:</p>
|
||||
<pre>
|
||||
<code><span class=identifier>actor</span><span class=special>(</span><span class=identifier>first</span><span class=special>, </span><span class=identifier>last</span><span class=special>);</span></code></pre>
|
||||
<p> If the attribute <tt>val</tt> is of type nil_t. Otherwise:</p>
|
||||
<pre>
|
||||
<code><span class=identifier>actor</span><span class=special>(</span><span class=identifier>val</span><span class=special>);</span></code></pre>
|
||||
<a name="scanner_policies_mixer"></a>
|
||||
<h3>scanner_policies mixer</h3>
|
||||
<p> The class <tt>scanner_policies</tt> combines the three scanner policy classes
|
||||
above into one:</p>
|
||||
<pre>
|
||||
<code><span class=keyword>template </span><span class=special><
|
||||
</span><span class=keyword>typename </span><span class=identifier>IterationPolicyT </span><span class=special>= </span><span class=identifier>iteration_policy</span><span class=special>,
|
||||
</span><span class=keyword>typename </span><span class=identifier>MatchPolicyT </span><span class=special>= </span><span class=identifier>match_policy</span><span class=special>,
|
||||
</span><span class=keyword>typename </span><span class=identifier>ActionPolicyT </span><span class=special>= </span><span class=identifier>action_policy</span><span class=special>>
|
||||
</span><span class=keyword>struct </span><span class=identifier>scanner_policies</span><span class=special>;
|
||||
</span></code></pre>
|
||||
<p> This <i>mixer</i> class inherits from all the three policies. This scanner_policies
|
||||
class is then used to parameterize the scanner:</p>
|
||||
<pre>
|
||||
<code><span class=keyword>template </span><span class=special><
|
||||
</span><span class=keyword>typename </span><span class=identifier>IteratorT </span><span class=special>= </span><span class=keyword>char </span><span class=keyword>const</span><span class=special>*,
|
||||
</span><span class=keyword>typename </span><span class=identifier>PoliciesT </span><span class=special>= </span><span class=identifier>scanner_policies</span><span class=special><> </span><span class=special>>
|
||||
</span><span class=keyword>class </span><span class=identifier>scanner</span><span class=special>;
|
||||
</span></code></pre>
|
||||
<p> The scanner in turn inherits from the PoliciesT.</p>
|
||||
<a name="rebinding_policies"></a>
|
||||
<h3>Rebinding Policies</h3>
|
||||
<p> The scanner can be made to rebind to a different set of policies anytime.
|
||||
It has a member function <tt>change_policies(new_policies)</tt>. Given a new
|
||||
set of policies, this member function creates a new scanner with the new set
|
||||
of policies. The result type of the <i>rebound</i> scanner can be can be obtained
|
||||
by calling the metafunction:</p>
|
||||
<pre>
|
||||
<code><span class=identifier>rebind_scanner_policies</span><span class=special><</span><span class=identifier>ScannerT</span><span class=special>, </span><span class=identifier>PoliciesT</span><span class=special>>::</span><span class=identifier>type</span></code></pre>
|
||||
<a name="rebinding_iterators"></a>
|
||||
<h3>Rebinding Iterators</h3>
|
||||
<p> The scanner can also be made to rebind to a different iterator type anytime.
|
||||
It has a member function <tt>change_iterator(first, last)</tt>. Given a new
|
||||
pair of iterator of type different from the ones held by the scanner, this member
|
||||
function creates a new scanner with the new pair of iterators. The result type
|
||||
of the <i>rebound</i> scanner can be can be obtained by calling the metafunction:</p>
|
||||
<pre>
|
||||
<code><span class=identifier>rebind_scanner_iterator</span><span class=special><</span><span class=identifier>ScannerT</span><span class=special>, </span><span class=identifier>IteratorT</span><span class=special>>::</span><span class=identifier>type</span></code></pre>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="indepth_the_parser.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="indepth_the_parser_context.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<hr size="1">
|
||||
<p class="copyright">Copyright © 1998-2003 Joel de Guzman<br>
|
||||
<br>
|
||||
<font size="2">Use, modification and distribution is subject to the Boost Software
|
||||
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
||||
http://www.boost.org/LICENSE_1_0.txt)</font></p>
|
||||
<p class="copyright"> </p>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,242 +0,0 @@
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<meta content=
|
||||
"HTML Tidy for Windows (vers 1st February 2003), see www.w3.org"
|
||||
name="generator">
|
||||
<title>
|
||||
Introduction
|
||||
</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=us-ascii">
|
||||
<link rel="stylesheet" href="theme/style.css" type="text/css">
|
||||
</head>
|
||||
<body>
|
||||
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
|
||||
<tr>
|
||||
<td width="10" height="49"></td>
|
||||
<td width="85%" height="49">
|
||||
<font size="6" face=
|
||||
"Verdana, Arial, Helvetica, sans-serif"><b>Introduction</b></font>
|
||||
</td>
|
||||
<td width="112" height="49">
|
||||
<a href="http://spirit.sf.net"><img src="theme/spirit.gif"
|
||||
width="112" height="48" align="right" border="0"></a>
|
||||
</td>
|
||||
</tr>
|
||||
</table><br>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30">
|
||||
<a href="../index.html"><img src="theme/u_arr.gif" border="0"></a>
|
||||
</td>
|
||||
<td width="30">
|
||||
<a href="preface.html"><img src="theme/l_arr.gif" width="20"
|
||||
height="19" border="0"></a>
|
||||
</td>
|
||||
<td width="30">
|
||||
<a href="quick_start.html"><img src="theme/r_arr.gif" border="0"></a>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>
|
||||
Spirit is an object-oriented recursive-descent parser generator framework
|
||||
implemented using template meta-programming techniques. Expression
|
||||
templates allow us to approximate the syntax of Extended Backus-Normal
|
||||
Form (EBNF) completely in C++.
|
||||
</p>
|
||||
<p>
|
||||
The Spirit framework enables a target grammar to be written exclusively
|
||||
in C++. Inline EBNF grammar specifications can mix freely with other C++
|
||||
code and, thanks to the generative power of C++ templates, are
|
||||
immediately executable. In retrospect, conventional compiler-compilers or
|
||||
parser-generators have to perform an additional translation step from the
|
||||
source EBNF code to C or C++ code.
|
||||
</p>
|
||||
<p>
|
||||
A simple EBNF grammar snippet:
|
||||
</p>
|
||||
|
||||
<pre><code><font color="#000000"> </font></code><code><font color="#000000"><span class="identifier">group </span> <span class="special">::=</span> <span class="literal">'('</span> <span class="identifier">expression</span> <span class="literal">')'
|
||||
</span> <span class="identifier">factor </span> <span class=
|
||||
"special">::=</span> <span class="identifier">integer</span> <span class=
|
||||
"special">|</span> <span class="identifier">group
|
||||
</span> <span class="identifier">term </span> <span class=
|
||||
"special">::=</span> <span class="identifier">factor</span> <span class=
|
||||
"special">((</span><span class="literal">'*'</span> <span class=
|
||||
"identifier">factor</span><span class="special">)</span> <span class=
|
||||
"special">|</span> <span class="special">(</span><span class=
|
||||
"literal">'/'</span> <span class="identifier">factor</span><span class=
|
||||
"special">))*
|
||||
</span> <span class="identifier">expression </span> <span class=
|
||||
"special">::=</span> <span class="identifier">term</span> <span class=
|
||||
"special">((</span><span class="literal">'+'</span> <span class=
|
||||
"identifier">term</span><span class="special">)</span> <span class=
|
||||
"special">|</span> <span class="special">(</span><span class=
|
||||
"literal">'-'</span> <span class="identifier">term</span><span class=
|
||||
"special">))*</span></font></code></pre>
|
||||
<p>
|
||||
is approximated using Spirit's facilities as seen in this code snippet:
|
||||
</p>
|
||||
|
||||
<pre><code><font color="#000000"> </font></code><code><font color="#000000"><span class=
|
||||
"identifier">group </span> <span class=
|
||||
"special">=</span> <span class="literal">'('</span> <span class=
|
||||
"special">>></span> <span class=
|
||||
"identifier">expression</span> <span class=
|
||||
"special">>></span> <span class="literal">')'</span><span class=
|
||||
"special">;
|
||||
</span> <span class="identifier">factor </span> <span class=
|
||||
"special">=</span> <span class="identifier">integer</span> <span class=
|
||||
"special">|</span> <span class="identifier">group</span><span class="special">;
|
||||
</span> <span class="identifier">term </span> <span class=
|
||||
"special">=</span> <span class="identifier">factor</span> <span class=
|
||||
"special">>></span> <span class="special">*((</span><span class=
|
||||
"literal">'*'</span> <span class="special">>></span> <span class=
|
||||
"identifier">factor</span><span class="special">)</span> <span class=
|
||||
"special">|</span> <span class="special">(</span><span class=
|
||||
"literal">'/'</span> <span class="special">>></span> <span class=
|
||||
"identifier">factor</span><span class="special">));
|
||||
</span> <span class="identifier">expression </span> <span class=
|
||||
"special">=</span> <span class="identifier">term</span> <span class=
|
||||
"special">>></span> <span class="special">*((</span><span class=
|
||||
"literal">'+'</span> <span class="special">>></span> <span class=
|
||||
"identifier">term</span><span class="special">)</span> <span class=
|
||||
"special">|</span> <span class="special">(</span><span class=
|
||||
"literal">'-'</span> <span class="special">>></span> <span class=
|
||||
"identifier">term</span><span class="special">));</span></font></code>
|
||||
</pre>
|
||||
<p>
|
||||
Through the magic of expression templates, this is perfectly valid and
|
||||
executable C++ code. The production rule <tt>expression</tt> is in fact
|
||||
an object that has a member function parse that does the work given a
|
||||
source code written in the grammar that we have just declared. Yes, it's
|
||||
a calculator. We shall simplify for now by skipping the type declarations
|
||||
and the definition of the rule <tt>integer</tt> invoked by
|
||||
<tt>factor</tt>. The production rule <tt>expression</tt> in our grammar
|
||||
specification, traditionally called the start symbol, can recognize
|
||||
inputs such as:
|
||||
</p>
|
||||
|
||||
<pre><code><font color="#000000"> </font></code><span class="number">12345
|
||||
</span><code><font color="#000000"> </font></code><span class="special">-</span><span class="number">12345
|
||||
</span><code><font color="#000000"> </font></code><span class="special">+</span><span class="number">12345
|
||||
</span><code><font color="#000000"> </font></code><span class="number">1</span> <span class=
|
||||
"special">+</span> <span class="number">2
|
||||
</span><code><font color="#000000"> </font></code><span class="number">1</span> <span class=
|
||||
"special">*</span> <span class="number">2
|
||||
</span><code><font color="#000000"> </font></code><span class="number">1</span><span class=
|
||||
"special">/</span><span class="number">2</span> <span class=
|
||||
"special">+</span> <span class="number">3</span><span class=
|
||||
"special">/</span><span class="number">4
|
||||
</span><code><font color="#000000"> </font></code><span class="number">1</span> <span class=
|
||||
"special">+</span> <span class="number">2</span> <span class=
|
||||
"special">+</span> <span class="number">3</span> <span class=
|
||||
"special">+</span> <span class="number">4
|
||||
</span><code><font color="#000000"> </font></code><span class="number">1</span> <span class=
|
||||
"special">*</span> <span class="number">2</span> <span class=
|
||||
"special">*</span> <span class="number">3</span> <span class=
|
||||
"special">*</span> <span class="number">4
|
||||
</span><code><font color="#000000"> </font></code><span class="special">(</span><span class=
|
||||
"number">1</span> <span class="special">+</span> <span class=
|
||||
"number">2</span><span class="special">)</span> <span class=
|
||||
"special">*</span> <span class="special">(</span><span class=
|
||||
"number">3</span> <span class="special">+</span> <span class=
|
||||
"number">4</span><span class="special">)
|
||||
</span><code><font color="#000000"> </font></code><span class="special">(-</span><span class=
|
||||
"number">1</span> <span class="special">+</span> <span class=
|
||||
"number">2</span><span class="special">)</span> <span class=
|
||||
"special">*</span> <span class="special">(</span><span class=
|
||||
"number">3</span> <span class="special">+</span> <span class=
|
||||
"special">-</span><span class="number">4</span><span class="special">)
|
||||
</span><code><font color="#000000"> </font></code><span class="number">1</span> <span class=
|
||||
"special">+</span> <span class="special">((</span><span class=
|
||||
"number">6</span> <span class="special">*</span> <span class=
|
||||
"number">200</span><span class="special">)</span> <span class=
|
||||
"special">-</span> <span class="number">20</span><span class=
|
||||
"special">)</span> <span class="special">/</span> <span class="number">6
|
||||
</span><code><font color="#000000"> </font></code><span class="special">(</span><span class=
|
||||
"number">1</span> <span class="special">+</span> <span class=
|
||||
"special">(</span><span class="number">2</span> <span class=
|
||||
"special">+</span> <span class="special">(</span><span class=
|
||||
"number">3</span> <span class="special">+</span> <span class=
|
||||
"special">(</span><span class="number">4</span> <span class=
|
||||
"special">+</span> <span class="number">5</span><span class=
|
||||
"special">))))</span>
|
||||
</pre>
|
||||
<p>
|
||||
Certainly we have done some modifications to the original EBNF syntax.
|
||||
This is done to conform to C++ syntax rules. Most notably we see the
|
||||
abundance of shift <tt>>></tt> operators. Since there are no
|
||||
'empty' operators in C++, it is simply not possible to write something
|
||||
like:
|
||||
</p>
|
||||
|
||||
<pre><code><font color="#000000"> </font></code><span class=
|
||||
"identifier">a</span> <span class="identifier">b</span>
|
||||
</pre>
|
||||
<p>
|
||||
as seen in math syntax, for example, to mean multiplication or, in our
|
||||
case, as seen in EBNF syntax to mean sequencing (b should follow a). The
|
||||
framework uses the shift <tt class="operators">>></tt> operator
|
||||
instead for this purpose. We take the <tt class="operators">>></tt>
|
||||
operator, with arrows pointing to the right, to mean "is followed by".
|
||||
Thus we write:
|
||||
</p>
|
||||
|
||||
<pre><code><font color="#000000"> </font></code><span class=
|
||||
"identifier">a</span> <span class="special">>></span> <span class=
|
||||
"identifier">b</span>
|
||||
</pre>
|
||||
<p>
|
||||
The alternative operator <tt class="operators">|</tt> and the parentheses
|
||||
<tt class="operators">()</tt> remain as is. The assignment operator
|
||||
<tt class="operators">=</tt> is used in place of EBNF's <tt class=
|
||||
"operators">::=</tt>. Last but not least, the Kleene star <tt class=
|
||||
"operators">*</tt> which used to be a postfix operator in EBNF becomes a
|
||||
prefix. Instead of:
|
||||
</p>
|
||||
|
||||
<pre><code><font color="#000000"> </font></code><span class="identifier">a</span><span class=
|
||||
"special">*</span> <span class="comment">//... in EBNF syntax,</span>
|
||||
</pre>
|
||||
<p>
|
||||
we write:
|
||||
</p>
|
||||
|
||||
<pre><code><font color="#000000"> </font></code><span class="special">*</span><span class=
|
||||
"identifier">a</span> <span class="comment">//... in Spirit.</span>
|
||||
</pre>
|
||||
<p>
|
||||
since there are no postfix stars, "<tt class="operators">*</tt>", in
|
||||
C/C++. Finally, we terminate each rule with the ubiquitous semi-colon,
|
||||
"<tt>;</tt>".
|
||||
</p>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30">
|
||||
<a href="../index.html"><img src="theme/u_arr.gif" border="0"></a>
|
||||
</td>
|
||||
<td width="30">
|
||||
<a href="preface.html"><img src="theme/l_arr.gif" width="20"
|
||||
height="19" border="0"></a>
|
||||
</td>
|
||||
<td width="30">
|
||||
<a href="quick_start.html"><img src="theme/r_arr.gif" border="0"></a>
|
||||
</td>
|
||||
</tr>
|
||||
</table><br>
|
||||
<hr size="1">
|
||||
<p class="copyright">
|
||||
Copyright © 1998-2003 Joel de Guzman<br>
|
||||
<br>
|
||||
<font size="2">Use, modification and distribution is subject to the
|
||||
Boost Software License, Version 1.0. (See accompanying file
|
||||
LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)</font>
|
||||
</p>
|
||||
<p>
|
||||
|
||||
</p>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,187 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>List Parsers</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link href="theme/style.css" rel="stylesheet" type="text/css">
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
|
||||
<tr>
|
||||
<td width="10"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b> </b></font></td>
|
||||
<td width="85%"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>List Parsers</b></font></td>
|
||||
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="confix.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="functor_parser.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>List Parsers are generated by the special predefined parser generator object
|
||||
<tt>list_p</tt>, which generates parsers recognizing list structures
|
||||
of the type </p>
|
||||
<pre><span class=identifier> item </span><span class=special>>> </span><span class=special>*(</span><span class=identifier>delimiter </span><span class=special>>> </span><span class=identifier>item</span><span class=special>) </span><span class=special>>> </span><span class=special>!</span><span class=identifier>end</span></pre>
|
||||
<p>where <tt>item</tt> is an expression, delimiter is a delimiter and end is an
|
||||
optional closing expression. As you can see, the <tt>list_p</tt> generated parser
|
||||
does not recognize empty lists, i.e. the parser must find at least one item
|
||||
in the input stream to return a successful match. If you wish to also match
|
||||
an empty list, you can make your list_p optional with operator! An example where
|
||||
this utility parser is helpful is parsing comma separated C/C++ strings, which
|
||||
can be easily formulated as:</p>
|
||||
<pre><span class=special> </span><span class=identifier>rule</span><span class=special><> </span><span class=identifier>list_of_c_strings_rule
|
||||
</span><span class=special>= </span><span class=identifier>list_p</span><span class=special>(</span><span class=identifier>confix_p</span><span class=special>(</span><span class=literal>'\"'</span><span class=special>, </span><span class=special>*</span><span class=identifier>c_escape_char_p</span><span class=special>, </span><span class=literal>'\"'</span><span class=special>), </span><span class=literal>','</span><span class=special>)
|
||||
</span><span class=special>;</span></pre>
|
||||
<p>The <tt>confix_p</tt> and <tt>c_escape_char_p</tt> parser generators
|
||||
are described <a href="confix.html">here</a> and <a href="escape_char_parser.html">here</a>.</p>
|
||||
<p>The <tt>list_p</tt> parser generator object can be used to generate the following
|
||||
different types of List Parsers:</p>
|
||||
<table width="90%" border="0" align="center">
|
||||
<tr>
|
||||
<td colspan="2" class="table_title"><b>List Parsers</b></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td width="29%" height="27" class="table_cells"><b>list_p</b></td>
|
||||
<td width="71%" class="table_cells"><p><tt>list_p</tt> used by itself parses
|
||||
comma separated lists without special item formatting, i.e. everything
|
||||
in between two commas is matched as an <tt>item</tt>, no <tt>end</tt>
|
||||
of list token is matched</p></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td height="27" class="table_cells"><strong>list_p(delimiter)</strong></td>
|
||||
<td class="table_cells"><p>generates a list parser, which recognizes lists
|
||||
with the given <tt>delimiter</tt> and matches everything in between them
|
||||
as an <tt>item</tt>, no <tt>end</tt> of list token is matched</p></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td height="27" class="table_cells"><strong>list_p(item, delimiter)</strong></td>
|
||||
<td class="table_cells"><p>generates a list parser, which recognizes lists
|
||||
with the given <tt>delimiter</tt> and matches items based on the given
|
||||
item parser, no <tt>end</tt> of list token is matched</p></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td height="27" class="table_cells"><strong>list_p(item, delimiter, end)</strong></td>
|
||||
<td class="table_cells"><p>generates a list parser, which recognizes lists
|
||||
with the given <tt>delimiter</tt> and matches items based on the given
|
||||
<tt>item</tt> parser and additionally recognizes an optional <tt>end</tt>
|
||||
expression</p></td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>All of the parameters to list_p can be single characters, strings
|
||||
or, if more complex parsing logic is required, auxiliary parsers, each of which
|
||||
is automatically converted to the corresponding parser type needed for successful
|
||||
parsing.</p>
|
||||
<p>If the <tt>item</tt> parser is an <tt>action_parser_category</tt> type (parser
|
||||
with an attached semantic action) we have to do something special. This happens,
|
||||
if the user wrote something like:</p>
|
||||
<pre><span class=special> </span><span class=identifier>list_p</span><span class=special>(</span><span class=identifier>item</span><span class=special>[</span><span class=identifier>func</span><span class=special>], </span><span class=identifier>delim</span><span class=special>)</span></pre>
|
||||
<p> where <tt>item</tt> is the parser matching one item of the list sequence and
|
||||
<tt>func</tt> is a functor to be called after matching one item. If we would
|
||||
do nothing, the resulting code would parse the sequence as follows:</p>
|
||||
<pre><span class=special> </span><span class=special>(</span><span class=identifier>item</span><span class=special>[</span><span class=identifier>func</span><span class=special>] </span><span class=special>- </span><span class=identifier>delim</span><span class=special>) </span><span class=special>>> </span><span class=special>*(</span><span class=identifier>delim </span><span class=special>>> </span><span class=special>(</span><span class=identifier>item</span><span class=special>[</span><span class=identifier>func</span><span class=special>] </span><span class=special>- </span><span class=identifier>delim</span><span class=special>))</span></pre>
|
||||
<p> what in most cases is not what the user expects. (If this <u>is</u> what you've
|
||||
expected, then please use one of the <tt>list_p</tt> generator
|
||||
functions <tt>direct()</tt>, which will inhibit refactoring of the <tt>item</tt>
|
||||
parser). To make the list parser behave as expected:</p>
|
||||
<pre><span class=special> </span><span class=special>(</span><span class=identifier>item </span><span class=special>- </span><span class=identifier>delim</span><span class=special>)[</span><span class=identifier>func</span><span class=special>] </span><span class=special>>> </span><span class=special>*(</span><span class=identifier>delim </span><span class=special>>> </span><span class=special>(</span><span class=identifier>item </span><span class=special>- </span><span class=identifier>delim</span><span class=special>)[</span><span class=identifier>func</span><span class=special>])</span></pre>
|
||||
<p> the actor attached to the item parser has to be re-attached to the <tt>(item
|
||||
- delim)</tt> parser construct, which will make the resulting list parser 'do
|
||||
the right thing'. This refactoring is done by the help of the <a href="refactoring.html">Refactoring
|
||||
Parsers</a>. Additionally special care must be taken, if the item parser is
|
||||
a <tt>unary_parser_category</tt> type parser as for instance:</p>
|
||||
<pre><span class=special> </span><span class=identifier>list_p</span><span class=special>(*</span><span class=identifier>anychar_p</span><span class=special>, </span><span class=literal>','</span><span class=special>)</span></pre>
|
||||
<p> which without any refactoring would result in </p>
|
||||
<pre><span class=special> </span><span class=special>(*</span><span class=identifier>anychar_p </span><span class=special>- </span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>','</span><span class=special>))
|
||||
</span><span class=special>>> </span><span class=special>*( </span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>','</span><span class=special>) </span><span class=special>>> </span><span class=special>(*</span><span class=identifier>anychar_p </span><span class=special>- </span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>','</span><span class=special>)) </span><span class=special>)</span></pre>
|
||||
<p> and will not give the expected result (the first <tt>*anychar_p</tt> will
|
||||
eat up all the input up to the end of the input stream). So we have to refactor
|
||||
this into:</p>
|
||||
<pre><span class=special> </span><span class=special>*(</span><span class=identifier>anychar_p </span><span class=special>- </span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>','</span><span class=special>))
|
||||
</span><span class=special>>> </span><span class=special>*( </span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>','</span><span class=special>) </span><span class=special>>> </span><span class=special>*(</span><span class=identifier>anychar_p </span><span class=special>- </span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>','</span><span class=special>)) </span><span class=special>)</span></pre>
|
||||
<p> what will give the correct result.</p>
|
||||
<p> The case, where the item parser is a combination of the two mentioned problems
|
||||
(i.e. the item parser is a unary parser with an attached action), is handled
|
||||
accordingly too:</p>
|
||||
<pre><span class=special> </span><span class=identifier>list_p</span><span class=special>((*</span><span class=identifier>anychar_p</span><span class=special>)[</span><span class=identifier>func</span><span class=special>], </span><span class=literal>','</span><span class=special>)</span></pre>
|
||||
<p> will be parsed as expected:</p>
|
||||
<pre><span class=special> </span><span class=special>(*(</span><span class=identifier>anychar_p </span><span class=special>- </span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>','</span><span class=special>)))[</span><span class=identifier>func</span><span class=special>]
|
||||
</span><span class=special>>> </span><span class=special>*( </span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>','</span><span class=special>) </span><span class=special>>> </span><span class=special>(*(</span><span class=identifier>anychar_p </span><span class=special>- </span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>','</span><span class=special>)))[</span><span class=identifier>func</span><span class=special>] </span><span class=special>)</span></pre>
|
||||
<p>The required refactoring is implemented with the help of the <a href="refactoring.html">Refactoring
|
||||
Parsers</a>.</p>
|
||||
<table width="90%" border="0" align="center">
|
||||
<tr>
|
||||
<td colspan="2" class="table_title"><b>Summary of List Parser refactorings</b></td>
|
||||
</tr>
|
||||
<tr class="table_title">
|
||||
<td width="34%"><b>You write it as:</b></td>
|
||||
<td width="66%"><code><font face="Verdana, Arial, Helvetica, sans-serif">It
|
||||
is refactored to:</font></code></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td width="34%" class="table_cells"><code><span class=identifier>list_p</span><span class=special>(</span><span class=identifier>item</span><span class=special>,
|
||||
</span><span class=identifier>delimiter</span><span class=special>)</span></code></td>
|
||||
<td width="66%" class="table_cells"> <code><span class=special> (</span><span class=identifier>item
|
||||
</span><span class=special>- </span><span class=identifier>delimiter</span><span class=special>)
|
||||
<br>
|
||||
>> *(</span><span class=identifier>delimiter </span><span class=special>
|
||||
>> (</span><span class=identifier>item </span><span class=special>-
|
||||
</span><span class=identifier>delimiter</span><span class=special>))</span></code></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td width="34%" class="table_cells"><code><span class=identifier>list_p</span><span class=special>(</span><span class=identifier>item</span><span class=special>[</span><span class=identifier>func</span><span class=special>],
|
||||
</span><span class=identifier>delimiter</span><span class=special>)</span></code></td>
|
||||
<td width="66%" class="table_cells"> <code><span class=special> (</span><span class=identifier>item
|
||||
</span><span class=special> - </span><span class=identifier>delimiter</span><span class=special>)[</span><span class=identifier>func</span><span class=special>]
|
||||
<br>
|
||||
>> *(</span><span class=identifier>delimiter </span><span class=special>>>
|
||||
(</span><span class=identifier>item </span><span class=special>- </span><span class=identifier>delimiter</span><span class=special>)[</span><span class=identifier>func</span><span class=special>])</span></code></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td width="34%" class="table_cells"><code><span class=identifier>list_p</span><span class=special>(*</span><span class=identifier>item</span><span class=special>,
|
||||
</span><span class=identifier>delimiter</span><span class=special>)</span></code></td>
|
||||
<td width="66%" class="table_cells"> <code><span class=special>*(</span><span class=identifier>item
|
||||
</span><span class=special>- </span><span class=identifier>delimiter</span><span class=special>)
|
||||
<br>
|
||||
>> *(</span><span class=identifier>delimiter </span><span class=special>>>
|
||||
*(</span><span class=identifier>item </span><span class=special>- </span><span class=identifier>delimiter</span><span class=special>))</span></code></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td width="34%" class="table_cells"><code><span class=identifier>list_p</span><span class=special>((*</span><span class=identifier>item</span><span class=special>)[</span><span class=identifier>func</span><span class=special>],
|
||||
</span><span class=identifier>delimiter</span><span class=special>)</span></code></td>
|
||||
<td width="66%" class="table_cells"> <code><span class=special>(*(</span><span class=identifier>item
|
||||
</span><span class=special>- </span><span class=identifier>delimiter</span><span class=special>))[</span><span class=identifier>func</span><span class=special>]
|
||||
<br>
|
||||
>> *(</span><span class=identifier>delimiter </span><span class=special>>>
|
||||
(*(</span><span class=identifier>item </span><span class=special>- </span><span class=identifier>delimiter</span><span class=special>))[</span><span class=identifier>func</span><span class=special>])</span></code></td>
|
||||
</tr>
|
||||
</table>
|
||||
<p> <img height="16" width="15" src="theme/lens.gif"> <a href="../example/fundamental/list_parser.cpp">list_parser.cpp </a> sample shows the usage of the list_p utility parser:</p>
|
||||
<ol>
|
||||
<li>parsing a simple ',' delimited list w/o item formatting</li>
|
||||
<li> parsing a CSV list (comma separated values - strings, integers or reals)</li>
|
||||
<li>parsing a token list (token separated values - strings, integers or reals) <br>
|
||||
with an action parser directly attached to the item part of the list_p generated parser</li>
|
||||
</ol>
|
||||
<p>This is part of the Spirit distribution.</p>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="confix.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="functor_parser.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<hr size="1">
|
||||
<p class="copyright">Copyright © 2001-2003 Hartmut Kaiser<br>
|
||||
<br>
|
||||
<font size="2">Use, modification and distribution is subject to the Boost Software
|
||||
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
||||
http://www.boost.org/LICENSE_1_0.txt) </font> </p>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,174 +0,0 @@
|
||||
<html>
|
||||
<head>
|
||||
<title> Loops</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" href="theme/style.css" type="text/css">
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
|
||||
<tr>
|
||||
<td width="10">
|
||||
</td>
|
||||
<td width="85%">
|
||||
<font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b> Loops</b></font>
|
||||
</td>
|
||||
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="escape_char_parser.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="character_sets.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>So far we have introduced a couple of EBNF operators that deal with looping.
|
||||
We have the <tt>+</tt> positive operator, which matches the preceding symbol
|
||||
one (1) or more times, as well as the Kleene star <tt>*</tt> which matches the
|
||||
preceding symbol zero (0) or more times.</p>
|
||||
<p>Taking this further, we may want to have a generalized loop operator. To some
|
||||
this may seem to be a case of overkill. Yet there are grammars that are impractical
|
||||
and cumbersome, if not impossible, for the basic EBNF iteration syntax to specify.
|
||||
Examples:</p>
|
||||
<blockquote>
|
||||
<p><img src="theme/bullet.gif" width="12" height="12"> A file name may have
|
||||
a maximum of 255 characters only.<br>
|
||||
<img src="theme/bullet.gif" width="12" height="12"> A specific bitmap file
|
||||
format has exactly 4096 RGB color information. <br>
|
||||
<img src="theme/bullet.gif" width="12" height="12"> A 32 bit binary string
|
||||
(1..32 1s or 0s).</p>
|
||||
</blockquote>
|
||||
<p>Other than the Kleene star <tt>*</tt>, the Positive closure <tt>+</tt>, and
|
||||
the optional <tt>!</tt>, a more flexible mechanism for looping is provided for
|
||||
by the framework. <br>
|
||||
</p>
|
||||
<table width="80%" border="0" align="center">
|
||||
<tr>
|
||||
<td colspan="2" class="table_title">Loop Constructs</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells" width="26%"><b>repeat_p (n) [p]</b></td>
|
||||
<td class="table_cells" width="74%">Repeat <b>p</b> exactly <b>n</b> times</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells" width="26%"><b>repeat_p (n1, n2) [p]</b></td>
|
||||
<td class="table_cells" width="74%">Repeat <b>p</b> at least <b>n1</b> times
|
||||
and at most <b>n2</b> times</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells" width="26%"><b>repeat_p (n, more) [p] </b></td>
|
||||
<td class="table_cells" width="74%">Repeat <b>p</b> at least <b>n</b> times,
|
||||
continuing until <b>p</b> fails or the input is consumed</td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>Using the <tt>repeat_p</tt> parser, we can now write our examples above:</p>
|
||||
<p>A file name with a maximum of 255 characters:<br>
|
||||
</p>
|
||||
<pre> <span class=identifier>valid_fname_chars </span><span class=special>= </span><span class=comment>/*..*/</span><span class=special>;
|
||||
</span><span class=identifier>filename </span><span class=special>= </span><span class=identifier>repeat_p</span><span class=special>(</span><span class=number>1</span><span class=special>, </span><span class=number>255</span><span class=special>)[</span><span class=identifier>valid_fname_chars</span><span class=special>];</span></pre>
|
||||
<p>A specific bitmap file format which has exactly 4096 RGB color information:<span class=special><br>
|
||||
</span></p>
|
||||
<pre> <span class=identifier>uint_parser</span><span class=special><</span><span class=keyword>unsigned</span><span class=special>, </span><span class=number>16</span><span class=special>, </span><span class=number>6</span><span class=special>, </span><span class=number>6</span><span class=special>> </span><span class=identifier>rgb_p</span><span class=special>;
|
||||
</span><span class=identifier>bitmap </span><span class=special>= </span><span class=identifier>repeat_p</span><span class=special>(</span><span class=number>4096</span><span class=special>)[</span><span class=identifier>rgb_p</span><span class=special>];</span></pre>
|
||||
<p>As for the 32 bit binary string (1..32 1s or 0s), of course we could have easily
|
||||
used the <tt>bin_p</tt> numeric parser instead. For the sake of demonstration
|
||||
however:<span class=special><br>
|
||||
</span></p>
|
||||
<pre> <span class=identifier>bin</span><span class=number>32</span> <span class=special>= </span><span class=identifier>lexeme_d</span><span class=special>[</span><span class=identifier>repeat_p</span><span class=special>(</span>1, <span class=number>32</span><span class=special>)[</span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>'1'</span><span class=special>) </span><span class=special>| </span><span class=literal>'0'</span><span class=special>]];</span></pre>
|
||||
<table width="80%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="note_box"><img src="theme/note.gif" width="16" height="16"> Loop
|
||||
parsers are run-time <a href="parametric_parsers.html">parametric</a>.</td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>The Loop parsers can be dynamic. Consider the parsing of a binary file of Pascal-style
|
||||
length prefixed string, where the first byte determines the length of the incoming
|
||||
string. Here's a sample input:
|
||||
<blockquote>
|
||||
<table width="363" border="0" cellspacing="0" cellpadding="0">
|
||||
<tr>
|
||||
<td class="dk_grey_bkd">
|
||||
<table width="100%" border="0" cellspacing="2" cellpadding="2">
|
||||
<tr>
|
||||
<td class="white_bkd" width=8%">
|
||||
<div align="center">11</div>
|
||||
</td>
|
||||
<td class="white_bkd" width="8%">
|
||||
<div align="center">h</div>
|
||||
</td>
|
||||
<td class="white_bkd" width="8%">
|
||||
<div align="center">e</div>
|
||||
</td>
|
||||
<td class="white_bkd" width="8%">
|
||||
<div align="center">l</div>
|
||||
</td>
|
||||
<td class="white_bkd" width="8%">
|
||||
<div align="center">l</div>
|
||||
</td>
|
||||
<td class="white_bkd" width="8%">
|
||||
<div align="center">o</div>
|
||||
</td>
|
||||
<td class="white_bkd" width="8%">
|
||||
<div align="center"> _</div>
|
||||
</td>
|
||||
<td class="white_bkd" width="8%">
|
||||
<div align="center">w</div>
|
||||
</td>
|
||||
<td class="white_bkd" width="8%">
|
||||
<div align="center">o</div>
|
||||
</td>
|
||||
<td class="white_bkd" width="8%">
|
||||
<div align="center">r</div>
|
||||
</td>
|
||||
<td class="white_bkd" width="8%">
|
||||
<div align="center">l</div>
|
||||
</td>
|
||||
<td class="white_bkd" width="8%">
|
||||
<div align="center">d</div>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
</blockquote>
|
||||
<p>This trivial example cannot be practically defined in traditional EBNF. Although
|
||||
some EBNF syntax allow more powerful repetition constructs other than the Kleene
|
||||
star, we are still limited to parsing fixed strings. The nature of EBNF forces
|
||||
the repetition factor to be a constant. On the other hand, Spirit allows the
|
||||
repetition factor to be variable at run time. We could write a grammar that
|
||||
accepts the input string above:</p>
|
||||
<pre><span class=identifier> </span><span class=keyword>int </span><span class=identifier>c</span><span class=special>;
|
||||
</span><span class=identifier>r </span><span class=special>= </span><span class=identifier>anychar_p</span><span class=special>[</span><span class=identifier>assign_a</span><span class=special>(</span><span class=identifier>c</span><span class=special>)] </span><span class=special>>> </span><span class=identifier>repeat_p</span><span class=special>(</span><span class=identifier>boost</span><span class=special>::</span><span class=identifier>ref</span><span class=special>(</span><span class=identifier>c</span><span class=special>))[</span><span class=identifier>anychar_p</span><span class=special>];</span></pre>
|
||||
<p>The expression</p>
|
||||
<pre> <span class=identifier>anychar_p</span><span class=special>[</span><span class=identifier>assign_a</span><span class=special>(</span><span class=identifier>c</span><span class=special>)]</span></pre>
|
||||
<p>extracts the first character from the input and puts it in <tt>c</tt>. What
|
||||
is interesting is that in addition to constants, we can also use variables as
|
||||
parameters to <tt>repeat_p</tt>, as demonstrated in </p>
|
||||
<pre> <span class=identifier>repeat_p</span><span class=special>(</span><span class=identifier>boost</span><span class=special>::</span><span class=identifier>ref</span><span class=special>(</span><span class=identifier>c</span><span class=special>)</span><span class=special>)</span><span class=special>[</span><span class=identifier>anychar_p</span><span class=special>]</span></pre>
|
||||
<p>Notice that <tt>boost::ref</tt> is used to reference the integer <tt>c</tt>.
|
||||
This usage of <tt>repeat_p</tt> makes the parser defer the evaluation of the
|
||||
repetition factor until it is actually needed. Continuing our example, since
|
||||
the value 11 is already extracted from the input, <tt>repeat_p</tt> is is now
|
||||
expected to loop exactly 11 times.</p>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="escape_char_parser.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="character_sets.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<hr size="1">
|
||||
<p class="copyright">Copyright © 1998-2003 Joel de Guzman<br>
|
||||
<br>
|
||||
<font size="2">Use, modification and distribution is subject to the Boost Software
|
||||
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
||||
http://www.boost.org/LICENSE_1_0.txt) </font> </p>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,276 +0,0 @@
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"><html><head><title>The multi_pass</title>
|
||||
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" href="theme/style.css" type="text/css"></head>
|
||||
<body>
|
||||
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
|
||||
<tbody><tr>
|
||||
<td width="10">
|
||||
<br>
|
||||
</td>
|
||||
<td width="85%"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>The
|
||||
multi_pass</b></font> </td>
|
||||
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
|
||||
</tr>
|
||||
</tbody></table>
|
||||
<br>
|
||||
<table border="0">
|
||||
<tbody><tr>
|
||||
<td width="10"><br>
|
||||
</td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="trees.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="file_iterator.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</tbody></table>
|
||||
<p>Backtracking in Spirit requires the use of the following types of iterator:
|
||||
forward, bidirectional, or random access. Because of backtracking, input iterators
|
||||
cannot be used. Therefore, the standard library classes istreambuf_iterator
|
||||
and istream_iterator, that fall under the category of input iterators, cannot
|
||||
be used. Another input iterator that is of interest is one that wraps a lexer,
|
||||
such as LEX.</p>
|
||||
<table width="80%" border="0" align="center">
|
||||
<tbody><tr>
|
||||
<td class="note_box"> <img src="theme/note.gif" width="16" height="16"> <b>Input
|
||||
Iterators</b> <br>
|
||||
<br>
|
||||
In general, Spirit is a backtracking parser. This is not an absolute requirement
|
||||
though. In the future, we shall see more deterministic parsers that require
|
||||
no more than 1 character (token) of lookahead. Such parsers allow us to
|
||||
use input iterators such as the istream_iterator as is. </td>
|
||||
</tr>
|
||||
</tbody></table>
|
||||
<p> Unfortunately, with an input iterator, there is no way to save an iterator
|
||||
position, and thus input iterators will not work with backtracking in Spirit.
|
||||
One solution to this problem is to simply load all the data to be parsed into
|
||||
a container, such as a vector or deque, and then pass the begin and end of the
|
||||
container to Spirit. This method can be too memory intensive for certain applications,
|
||||
which is why the multi_pass iterator was created.</p>
|
||||
<p> The multi_pass iterator will convert any input iterator into a forward iterator
|
||||
suitable for use with Spirit. multi_pass will buffer data when needed and will
|
||||
discard the buffer when only one copy of the iterator exists.</p>
|
||||
<p> A grammar must be designed with care if the multi_pass iterator is used. Any rule that may
|
||||
need to backtrack, such as one that contains an alternative, will cause data to be buffered. The rules that are optimal to
|
||||
use are sequence and repetition. Sequences of the form <tt>a >> b</tt>
|
||||
will not buffer data at all. Any rule that repeats, such as kleene_star (<tt>*a</tt>)
|
||||
or positive such as (<tt>+a</tt>), will only buffer the data for the current
|
||||
repetition.</p>
|
||||
<p> In typical grammars, ambiguity and therefore lookahead is often localized.
|
||||
In fact, many well designed languages are fully deterministic and require no
|
||||
lookahead at all. Peeking at the first character from the input will immediately
|
||||
determine the alternative branch to take. Yet, even with highly ambiguous grammars,
|
||||
alternatives are often of the form <tt>*(a | b | c | d)</tt>. The input iterator
|
||||
moves on and is never stuck at the beginning. Let's look at a Pascal snippet
|
||||
for example:</p>
|
||||
<pre> <code><span class="identifier">program </span><span class="special">=<br> </span><span class="identifier"> programHeading </span><span class="special">>> </span><span class="identifier">block </span><span class="special">>> </span><span class="literal">'.'<br> </span><span class="special"> ;<br><br> </span><span class="identifier">block </span><span class="special">=<br> *( </span><span class="identifier">labelDeclarationPart<br> </span><span class="special">| </span><span class="identifier">constantDefinitionPart<br> </span><span class="special">| </span><span class="identifier">typeDefinitionPart<br> </span><span class="special"> | </span><span class="identifier">variableDeclarationPart<br> </span><span class="special">| </span><span class="identifier"> procedureAndFunctionDeclarationPart<br> </span><span class="special"> )<br> >> </span><span class="identifier">statementPart<br> </span><span class="special">;<br></span></code></pre>
|
||||
<p> Notice the alternatives inside the Kleene star in the rule block . The rule
|
||||
gobbles the input in a linear manner and throws away the past history with each
|
||||
iteration. As this is fully deterministic LL(1) grammar, each failed alternative
|
||||
only has to peek 1 character (token). The alternative that consumes more than
|
||||
1 character (token) is definitely a winner. After which, the Kleene star moves
|
||||
on to the next.</p>
|
||||
<p>Be mindful if you use the free parse functions.
|
||||
All of these make a copy of the iterator passed to them.<br>
|
||||
</p>
|
||||
<p>Now, after the lecture on the features to be careful with when using multi_pass,
|
||||
you may think that multi_pass is way too restrictive to use. That's
|
||||
not the case. If your grammar is deterministic, you can make use of flush_multi_pass in your grammar to ensure that data is not buffered when unnecessary.<br>
|
||||
</p>
|
||||
|
||||
<p> Again, following up the example we started to use in the section on the scanner
|
||||
. Here's an example using the multi_pass: This time around we are extracting
|
||||
our input from the input stream using an istreambuf_iterator.</p>
|
||||
<pre> <span class="preprocessor">#include</span> <span class="special"><</span><span class="identifier">boost</span><span class="special">/</span><span class="identifier">spirit</span><span class="special">/</span><span class="identifier">core</span><span class="special">.</span><span class="identifier">hpp</span><span class="special">></span>
|
||||
<code><span class="preprocessor"> #include </span><span class="special"><</span><span class="identifier">boost</span><span class="special">/</span><span class="identifier">spirit</span><span class="special">/</span><span class="identifier">iterator</span><span class="special">/</span><span class="identifier">multi_pass</span><span class="special">.</span><span class="identifier">hpp</span><span class="special">></span><span class="comment">
|
||||
|
||||
</span><span class="keyword">using namespace</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">spirit</span><span class="special">;
|
||||
</span><span class="keyword">using namespace</span> <span class="identifier">std</span><span class="special">;</span>
|
||||
|
||||
<span class="identifier">ifstream in</span><span class="special">(</span><span class="string">"input_file.txt"</span><span class="special">); </span><span class="comment">// we get our input from this file<br><br> </span><span class="keyword">typedef char </span><span class="identifier">char_type</span><span class="special">;</span>
|
||||
<span class="keyword">typedef </span><span class="identifier">multi_pass</span><span class="special"><</span><span class="identifier">istreambuf_iterator</span><span class="special"><</span><span class="identifier">char_type</span><span class="special">> > </span><span class="identifier">iterator_type</span><span class="special">;</span>
|
||||
|
||||
<span class="keyword">typedef</span> <span class="identifier">skip_parser_iteration_policy</span><span class="special"><</span><span class="identifier">space_parser</span><span class="special">></span> <span class="identifier">iter_policy_type</span><span class="special">;</span>
|
||||
<span class="keyword">typedef</span> <span class="identifier">scanner_policies</span><span class="special"><</span>iter_policy_type<span class="special">></span> <span class="identifier">scanner_policies_type</span><span class="special">;</span>
|
||||
<span class="keyword">typedef</span> <span class="identifier">scanner</span><span class="special"><</span>iterator_type, scanner_policies_type<span class="special">></span> <span class="identifier">scanner_type</span><span class="special">;</span>
|
||||
|
||||
<span class="keyword">typedef</span> rule<span class="special"><</span>scanner_type<span class="special">></span> <span class="identifier">rule_type</span><span class="special">;</span>
|
||||
|
||||
<span class="identifier">iter_policy_type</span> <span class="identifier">iter_policy</span><span class="special">(</span><span class="identifier">space_p</span><span class="special">);</span>
|
||||
<span class="identifier">scanner_policies_type</span> <span class="identifier">policies</span><span class="special">(</span><span class="identifier">iter_policy</span><span class="special">);</span>
|
||||
iterator_type first(
|
||||
make_multi_pass(std::istreambuf_iterator<char_type>(in)));
|
||||
|
||||
scanner_type <span class="identifier">scan</span><span class="special">(</span>
|
||||
first<span class="special">,</span> make_multi_pass(std::istreambuf_iterator<span class="special"><</span><span class="identifier">char_type</span><span class="special">>()),</span>
|
||||
<span class="identifier">policies</span><span class="special">)</span>;
|
||||
<span class="special"><br> </span><span class="identifier">rule_type n_list </span><span class="special">= </span><span class="identifier">real_p </span><span class="special">>> *(</span><span class="literal">',' </span><span class="special">>> </span><span class="identifier">real_p</span><span class="special">);<br> </span><span class="identifier">match</span><span class="special"><></span><span class="identifier"> m </span><span class="special">= </span><span class="identifier">n_list</span><span class="special">.</span><span class="identifier">parse</span><span class="special">(</span><span class="identifier">scan</span><span class="special">);<br></span></code></pre>
|
||||
<a name="flush_multi_pass"></a>
|
||||
<h2>flush_multi_pass</h2>
|
||||
<p> There is a predefined pseudo-parser called flush_multi_pass. When this parser
|
||||
is used with multi_pass, it will call multi_pass::clear_queue(). This will cause
|
||||
any buffered data to be erased. This also will invalidate all other copies of
|
||||
multi_pass and they should not be used. If they are, an boost::illegal_backtracking
|
||||
exception will be thrown.</p>
|
||||
<a name="multi_pass_policies"></a>
|
||||
<h2>multi_pass Policies</h2>
|
||||
<p> multi_pass is a templated policy driven class. The description of multi_pass
|
||||
above is how it was originally implemented (before it used policies), and is
|
||||
the default configuration now. But, multi_pass is capable of much more. Because
|
||||
of the open-ended nature of policies, you can write your own policy to make
|
||||
multi_pass behave in a way that we never before imagined.</p>
|
||||
<p> The multi_pass class has five template parameters:</p>
|
||||
<ul>
|
||||
<li>InputT - The type multi_pass uses to acquire it's input. This is typically
|
||||
an input iterator, or functor.</li>
|
||||
<li>InputPolicy - A class that defines how multi_pass acquires it's input. The
|
||||
InputPolicy is parameterized by InputT.</li>
|
||||
<li>OwnershipPolicy - This policy determines how multi_pass deals with it's
|
||||
shared components.</li>
|
||||
<li>CheckingPolicy - This policy determines how checking for invalid iterators
|
||||
is done.</li>
|
||||
<li>StoragePolicy - The buffering scheme used by multi_pass is determined and
|
||||
managed by the StoragePolicy.</li>
|
||||
</ul>
|
||||
<a name="predefined_policies"></a>
|
||||
<h2>Predefined policies</h2>
|
||||
<p> All predefined multi_pass policies are in the namespace boost::spirit::multi_pass_policies.</p>
|
||||
<a name="predefined_inputpolicy_classes"></a>
|
||||
<h3>Predefined InputPolicy classes</h3>
|
||||
<a name="input_iterator"></a>
|
||||
<h4>input_iterator</h4>
|
||||
<p> This policy directs multi_pass to read from an input iterator of type InputT.</p>
|
||||
<a name="lex_input"></a>
|
||||
<h4>lex_input</h4>
|
||||
<p> This policy obtains it's input by calling yylex(), which would typically be
|
||||
provided by a scanner generated by LEX. If you use this policy your code must
|
||||
link against a LEX generated scanner.</p>
|
||||
<a name="functor_input"></a>
|
||||
<h4>functor_input</h4>
|
||||
<p> This input policy obtains it's data by calling a functor of type InputT. The
|
||||
functor must meet certain requirements. It must have a typedef called result_type
|
||||
which should be the type returned from operator(). Also, since an input policy
|
||||
needs a way to determine when the end of input has been reached, the functor
|
||||
must contain a static variable named eof which is comparable to a variable of
|
||||
result_type.</p>
|
||||
<a name="predefined_ownershippolicy_classes"></a>
|
||||
<h3>Predefined OwnershipPolicy classes</h3>
|
||||
<a name="ref_counted"></a>
|
||||
<h4>ref_counted</h4>
|
||||
<p> This class uses a reference counting scheme. multi_pass will delete it's shared
|
||||
components when the count reaches zero.</p>
|
||||
<a name="first_owner"></a>
|
||||
<h4>first_owner</h4>
|
||||
<p> When this policy is used, the first multi_pass created will be the one that
|
||||
deletes the shared data. Each copy will not take ownership of the shared data.
|
||||
This works well for spirit, since no dynamic allocation of iterators is done.
|
||||
All copies are made on the stack, so the original iterator has the longest lifespan.</p>
|
||||
<a name="predefined_checkingpolicy_classes"></a>
|
||||
<h3>Predefined CheckingPolicy classes</h3>
|
||||
<a name="no_check"></a>
|
||||
<h4>no_check</h4>
|
||||
<p> This policy does no checking at all.</p>
|
||||
<a name="buf_id_check"></a>
|
||||
<h4>buf_id_check</h4>
|
||||
<p> buf_id_check keeps around a buffer id, or a buffer age. Every time clear_queue()
|
||||
is called on a multi_pass iterator, it is possible that all other iterators
|
||||
become invalid. When clear_queue() is called, buf_id_check increments the buffer
|
||||
id. When an iterator is dereferenced, this policy checks that the buffer id
|
||||
of the iterator matches the shared buffer id. This policy is most effective
|
||||
when used together with the std_deque StoragePolicy. It should not be used with
|
||||
the fixed_size_queue StoragePolicy, because it will not detect iterator dereferences
|
||||
that are out of range.</p>
|
||||
<a name="full_check"></a>
|
||||
<h4>full_check</h4>
|
||||
<p> This policy has not been implemented yet. When it is, it will keep track of
|
||||
all iterators and make sure that they are all valid.</p>
|
||||
<a name="predefined_storagepolicy_classes"></a>
|
||||
<h3>Predefined StoragePolicy classes</h3>
|
||||
<a name="std_deque"></a>
|
||||
<h4>std_deque</h4>
|
||||
<p> This policy keeps all buffered data in a std::deque. All data is stored as
|
||||
long as there is more than one iterator. Once the iterator count goes down to
|
||||
one, and the queue is no longer needed, it is cleared, freeing up memory. The
|
||||
queue can also be forcibly cleared by calling multi_pass::clear_queue().</p>
|
||||
<a name="fixed_size_queue_lt_n_gt_"></a>
|
||||
<h4>fixed_size_queue<N></h4>
|
||||
<p> fixed_size_queue keeps a circular buffer that is size N+1 and stores N elements.
|
||||
fixed_size_queue is a template with a std::size_t parameter that specified the
|
||||
queue size. It is your responsibility to ensure that N is big enough for your
|
||||
parser. Whenever the foremost iterator is incremented, the last character of
|
||||
the buffer is automatically erased. Currently there is no way to tell if an
|
||||
iterator is trailing too far behind and has become invalid. No dynamic allocation
|
||||
is done by this policy during normal iterator operation, only on initial construction.
|
||||
The memory usage of this StoragePolicy is set at N+1 bytes, unlike std_deque,
|
||||
which is unbounded.</p>
|
||||
<a name="combinations__how_to_specify_your_own_custom_multi_pass"></a>
|
||||
<h2>Combinations: How to specify your own custom multi_pass</h2>
|
||||
<p> The beauty of policy based designs is that you can mix and match policies
|
||||
to create your own custom class by selecting the policies you want. Here's an
|
||||
example of how to specify a custom multi_pass that wraps an istream_iterator<char>,
|
||||
and is slightly more efficient than the default because it uses the first_owner
|
||||
OwnershipPolicy and the no_check CheckingPolicy:</p>
|
||||
<pre> <code><span class="keyword">typedef </span><span class="identifier">multi_pass</span><span class="special"><<br> </span><span class="identifier">istream_iterator</span><span class="special"><</span><span class="keyword">char</span><span class="special">>,<br> </span><span class="identifier">multi_pass_policies</span><span class="special">::</span><span class="identifier">input_iterator</span><span class="special">,<br> </span><span class="identifier">multi_pass_policies</span><span class="special">::</span><span class="identifier">first_owner</span><span class="special">,<br> </span><span class="identifier">multi_pass_policies</span><span class="special">::</span><span class="identifier">no_check</span><span class="special">,<br> </span><span class="identifier">multi_pass_policies</span><span class="special">::</span><span class="identifier">std_deque<br> </span><span class="special">> </span><span class="identifier">first_owner_multi_pass_type</span><span class="special">;<br></span></code></pre>
|
||||
<p> The default template parameters for multi_pass are: input_iterator InputPolicy,
|
||||
ref_counted OwnershipPolicy, buf_id_check CheckingPolicy and std_deque StoragePolicy.
|
||||
So if you use multi_pass<istream_iterator<char> > you will get those
|
||||
pre-defined behaviors while wrapping an istream_iterator<char>.</p>
|
||||
<p> There is one other pre-defined class called look_ahead. look_ahead has two
|
||||
template parameters: InputT, the type of the input iterator to wrap, and a std::size_t
|
||||
N, which specifies the size of the buffer to the fixed_size_queue policy. While
|
||||
the default multi_pass configuration is designed for safey, look_ahead is designed
|
||||
for speed. look_ahead is derived from a multi_pass with the following policies:
|
||||
input_iterator InputPolicy, first_owner OwnershipPolicy, no_check CheckingPolicy,
|
||||
and fixed_size_queue<N> StoragePolicy.</p>
|
||||
<a name="how_to_write_a_functor_for_use_with_the_functor_input_inputpolicy"></a>
|
||||
<h3>How to write a functor for use with the functor_input InputPolicy</h3>
|
||||
<p> If you want to use the functor_input InputPolicy, you can write your own functor
|
||||
that will supply the input to multi_pass. The functor must satisfy two requirements.
|
||||
It must have a typedef result_type which specifies the return type of operator().
|
||||
This is standard practice in the STL. Also, it must supply a static variable
|
||||
called eof which is compared against to know whether the input has reached the
|
||||
end. Here is an example:</p>
|
||||
<pre> <code><span class="keyword">class </span><span class="identifier">my_functor<br> </span><span class="special">{<br> </span><span class="keyword">public</span><span class="special">:<br><br> </span><span class="keyword">typedef char </span><span class="identifier">result_type</span><span class="special">;<br><br> </span><span class="identifier">my_functor</span><span class="special">()<br> : </span><span class="identifier">c</span><span class="special">(</span><span class="literal">'A'</span><span class="special">) {}<br><br> </span><span class="keyword">char operator</span><span class="special">()() </span><span class="keyword">const<br> </span><span class="special">{<br> </span><span class="keyword">if </span><span class="special">(</span><span class="identifier">c </span><span class="special">== </span><span class="literal">'M'</span><span class="special">)<br> </span><span class="keyword">return </span><span class="identifier">eof</span><span class="special">;<br> </span><span class="keyword">else<br> return </span><span class="identifier">c</span><span class="special">++;<br> }<br><br> </span><span class="keyword">static </span><span class="identifier">result_type eof</span><span class="special">;<br><br> </span><span class="keyword">private</span><span class="special">:<br><br> </span><span class="keyword">char </span><span class="identifier">c</span><span class="special">;<br> };<br><br> </span><span class="identifier">my_functor</span><span class="special">::</span><span class="identifier">result_type my_functor</span><span class="special">::</span><span class="identifier">eof </span><span class="special">= </span><span class="literal">'\0'</span><span class="special">;<br><br> </span><span class="keyword">typedef </span><span class="identifier">multi_pass</span><span class="special"><<br> </span><span class="identifier">my_functor</span><span class="special">,<br> </span><span class="identifier">multi_pass_policies</span><span class="special">::</span><span class="identifier">functor_input</span><span class="special">,<br> </span><span class="identifier">multi_pass_policies</span><span class="special">::</span><span class="identifier">first_owner</span><span class="special">,<br> </span><span class="identifier">multi_pass_policies</span><span class="special">::</span><span class="identifier">no_check</span><span class="special">,<br> </span><span class="identifier">multi_pass_policies</span><span class="special">::</span><span class="identifier">std_deque<br> </span><span class="special">> </span><span class="identifier">functor_multi_pass_type</span><span class="special">;<br><br> </span><span class="identifier">functor_multi_pass_type first </span><span class="special">= </span><span class="identifier">functor_multi_pass_type</span><span class="special">(</span><span class="identifier">my_functor</span><span class="special">());<br> </span><span class="identifier">functor_multi_pass_type last</span><span class="special">;<br></span></code></pre>
|
||||
<a name="how_to_write_policies_for_use_with_multi_pass"></a>
|
||||
<h3>How to write policies for use with multi_pass</h3>
|
||||
<a name="inputpolicy"></a>
|
||||
<h4>InputPolicy</h4>
|
||||
<p> An InputPolicy must have the following interface:</p>
|
||||
<pre> <code><span class="keyword">class </span><span class="identifier">my_input_policy </span><span class="comment">// your policy name<br> </span><span class="special">{<br> </span><span class="keyword">public</span><span class="special">:<br><br> </span><span class="comment">// class inner will be instantiated with the type given<br> // as the InputT parameter to multi_pass.<br><br> </span><span class="keyword">template </span><span class="special"><</span><span class="keyword">typename </span><span class="identifier">InputT</span><span class="special">><br> </span><span class="keyword">class </span><span class="identifier">inner<br> </span><span class="special">{<br> </span><span class="keyword">public</span><span class="special">:<br><br> </span><span class="comment">// these typedefs determine the iterator_traits for multi_pass<br> </span><span class="keyword">typedef </span><span class="identifier">x </span><span class="identifier">value_type</span><span class="special">;<br> </span><span class="keyword">typedef </span><span class="identifier">x </span><span class="identifier">difference_type</span><span class="special">;<br> </span><span class="keyword">typedef </span><span class="identifier">x </span><span class="identifier">pointer</span><span class="special">;<br> </span><span class="keyword">typedef </span><span class="identifier">x </span><span class="identifier">reference</span><span class="special">;<br><br> </span><span class="keyword">protected</span><span class="special">:<br><br> </span><span class="identifier">inner</span><span class="special">();<br> </span><span class="identifier">inner</span><span class="special">(</span><span class="identifier">InputT </span><span class="identifier">x</span><span class="special">);<br> </span><span class="identifier">inner</span><span class="special">(</span><span class="identifier">inner </span><span class="keyword">const</span><span class="special">& </span><span class="identifier">x</span><span class="special">);<br> </span><span class="comment">// delete or clean up any state<br> </span><span class="keyword">void </span><span class="identifier">destroy</span><span class="special">();<br> </span><span class="comment">// return true if *this and x have the same input<br> </span><span class="keyword">bool </span><span class="identifier">same_input</span><span class="special">(</span><span class="identifier">inner </span><span class="keyword">const</span><span class="special">& </span><span class="identifier">x</span><span class="special">) </span><span class="keyword">const</span><span class="special">;<br> </span><span class="keyword">void </span><span class="identifier">swap</span><span class="special">(</span><span class="identifier">inner</span><span class="special">& </span><span class="identifier">x</span><span class="special">);<br><br> </span><span class="keyword">public</span><span class="special">:<br><br> </span><span class="comment">// get an instance from the input<br> </span><span class="identifier">result_type </span><span class="identifier">get_input</span><span class="special">() </span><span class="keyword">const</span><span class="special">;<br> </span><span class="comment">// advance the input<br> </span><span class="keyword">void </span><span class="identifier">advance_input</span><span class="special">();<br> </span><span class="comment">// return true if the input is at the end<br> </span><span class="keyword">bool </span><span class="identifier">input_at_eof</span><span class="special">() </span><span class="keyword">const</span><span class="special">;<br> </span><span class="special">};<br> </span><span class="special">};<br></span></code></pre>
|
||||
<p> Because of the way that multi_pass shares a buffer and input among multiple
|
||||
copies, class inner should keep a pointer to it's input. The copy constructor
|
||||
should simply copy the pointer. destroy() should delete it. same_input should
|
||||
compare the pointers. For more details see the various implementations of InputPolicy
|
||||
classes.</p>
|
||||
<a name="ownershippolicy"></a>
|
||||
<h4>OwnershipPolicy</h4>
|
||||
<p> The OwnershipPolicy must have the following interface:</p>
|
||||
<pre> <code><span class="keyword">class </span><span class="identifier">my_ownership_policy<br> </span><span class="special">{<br> </span><span class="keyword">protected</span><span class="special">:<br><br> </span><span class="identifier">my_ownership_policy</span><span class="special">();<br> </span><span class="identifier">my_ownership_policy</span><span class="special">(</span><span class="identifier">my_ownership_policy </span><span class="keyword">const</span><span class="special">& </span><span class="identifier">x</span><span class="special">);<br> </span><span class="comment">// clone is called when a copy of the iterator is made<br> </span><span class="keyword">void </span><span class="identifier">clone</span><span class="special">();<br> </span><span class="comment">// called when a copy is deleted. Return true to indicate<br> // resources should be released<br> </span><span class="keyword">bool </span><span class="identifier">release</span><span class="special">();<br> </span><span class="keyword">void </span><span class="identifier">swap</span><span class="special">(</span><span class="identifier">my_ownership_policy</span><span class="special">& </span><span class="identifier">x</span><span class="special">);<br><br> </span><span class="keyword">public</span><span class="special">:<br> </span><span class="comment">// returns true if there is only one iterator in existence.<br> // std_dequeue StoragePolicy will free it's buffered data if this<br> // returns true.<br> </span><span class="keyword">bool </span><span class="identifier">unique</span><span class="special">() </span><span class="keyword">const</span><span class="special">;<br> </span><span class="special">};<br></span></code></pre>
|
||||
<a name="checkingpolicy"></a>
|
||||
<h4>CheckingPolicy</h4>
|
||||
<p> The CheckingPolicy must have the following interface:</p>
|
||||
<pre> <code><span class="keyword">class </span><span class="identifier">my_check<br> </span><span class="special">{<br> </span><span class="keyword">protected</span><span class="special">:<br><br> </span><span class="identifier">my_check</span><span class="special">();<br> </span><span class="identifier">my_check</span><span class="special">(</span><span class="identifier">my_check </span><span class="keyword">const</span><span class="special">& </span><span class="identifier">x</span><span class="special">);<br> </span><span class="keyword">void </span><span class="identifier">destroy</span><span class="special">();<br> </span><span class="keyword">void </span><span class="identifier">swap</span><span class="special">(</span><span class="identifier">my_check</span><span class="special">& </span><span class="identifier">x</span><span class="special">);<br> </span><span class="comment">// check should make sure that this iterator is valid<br> </span><span class="keyword">void </span><span class="identifier">check_if_valid</span><span class="special">() </span><span class="keyword">const</span><span class="special">;<br> </span><span class="keyword">void </span><span class="identifier">clear_queue</span><span class="special">();<br> </span><span class="special">};<br></span></code></pre>
|
||||
<a name="storagepolicy"></a>
|
||||
<h4>StoragePolicy</h4>
|
||||
<p> A StoragePolicy must have the following interface:</p>
|
||||
<pre> <code><span class="keyword">class </span><span class="identifier">my_storage_policy<br> </span><span class="special">{<br> </span><span class="keyword">public</span><span class="special">:<br><br> </span><span class="comment">// class inner will be instantiated with the value_type from the InputPolicy<br><br> </span><span class="keyword">template </span><span class="special"><</span><span class="keyword">typename </span><span class="identifier">ValueT</span><span class="special">><br> </span><span class="keyword">class </span><span class="identifier">inner<br> </span><span class="special">{<br> </span><span class="keyword">protected</span><span class="special">:<br><br> </span><span class="identifier">inner</span><span class="special">();<br> </span><span class="identifier">inner</span><span class="special">(</span><span class="identifier">inner </span><span class="keyword">const</span><span class="special">& </span><span class="identifier">x</span><span class="special">);<br> </span><span class="comment">// will be called from the destructor of the last iterator.<br> </span><span class="keyword">void </span><span class="identifier">destroy</span><span class="special">();<br> </span><span class="keyword">void </span><span class="identifier">swap</span><span class="special">(</span><span class="identifier">inner</span><span class="special">& </span><span class="identifier">x</span><span class="special">);<br> </span><span class="comment">// This is called when the iterator is dereferenced. It's a template<br> // method so we can recover the type of the multi_pass iterator<br> // and access it.<br> </span><span class="keyword">template </span><span class="special"><</span><span class="keyword">typename </span><span class="identifier">MultiPassT</span><span class="special">><br> </span><span class="keyword">static </span><span class="identifier">ValueT </span><span class="identifier">dereference</span><span class="special">(</span><span class="identifier">MultiPassT </span><span class="keyword">const</span><span class="special">& </span><span class="identifier">mp</span><span class="special">);<br> </span><span class="comment">// This is called when the iterator is incremented. It's a template<br> // method so we can recover the type of the multi_pass iterator<br> // and access it.<br> </span><span class="keyword">template </span><span class="special"><</span><span class="keyword">typename </span><span class="identifier">MultiPassT</span><span class="special">><br> </span><span class="keyword">static </span><span class="keyword">void </span><span class="identifier">increment</span><span class="special">(</span><span class="identifier">MultiPassT</span><span class="special">& </span><span class="identifier">mp</span><span class="special">);<br> </span><span class="keyword">void </span><span class="identifier">clear_queue</span><span class="special">();<br> </span><span class="comment">// called to determine whether the iterator is an eof iterator<br> </span><span class="keyword">template </span><span class="special"><</span><span class="keyword">typename </span><span class="identifier">MultiPassT</span><span class="special">><br> </span><span class="keyword">static </span><span class="keyword">bool </span><span class="identifier">is_eof</span><span class="special">(</span><span class="identifier">MultiPassT </span><span class="keyword">const</span><span class="special">& </span><span class="identifier">mp</span><span class="special">);<br> </span><span class="comment">// called by operator==<br> </span><span class="keyword">bool </span><span class="identifier">equal_to</span><span class="special">(</span><span class="identifier">inner </span><span class="keyword">const</span><span class="special">& </span><span class="identifier">x</span><span class="special">) </span><span class="keyword">const</span><span class="special">;<br> </span><span class="comment">// called by operator<<br> </span><span class="keyword">bool </span><span class="identifier">less_than</span><span class="special">(</span><span class="identifier">inner </span><span class="keyword">const</span><span class="special">& </span><span class="identifier">x</span><span class="special">) </span><span class="keyword">const</span><span class="special">;<br> </span><span class="special">}; </span><span class="comment"> // class inner<br> </span><span class="special">};<br></span></code></pre>
|
||||
<p> A StoragePolicy is the trickiest policy to write. You should study and understand
|
||||
the existing StoragePolicy classes before you try and write your own.</p>
|
||||
<table border="0">
|
||||
<tbody><tr>
|
||||
<td width="10"><br>
|
||||
</td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="trees.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="file_iterator.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</tbody></table>
|
||||
<br>
|
||||
<hr size="1">
|
||||
<p class="copyright">Copyright © 2001-2002 Daniel C. Nuffer<br>
|
||||
<br>
|
||||
<font size="2">Use, modification and distribution is subject to the Boost Software
|
||||
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
||||
http://www.boost.org/LICENSE_1_0.txt) </font> </p>
|
||||
<p class="copyright"> </p>
|
||||
<br>
|
||||
</body></html>
|
||||
@@ -1,460 +0,0 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>Numerics</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" href="theme/style.css" type="text/css">
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
|
||||
<tr>
|
||||
<td width="10">
|
||||
</td>
|
||||
<td width="85%">
|
||||
<font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Numerics</b></font>
|
||||
</td>
|
||||
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="operators.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="rule.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>Similar to <tt>chlit</tt>, <tt>strlit</tt> etc. numeric parsers are also primitives.
|
||||
Numeric parsers are placed on
|
||||
|
||||
|
||||
a section of their own
|
||||
|
||||
to give this important building
|
||||
block better focus. The framework includes a couple of predefined objects for
|
||||
parsing signed and unsigned integers and real numbers. These parsers are fully
|
||||
parametric. Most of the important aspects of numeric parsing can be finely adjusted
|
||||
to suit. This includes the radix base, the minimum and maximum number of allowable
|
||||
digits, the exponent, the fraction etc. Policies control the real number parsers'
|
||||
behavior. There are some predefined policies covering the most common real number
|
||||
formats but the user can supply her own when needed. </p>
|
||||
<h2>uint_parser</h2>
|
||||
<p>This class is the simplest among the members of the numerics package. The <tt>uint_parser</tt>
|
||||
can parse unsigned integers of arbitrary length and size. The <tt>uint_parser</tt>
|
||||
parser can be used to parse ordinary primitive C/C++ integers or even user defined
|
||||
scalars such as bigints (unlimited precision integers). Like most of the classes
|
||||
in Spirit, the <tt>uint_parser</tt> is a template class. Template parameters
|
||||
fine tune its behavior. The uint_parser is so flexible that the other numeric
|
||||
parsers are implemented using it as the backbone.</p>
|
||||
<pre><code><font color="#000000"><span class=identifier> </span><span class=keyword>template </span><span class=special><
|
||||
</span><span class=keyword>typename </span><span class=identifier>T </span><span class=special>= </span><span class="keyword">unsigned</span><span class=special>,
|
||||
</span><span class=keyword>int </span><span class=identifier>Radix </span><span class=special>= </span><span class=number>10</span><span class=special>,
|
||||
</span><span class=keyword>unsigned </span><span class=identifier>MinDigits </span><span class=special>= </span><span class=number>1</span><span class=special>,
|
||||
</span><span class=keyword>int </span><span class=identifier>MaxDigits </span><span class=special>= -</span><span class=number>1</span><span class=special>>
|
||||
</span><span class=keyword>struct </span><span class=identifier>uint_parser </span><span class=special>{ </span><span class=comment>/*...*/ </span><span class=special>};</span></font></code></pre>
|
||||
<table width="90%" border="0" align="center">
|
||||
<tr>
|
||||
<td colspan="2" class="table_title">uint_parser template parameters</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells" width="21%"><b>T</b></td>
|
||||
<td class="table_cells" width="79%">The numeric base type of the numeric parser.
|
||||
Defaults to <tt>unsigned</tt></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells" width="21%"><b>Radix</b></td>
|
||||
<td class="table_cells" width="79%">The radix base. This can be either 2:
|
||||
binary, 8: octal, 10: decimal and 16: hexadecimal. Defaults to 10; decimal</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells" width="21%"><b>MinDigits</b></td>
|
||||
<td class="table_cells" width="79%">The minimum number of digits allowable</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells" width="21%"><b>MaxDigits</b></td>
|
||||
<td class="table_cells" width="79%">The maximum number of digits allowable.
|
||||
If this is -1, then the maximum limit becomes unbounded</td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<table width="90%" border="0" align="center">
|
||||
<tr>
|
||||
<td colspan="2" class="table_title">Predefined uint_parsers</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells" width="21%"><b>bin_p</b></td>
|
||||
<td class="table_cells" width="79%"><code><span class=identifier>uint_parser</span><span class=special><</span><span class=keyword>unsigned</span><span class=special>,
|
||||
</span><span class=number>2</span><span class=special>, </span><span class=number>1</span><span class=special>,
|
||||
-</span><span class=number>1</span><span class=special>> </span><span class=keyword>const</span></code></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells" width="21%"><b>oct_p</b></td>
|
||||
<td class="table_cells" width="79%"><code><span class=identifier>uint_parser</span><span class=special><</span><span class=keyword>unsigned</span><span class=special>,
|
||||
</span><span class=number>8</span><span class=special>, </span><span class=number>1</span><span class=special>,
|
||||
-</span><span class=number>1</span><span class=special>> </span><span class=keyword>const</span></code></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells" width="21%"><b>uint_p</b></td>
|
||||
<td class="table_cells" width="79%"><code><span class=identifier>uint_parser</span><span class=special><</span><span class=keyword>unsigned</span><span class=special>,
|
||||
</span><span class=number>10</span><span class=special>, </span><span class=number>1</span><span class=special>,
|
||||
-</span><span class=number>1</span><span class=special>> </span><span class=keyword>const</span></code></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells" width="21%"><b>hex_p</b></td>
|
||||
<td class="table_cells" width="79%"><code><span class=identifier>uint_parser</span><span class=special><</span><span class=keyword>unsigned</span><span class=special>,
|
||||
</span><span class=number>16</span><span class=special>, </span><span class=number>1</span><span class=special>,
|
||||
-</span><span class=number>1</span><span class=special>> </span><span class=keyword>const</span></code></td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>The following example shows how the uint_parser can be used to parse thousand
|
||||
separated numbers. The example can correctly parse numbers such as 1,234,567,890.</p>
|
||||
<pre><span class=keyword> </span><span class=identifier>uint_parser</span><span class=special><</span><span class=keyword>unsigned</span><span class=special>, </span><span class=number>10</span><span class=special>, </span><span class=number>1</span><span class=special>, </span><span class=number>3</span><span class=special>> </span><span class=identifier>uint3_p</span><span class=special>; </span><span class=comment>// 1..3 digits
|
||||
</span><span class=identifier>uint_parser</span><span class=special><</span><span class=keyword>unsigned</span><span class=special>, </span><span class=number>10</span><span class=special>, </span><span class=number>3</span><span class=special>, </span><span class=number>3</span><span class=special>> </span><span class=identifier>uint3_3_p</span><span class=special>; </span><span class=comment>// exactly 3 digits
|
||||
</span><span class=identifier>ts_num_p </span><span class=special>= </span><span class=special>(</span><span class=identifier>uint3_p </span><span class=special>>> </span><span class=special>*(</span><span class=literal>',' </span><span class=special>>> </span><span class=identifier>uint3_3_p</span><span class=special>)); </span><span class=comment>// our thousand separated number parser</span></pre>
|
||||
<p><tt>bin_p</tt>, <tt>oct_p</tt>, <tt>uint_p</tt> and <tt>hex_p</tt> are parser
|
||||
generator objects designed to be used within expressions. Here's an example
|
||||
of a rule that parses comma delimited list of numbers (We've seen this <a href="quick_start.html#list_of_numbers">before</a>):</p>
|
||||
<pre><code><span class=identifier> </span><span class=identifier>list_of_numbers </span><span class=special>=</span> <span class=identifier>real_p </span><span class=special>>> *(</span><span class=literal>','</span> <span class=special>>> </span><span class=identifier>real_p</span><span class=special>)</span></code>;
|
||||
</pre>
|
||||
<p></p>
|
||||
<p>Later, we shall see how we can extract the actual numbers parsed by the numeric
|
||||
parsers. We shall deal with this when we get to the section on <a href="semantic_actions.html#specialized_actions">specialized
|
||||
actions</a>.</p>
|
||||
<h2>int_parser</h2>
|
||||
<p>The <tt>int_parser</tt> can parse signed integers of arbitrary length and size.
|
||||
This is almost the same as the <tt>uint_parser</tt>. The only difference is
|
||||
the additional task of parsing the <tt><span class="quotes">'+'</span></tt>
|
||||
or <tt class="quotes">'-'</tt> sign preceding the number. The class interface
|
||||
is the same as that of the uint_parser.<br>
|
||||
</p>
|
||||
<table width="90%" border="0" align="center">
|
||||
<tr>
|
||||
<td colspan="2" class="table_title">A predefined int_parser</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells" width="21%"><b>int_p</b></td>
|
||||
<td class="table_cells" width="79%"><span class=identifier><code>int_parser</code></span><code><span class=special><</span><span class=keyword>int</span><span class=special>,
|
||||
</span><span class=number>10</span><span class=special>, </span><span class=number>1</span><span class=special>,
|
||||
-</span><span class=number>1</span><span class=special>> </span><span class=keyword>const</span></code></td>
|
||||
</tr>
|
||||
</table>
|
||||
<h2>real_parser</h2>
|
||||
<p>The <tt>real_parser</tt> can parse real numbers of arbitrary length and size
|
||||
limited by its parametric type <tt>T</tt>. The <tt>real_parser</tt> is a template
|
||||
class with 2 template parameters. Here's the <tt>real_parser</tt> template interface:</p>
|
||||
<pre><span class=keyword> template</span><span class=special><
|
||||
</span><span class=keyword>typename </span><span class=identifier>T </span><span class=special>= </span><span class=keyword>double</span><span class=special>,
|
||||
</span><span class=keyword>typename </span><span class=identifier>RealPoliciesT </span><span class=special>= </span><span class=identifier>ureal_parser_policies</span><span class=special><</span><span class=identifier>T</span><span class=special>> </span><span class=special>>
|
||||
</span><span class=keyword> struct </span><span class=identifier>real_parser</span><span class=special>;</span></pre>
|
||||
<p>The first template parameter is its numeric base type <tt>T</tt>. This defaults
|
||||
to <tt>double</tt>. </p>
|
||||
<table width="80%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="note_box"><img src="theme/bulb.gif" width="13" height="18"><b>
|
||||
Parsing special numeric types</b><br>
|
||||
<br>
|
||||
Notice that the numeric base type <tt>T</tt> can be specified by the user.
|
||||
This means that we can use the numeric parsers to parse user defined numeric
|
||||
types such as <tt>fixed_point</tt> (fixed point reals) and <tt>bigint</tt>
|
||||
(unlimited precision integers).</td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>The second template parameter is a class that groups all the policies and defaults
|
||||
to <tt>ureal_parser_policies<T></tt>. Policies control the real number
|
||||
parsers' behavior. The default policies provided are designed to parse C/C++
|
||||
style real numbers of the form <b>nnn.fff.Eeee</b> where <b>nnn</b> is the whole
|
||||
number part, <b>fff</b> is the fractional part, <b>E</b> is <tt class="quotes">'e'</tt>
|
||||
or <tt class="quotes">'E'</tt> and <b>eee</b> is the exponent optionally preceded
|
||||
by <tt class="quotes">'-'</tt> or <tt><span class="quotes">'+'</span></tt>.
|
||||
This corresponds to the following grammar, with the exception that plain integers
|
||||
without the decimal point are also accepted by default.</p>
|
||||
<pre><code><font color="#000000"><span class=keyword> </span><span class=identifier>floatingliteral
|
||||
</span><span class=special>= </span><span class=identifier>fractionalconstant </span><span class=special>>> </span><span class=special>!</span><span class=identifier>exponentpart
|
||||
</span><span class=special>| </span><span class=special>+</span><span class=identifier>digit_p </span><span class=special>>> </span><span class=identifier>exponentpart
|
||||
</span><span class=special>;
|
||||
|
||||
</span><span class=identifier>fractionalconstant
|
||||
</span><span class=special>= </span><span class=special>*</span><span class=identifier>digit_p </span><span class=special>>> </span><span class=literal>'.' </span><span class=special>>> </span><span class=special>+</span><span class=identifier>digit_p
|
||||
</span><span class=special>| </span><span class=special>+</span><span class=identifier>digit_p </span><span class=special>>> </span><span class=literal>'.'
|
||||
</span><span class=special>;
|
||||
|
||||
</span><span class=identifier>exponentpart
|
||||
</span><span class=special>= </span><span class=special>(</span><span class=literal>'e' </span><span class=special>| </span><span class=literal>'E'</span><span class=special>) </span><span class=special>>> </span><span class=special>!(</span><span class=literal>'+' </span><span class=special>| </span><span class=literal>'-'</span><span class=special>) </span><span class=special>>> </span><span class=special>+</span><span class=identifier>digit_p
|
||||
</span><span class=special>;</span></font></code></pre>
|
||||
<p>The default policies are provided to take care of the most common case (there
|
||||
are many ways to represent, and hence parse, real numbers). In most cases, the
|
||||
default setting of the <tt>real_parser</tt> is sufficient and can be used straight
|
||||
out of the box. Actually, there are four <tt>real_parser</tt>s pre-defined for
|
||||
immediate use:</p>
|
||||
<table width="90%" border="0" align="center">
|
||||
<tr>
|
||||
<td colspan="2" class="table_title">Predefined real_parsers</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells" width="21%"><b>ureal_p</b></td>
|
||||
<td class="table_cells" width="79%"><span class=identifier><code>real_parser</code></span><code><span class=special><</span><span class=keyword>double</span><span class=special>,
|
||||
</span><span class=identifier>ureal_parser_policies</span><span class=special><</span><span class=keyword>double</span><span class=special>>
|
||||
> </span><span class=keyword>const</span></code></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells" width="21%"><b>real_p</b></td>
|
||||
<td class="table_cells" width="79%"><span class=identifier><code>real_parser</code></span><code><span class=special><</span><span class=keyword>double</span><span class=special>,
|
||||
</span><span class=identifier>real_parser_policies</span><span class=special><</span><span class=keyword>double</span><span class=special>>
|
||||
> </span><span class=keyword>const</span></code></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells" width="21%"><b>strict_ureal_p</b></td>
|
||||
<td class="table_cells" width="79%"><span class=identifier><code>real_parser</code></span><code><span class=special><</span><span class=keyword>double</span><span class=special>,
|
||||
</span><span class=identifier>strict_ureal_parser_policies</span><span class=special><</span><span class=keyword>double</span><span class=special>>
|
||||
> </span><span class=keyword>const</span></code></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells" width="21%"><b>strict_real_p</b></td>
|
||||
<td class="table_cells" width="79%"><span class=identifier><code>real_parser</code></span><code><span class=special><</span><span class=keyword>double</span><span class=special>,
|
||||
</span><span class=identifier>strict_real_parser_policies</span><span class=special><</span><span class=keyword>double</span><span class=special>>
|
||||
> </span><span class=keyword>const</span></code></td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>We've seen <tt>real_p</tt> before. <tt>ureal_p</tt> is its unsigned variant.</p>
|
||||
<h3><a name="strict_reals"></a>Strict Reals </h3>
|
||||
<p>Integer numbers are considered a subset of real numbers, so <tt>real_p</tt>
|
||||
and <tt>ureal_p</tt> recognize integer numbers (without a dot) as real numbers.
|
||||
<tt>strict_real_p</tt> and <tt>strict_ureal_p</tt> are the equivalent parsers
|
||||
that <strong>require</strong> a dot to be present for a number to be considered
|
||||
a successful match.</p>
|
||||
<h2>Advanced: real_parser policies</h2>
|
||||
<p>The parser policies break down real number parsing into 6 steps:</p>
|
||||
<table width="90%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="table_cells">1</td>
|
||||
<td class="table_cells"><b>parse_sign</b></td>
|
||||
<td class="table_cells">Parse the prefix sign</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells">2</td>
|
||||
<td class="table_cells"><b>parse_n</b></td>
|
||||
<td class="table_cells">Parse the integer at the left of the decimal point</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells">3</td>
|
||||
<td class="table_cells"><b>parse_dot</b></td>
|
||||
<td class="table_cells">Parse the decimal point</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells">4</td>
|
||||
<td class="table_cells"><b>parse_frac_n</b></td>
|
||||
<td class="table_cells">Parse the fraction after the decimal point</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells">5</td>
|
||||
<td class="table_cells"><b>parse_exp</b></td>
|
||||
<td class="table_cells">Parse the exponent prefix (e.g. 'e')</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells">6</td>
|
||||
<td class="table_cells"><b>parse_exp_n</b></td>
|
||||
<td class="table_cells">Parse the actual exponent</td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>And the interaction of these sub-parsing tasks is further controlled by these
|
||||
3 policies:</p>
|
||||
<table width="90%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="table_cells">1</td>
|
||||
<td class="table_cells"><b>allow_leading_dot</b></td>
|
||||
<td class="table_cells">Allow a leading dot to be present (".1" becomes
|
||||
equivalent to "0.1")</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells">2</td>
|
||||
<td class="table_cells"><b>allow_trailing_dot</b></td>
|
||||
<td class="table_cells">Allow a trailing dot to be present ("1." becomes
|
||||
equivalent to "1.0")</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells">3</td>
|
||||
<td class="table_cells"><b>expect_dot</b></td>
|
||||
<td class="table_cells">Require a dot to be present (disallows "1" to
|
||||
be equivalent to "1.0")</td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>[ <img src="theme/lens.gif" width="15" height="16"> From here on, required
|
||||
reading: <a href="scanner.html">The Scanner</a>, <a href="indepth_the_parser.html">In-depth
|
||||
The Parser</a> and <a href="indepth_the_scanner.html">In-depth The Scanner</a>
|
||||
]</p>
|
||||
<h2>sign_parser and sign_p</h2>
|
||||
<p>Before we move on, a small utility parser is included here to ease the parsing
|
||||
of the <span class="quotes">'-'</span> or <span class="quotes">'+'</span> sign.
|
||||
While it is easy to write one:</p>
|
||||
<pre> <span class=identifier>sign_p </span><span class=special>= </span><span class=special>(</span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>'+'</span>) <span class=special>| </span><span class=literal>'-'</span><span class="special">)</span><span class=literal>;</span></pre>
|
||||
<p>it is not possible to extract the actual sign (positive or negative) without
|
||||
resorting to semantic actions. The sign_p parser has a bool attribute returned
|
||||
to the caller through the match object which, after parsing, is set to <strong>true</strong>
|
||||
if the parsed sign is negative. This attribute detects if the negative sign
|
||||
has been parsed. Examples:</p>
|
||||
<pre><span class=special> </span><span class=keyword>bool </span><span class=identifier>is_negative</span><span class=special>;
|
||||
</span><span class=identifier>r </span><span class=special>= </span><span class=identifier>sign_p</span><span class=special>[</span><span class=identifier>assign_a</span><span class=special>(</span><span class=identifier>is_negative</span><span class=special>)]</span><span class=special>;</span></pre>
|
||||
<p><span class=special></span>or simply...</p>
|
||||
<pre> <span class=comment>// directly extract the result from the match result's value</span>
|
||||
<span class=keyword>bool </span><span class=identifier>is_negative </span><span class=special>= </span><span class=identifier>sign_p</span><span class=special>.</span><span class=identifier>parse</span><span class=special>(</span><span class=identifier>scan</span><span class=special>).</span><span class=identifier>value</span><span class=special>();</span><span class=comment> </span></pre>
|
||||
<p>The sign_p parser expects attached semantic actions to have a signature (see
|
||||
<a href="semantic_actions.html#specialized_actions">Specialized Actions</a>
|
||||
for further detail) compatible with: </p>
|
||||
<p><b>Signature for functions:</b></p>
|
||||
<pre><code><font color="#000000"><span class=identifier> </span><span class=keyword>void </span><span class=identifier>func</span><span class=special>(</span><span class="keyword">bool</span><span class=identifier> is_negative</span><span class=special>);</span></font></code></pre>
|
||||
<p><b>Signature for functors:</b> </p>
|
||||
<pre><code><font color="#000000"><span class=special> </span><span class=keyword>struct </span><span class=identifier>ftor
|
||||
</span><span class=special>{
|
||||
</span><span class=keyword>void </span><span class=keyword>operator</span><span class=special>()(</span><span class="keyword">bool</span><span class=identifier> is_negative</span><span class=special>) </span><span class=keyword>const</span><span class=special>;
|
||||
</span><span class=special>};</span></font></code></pre>
|
||||
<h2><span class=identifier>ureal_parser_policies</span></h2>
|
||||
<pre><span class=comment> </span><span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>T</span><span class=special>>
|
||||
</span><span class=keyword>struct </span><span class=identifier>ureal_parser_policies
|
||||
</span><span class=special>{
|
||||
</span><span class=keyword>typedef </span><span class=identifier>uint_parser</span><span class=special><</span><span class=identifier>T</span><span class=special>, </span><span class=number>10</span><span class=special>, </span><span class=number>1</span><span class=special>, -</span><span class=number>1</span><span class=special>> </span><span class=identifier>uint_parser_t</span><span class=special>;
|
||||
</span><span class=keyword>typedef </span><span class=identifier>int_parser</span><span class=special><</span><span class=identifier>T</span><span class=special>, </span><span class=number>10</span><span class=special>, </span><span class=number>1</span><span class=special>, -</span><span class=number>1</span><span class=special>> </span><span class=identifier>int_parser_t</span><span class=special>;
|
||||
|
||||
</span><span class=keyword>static const bool</span> <span class=identifier>allow_leading_dot</span> <span class=special> =</span> <span class=literal>true</span><span class=special>;</span><span class=special>
|
||||
</span><span class=keyword>static const bool</span> <span class=identifier>allow_trailing_dot </span><span class=special>=</span> <span class=literal>true</span><span class=special>;</span><span class=special></span>
|
||||
<span class=special> </span><span class=keyword>static const bool</span> <span class=identifier>expect_dot</span> <span class=special> =</span> <span class=literal>false</span><span class=special>;</span><span class=special></span><span class=special><br>
|
||||
</span><span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>>
|
||||
</span><span class=keyword>static typename </span><span class=identifier>match_result</span><span class=special><</span><span class=identifier>ScannerT</span><span class=special>, </span><span class="identifier">nil_t</span><span class=special>>::</span><span class=identifier>type
|
||||
parse_sign</span><span class=special>(</span><span class=identifier>ScannerT</span><span class=special>& </span><span class=identifier>scan</span><span class=special>)
|
||||
{ </span><span class=keyword>return </span><span class=identifier>scan</span><span class=special>.</span><span class=identifier>no_match</span><span class=special>(); }
|
||||
|
||||
</span><span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>>
|
||||
</span><span class=keyword>static typename </span><span class=identifier>parser_result</span><span class=special><</span><span class=identifier>uint_parser_t</span><span class=special>, </span><span class=identifier>ScannerT</span><span class=special>>::</span><span class=identifier>type
|
||||
parse_n</span><span class=special>(</span><span class=identifier>ScannerT</span><span class=special>& </span><span class=identifier>scan</span><span class=special>)
|
||||
{ </span><span class=keyword>return </span><span class=identifier>uint_parser_t</span><span class=special>().</span><span class=identifier>parse</span><span class=special>(</span><span class=identifier>scan</span><span class=special>); }
|
||||
|
||||
</span><span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>>
|
||||
</span><span class=keyword>static typename </span><span class=identifier>parser_result</span><span class=special><</span><span class=identifier>chlit</span><span class=special><>, </span><span class=identifier>ScannerT</span><span class=special>>::</span><span class=identifier>type
|
||||
parse_dot</span><span class=special>(</span><span class=identifier>ScannerT</span><span class=special>& </span><span class=identifier>scan</span><span class=special>)
|
||||
{ </span><span class=keyword>return </span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>'.'</span><span class=special>).</span><span class=identifier>parse</span><span class=special>(</span><span class=identifier>scan</span><span class=special>); }
|
||||
|
||||
</span><span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>>
|
||||
</span><span class=keyword>static typename </span><span class=identifier>parser_result</span><span class=special><</span><span class=identifier>uint_parser_t</span><span class=special>, </span><span class=identifier>ScannerT</span><span class=special>>::</span><span class=identifier>type
|
||||
parse_frac_n</span><span class=special>(</span><span class=identifier>ScannerT</span><span class=special>& </span><span class=identifier>scan</span><span class=special>)
|
||||
{ </span><span class=keyword>return </span><span class=identifier>uint_parser_t</span><span class=special>().</span><span class=identifier>parse</span><span class=special>(</span><span class=identifier>scan</span><span class=special>); }
|
||||
|
||||
</span><span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>>
|
||||
</span><span class=keyword>static typename </span><span class=identifier>parser_result</span><span class=special><</span><span class=identifier>chlit</span><span class=special><>, </span><span class=identifier>ScannerT</span><span class=special>>::</span><span class=identifier>type
|
||||
parse_exp</span><span class=special>(</span><span class=identifier>ScannerT</span><span class=special>& </span><span class=identifier>scan</span><span class=special>)
|
||||
{ </span><span class=keyword>return </span><span class=identifier>as_lower_d</span><span class=special>[</span><span class=literal>'e'</span><span class=special>].</span><span class=identifier>parse</span><span class=special>(</span><span class=identifier>scan</span><span class=special>); }
|
||||
|
||||
</span><span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>>
|
||||
</span><span class=keyword>static typename </span><span class=identifier>parser_result</span><span class=special><</span><span class=keyword>int</span><span class=identifier>_parser_t</span><span class=special>, </span><span class=identifier>ScannerT</span><span class=special>>::</span><span class=identifier>type
|
||||
parse_exp_n</span><span class=special>(</span><span class=identifier>ScannerT</span><span class=special>& </span><span class=identifier>scan</span><span class=special>)
|
||||
{ </span><span class=keyword>return int</span><span class=identifier>_parser_t</span><span class=special>().</span><span class=identifier>parse</span><span class=special>(</span><span class=identifier>scan</span><span class=special>); }
|
||||
};
|
||||
</span></pre>
|
||||
<p><span class=special></span><span class=identifier>The default ureal_parser_policies
|
||||
uses the lower level integer numeric parsers to do its job. </span></p>
|
||||
<h2><span class=identifier>real_parser_policies</span></h2>
|
||||
<pre> <span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>T</span><span class=special>>
|
||||
</span><span class=keyword>struct </span><span class=identifier>real_parser_policies </span><span class=special>: </span><span class=keyword>public </span><span class=identifier>ureal_parser_policies</span><span class=special><</span><span class=identifier>T</span><span class=special>>
|
||||
</span><span class=special>{
|
||||
</span><span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>>
|
||||
</span><span class=keyword>static </span><span class=keyword>typename </span><span class=identifier>parser_result</span><span class=special><</span><span class=identifier>sign_parser</span><span class=special>, </span><span class=identifier>ScannerT</span><span class=special>>::</span><span class=identifier>type
|
||||
</span><span class=identifier>parse_sign</span><span class=special>(</span><span class=identifier>ScannerT</span><span class=special>& </span><span class=identifier>scan</span><span class=special>)
|
||||
</span><span class=special>{ </span><span class=keyword>return </span><span class=identifier>sign_p</span><span class=special>.</span><span class=identifier>parse</span><span class=special>(</span><span class=identifier>scan</span><span class=special>); </span><span class=special>}
|
||||
</span><span class=special>};</span></pre>
|
||||
<p>Notice how the real_parser_policies replaced <b><tt>parse_sign</tt></b> of
|
||||
the <b>u</b>real_parser_policies from which it is subclassed. The default real_parser_policies
|
||||
simply uses a <tt>sign_p</tt> instead of <tt>scan.no_match()</tt> in the <tt>parse_sign
|
||||
</tt> step. </p>
|
||||
<h2><span class=identifier>strict_ureal_parser_policies and strict_real_parser_policies</span></h2>
|
||||
<pre> <span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>T</span><span class=special>>
|
||||
</span><span class=keyword>struct </span><span class=identifier>strict_ureal_parser_policies </span><span class=special>: </span><span class=keyword>public </span><span class=identifier>ureal_parser_policies</span><span class=special><</span><span class=identifier>T</span><span class=special>>
|
||||
</span><span class=special>{</span>
|
||||
<span class=special> </span><span class=keyword>static const bool</span> <span class=identifier>expect_dot</span><span class=special> =</span> <span class=literal>true</span><span class=special>;</span><span class=special></span><span class=special></span>
|
||||
<span class=special>};</span>
|
||||
|
||||
<span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>T</span><span class=special>>
|
||||
</span><span class=keyword>struct </span><span class=identifier>strict_real_parser_policies </span><span class=special>: </span><span class=keyword>public </span><span class=identifier>real_parser_policies</span><span class=special><</span><span class=identifier>T</span><span class=special>>
|
||||
</span><span class=special>{</span>
|
||||
<span class=special> </span><span class=keyword>static const bool</span> <span class=identifier>expect_dot</span><span class=special> =</span> <span class=literal>true</span><span class=special>;</span><span class=special></span><span class=special></span>
|
||||
<span class=special>};</span></pre>
|
||||
<p>Again, these policies replaced just the policies they wanted different from
|
||||
their superclasses.</p>
|
||||
<p><i>Specialized</i> real parser policies can reuse some of the defaults while
|
||||
replacing a few. For example, the following is a real number parser policy that
|
||||
parses thousands separated numbers with at most two decimal places and no exponent.
|
||||
</p>
|
||||
<p><img src="theme/lens.gif" width="15" height="16">The full source code can be
|
||||
viewed <a href="../example/fundamental/thousand_separated.cpp">here</a>. </p>
|
||||
<pre>
|
||||
<span class=identifier> </span><span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>T</span><span class=special>>
|
||||
</span><span class=keyword>struct </span><span class=identifier>ts_real_parser_policies </span><span class=special>: </span><span class=keyword>public </span><span class=identifier>ureal_parser_policies</span><span class=special><</span><span class=identifier>T</span><span class=special>>
|
||||
{
|
||||
</span><span class=comment>// These policies can be used to parse thousand separated
|
||||
// numbers with at most 2 decimal digits after the decimal
|
||||
// point. e.g. 123,456,789.01
|
||||
|
||||
</span><span class=keyword>typedef </span><span class=identifier>uint_parser</span><span class=special><</span><span class=keyword>int</span><span class=special>, </span><span class=number>10</span><span class=special>, </span><span class=number>1</span><span class=special>, </span><span class=number>2</span><span class=special>> </span><span class=identifier>uint2_t</span><span class=special>;
|
||||
</span><span class=keyword>typedef </span><span class=identifier>uint_parser</span><span class=special><</span><span class=identifier>T</span><span class=special>, </span><span class=number>10</span><span class=special>, </span><span class=number>1</span><span class=special>, -</span><span class=number>1</span><span class=special>> </span><span class=identifier>uint_parser_t</span><span class=special>;
|
||||
</span><span class=keyword>typedef </span><span class=identifier>int_parser</span><span class=special><</span><span class=keyword>int</span><span class=special>, </span><span class=number>10</span><span class=special>, </span><span class=number>1</span><span class=special>, -</span><span class=number>1</span><span class=special>> </span><span class=identifier>int_parser_t</span><span class=special>;
|
||||
|
||||
</span><span class=comment>////////////////////////////////// 2 decimal places Max
|
||||
</span><span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>>
|
||||
</span><span class=keyword>static typename </span><span class=identifier>parser_result</span><span class=special><</span><span class=identifier>uint2_t</span><span class=special>, </span><span class=identifier>ScannerT</span><span class=special>>::</span><span class=identifier>type
|
||||
parse_frac_n</span><span class=special>(</span><span class=identifier>ScannerT</span><span class=special>& </span><span class=identifier>scan</span><span class=special>)
|
||||
{ </span><span class=keyword>return </span><span class=identifier>uint2_t</span><span class=special>().</span><span class=identifier>parse</span><span class=special>(</span><span class=identifier>scan</span><span class=special>); }
|
||||
|
||||
</span><span class=special> </span><span class=comment>////////////////////////////////// No exponent<br></span> <span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>>
|
||||
</span><span class=keyword>static typename </span><span class=identifier>parser_result</span><span class=special><</span><span class=identifier>chlit</span><span class=special><>, </span><span class=identifier>ScannerT</span><span class=special>>::</span><span class=identifier>type
|
||||
parse_exp</span><span class=special>(</span><span class=identifier>ScannerT</span><span class=special>& </span><span class=identifier>scan</span><span class=special>)
|
||||
{ </span><span class=keyword>return </span><span class=identifier>scan</span><span class=special>.</span><span class=identifier>no_match</span><span class=special>(); }
|
||||
|
||||
</span><span class=special> </span><span class=comment>////////////////////////////////// No exponent<br></span> <span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>>
|
||||
</span><span class=keyword>static typename </span><span class=identifier>parser_result</span><span class=special><</span><span class=identifier>int_parser_t</span><span class=special>, </span><span class=identifier>ScannerT</span><span class=special>>::</span><span class=identifier>type
|
||||
parse_exp_n</span><span class=special>(</span><span class=identifier>ScannerT</span><span class=special>& </span><span class=identifier>scan</span><span class=special>)
|
||||
{ </span><span class=keyword>return </span><span class=identifier>scan</span><span class=special>.</span><span class=identifier>no_match</span><span class=special>(); }
|
||||
|
||||
</span><span class=comment>////////////////////////////////// Thousands separated numbers
|
||||
</span><span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>>
|
||||
</span><span class=keyword>static typename </span><span class=identifier>parser_result</span><span class=special><</span><span class=identifier>uint_parser_t</span><span class=special>, </span><span class=identifier>ScannerT</span><span class=special>>::</span><span class=identifier>type
|
||||
<a name="scanner_save"></a>parse_n</span><span class=special>(</span><span class=identifier>ScannerT</span><span class=special>& </span><span class=identifier>scan</span><span class=special>)
|
||||
{
|
||||
</span><span class=keyword>typedef typename </span><span class=identifier>parser_result</span><span class=special><</span><span class=identifier>uint_parser_t</span><span class=special>, </span><span class=identifier>ScannerT</span><span class=special>>::</span><span class=identifier>type RT</span><span class=special>;
|
||||
</span><span class="keyword">static </span><span class=identifier>uint_parser</span><span class=special><</span><span class=keyword>unsigned</span><span class=special>, </span><span class=number>10</span><span class=special>, </span><span class=number>1</span><span class=special>, </span><span class=number>3</span><span class=special>> </span><span class=identifier>uint3_p</span><span class=special>;
|
||||
</span><span class="keyword">static </span><span class=identifier>uint_parser</span><span class=special><</span><span class=keyword>unsigned</span><span class=special>, </span><span class=number>10</span><span class=special>, </span><span class=number>3</span><span class=special>, </span><span class=number>3</span><span class=special>> </span><span class=identifier>uint3_3_p</span><span class=special>;
|
||||
|
||||
</span><span class=keyword>if </span><span class=special>(</span><span class=identifier>RT hit </span><span class=special>= </span><span class=identifier>uint3_p</span><span class=special>.</span><span class=identifier>parse</span><span class=special>(</span><span class=identifier>scan</span><span class=special>))
|
||||
{
|
||||
</span><span class=identifier>T n</span><span class=special>;
|
||||
</span><span class="keyword">typedef typename </span>ScannerT<span class="special">::</span>iterator_t iterator_t;<span class=special>
|
||||
</span>iterator_t save<span class="special"> = </span>scan.first<span class="special">;
|
||||
</span><span class=keyword>while </span><span class=special>(</span><span class=identifier>match</span><span class=special><> </span><span class=identifier>next </span><span class=special>= (</span><span class=literal>',' </span><span class=special>>> </span><span class=identifier>uint3_3_p</span><span class=special>[</span><span class=identifier>assign_a</span><span class=special>(</span><span class=identifier>n</span><span class=special>)]).</span><span class=identifier>parse</span><span class=special>(</span><span class=identifier>scan</span><span class=special>))
|
||||
{
|
||||
</span><span class=identifier>hit</span><span class=special>.</span><span class=identifier>value</span><span class=special>() *= </span><span class=number>1000</span><span class=special>;
|
||||
</span><span class=identifier>hit</span><span class=special>.</span><span class=identifier>value</span><span class=special>() += </span><span class=identifier>n</span><span class=special>;
|
||||
</span><span class=identifier>scan</span><span class=special>.</span><span class=identifier>concat_match</span><span class=special>(</span><span class=identifier>hit</span><span class=special>, </span><span class=identifier>next</span><span class=special>);
|
||||
</span><span class="identifier">save </span><span class=special><span class="special">= </span></span><span class="identifier">scan</span><span class="special">.</span><span class="identifier">first</span><span class=special><span class="special">;</span>
|
||||
}
|
||||
</span>scan<span class="special">.</span>first<span class="special"> = </span>save<span class="special">;
|
||||
</span><span class=keyword>return </span><span class=identifier>hit</span><span class=special>;
|
||||
|
||||
</span><span class=special> // Note: On erroneous input such as "123,45", the result should<br> // be a partial match "123". 'save' is used to makes sure that<br> // the scanner position is placed at the last *valid* parse<br> // position.<br> }
|
||||
</span><span class=keyword>return </span><span class=identifier>scan</span><span class=special>.</span><span class=identifier>no_match</span><span class=special>();
|
||||
}
|
||||
};</span></pre>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="operators.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="rule.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<hr size="1">
|
||||
<p class="copyright">Copyright © 1998-2002 Joel de Guzman<br>
|
||||
<br>
|
||||
<font size="2">Use, modification and distribution is subject to the Boost Software
|
||||
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
||||
http://www.boost.org/LICENSE_1_0.txt) </font> </p>
|
||||
<p> </p>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,222 +0,0 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>Operators</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" href="theme/style.css" type="text/css">
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
|
||||
<tr>
|
||||
<td width="10">
|
||||
</td>
|
||||
<td width="85%">
|
||||
<font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Operators</b></font>
|
||||
</td>
|
||||
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="primitives.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="numerics.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>Operators are used as a means for object composition and embedding. Simple
|
||||
parsers may be composed to form composites through operator overloading, crafted
|
||||
to approximate the syntax of an Extended Backus-Normal Form (EBNF) variant.
|
||||
An expression such as:</p>
|
||||
<pre><code><font color="#000000"> <span class=identifier>a </span><span class=special>| </span><span class=identifier>b</span></font></code></pre>
|
||||
<p>actually yields a new parser type which is a composite of its operands, a and
|
||||
b. Taking this example further, if a and b were of type <tt>chlit</tt><>,
|
||||
the result would have the composite type:</p>
|
||||
<pre><code><font color="#000000"> <span class=identifier>alternative</span><span class=special><</span><span class=identifier>chlit</span><span class=special><>, </span><span class=identifier>chlit</span><span class=special><> </span><span class=special>></span></font></code></pre>
|
||||
<p> In general, for any binary operator, it will take its two arguments, parser1
|
||||
and parser2, and create a new composed parser of the form</p>
|
||||
<pre><code><font color="#000000"> <span class=identifier>op</span><span class=special><</span><span class=identifier>parser1</span><span class=special>, </span><span class=identifier>parser2</span><span class=special>></span></font></code></pre>
|
||||
<p>where parser1 and parser2 can be arbitrarily complex parsers themselves, with
|
||||
the only limitations being what your compiler imposes. </p>
|
||||
<h3>Set Operators</h3>
|
||||
<table width="90%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="table_title" colspan="3">Set operators</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells" width="20%"><code><span class=identifier>a </span><span class=special>|
|
||||
</span><span class=identifier>b</span></code></td>
|
||||
<td class="table_cells" width="24%">Union</td>
|
||||
<td class="table_cells" width="56%">Match a or b. Also referred to as alternative</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells" width="20%"><code><span class=identifier>a </span><span class=special>&
|
||||
</span><span class=identifier>b</span></code></td>
|
||||
<td class="table_cells" width="24%">Intersection</td>
|
||||
<td class="table_cells" width="56%">Match a and b</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells" width="20%"><code><span class=identifier>a </span><span class=special>-
|
||||
</span><span class=identifier>b</span></code></td>
|
||||
<td class="table_cells" width="24%">Difference</td>
|
||||
<td class="table_cells" width="56%">Match a but not b. If both match and b's
|
||||
matched text is shorter than a's matched text, a successful match is made</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells" width="20%"><code><span class=identifier>a </span><span class=special>^
|
||||
</span><span class=identifier>b</span></code></td>
|
||||
<td class="table_cells" width="24%">XOR</td>
|
||||
<td class="table_cells" width="56%">Match a or b, but not both</td>
|
||||
</tr>
|
||||
</table>
|
||||
<p><b>Short-circuiting</b></p>
|
||||
<p>Alternative operands are tried one by one on a first come first served basis
|
||||
starting from the leftmost operand. After a successfully matched alternative
|
||||
is found, the parser concludes its search, essentially short-circuiting the
|
||||
search for other potentially viable candidates. This short-circuiting implicitly
|
||||
gives the highest priority to the leftmost alternative.</p>
|
||||
<p>Short-circuiting is done in the same manner as C or C++'s logical expressions;
|
||||
e.g. <tt>if</tt> <tt><span class="operators">(</span>x <span class="operators"><</span>
|
||||
3 <span class="operators">||</span> y <span class="operators"><</span> 2<span class="operators">)</span></tt>
|
||||
where, if <tt>x</tt> evaluates to be less than 3, the <tt>y <span class="operators"><</span>
|
||||
2</tt> test is not done at all. In addition to providing an implicit priority
|
||||
rule for alternatives which is necessary, given the non-deterministic nature
|
||||
of the Spirit parser compiler, short-circuiting improves the execution time.
|
||||
If the order of your alternatives is logically irrelevant, strive to put the
|
||||
(expected) most common choice first for maximum efficiency.</p>
|
||||
<table width="80%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="note_box"><img src="theme/lens.gif" width="15" height="16"> <b>Intersections</b><br>
|
||||
<br>
|
||||
Some researchers assert that the intersections (e.g. <tt>a & b</tt>)
|
||||
let us define context sensitive languages (<a href="references.html#intersections">"XBNF"</a>
|
||||
[citing Leu-Weiner, 1973]). "The theory of defining a language as the
|
||||
intersection of a finite number of context free languages was developed
|
||||
by Leu and Weiner in 1973".<br>
|
||||
<br>
|
||||
<b><img src="theme/lens.gif" width="15" height="16"> <b></b>~ Operator</b><br>
|
||||
<br>
|
||||
The complement operator <tt>~</tt> was originally put into consideration.
|
||||
Further understanding of its value and meaning leads us to uncertainty.
|
||||
The basic problem stems from the fact that <tt>~a</tt> will yield <tt>U-a</tt>,
|
||||
where <tt>U</tt> is the universal set of all strings. However, where it
|
||||
makes sense, some parsers can be complemented (see the <a href="primitives.html#negation">primitive
|
||||
character parsers</a> for examples).</td>
|
||||
</tr>
|
||||
</table>
|
||||
<h3>Sequencing Operators</h3>
|
||||
<table width="90%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="table_title" colspan="3">Sequencing operators</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells" width="21%"><code><span class=identifier>a </span><span class=special>>>
|
||||
</span><span class=identifier>b</span></code></td>
|
||||
<td class="table_cells" width="23%">Sequence</td>
|
||||
<td class="table_cells" width="56%">Match a and b in sequence</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells" width="21%"><code><span class=identifier>a </span><span class=special>&&
|
||||
</span><span class=identifier>b</span></code></td>
|
||||
<td class="table_cells" width="23%">Sequential-and</td>
|
||||
<td class="table_cells" width="56%">Sequential-and. Same as above, match a
|
||||
and b in sequence</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells" width="21%"><code><span class=identifier>a </span><span class=special>||
|
||||
</span><span class=identifier>b</span></code></td>
|
||||
<td class="table_cells" width="23%">Sequential-or</td>
|
||||
<td class="table_cells" width="56%">Match a or b in sequence</td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>The sequencing operator <tt class="operators">>></tt> can alternatively
|
||||
be thought of as the sequential-and operator. The expression <tt>a <span class="operators">&&</span>
|
||||
b</tt> reads as match a and b in sequence. Continuing this logic, we can also
|
||||
have a sequential-or operator where the expression <tt>a <span class="operators">||</span>
|
||||
b</tt> reads as match a or b and in sequence. That is, if both a and b match,
|
||||
it must be in sequence; this is equivalent to <tt>a >> !b | b</tt>. </p>
|
||||
<h3>Optional and Loops</h3>
|
||||
<table width="90%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="table_title" colspan="3">Optional and Loops</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells" width="21%"><code><span class=special>*</span><span class=identifier>a</span></code></td>
|
||||
<td class="table_cells" width="23%">Kleene star</td>
|
||||
<td class="table_cells" width="56%">Match a zero (0) or more times</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells" width="21%"><code><span class=special>+</span><span class=identifier>a</span></code></td>
|
||||
<td class="table_cells" width="23%">Positive</td>
|
||||
<td class="table_cells" width="56%">Match a one (1) or more times</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells" width="21%"><code><span class=special>!</span><span class=identifier>a</span></code></td>
|
||||
<td class="table_cells" width="23%">Optional</td>
|
||||
<td class="table_cells" width="56%">Match a zero (0) or one (1) time</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells" width="21%"><code><span class=identifier>a </span><span class=special>%
|
||||
</span><span class=identifier>b</span></code></td>
|
||||
<td class="table_cells" width="23%">List</td>
|
||||
<td class="table_cells" width="56%">Match a list of one or more repetitions
|
||||
of a separated by occurrences of b. This is the same as <tt>a >> *(b
|
||||
>> a)</tt>. Note that <tt>a</tt> must not also match <tt>b</tt></td>
|
||||
</tr>
|
||||
</table>
|
||||
<p><img src="theme/note.gif" width="16" height="16"> If we look more closely,
|
||||
take note that we generalized the optional expression of the form <tt>!a</tt>
|
||||
in the same category as loops. This is logical, considering that the optional
|
||||
matches the expression following it zero (0) or one (1) time. </p>
|
||||
<p><b>Primitive type operands</b></p>
|
||||
<p> For binary operators, one of the operands but not both may be a <tt>char</tt>,
|
||||
<tt> wchar_t</tt>, <tt>char const<span class="operators">*</span></tt> or <tt>wchar_t
|
||||
const<span class="operators">*</span></tt>. Where P is a parser object, here
|
||||
are some examples:</p>
|
||||
<pre><code><span class=identifier> </span><span class=identifier>P </span><span class=special>| </span><span class=literal>'x'
|
||||
</span><span class=identifier>P </span><span class=special>- </span><span class=identifier>L</span><span class=string>"Hello World"
|
||||
</span><span class=literal>'x' </span><span class=special>>> </span><span class=identifier>P
|
||||
</span><span class=string>"bebop" </span><span class=special>>> </span><span class=identifier>P</span></code></pre>
|
||||
<p>It is important to emphasize that C++ mandates that operators may only be overloaded
|
||||
if at least one argument is a user-defined type. Typically, in an expression
|
||||
involving multiple operators, explicitly typing the leftmost operand as a parser
|
||||
is enough to cause propagation to all the rest of the operands to its right
|
||||
to be regarded as parsers. Examples:</p>
|
||||
<pre><code><font color="#000000"><span class=identifier> </span><span class=identifier>r </span><span class=special>= </span><span class=literal>'a' </span><span class=special>| </span><span class=literal>'b' </span><span class=special>| </span><span class=literal>'c' </span><span class=special>| </span><span class=literal>'d'</span><span class=special>; </span><span class=comment>// ill formed
|
||||
</span><span class=identifier>r </span><span class=special>= </span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>'a'</span><span class=special>) </span><span class=special>| </span><span class=literal>'b' </span><span class=special>| </span><span class=literal>'c' </span><span class=special>| </span><span class=literal>'d'</span><span class=special>; </span><span class=comment>// OK</span></font></code></pre>
|
||||
<p>The second case is parsed as follows:</p>
|
||||
<pre><code><font color="#000000"> r <font color="#0000ff"><img src="theme/arrow.gif"> <span class=special>(((</span><span class=identifier>chlit</span><span class=special><</span><span class=keyword>char</span><span class=special>> </span><span class=special>| </span><span class=keyword>char</span><span class=special>) </span><span class=special>| </span><span class=keyword>char</span><span class=special>) </span><span class=special>| </span><span class=keyword>char</span><span class=special>)</span></font>
|
||||
|
||||
a <font color="#0000ff"><img src="theme/arrow.gif"> <span class=special>(</span><span class=identifier>chlit</span><span class=special><</span><span class=keyword>char</span><span class=special>> </span><span class=special>| </span><span class=keyword>char</span><span class=special>)</span></font>
|
||||
r <font color="#0000ff"><img src="theme/arrow.gif"> <span class=special>(((</span><span class=identifier>a</span><span class=special>) </span><span class=special>| </span><span class=keyword>char</span><span class=special>) </span><span class=special>| </span><span class=keyword>char</span><span class=special>)</span></font>
|
||||
|
||||
b <font color="#0000ff"><img src="theme/arrow.gif"> <span class=special>(</span><span class=identifier>a </span><span class=special>| </span><span class=keyword>char</span><span class=special>)</span></font>
|
||||
r <font color="#0000ff"><img src="theme/arrow.gif"> <span class=special>(((</span><span class=identifier>b</span><span class=special>)) </span><span class=special>| </span><span class=keyword>char</span><span class=special>)</span></font>
|
||||
|
||||
c <font color="#0000ff"><img src="theme/arrow.gif"> <span class=special>(</span><span class=identifier>b </span><span class=special>| </span><span class=keyword>char</span><span class=special>)</span></font>
|
||||
r <font color="#0000ff"><img src="theme/arrow.gif"> <span class=special>(((</span><span class=identifier>c</span><span class=special>)))</span></font></font></code></pre>
|
||||
<p><b>Operator precedence and grouping</b></p>
|
||||
<p>Since we are defining our meta-language in C++, we follow C/C++'s operator
|
||||
precedence rules. Grouping expressions inside the parentheses override this
|
||||
(e.g., <tt><span class="operators">*(</span>a <span class="operators">|</span>
|
||||
b<span class="operators">)</span></tt> reads: match a or b zero (0) or more
|
||||
times). </p>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="primitives.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="numerics.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<hr size="1">
|
||||
<p class="copyright">Copyright © 1998-2003 Joel de Guzman<br>
|
||||
<br>
|
||||
<font size="2">Use, modification and distribution is subject to the Boost Software
|
||||
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
||||
http://www.boost.org/LICENSE_1_0.txt) </font> </p>
|
||||
<p> </p>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,165 +0,0 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>Organization</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" href="theme/style.css" type="text/css">
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
|
||||
<tr>
|
||||
<td width="10">
|
||||
</td>
|
||||
<td width="85%"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Organization</b></font>
|
||||
</td>
|
||||
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="basic_concepts.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="primitives.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>The framework is highly modular and is organized in layers:</p>
|
||||
<table width="100%" border="0">
|
||||
<tr>
|
||||
<td><div align="center">
|
||||
<table width="40%" border="1" cellpadding="3" cellspacing="3" class="table_cells">
|
||||
<tr>
|
||||
<td><div align="center"><font color="#003366"><strong>iterator</strong></font></div></td>
|
||||
<td><div align="center"><font color="#003366"><strong>actor</strong></font></div></td>
|
||||
</tr>
|
||||
</table>
|
||||
<font color="#003366" size="3"><br>
|
||||
</font></div></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><div align="center">
|
||||
<table width="20%" border="1" cellpadding="3" cellspacing="3" class="table_cells">
|
||||
<tr>
|
||||
<td><div align="center"><font color="#003366"><strong>debug</strong></font></div></td>
|
||||
</tr>
|
||||
</table>
|
||||
<font color="#003366" size="3"><br>
|
||||
</font></div></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><div align="center">
|
||||
<table width="75%" border="1" cellpadding="3" cellspacing="3" class="table_cells">
|
||||
<tr>
|
||||
<td><div align="center"><font color="#003366"><strong>attribute</strong></font></div></td>
|
||||
<td><div align="center"><font color="#003366"><strong>dynamic</strong></font></div></td>
|
||||
<td><div align="center"><font color="#003366"><strong>error_handling</strong></font></div></td>
|
||||
<td><div align="center"><font color="#003366"><strong>symbols</strong></font></div></td>
|
||||
<td><div align="center"><font color="#003366"><strong>tree</strong></font></div></td>
|
||||
<td><div align="center"><font color="#003366"><strong>utility</strong></font></div></td>
|
||||
</tr>
|
||||
</table>
|
||||
<font color="#003366" size="3"><br>
|
||||
</font></div></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><div align="center">
|
||||
<table width="20%" border="1" cellpadding="3" cellspacing="3" class="table_cells">
|
||||
<tr>
|
||||
<td><div align="center"><font color="#003366"><strong>meta</strong></font></div></td>
|
||||
</tr>
|
||||
</table>
|
||||
<font color="#003366"><br>
|
||||
</font></div></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><div align="center">
|
||||
<table width="85%" border="1" cellpadding="3" cellspacing="3" class="table_cells">
|
||||
<tr>
|
||||
<td colspan="4"><div align="center"><font color="#003366"><strong><font size="4">core</font></strong></font></div></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><div align="center"><font color="#003366"><strong>scanner</strong></font></div></td>
|
||||
<td><div align="center"><font color="#003366"><strong>primitives</strong></font></div></td>
|
||||
<td><div align="center"><font color="#003366"><strong>composite</strong></font></div></td>
|
||||
<td><div align="center"><font color="#003366"><strong>non_terminal</strong></font></div></td>
|
||||
</tr>
|
||||
</table>
|
||||
</div></td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>Spirit has four layers, plus an independent top layer. The independent layer,
|
||||
comprising of actor and iterator, does not rely on the other layers. The framework's
|
||||
architecture is completely orthogonal. The relationship among the layers is
|
||||
acyclic. Lower layers do not depend nor know the existence of upper layers.
|
||||
Modules in a layer do not depend on other modules in the same layer. </p>
|
||||
<p>The client may use only the modules that she wants without incurring any compile
|
||||
time nor run time penalty. A minimalistic approach is to use only the core as
|
||||
is. The highly streamlined core is usable by itself. The core is sufficiently
|
||||
suitable for tasks such as micro parsing.</p>
|
||||
<p>The <strong>iterator</strong> module is independent of Spirit and may be used
|
||||
in other non-Spirit applications. This module is a compilation of stand-alone
|
||||
iterators and iterator wrappers compatible with Spirit.
|
||||
|
||||
|
||||
Over time, these iterators have been found to be most useful for parsing with Spirit. </p>
|
||||
<p>The <strong>actor</strong> module, also independent of Spirit, is a compilation
|
||||
of predefined semantic actions that covers the most common semantics processing
|
||||
tasks.</p>
|
||||
<p>The <strong>debug</strong> module provides library wide parser debugging. This
|
||||
module hooks itself up transparently into the core non-intrusively and only
|
||||
when necessary.</p>
|
||||
<p>The<strong> attribute</strong> module introduces advanced semantic action machinery
|
||||
with emphasis on extraction and passing of data up and down the parser hierarchy
|
||||
through inherited and synthesized attributes. Attributes may also be used to
|
||||
actually control the parsing. Parametric parsers are a form of dynamic parsers
|
||||
that changes their behavior at run time based on some attribute or data.</p>
|
||||
<p>The <strong>dynamic</strong> module focuses on parsers with behavior that can
|
||||
be modified at run-time.</p>
|
||||
<p><strong>error_handling</strong>. The framework would not be complete without
|
||||
Error Handling. C++'s exception handling mechanism is a perfect match for Spirit
|
||||
due to its highly recursive functional nature. C++ Exceptions are used extensively
|
||||
by this module for handling errors.</p>
|
||||
<p>The<strong> symbols</strong> module focuses on symbol table management. This module
|
||||
is rather basic now. The goal is to build a sub-framework that will be able
|
||||
to accommodate C++ style multiple scope mechanisms. C++ is a great model for
|
||||
the complexity of scoping that perhaps has no parallel in any other language.
|
||||
There are classes and inheritance, private, protected and public access restrictions,
|
||||
friends, namespaces, using declarations, using directives, Koenig lookup (Argument
|
||||
Dependent Lookup) and more. The symbol table functionality we have now will
|
||||
be the basis of a complete facility that will attempt to model this.</p>
|
||||
<blockquote>
|
||||
<p><em><font color="#003366">I wish that I could ever see, a structure as lovely
|
||||
as a tree</font></em><font color="#003366">...</font></p>
|
||||
</blockquote>
|
||||
<p> Parse Tree and Abstract Syntax Tree (AST) generation are handled by the <b>Tree</b>
|
||||
module. There are advantages with Parse Trees and Abstract Syntax Trees over
|
||||
semantic actions. You can make multiple passes over the data without having
|
||||
to re-parse the input. You can perform transformations on the tree. You can
|
||||
evaluate things in any order you want, whereas with attribute schemes you have
|
||||
to process in a begin to end fashion. You do not have to worry about backtracking
|
||||
and action side effects that may occur with an ambiguous grammar.</p>
|
||||
<p>The <b>utility</b> module is a set of commonly useful parsers and support classes
|
||||
that were found to be useful in handling common tasks such as list processing,
|
||||
comments, confix expressions, etc.</p>
|
||||
<p><strong>meta</strong>, provides metaprogramming facilities for advanced Spirit
|
||||
developers. This module facilitates compile-time and run-time introspection
|
||||
of Spirit parsers.</p>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="basic_concepts.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="primitives.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<hr size="1">
|
||||
<p class="copyright">Copyright © 1998-2003 Joel de Guzman<br>
|
||||
<br>
|
||||
<font size="2">Use, modification and distribution is subject to the Boost Software
|
||||
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
||||
http://www.boost.org/LICENSE_1_0.txt)</font></p>
|
||||
<p class="copyright"> </p>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,149 +0,0 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>Parametric Parsers</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" href="theme/style.css" type="text/css">
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
|
||||
<tr>
|
||||
<td width="10">
|
||||
</td>
|
||||
<td width="85%"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Parametric
|
||||
Parsers</b></font> </td>
|
||||
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="predefined_actors.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="functional.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>We already have a hint of the dynamic nature of the Spirit framework. This
|
||||
capability is fundamental to Spirit. Dynamic parsing is a very powerful concept.
|
||||
We shall take this concept further through run-time parametric parsers. We are
|
||||
able to handle parsing tasks that are impossible to do with any EBNF syntax
|
||||
alone.</p>
|
||||
<h2>A Little Secret</h2>
|
||||
<p> A little critter called <tt>boost::ref</tt> lurking in the boost distribution
|
||||
is quite powerful beast when used with Spirit's primitive parsers. We are used
|
||||
to seeing the Spirit primitive parsers created with string or character literals
|
||||
such as:</p>
|
||||
<pre>
|
||||
<code><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>'A'</span><span class=special>)
|
||||
</span><span class=identifier>range_p</span><span class=special>(</span><span class=literal>'A'</span><span class=special>, </span><span class=literal>'Z'</span><span class=special>)
|
||||
</span><span class=identifier>str_p</span><span class=special>(</span><span class=string>"Hello World"</span><span class=special>)</span></code></pre>
|
||||
<p> str_p has a second form that accepts two iterators over the string:</p>
|
||||
<pre>
|
||||
<code><span class=keyword>char </span><span class=keyword>const</span><span class=special>* </span><span class=identifier>first </span><span class=special>= </span><span class=string>"My oh my"</span><span class=special>;
|
||||
</span><span class=keyword>char </span><span class=keyword>const</span><span class=special>* </span><span class=identifier>last </span><span class=special>= </span><span class=identifier>first </span><span class=special>+ </span><span class=identifier>std</span><span class=special>::</span><span class=identifier>strlen</span><span class=special>(</span><span class=identifier>first</span><span class=special>);
|
||||
|
||||
</span><span class=identifier>str_p</span><span class=special>(</span><span class=identifier>first</span><span class=special>, </span><span class=identifier>last</span><span class=special>)</span></code></pre>
|
||||
<p> What is not obvious is that we can use <tt>boost::ref</tt> as well:</p>
|
||||
<pre>
|
||||
<code><span class=keyword>char </span><span class=identifier>ch </span><span class=special>= </span><span class=literal>'A'</span><span class=special>;
|
||||
</span><span class=keyword>char </span><span class=identifier>from </span><span class=special>= </span><span class=literal>'A'</span><span class=special>;
|
||||
</span><span class=keyword>char </span><span class=identifier>to </span><span class=special>= </span><span class=literal>'Z'</span><span class=special>;
|
||||
|
||||
</span><span class=identifier>ch_p</span><span class=special>(</span><span class=identifier>boost</span><span class=special>::</span><span class=identifier>ref</span><span class=special>(</span><span class=identifier>ch</span><span class=special>))
|
||||
</span><span class=identifier>range_p</span><span class=special>(</span><span class=identifier>boost</span><span class=special>::</span><span class=identifier>ref</span><span class=special>(</span><span class=identifier>from</span><span class=special>), </span><span class=identifier>boost</span><span class=special>::</span><span class=identifier>ref</span><span class=special>(</span><span class=identifier>to</span><span class=special>))</span></code></pre>
|
||||
<p> When <tt>boost::ref</tt> is used, the actual parameters to <tt>ch_p</tt> and
|
||||
<tt>range_p</tt> are held by reference. This means that we can change the values
|
||||
of <tt>ch</tt>, <tt>from</tt> and <tt>to</tt> anytime and the corresponding
|
||||
<tt>ch_p</tt> and <tt>range_p</tt> parser will follow their dynamic values.
|
||||
Of course, since they are held by reference, you must make sure that the referenced
|
||||
object is not destructed while parsing.</p>
|
||||
<p> What about <tt>str_p</tt>?</p>
|
||||
<p> While the first form of <tt>str_p</tt> (the single argument form) is reserved
|
||||
for null terminated string constants, the second form (the two argument first/last
|
||||
iterator form) may be used:</p>
|
||||
<pre>
|
||||
<code><span class=keyword>char </span><span class=keyword>const</span><span class=special>* </span><span class=identifier>first </span><span class=special>= </span><span class=string>"My oh my"</span><span class=special>;
|
||||
</span><span class=keyword>char </span><span class=keyword>const</span><span class=special>* </span><span class=identifier>last </span><span class=special>= </span><span class=identifier>first </span><span class=special>+ </span><span class=identifier>std</span><span class=special>::</span><span class=identifier>strlen</span><span class=special>(</span><span class=identifier>first</span><span class=special>);
|
||||
|
||||
</span><span class=identifier>str_p</span><span class=special>(</span><span class=identifier>boost</span><span class=special>::</span><span class=identifier>ref</span><span class=special>(</span><span class=identifier>first</span><span class=special>), </span><span class=identifier>boost</span><span class=special>::</span><span class=identifier>ref</span><span class=special>(</span><span class=identifier>last</span><span class=special>))</span></code></pre>
|
||||
<table width="80%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="note_box"> <img src="theme/note.gif" width="16" height="16"> Hey,
|
||||
don't forget <tt>chseq_p</tt>. All these apply to this seldom used primitive
|
||||
as well. </td>
|
||||
</tr>
|
||||
</table>
|
||||
<h2>Functional Parametric Primitives</h2>
|
||||
<pre> <span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span>attribute<span class="special">/</span><span class="identifier">parametric</span><span class="special">.</span>hpp<span class="special">></span></pre>
|
||||
<p> Taking this further, Spirit includes functional versions of the primitives.
|
||||
Rather than taking in characters, strings or references to characters and strings
|
||||
(using boost::ref), the functional versions take in functions or functors.</p>
|
||||
<h3>f_chlit and f_ch_p</h3>
|
||||
<p> The functional version of <tt>chlit</tt>. This parser takes in a function
|
||||
or functor (function object). The function is expected to have an interface
|
||||
compatible with:</p>
|
||||
<pre>
|
||||
<code><span class=identifier>CharT </span><span class=identifier>func</span><span class=special>()</span></code></pre>
|
||||
<p> where CharT is the character type (e.g. <tt>char</tt>, <tt>int</tt>, <tt>wchar_t</tt>).</p>
|
||||
<p> The functor is expected to have an interface compatible with:</p>
|
||||
<pre>
|
||||
<code><span class=keyword>struct </span><span class=identifier>functor
|
||||
</span><span class=special>{
|
||||
</span><span class=identifier>CharT </span><span class=keyword>operator</span><span class=special>()() </span><span class=keyword>const</span><span class=special>;
|
||||
</span><span class=special>};</span></code></pre>
|
||||
<p> where CharT is the character type (e.g. <tt>char</tt>, <tt>int</tt>, <tt>wchar_t</tt>).</p>
|
||||
<p> Here's a contrived example:</p>
|
||||
<pre>
|
||||
<code><span class=keyword>struct </span><span class=identifier>X
|
||||
</span><span class=special>{
|
||||
</span><span class=keyword>char </span><span class=keyword>operator</span><span class=special>()() </span><span class=keyword>const
|
||||
</span><span class=special>{
|
||||
</span><span class=keyword>return </span><span class=literal>'X'</span><span class=special>; </span><span class=special>
|
||||
}
|
||||
</span><span class=special>};</span></code></pre>
|
||||
<p> Now we can use X to create our f_chlit parser:</p>
|
||||
<pre>
|
||||
<code><span class=identifier>f_ch_p</span><span class=special>(</span><span class=identifier>X</span><span class=special>())</span></code></pre>
|
||||
<h3>f_range and f_range_p</h3>
|
||||
<p> The functional version of <tt>range</tt>. This parser takes in a function
|
||||
or functor compatible with the interfaces above. The difference is that <tt>f_range</tt>
|
||||
(and <tt>f_range_p</tt>) expects two functors. One for the start and one for
|
||||
the end of the range.</p>
|
||||
<h3>f_chseq and f_chseq_p</h3>
|
||||
<p> The functional version of <tt>chseq</tt>. This parser takes in two functions
|
||||
or functors. One for the begin iterator and one for the end iterator. The function
|
||||
is expected to have an interface compatible with:</p>
|
||||
<pre>
|
||||
<code><span class=identifier>IteratorT </span><span class=identifier>func</span><span class=special>()</span></code></pre>
|
||||
<p> where <tt>IteratorT</tt> is the iterator type (e.g. <tt>char const*</tt>,
|
||||
<tt>wchar_t const*</tt>).</p>
|
||||
<p> The functor is expected to have an interface compatible with:</p>
|
||||
<pre>
|
||||
<code><span class=keyword>struct </span><span class=identifier>functor
|
||||
</span><span class=special>{
|
||||
</span><span class=identifier>IteratorT </span><span class=keyword>operator</span><span class=special>()() </span><span class=keyword>const</span><span class=special>;
|
||||
</span><span class=special>};</span></code></pre>
|
||||
<p> where <tt>IteratorT</tt> is the iterator type (e.g. <tt>char const*</tt>,
|
||||
<tt>wchar_t const*</tt>).</p>
|
||||
<h3>f_strlit and f_str_p</h3>
|
||||
<p> The functional version of <tt>strlit</tt>. This parser takes in two functions
|
||||
or functors compatible with the interfaces that <tt>f_chseq</tt> expects.</p>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="predefined_actors.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="functional.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<hr size="1">
|
||||
<p class="copyright">Copyright © 1998-2003 Joel de Guzman<br>
|
||||
<br>
|
||||
<font size="2">Use, modification and distribution is subject to the Boost Software
|
||||
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
||||
http://www.boost.org/LICENSE_1_0.txt)</font></p>
|
||||
<p class="copyright"> </p>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,184 +0,0 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>Phoenix</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" href="theme/style.css" type="text/css">
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
|
||||
<tr>
|
||||
<td width="10">
|
||||
</td>
|
||||
<td width="85%"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Phoenix</b></font>
|
||||
</td>
|
||||
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="functional.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="closures.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>The preceding chapter introduced Phoenix as a means to implementing your semantic actions. We shall look a little bit more into this important library with focus on how you can use it handily with Spirit. This chapter is by no means a thorough discourse of the library. For more information on Phoenix, please take some time to read the <a href="../phoenix/index.html">Phoenix User's Guide</a>. If you just want to use it quickly, this chapter will probably suffice. Rather than taking you to the theories and details of the library, we shall try to provide you with annotated exemplars instead. Hopefully, this will get you into high gear quickly. </p>
|
||||
<p>Semantic actions in Spirit can be just about any function or function object (functor) as long as it can satisfy the required signature. For example, <tt>uint_p</tt> requires a signature of <tt>void F(T)</tt>, where <tt>T</tt> is the type of the integer (typically <tt>unsigned int</tt>). Plain vanilla actions are of the <tt>void F(IterT, IterT)</tt> variety. You can code your actions in plain C++. Calls to C++ functions or functors will thus be of the form <tt>P[&F]</tt> or <tt>P[F()]</tt> etc. (see <a href="semantic_actions.html">Semantic Actions</a>). Phoenix on the other hand, attempts to mimic C++ such that you can define the function body inlined in the code. </p>
|
||||
<table width="80%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="note_box"> <img src="theme/lens.gif" width="15" height="16"> <strong>C++ in C++? </strong><br>
|
||||
<br>
|
||||
In as much as Spirit attempts to mimic EBNF in C++, Phoenix attempts to mimic C++ in C++!!!</td>
|
||||
</tr>
|
||||
</table>
|
||||
<h2>var</h2>
|
||||
<p>Remember the <tt>boost::ref</tt>? We discussed that in the <a href="parametric_parsers.html">Parametric Parsers chapter</a>. Phoenix has a similar, but more flexible, counterpart. It's called <tt>var</tt>. The usage is similar to <tt>boost::ref</tt> and you can use it as a direct replacement. However, unlike <tt>boost::ref</tt>, you can use it to form more complex expressions. Here are some examples:</p>
|
||||
<pre> <span class=identifier>var</span><span class=special>(</span><span class=identifier>x</span><span class=special>) += </span><span class=number>3
|
||||
</span><span class=identifier>var</span><span class=special>(</span><span class=identifier>x</span><span class=special>) = </span><span class=identifier>var</span><span class=special>(</span><span class=identifier>y</span><span class=special>) + </span><span class=identifier>var</span><span class=special>(</span><span class=identifier>z</span><span class=special>)
|
||||
</span><span class=number> </span><span class=identifier>var</span><span class=special>(</span><span class=identifier>x</span><span class=special>) = </span><span class=identifier>var</span><span class=special>(</span><span class=identifier>y</span><span class=special>) + </span><span class=identifier><span class=special>(</span>3 * var</span><span class=special>(</span><span class=identifier>z</span><span class=special>))
|
||||
</span><span class=number> </span><span class=identifier>var</span><span class=special>(</span><span class=identifier>x</span><span class=special>) = </span><span class=identifier>var</span><span class=special>(</span><span class=identifier>y</span><span class=special>)[</span>var<span class="special">(</span>i<span class="special">)] </span><span class="comment">// assuming y is indexable and i is an index</span></pre>
|
||||
<p>Let's start with a simple example. We'll want to parse a comma separated list of numbers and report the sum of all the numbers. Using phoenix's var, we do not have to write external semantic actions. We simply inline the code inside the semantic action slots. Here's the complete grammar with our phoenix actions (see <a href="../example/fundamental/sum.cpp">sum.cpp</a> in the examples):</p>
|
||||
<pre><span class=number> </span><span class=identifier>real_p</span><span class=special>[</span><span class=identifier>var</span><span class=special>(</span><span class=identifier>n</span><span class=special>) </span><span class=special>= </span><span class=identifier>arg1</span><span class=special>] </span><span class=special>>> </span><span class=special>*(</span><span class=literal>',' </span><span class=special>>> </span><span class=identifier>real_p</span><span class=special>[</span><span class=identifier>var</span><span class=special>(</span><span class=identifier>n</span><span class=special>) </span><span class=special>+= </span><span class=identifier>arg1</span><span class=special>])</span> </pre>
|
||||
<p> <img height="16" width="15" src="theme/lens.gif"> The full source code can be <a href="../example/fundamental/sum.cpp">viewed here</a>.
|
||||
This is part of the Spirit distribution.</p>
|
||||
<h3>argN</h3>
|
||||
<p>Notice the expression: <span class=identifier><tt>var(n) = arg1 </tt></span>. What is <tt>arg1</tt> and what is it doing there? <tt>arg1</tt> is an argument placeholder. Remember that <tt>real_p</tt> (see <a href="numerics.html">Numerics</a>) reports the parsed number to its attached semantic action. <tt>arg1</tt> is a placeholder for the first argument passed to the semantic action by the parser. If there are more than one arguments passed in, these arguments can be referred to using <tt>arg1</tt>..<tt>argN</tt>. For instance, generic semantic actions (transduction interface; see <a href="semantic_actions.html">Semantic Actions</a>) are passed 2 arguments: the iterators (<tt>first</tt>/<tt>last</tt>) to the matching portion of the input stream. You can refer to <tt>first</tt> and <tt>last</tt> through <tt>arg1</tt> and <tt>arg2</tt>, respectively. </p>
|
||||
<p>Like var, argN is also composable. Here are some examples:</p>
|
||||
<pre> <span class=identifier>var</span><span class=special>(</span><span class=identifier>x</span><span class=special>) += </span><span class=number>arg1
|
||||
</span><span class=identifier>var</span><span class=special>(</span><span class=identifier>x</span><span class=special>) = </span><span class=identifier>arg1</span><span class=special> + </span><span class=identifier>var</span><span class=special>(</span><span class=identifier>z</span><span class=special>)
|
||||
</span><span class=number> </span><span class=identifier>var</span><span class=special>(</span><span class=identifier>x</span><span class=special>) = </span><span class=identifier>arg1</span><span class=special> + </span><span class=identifier><span class=special>(</span>3 * arg2</span><span class=special>)
|
||||
</span><span class=number> </span><span class=identifier>var</span><span class=special>(</span><span class=identifier>x</span><span class=special>) = </span><span class=identifier>arg1</span><span class=special>[</span>arg2<span class="special">] </span><span class="comment">// assuming arg1 is indexable and arg2 is an index</span></pre>
|
||||
<h3>val</h3>
|
||||
<p>Note the expression: <tt>3 * arg2.</tt> This expression is actually a short-hand equivalent to: <tt>val(3) * arg2</tt>. We shall see later why, in some cases, we need to explicitly wrap constants and literals inside the val. Again, like var and argN, val is also composable.</p>
|
||||
<h3>Functions </h3>
|
||||
<p>Remember our very first example? In the <a href="quick_start.html">Quick Start</a> chapter, we presented a parser that parses a comma separated list and stuffs the parsed numbers in a vector (see <a href="../example/fundamental/number_list.cpp"> number_list.cpp</a>) . For simplicity, we used Spirit's pre-defined actors (see <a href="predefined_actors.html">Predefined Actors</a>). In the example, we used <tt>push_back_a</tt>:</p>
|
||||
<pre><code><font color="#000000"> </font><font color="#000000"><span class="identifier">real_p</span><span class=
|
||||
"special">[</span><span class="identifier">push_back_a</span><span class=
|
||||
"special">(</span><span class="identifier">v</span><span class=
|
||||
"special">)]</span> <span class="special">>></span> <span class=
|
||||
"special">*(</span><span class="literal">','</span> <span class=
|
||||
"special">>></span> <span class=
|
||||
"identifier">real_p</span><span class="special">[</span><span class=
|
||||
"identifier">push_back_a</span><span class="special">(</span><span class=
|
||||
"identifier">v</span><span class="special">)])</span></font></code></pre>
|
||||
<p>Phoenix allows you to write more powerful polymorphic functions, similar to <tt>push_back_a</tt>, easily. See <a href="../example/fundamental/stuff_vector.cpp">stuff_vector.cpp</a>. The example is similar to <a href="../example/fundamental/number_list.cpp">number_list.cpp</a> in functionality, but this time, using phoenix a function to actually implement the <tt>push_back</tt> function:</p>
|
||||
<pre><span class=identifier> </span><span class=keyword>struct </span><span class=identifier>push_back_impl
|
||||
</span><span class=special>{
|
||||
</span><span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>Container</span><span class=special>, </span><span class=keyword>typename </span><span class=identifier>Item</span><span class=special>>
|
||||
</span><span class=keyword>struct </span><span class=identifier>result
|
||||
</span><span class=special>{
|
||||
</span><span class=keyword>typedef </span><span class=keyword>void </span><span class=identifier>type</span><span class=special>;
|
||||
</span><span class=special>};
|
||||
|
||||
</span><span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>Container</span><span class=special>, </span><span class=keyword>typename </span><span class=identifier>Item</span><span class=special>>
|
||||
</span><span class=keyword>void </span><span class=keyword>operator</span><span class=special>()(</span><span class=identifier>Container</span><span class=special>& </span><span class=identifier>c</span><span class=special>, </span><span class=identifier>Item </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>item</span><span class=special>) </span><span class=keyword>const
|
||||
</span><span class=special>{
|
||||
</span><span class=identifier>c</span><span class=special>.</span><span class=identifier>push_back</span><span class=special>(</span><span class=identifier>item</span><span class=special>);
|
||||
</span><span class=special>}
|
||||
</span><span class=special>};</span>
|
||||
|
||||
<span class=identifier>function</span><span class=special><</span><span class=identifier>push_back_impl</span><span class=special>> </span><span class=keyword>const </span><span class=identifier>push_back </span><span class=special>= </span><span class=identifier>push_back_impl</span><span class=special>();</span></pre>
|
||||
<p><img height="16" width="15" src="theme/lens.gif"> The full source code can be <a href="../example/fundamental/stuff_vector.cpp">viewed here</a>. This is part of the Spirit distribution.</p>
|
||||
<table width="80%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="note_box"> <img src="theme/lens.gif" width="15" height="16"> <strong>Predefined Phoenix Functions</strong><br>
|
||||
<br>
|
||||
A future version of Phoenix will include an extensive set of predefined functions covering the whole of STL containers, iterators and algorithms. push_back, will be part of this suite. </td>
|
||||
</tr>
|
||||
</table>
|
||||
<p><span class=identifier><tt>push_back_impl</tt></span> is a simple wrapper over the <tt>push_back</tt> member function of STL containers. The extra scaffolding is there to provide phoenix with additional information that otherwise cannot be directly deduced. <tt>result</tt> relays to phoenix the return type of the functor (<tt>operator()</tt>) given its argument types (<tt>Container</tt> and <tt>Item</tt>) . In this case, the return type is always, simply <tt>void</tt>. </p>
|
||||
<p><span class=identifier><tt>push_back</tt></span> is a phoenix function object. This is the actual function object that we shall use. The beauty behind phoenix function objects is that the actual use is strikingly similar to a normal C++ function call. Here's the number list parser rewritten using our phoenix function object:</p>
|
||||
<pre><span class=special> </span><span class=identifier>real_p</span><span class=special>[</span><span class=identifier>push_back</span><span class=special>(</span><span class=identifier>var</span><span class=special>(</span><span class=identifier>v</span><span class=special>), </span><span class=identifier>arg1</span><span class=special>)] </span><span class=special>>> </span><span class=special>*(</span><span class=literal>',' </span><span class=special>>> </span><span class=identifier>real_p</span><span class=special>[</span><span class=identifier>push_back</span><span class=special>(</span><span class=identifier>var</span><span class=special>(</span><span class=identifier>v</span><span class=special>), </span><span class=identifier>arg1</span><span class=special>)])</span></pre>
|
||||
<p>And, unlike predefined actors, they can be composed. See the pattern? Here are some examples:</p>
|
||||
<pre> <span class=identifier>push_back</span><span class=special>(</span><span class=identifier>var</span><span class=special>(</span><span class=identifier>v</span><span class=special>), </span><span class=identifier>arg1 + 2</span><span class=special>)</span>
|
||||
<span class=identifier>push_back</span><span class=special>(</span><span class=identifier>var</span><span class=special>(</span><span class=identifier>v</span><span class=special>), </span><span class=identifier>var<span class=special>(</span>x<span class=special>)</span></span><span class=special> + </span><span class="identifier">arg1</span><span class=special>)</span>
|
||||
<span class=identifier> push_back</span><span class=special>(</span><span class=identifier>var</span><span class=special>(</span><span class=identifier>v</span><span class=special>)[</span>arg1<span class=special>], </span><span class=identifier>arg2</span><span class=special>)<span class="comment"> // assuming v is a vector of vectors and arg1 is an index</span></span></pre>
|
||||
<p>push_back does not have a return type. Say, for example, we wrote another phoenix function <tt>sin</tt>, we can use it in expressions as well: </p>
|
||||
<pre> <span class=identifier>push_back</span><span class=special>(</span><span class=identifier>var</span><span class=special>(</span><span class=identifier>v</span><span class=special>), </span><span class="identifier">sin</span><span class=special>(</span><span class=identifier>arg1<span class=special>)</span> </span><span class="special">*</span><span class=identifier> 2</span><span class=special>)</span>
|
||||
</pre>
|
||||
<h3>Construct</h3>
|
||||
<p>Sometimes, we wish to construct an object. For instance, we might want to create a <tt>std::string</tt> given the first/last iterators. For instance, say we want to parse a list of identifiers instead. Our grammar, without the actions, is: </p>
|
||||
<pre><span class=number> </span><span class=special>(+</span><span class=identifier>alpha_p</span><span class=special>) </span><span class=special>>> </span><span class=special>*(</span><span class=literal>',' </span><span class=special>>> </span><span class=special>(+</span><span class=identifier>alpha_p</span><span class=special>)</span><span class=special>)</span></pre>
|
||||
<p><strong><tt>construct_</tt></strong> is a predefined phoenix function that, you guessed it, constructs an object, from the arguments passed in. The usage is:</p>
|
||||
<pre><span class=number> </span><span class=identifier>construct_</span><span class=special><</span><span class=identifier>T</span><span class=special>>(</span><span class=identifier>arg1</span><span class=special>, </span><span class=identifier>arg2</span><span class=special>,... </span><span class=identifier>argN</span><span class=special>)</span></pre>
|
||||
<p>where T is the desired type and arg1..argN are the constructor arguments. For example, we can construct a <tt>std::string</tt> from the first/last iterator pair this way:</p>
|
||||
<pre><span class=identifier> construct_</span><span class=special><</span><span class=identifier>std</span><span class=special>::</span><span class=identifier>string</span><span class=special>>(</span><span class=identifier>arg1</span><span class=special>, </span><span class=identifier>arg2</span><span class=special>)</span></pre>
|
||||
<p>Now, we attach the actions to our grammar:</p>
|
||||
<pre><span class=number> </span><span class=special>(+</span><span class=identifier>alpha_p</span><span class=special>)
|
||||
</span><span class=special>[
|
||||
</span><span class=identifier>push_back</span><span class=special>(</span><span class=identifier>var</span><span class=special>(</span><span class=identifier>v</span><span class=special>), </span><span class=identifier>construct_</span><span class=special><</span><span class=identifier>std</span><span class=special>::</span><span class=identifier>string</span><span class=special>>(</span><span class=identifier>arg1</span><span class=special>, </span><span class=identifier>arg2</span><span class=special>))
|
||||
</span><span class=special>]
|
||||
</span><span class=special>>>
|
||||
</span><span class=special>*(</span><span class=literal>',' </span><span class=special>>>
|
||||
</span><span class=special>(+</span><span class=identifier>alpha_p</span><span class=special>)
|
||||
</span><span class=special>[
|
||||
</span><span class=identifier>push_back</span><span class=special>(</span><span class=identifier>var</span><span class=special>(</span><span class=identifier>v</span><span class=special>), </span><span class=identifier>construct_</span><span class=special><</span><span class=identifier>std</span><span class=special>::</span><span class=identifier>string</span><span class=special>>(</span><span class=identifier>arg1</span><span class=special>, </span><span class=identifier>arg2</span><span class=special>))
|
||||
</span><span class=special>]
|
||||
</span><span class=special>)</span></pre>
|
||||
<p><img height="16" width="15" src="theme/lens.gif"> The full source code can be <a href="../example/fundamental/stuff_vector2.cpp">viewed here</a>. This is part of the Spirit distribution.<span class=special></span></p>
|
||||
<h3><a name="lambda"></a>Lambda expressions</h3>
|
||||
<p>All these phoenix expressions we see above are lambda expressions. The important thing to note is that these expressions are not evaluated immediately. At grammar construction time, when the actions are attached to the productions, a lambda expression actually generates an unnamed function object that is evaluated later, at parse time. In other words, lambda expressions are <strong>lazily evaluated</strong>.</p>
|
||||
<table width="80%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="note_box"> <b><img src="theme/lens.gif" width="15" height="16"> Lambda Expressions?</b><br>
|
||||
<br>
|
||||
Lambda expressions are actually unnamed partially applied functions where placeholders (e.g. arg1, arg2) are provided in place of some of the arguments. The reason this is called a lambda expression is that traditionally, such placeholders are written using the Greek letter lambda <img src="theme/lambda.png" width="15" height="22">.</td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>Phoenix uses tricks not unlike those used by Spirit to mimic C++ such that you can define the function body inlined in the code. It's weird, but as mentioned, Phoenix actually mimicks C++ in C++ using expression templates. Surely, there are limitations...</p>
|
||||
<p>All components in a Phoenix expression must be an <strong>actor</strong> (in phoenix parlance) in the same way that components in Spirit should be a <tt>parser</tt>. In Spirit, you can write:</p>
|
||||
<pre><span class=number> </span><span class=identifier>r </span><span class=special>= </span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>'x'</span><span class=special>) </span><span class=special>>> </span><span class=literal>'y'</span><span class=special>;</span></pre>
|
||||
<p>But not:</p>
|
||||
<pre><span class=special> </span><span class=identifier>r </span><span class=special>= </span><span class=literal>'x' </span><span class=special>>> </span><span class=literal>'y'</span><span class=special>;</span></pre>
|
||||
<p>In essence, <tt>parser >> char</tt> is a parser, but <tt>char >> char</tt> is a char (the char shift-right by another char).</p>
|
||||
<p>The same restrictions apply to Phoenix. For instance:</p>
|
||||
<pre><span class=special> </span><span class=keyword>int </span><span class=identifier>x </span><span class=special>= </span><span class=number>1</span><span class=special>;
|
||||
</span><span class=identifier>cout </span><span class=special><< </span><span class=identifier>var</span><span class=special>(</span><span class=identifier>x</span><span class=special>) </span><span class=special><< </span><span class=string>"pizza"</span></pre>
|
||||
<p>is a well formed Phoenix expression that's lazily evaluated. But:</p>
|
||||
<pre><span class=string> </span><span class=identifier>cout </span><span class=special><< </span><span class=identifier>x </span><span class=special><< </span><span class=string>"pizza"</span></pre>
|
||||
<p>is not. Such expressions are immediately executed. C++ syntax dictates that at least <strong>one</strong> of the operands must be a Phoenix actor type. This also applies to compound expressions. For example:</p>
|
||||
<pre><span class=string> </span><span class=identifier>cout </span><span class=special><< </span><span class=identifier>var</span><span class=special>(</span><span class=identifier>x</span><span class=special>) </span><span class=special><< </span><span class=string>"pizza" </span><span class=special><< </span><span class=string>"man"</span></pre>
|
||||
<p>This is evaluated as:</p>
|
||||
<pre><span class=string> </span><span class=special>(((</span><span class=identifier>cout </span><span class=special><< </span><span class=identifier>var</span><span class=special>(</span><span class=identifier>x</span><span class=special>)) </span><span class=special><< </span><span class=string>"pizza"</span><span class=special>) </span><span class=special><< </span><span class=string>"man"</span><span class=special>)</span></pre>
|
||||
<p>Since <tt>(cout << var(x))</tt> is an actor, at least <strong>one</strong> of the operands is a phoenix actor, <tt>((cout << var(x)) << "pizza")</tt> is also a Phoenix actor, and the whole expression is thus also an actor.</p>
|
||||
<p>Sometimes, it is safe to write:</p>
|
||||
<pre><span class=special> </span><span class=identifier>cout </span><span class=special><< </span><span class=identifier>var</span><span class=special>(</span><span class=identifier>x</span><span class=special>) </span><span class=special><< </span><span class=identifier>val</span><span class=special>(</span><span class=string>"pizza"</span><span class=special>) </span><span class=special><< </span><span class=identifier>val</span><span class=special>(</span><span class=string>"man"</span><span class=special>)</span></pre>
|
||||
<p>just to make it explicitly clear what we are dealing with, especially with complex expressions, in the same way as we explicitly wrap literal strings in <tt>str_p("lit")</tt> in Spirit. </p>
|
||||
<p>Phoenix (and Spirit) also deals with unary operators. In such cases, we have no choice. The operand must be a Phoenix actor (or Spirit parser). Examples:</p>
|
||||
<p>Spirit:</p>
|
||||
<pre><span class=special> </span><span class=special>*</span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>'z'</span><span class=special>) </span><span class=comment>// good
|
||||
</span><span class=special>*(</span><span class=literal>'z'</span><span class=special>) </span><span class=comment>// bad</span></pre>
|
||||
<p> Phoenix:</p>
|
||||
<pre><span class=comment> </span><span class=special>*</span><span class=identifier>var</span><span class=special>(</span><span class=identifier>x</span><span class=special>) </span><span class=comment>// good (lazy)
|
||||
</span><span class=special>*</span><span class=identifier>x </span><span class=comment>// bad (immediate)</span></pre>
|
||||
<p>Also, in Phoenix, for assignments and indexing to be lazily evaluated, the object acted upon should be a Phoenix actor. Examples:</p>
|
||||
<pre><span class=comment> </span><span class=identifier>var</span><span class=special>(</span><span class=identifier>x</span><span class=special>) </span><span class=special>= </span><span class=number>123 </span><span class=comment>// good (lazy)
|
||||
</span><span class=identifier>x </span><span class=special>= </span><span class=number>123 </span><span class=comment>// bad (immediate)
|
||||
</span><span class=identifier>var</span><span class=special>(</span><span class=identifier>x</span><span class=special>)[</span><span class=number>0</span><span class=special>] </span><span class=comment>// good (lazy)
|
||||
</span><span class=identifier>x</span><span class=special>[</span><span class=number>0</span><span class=special>] </span><span class=comment>// bad, immediate
|
||||
</span><span class=identifier>var</span><span class=special>(</span><span class=identifier>x</span><span class=special>)[</span><span class=identifier>var</span><span class=special>(</span><span class=identifier>i</span><span class=special>)] </span><span class=comment>// good (lazy)
|
||||
</span><span class=identifier>x</span><span class=special>[</span><span class=identifier>var</span><span class=special>(</span><span class=identifier>i</span><span class=special>)] </span><span class=comment>// bad and illegal (x is not an actor)
|
||||
</span><span class=identifier>var</span><span class=special>(</span><span class=identifier>x</span><span class=special>[</span><span class=identifier>var</span><span class=special>(</span><span class=identifier>i</span><span class=special>)]) </span><span class=comment>// bad and illegal (x is not an actor)</span></pre>
|
||||
<h3>Wrapping up </h3>
|
||||
<p>Well, there you have it. I hope with this jump-start chapter, you may be able to harness the power of lambda expressions. By all means, please read the <a href="../phoenix/index.html">phoenix manual</a> to learn more about the nitty gritty details. Surely, you'll get to know a lot more than just by reading this chapter. There are a lot of things still to be touched. There won't be enough space here to cover all the features of Phoenix even in brief. </p>
|
||||
<p>The next chapter, <a href="closures.html">Closures</a>, we'll see more of phoenix. Stay tuned. </p>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="functional.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="closures.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<hr size="1">
|
||||
<p class="copyright">Copyright © 1998-2003 Joel de Guzman<br>
|
||||
<br>
|
||||
<font size="2">Use, modification and distribution is subject to the Boost Software
|
||||
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
||||
http://www.boost.org/LICENSE_1_0.txt)</font></p>
|
||||
<p class="copyright"> </p>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,74 +0,0 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>Portability</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" href="theme/style.css" type="text/css">
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
|
||||
<tr>
|
||||
<td width="10">
|
||||
</td>
|
||||
<td width="85%"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Portability</b></font></td>
|
||||
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="includes.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="style_guide.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>Historically, Spirit supported a lot of compilers, including (to some extent)
|
||||
poorly conforming compilers such as VC6. Spirit v1.6.x will be the last release
|
||||
that will support older poorly conforming compilers. Starting from Spirit v1.8.0,
|
||||
ill conforming compilers will not be supported. If you are still using one of
|
||||
these older compilers, you can still use Spirit v1.6.x.</p>
|
||||
<p>The reason why Spirit v1.6.x worked on old non-conforming compilers is that
|
||||
the authors laboriously took the trouble of searching for workarounds to make
|
||||
these compilers happy. The process takes a lot of time and energy, especially
|
||||
when one encounters the dreaded ICE or "Internal Compiler Error".
|
||||
Sometimes searching for a single workaround takes days or even weeks. Sometimes,
|
||||
there are no known workarounds. This stifles progress a lot. And, as the library
|
||||
gets more progressive and takes on more advanced C++ techniques, the difficulty
|
||||
is escalated to even new heights.</p>
|
||||
<p>Spirit v1.6.x will still be supported. Maintenance will still happen and bug
|
||||
fixes will still be applied. There will still be active development for the
|
||||
back-porting of new features introduced in Spirit v1.8.0 (and Spirit 1.9.0)
|
||||
to lesser able compilers; hopefully, fueled by contributions from the community.
|
||||
We welcome active support from the C++ community, especially those with special
|
||||
expertise on compilers such as older Borland and MSVC++ compilers.</p>
|
||||
<p>Spirit 1.8 has been tested to compile and run properly on these compilers:</p>
|
||||
<ol>
|
||||
<li>g++ 3.1 and above</li>
|
||||
<li>Comeau 4.24.5 </li>
|
||||
<li>MSVC 7.1</li>
|
||||
<li>Intel 7.1</li>
|
||||
</ol>
|
||||
<p>If your compiler is sufficiently conforming, chances are, you can compile Spirit
|
||||
as it is or with minimal portability fixes here and there. Please inform us
|
||||
if your compiler is known to be ISO/ANSI conforming and is not in this list
|
||||
above. Feel free to post feedback to <a href="https://lists.sourceforge.net/lists/listinfo/spirit-general">Spirit-general
|
||||
mailing list</a> [Spirit-general@lists.sourceforge.net].</p>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="includes.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="style_guide.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<hr size="1">
|
||||
<p class="copyright">Copyright © 1998-2003 Joel de Guzman<br>
|
||||
<br>
|
||||
<font size="2">Use, modification and distribution is subject to the Boost Software
|
||||
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
||||
http://www.boost.org/LICENSE_1_0.txt)</font></p>
|
||||
<p class="copyright"> </p>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,119 +0,0 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>Position Iterator</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" href="theme/style.css" type="text/css">
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
|
||||
<tr>
|
||||
<td width="10">
|
||||
</td>
|
||||
<td width="85%"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Position
|
||||
Iterator</b></font> </td>
|
||||
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="file_iterator.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="debugging.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>Often, when writing a parser that is able to detect errors in the format of
|
||||
the input stream, we want it to communicate to the user where the error happened
|
||||
within that input. The classic example is when writing a compiler or interpreter
|
||||
that detects syntactical errors in the parsed program, indicating the line number
|
||||
and maybe even the position within the line where the error was found.</p>
|
||||
<p> The class position_iterator is a tool provided within Spirit that allows parser
|
||||
writers to easily implement this functionality. The concept is quite simple:
|
||||
this class is an iterator wrapper that keeps track of the current position within
|
||||
the file, including current file, line and column. It requires a single template
|
||||
parameter, which should be the type of the iterator that is to be wrapped.</p>
|
||||
<p> To use it, you'll need to add the following include:</p>
|
||||
<pre>
|
||||
<code><span class=preprocessor>#include </span><span class=special><</span><span class=identifier>boost</span><span class=special>/</span><span class=identifier>spirit</span><span class=special>/</span><span class=identifier>iterator</span><span class=special>/</span><span class=identifier>position_iterator</span><span class=special>.</span><span class=identifier>hpp</span><span class=special>></span></code></pre>
|
||||
<p> Or include all the iterators in Spirit:</p>
|
||||
<pre>
|
||||
<code><span class=preprocessor>#include </span><span class=special><</span><span class=identifier>boost</span><span class=special>/</span><span class=identifier>spirit</span><span class=special>/</span><span class=identifier>iterator</span><span class=special>.</span><span class=identifier>hpp</span><span class=special>></span></code></pre>
|
||||
<p> To construct the wrapper, it needs both the begin and end iterators of the
|
||||
input sequence, and the file name of the input sequence. Optionally, you can
|
||||
also specify the starting line and column numbers, which default to 1. Default
|
||||
construction, with no parameters, creates a generic end-of-sequence iterator,
|
||||
in a similar manner as it's done in the stream operators of the standard C++
|
||||
library.</p>
|
||||
<p> The wrapped iterator must belong to the input or forward iterator category,
|
||||
and the position_iterator just inherits that category.</p>
|
||||
<p> For example, to create begin and end positional iterators from an input C-
|
||||
string, you'd use:</p>
|
||||
<pre>
|
||||
<code><span class=keyword>char </span><span class=keyword>const</span><span class=special>* </span><span class=identifier>inputstring </span><span class=special>= </span><span class=string>"..."</span><span class=special>;
|
||||
</span><span class=keyword>typedef </span><span class=identifier>position_iterator</span><span class=special><</span><span class=keyword>char </span><span class=keyword>const</span><span class=special>*> </span><span class=identifier>iterator_t</span><span class=special>;
|
||||
|
||||
</span><span class=identifier>iterator_t </span><span class=identifier>begin</span><span class=special>(</span><span class=identifier>inputstring</span><span class=special>, </span><span class=identifier>inputstring</span><span class=special>+</span><span class=identifier>strlen</span><span class=special>(</span><span class=identifier>inputstring</span><span class=special>));
|
||||
</span><span class=identifier>iterator_t </span><span class=identifier>end</span><span class=special>;</span></code></pre>
|
||||
<a name="operations"></a>
|
||||
<h2>Operations</h2>
|
||||
<pre>
|
||||
<code><span class=keyword>void </span><span class=identifier>set_position</span><span class=special>(</span><span class=identifier>file_position </span><span class=keyword>const</span><span class=special>&);</span></code></pre>
|
||||
<p> Call this function when you need to change the current position stored in
|
||||
the iterator. For example, if parsing C-style #include directives, the included
|
||||
file's input must be marked by restarting the file and column to 1 and 1 and
|
||||
the name to the new file's name.<br>
|
||||
</p>
|
||||
<pre>
|
||||
<code><span class=identifier>file_position </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>get_position</span><span class=special>() </span><span class=keyword>const</span><span class=special>;</span></code></pre>
|
||||
<p> Call this function to retrieve the current position.</p>
|
||||
<pre>
|
||||
<code><span class=keyword>void </span><span class=identifier>set_tabchars</span><span class=special>(</span><span class=keyword>int</span><span class=special>);</span></code></pre>
|
||||
<p> Call this to set the number of tabs per character. This value is necessary
|
||||
to correctly track the column number.<br>
|
||||
</p>
|
||||
<p> <a name="file_position"></a> </p>
|
||||
<h2>file_position</h2>
|
||||
<p> file_position is a structure that holds the position within a file. Its fields
|
||||
are:</p>
|
||||
<table width="90%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="table_title" colspan="2">file_position fields</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells" width="26%"><code><span class=identifier>std</span><span class=special>::</span><span class=identifier>string
|
||||
</span><span class=identifier>file</span><span class=special>;</span></code></td>
|
||||
<td class="table_cells" width="74%">Name of the file. Hopefully a full pathname</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells" width="26%"><code><span class=keyword>int</span><span class=identifier>
|
||||
line</span><span class=special>;</span></code></td>
|
||||
<td class="table_cells" width="74%">Line number within the file. By default,
|
||||
the first line is number 1</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells" width="26%"><code><span class=keyword>int </span><span class=identifier>column</span><span class=special>;</span></code></td>
|
||||
<td class="table_cells" width="74%">Column position within the file. The first
|
||||
column is 1</td>
|
||||
</tr>
|
||||
</table>
|
||||
<p><img src="theme/lens.gif" width="15" height="16"> See <a href="../example/fundamental/position_iterator/position_iterator.cpp">position_iterator.cpp</a> for a compilable example. This is part of the Spirit distribution.</p>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="file_iterator.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="debugging.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<hr size="1">
|
||||
<p class="copyright">Copyright © 2002 Juan Carlos Arevalo-Baeza<br>
|
||||
<br>
|
||||
<font size="2">Use, modification and distribution is subject to the Boost Software
|
||||
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
||||
http://www.boost.org/LICENSE_1_0.txt) </font> </p>
|
||||
<p class="copyright"> </p>
|
||||
<p> </p>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,346 +0,0 @@
|
||||
<html>
|
||||
<head>
|
||||
<!-- Generated by the Spirit (http://spirit.sf.net) QuickDoc -->
|
||||
<title>predefined_actors</title>
|
||||
<link rel="stylesheet" href="theme/style.css" type="text/css">
|
||||
</head>
|
||||
<body>
|
||||
<table width="100%" height="48" border="0" background="theme/bkd2.gif" cellspacing="2">
|
||||
<tr>
|
||||
<td width="10">
|
||||
</td>
|
||||
<td width="85%"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Predefined
|
||||
Actors</b></font></td>
|
||||
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" align="right" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="indepth_the_parser_context.html"><img src="theme/l_arr.gif" width="20" height="19" border="0"></a></td>
|
||||
<td width="20"><a href="parametric_parsers.html"><img src="theme/r_arr.gif" width="20" height="19" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<h2>Actors</h2><p>
|
||||
The framework has a number of predefined semantic action functors.
|
||||
Experience shows that these functors are so often used that they were included
|
||||
as part of the core framework to spare the user from having to reinvent the
|
||||
same functionality over and over again.</p>
|
||||
<h2>Quick example: <tt>assign_a</tt> actor</h2>
|
||||
<code>
|
||||
<pre> <span class=keyword>int </span><span class=identifier>i</span><span class=special>,</span><span class=identifier> j</span><span class=special>;
|
||||
</span><span class=identifier>std</span><span class=special>::</span><span class=identifier>string </span><span class=identifier>s</span><span class=special>;
|
||||
</span><span class=identifier>r </span><span class=special>= </span><span class=identifier>int_p</span><span class=special>[</span><span class=identifier>assign_a</span><span class=special>(</span><span class=identifier>i</span><span class=special>)] >> (+</span><span class=identifier>alpha_p</span><span class=special>)[</span><span class=identifier>assign_a</span><span class=special>(</span><span class=identifier>s</span><span class=special>)] >> </span><span class=identifier>int_p</span><span class=special>[</span><span class=identifier>assign_a</span><span class=special>(</span><span class=identifier>j</span><span class=special>,</span><span class=identifier>i</span><span class=special>)];</span></pre>
|
||||
</code>
|
||||
<p>
|
||||
Given an input <tt>123456 Hello 789</tt>, </p>
|
||||
<ol><li><tt>assign_a(i)</tt> will extract the number <tt>123456</tt> and assign it to <tt>i</tt>, </li><li><tt>assign_a(s)</tt> will extract the string <tt>"Hello"</tt> and assign it to <tt>s</tt>,</li><li><tt>assign_a(j,i)</tt> will assign i to j, j=i, without using the parse result.</li></ol>
|
||||
<p> Technically, the expression <tt>assign_a(v)</tt> is a template function that
|
||||
generates a semantic action. In fact, actor instances are not created directly
|
||||
since they usually involve a number of template parameters. Instead generator
|
||||
functions ("helper functions") are provided to generate actors from
|
||||
their arguments. All helper functions have the "_a" suffix. For example,
|
||||
<tt>append_actor</tt> is created using the <tt>append_a</tt> function. </p>
|
||||
<p>
|
||||
The semantic action generated is polymorphic and should work with any
|
||||
type as long as it is compatible with the arguments received from the parser.
|
||||
It might not be obvious, but a string can accept the iterator first and last
|
||||
arguments that are passed into a generic semantic action (see above). In fact,
|
||||
any STL container that has an <tt>assign(first, last)</tt> member function can be
|
||||
used.</p>
|
||||
<h2>Actors summary</h2><p>
|
||||
Below are tables summarizing the "built-in" actors with the
|
||||
conventions given below.</p>
|
||||
<ul>
|
||||
<li><tt>ref</tt> is a <b>reference</b> to an object stored in a policy holder
|
||||
actor</li>
|
||||
<li><tt>value_ref</tt> and <tt>key_ref</tt> are <b>const reference</b>s stored
|
||||
in a policy holder actor</li>
|
||||
<li><tt>value</tt> is the <b>parse result</b>. This could be the result for
|
||||
the single argument () operator or the two argument () operator</li>
|
||||
<li><tt>vt</tt> stands for the <tt>value_type</tt> type: <tt>type& ref;
|
||||
// vt is type::value_type</tt>.</li>
|
||||
</ul>
|
||||
<p> Note that examples are provided after the tables.</p>
|
||||
<table width="90%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="table_title" colspan="8"> Unary operator actors</td>
|
||||
</tr>
|
||||
<td width="30%" class="table_cells">++ref</td> <td width="70%" class="table_cells"><b>increment_a</b>(ref)</td>
|
||||
</tr>
|
||||
<td class="table_cells">--ref</td> <td class="table_cells"><b>decrement_a</b>(ref)</td>
|
||||
</tr>
|
||||
</table>
|
||||
<table width="90%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="table_title" colspan="26"> Assign actors</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells">ref = value</td>
|
||||
<td class="table_cells"><b>assign_a</b>(ref)</td>
|
||||
</tr>
|
||||
<td width="30%" class="table_cells">ref = value_ref</td>
|
||||
<td width="70%" class="table_cells"><b>assign_a</b>(ref, value_ref)</td>
|
||||
</tr>
|
||||
</table>
|
||||
<table width="90%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="table_title" colspan="30"> Container actors </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td width="30%" class="table_cells">ref.push_back(value)</td>
|
||||
<td width="70%" class="table_cells"><b>push_back_a</b>(ref)</td>
|
||||
</tr>
|
||||
<td class="table_cells">ref.push_back(value_ref)</td>
|
||||
<td class="table_cells"><b>push_back_a</b>(ref, value_ref)</td>
|
||||
</tr>
|
||||
<td class="table_cells">ref.push_front(value)</td>
|
||||
<td class="table_cells"><b>push_front_a</b>(ref)</td>
|
||||
</tr>
|
||||
<td class="table_cells">ref.push_front(value_ref)</td>
|
||||
<td class="table_cells"><b>push_front_a</b>(ref, value_ref)</td>
|
||||
</tr>
|
||||
<td class="table_cells">ref.clear()</td>
|
||||
<td class="table_cells"><b>clear_a</b>(ref)</td>
|
||||
</tr>
|
||||
</table>
|
||||
<table width="90%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="table_title" colspan="14"> Associative container actors </td>
|
||||
</tr>
|
||||
<td width="30%" class="table_cells">ref.insert(vt(value, value_ref))</td> <td width="70%" class="table_cells"><b>insert_key_a</b>(ref, value_ref)</td>
|
||||
</tr> <tr>
|
||||
<td class="table_cells"> ref.insert(vt(key_ref,value_ref)) </td>
|
||||
<td class="table_cells"> <strong>insert_at_a</strong>(ref, key_ref_, value_ref)</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells"> ref.insert(vt(key_ref,value)) </td>
|
||||
<td class="table_cells"> <strong>insert_at_a</strong>(ref, key_ref) </td>
|
||||
</tr>
|
||||
<td class="table_cells">ref[value] = value_ref</td>
|
||||
<td class="table_cells"><b>assign_key_a</b>(ref, value_ref)</td>
|
||||
</tr>
|
||||
<td class="table_cells">ref.erase(ref,value)</td>
|
||||
<td class="table_cells"><b>erase_a</b>(ref)</td>
|
||||
</tr>
|
||||
<td class="table_cells">ref.erase(ref,key_ref)</td>
|
||||
<td class="table_cells"><b>erase_a</b>(ref, key_ref)</td>
|
||||
</tr>
|
||||
</table>
|
||||
<table width="90%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="table_title" colspan="8"> Miscellaneous actors </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td width="30%" class="table_cells">swaps aref and bref</td>
|
||||
<td width="70%" class="table_cells"><strong>swap_a</strong>(aref, bref)</td>
|
||||
</tr>
|
||||
</table>
|
||||
<h3>Include Files</h3>
|
||||
<p>The header files for the predefined actors are located in <tt>boost/spirit/actor</tt>.
|
||||
The file <tt>actors.hpp</tt> contains all the includes for all the actors. You
|
||||
may include just the specific header files that you need. The list below enumerates
|
||||
the header files.</p>
|
||||
<pre> <span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span>actor<span class="special">/</span><span class="identifier">assign_actor</span><span class="special">.</span>hpp<span class="special">><br></span> <span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span>actor<span class="special">/</span>assign_key_actor<span class="special">.</span>hpp<span class="special">></span><span class="special"></span><span class="special"></span>
|
||||
<span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span>actor<span class="special">/</span><span class="identifier">clear_actor</span><span class="special">.</span>hpp<span class="special">></span><span class="special"></span><span class="special"></span>
|
||||
<span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span>actor<span class="special">/</span><span class="identifier">decrement_actor</span><span class="special">.</span>hpp<span class="special">></span><span class="special"></span><span class="special"></span>
|
||||
<span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span>actor<span class="special">/</span><span class="identifier">erase_actor</span><span class="special">.</span>hpp<span class="special">></span><span class="special"></span> <br> <span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span>actor<span class="special">/</span><span class="identifier">increment_actor</span><span class="special">.</span>hpp<span class="special">><br></span> <span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span>actor<span class="special">/</span><span class="identifier">insert_key_actor</span><span class="special">.</span>hpp<span class="special">></span><span class="special"></span><span class="special"></span>
|
||||
<span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span>actor<span class="special">/</span>insert_at_actor.hpp<span class="special">></span>
|
||||
<span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span>actor<span class="special">/</span><span class="identifier">push_back_actor</span><span class="special">.</span>hpp<span class="special">></span><span class="special"></span><span class="special"></span>
|
||||
<span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span>actor<span class="special">/</span><span class="identifier">push_front_actor</span><span class="special">.</span>hpp<span class="special">></span><span class="special"></span><span class="special"></span>
|
||||
<span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span>actor<span class="special">/</span><span class="identifier">swap_actor</span><span class="special">.</span>hpp<span class="special">></span><span class="special"></span><span class="special"></span></pre>
|
||||
<h3>Examples</h3>
|
||||
<h4>Increment a value</h4>
|
||||
<p>
|
||||
Suppose that your input string is </p>
|
||||
<code>
|
||||
<pre> 1,2,-3,4,...
|
||||
</pre>
|
||||
</code><p>
|
||||
and we want to count the number of ints. The actor <tt>increment_a</tt> applies <tt>++</tt> to its reference:</p>
|
||||
<code>
|
||||
<pre> <span class=keyword>int </span><span class=identifier>count </span><span class=special>= </span><span class=number>0</span><span class=special>;
|
||||
</span><span class=identifier>rule</span><span class=special><> </span><span class=identifier>r </span><span class=special>= </span><span class=identifier>list_p</span><span class=special>.</span><span class=identifier>direct</span><span class=special>(</span><span class=identifier>int_p</span><span class=special>[</span><span class=identifier>increment_a</span><span class=special>(</span><span class=identifier>count</span><span class=special>)], </span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>','</span><span class=special>));</span></pre>
|
||||
</code>
|
||||
<h4>Append values to a vector (or other container)</h4>
|
||||
<p> Here, you want to fill a <tt>vector<int></tt> with the numbers. The
|
||||
actor <tt>push_back_a</tt> can be used to insert the integers at the back of
|
||||
the vector:</p>
|
||||
<code>
|
||||
<pre> <span class=identifier>vector</span><span class=special><</span><span class=keyword>int</span><span class=special>> </span><span class=identifier>v</span><span class=special>;
|
||||
</span><span class=identifier>rule</span><span class=special><> </span><span class=identifier>r </span><span class=special>= </span><span class=identifier>list_p</span><span class=special>.</span><span class=identifier>direct</span><span class=special>(</span><span class=identifier>int_p</span><span class=special>[</span><span class=identifier>push_back_a</span><span class=special>(</span><span class=identifier>v</span><span class=special>)], </span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>','</span><span class=special>));</span></pre>
|
||||
</code>
|
||||
<h4>insert key-value pairs into a map</h4><p>
|
||||
Suppose that your input string is </p>
|
||||
<code>
|
||||
<pre> (1,2) (3,4) ...
|
||||
</pre>
|
||||
</code>
|
||||
<p> and you want to parse the pair into a <tt>map<int,int></tt>. <tt>assign_a</tt>
|
||||
can be used to store key and values in a temporary key variable, while <tt>insert_a</tt>
|
||||
is used to insert it into the map:</p>
|
||||
<pre> <code><span class=identifier>map</span><span class=special><</span><span class=keyword>int</span><span class=special>, </span><span class=keyword>int</span><span class="special">>::</span>value_type<span class=keyword> </span>k<span class=special>;
|
||||
</span><span class=identifier>map</span><span class=special><</span><span class=keyword>int</span><span class=special>, </span><span class=keyword>int</span><span class=special>> </span><span class=identifier>m</span><span class=special>;
|
||||
|
||||
</span><span class=identifier>rule</span><span class=special><> </span><span class=identifier>pair </span><span class=special>= </span><span class=identifier>
|
||||
confix_p</span><span class=special>(
|
||||
</span><span class=literal>'('</span><span class=special>
|
||||
, </span><span class=identifier>int_p</span><span class=special>[</span><span class=identifier>assign_a</span><span class=special>(</span>k.first<span class=special>)] >> </span><span class=literal>','</span><span class=special> >> </span><span class=identifier>int_p</span><span class=special>[</span><span class=identifier>assign_a</span><span class=special>(</span>k.second<span class=special>)]
|
||||
,</span><span class=literal> ')'
|
||||
</span><span class=special>)<br> [</span><span class=identifier>insert_at_a</span><span class=special>(</span><span class=identifier>m</span><span class=special>, </span><span class=identifier>k</span><span class=identifier></span><span class=special>)]
|
||||
;</span></code></pre>
|
||||
<h2>Policy holder actors and policy actions</h2>
|
||||
<p> The action takes place through a call to the <tt>()</tt> operator: single
|
||||
argument <tt>()</tt> operator call for character parsers and two argument (first,
|
||||
last) call for phrase parsers. Actors should implement at least one of the two
|
||||
<tt>()</tt> operator.</p>
|
||||
<p>
|
||||
A lot of actors need to store reference to one or more objects. For
|
||||
example, actions on container need to store a reference to the container.</p>
|
||||
<p> Therefore, this kind of actor have been broken down into <strong>a)</strong>
|
||||
an action policy that does the action (act member function), <strong>b)</strong>
|
||||
policy holder actor that stores the references and feeds the act member function.</p>
|
||||
<h3>Policy holder actors</h3>
|
||||
<p> The available policy holders are enumerated below.</p>
|
||||
<table width="90%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="table_title" colspan="24"> Policy holders </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells">Name</td>
|
||||
<td class="table_cells">Stored variables</td>
|
||||
<td class="table_cells">Act signature</td>
|
||||
</tr>
|
||||
<td class="table_cells">ref_actor</td>
|
||||
<td class="table_cells">1 reference</td>
|
||||
<td class="table_cells"><tt>act(ref)</tt></td>
|
||||
</tr>
|
||||
<td class="table_cells">ref_value_actor</td>
|
||||
<td class="table_cells">1 ref</td>
|
||||
<td class="table_cells"> <tt>act(ref, value)</tt> or <tt>act(ref, first, last)</tt></td>
|
||||
</tr>
|
||||
<td class="table_cells">ref_const_ref_actor</td>
|
||||
<td class="table_cells">1 ref and 1 const ref</td>
|
||||
<td class="table_cells"><tt>act(ref, const_ref)</tt></td>
|
||||
</tr>
|
||||
<td class="table_cells">ref_const_ref_value_actor</td>
|
||||
<td class="table_cells">1 ref</td>
|
||||
<td class="table_cells"><tt>act(ref, value)</tt> or <tt>act(ref, first, last)</tt></td>
|
||||
</tr>
|
||||
<td class="table_cells">ref_const_ref_const_ref_actor</td>
|
||||
<td class="table_cells">1 ref, 2 const ref</td>
|
||||
<td class="table_cells"><tt>act(ref, const_ref1, const_ref2)</tt></td>
|
||||
</tr>
|
||||
</table>
|
||||
<h3>Include Files</h3>
|
||||
<p>The predefined policy header files are located in <tt>boost/spirit/actor</tt>:</p>
|
||||
<pre> <span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span>actor<span class="special">/</span>ref_actor<span class="special">.</span>hpp<span class="special">><br></span> <span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span>actor<span class="special">/</span>ref_value_actor<span class="special">.</span>hpp<span class="special">></span><span class="special"></span><span class="special"></span>
|
||||
<span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span>actor<span class="special">/</span>ref_const_ref<span class="special">.</span>hpp<span class="special">></span><span class="special"></span>
|
||||
<span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span>actor<span class="special">/</span>ref_const_ref_value<span class="special">.</span>hpp<span class="special">></span><span class="special"></span>
|
||||
<span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span>actor<span class="special">/</span>ref_const_ref_value<span class="special">.</span>hpp<span class="special">></span><span class="special"></span>
|
||||
<span class="preprocessor">#include</span> <span class="special"><</span>boost<span class="special">/</span>spirit<span class="special">/</span>actor<span class="special">/</span>ref_const_ref_const_ref<span class="special">.</span>hpp<span class="special">></span><span class="special"></span></pre>
|
||||
<h3>Holder naming convention</h3>
|
||||
<p> Policy holder have the following naming convention:</p>
|
||||
<pre> <code><member>_ >> *<member> >> !value >> actor</code></pre>
|
||||
<p> where <tt>member</tt> is the action policy member which can be of type:</p>
|
||||
<ul>
|
||||
<li>ref, a reference</li>
|
||||
<li>const_ref, a const reference</li>
|
||||
<li>value, by value</li>
|
||||
<li>empty, no stored members</li>
|
||||
</ul>
|
||||
<p> and <tt>value</tt> states if the policy uses the parse result or not.</p>
|
||||
<h3>Holder example: <tt>ref_actor</tt> class</h3>
|
||||
<pre><code> <span class=comment>// this is the building block for action that
|
||||
// take a reference and the parse result
|
||||
|
||||
</span><span class=keyword>template</span><span class=special><
|
||||
</span><span class=keyword>typename </span><span class=identifier>T</span><span class=special>,</span><span class="comment"> // reference type</span><span class=identifier>
|
||||
</span><span class=keyword>typename </span><span class=identifier>ActionT </span><span class=comment>// action policy
|
||||
</span><span class=special>>
|
||||
</span><span class=keyword>class </span><span class=identifier>ref_value_actor </span><span class=special>: </span><span class=keyword>public </span><span class=identifier>ActionT
|
||||
</span><span class=special>{
|
||||
</span> <span class=keyword>public</span><span class=special>:
|
||||
|
||||
</span><span class=keyword>explicit </span><span class=identifier>ref_value_actor</span><span class=special>(</span><span class=identifier>T</span><span class=special>& </span><span class=identifier>ref_</span><span class=special>)
|
||||
: </span><span class=identifier>ref</span><span class=special>(</span><span class=identifier>ref_</span><span class=special>){}
|
||||
|
||||
</span><span class=keyword>template</span><span class=special><</span><span class=keyword>typename </span><span class=identifier>T2</span><span class=special>>
|
||||
</span><span class=keyword>void operator</span><span class=special>()(</span><span class=identifier>T2 </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>val</span><span class=special>) </span><span class=keyword>const
|
||||
</span><span class=special>{
|
||||
</span><span class=identifier>act</span><span class=special>(</span><span class=identifier>ref</span><span class=special>, </span><span class=identifier>val</span><span class=special>);</span><span class="comment"> // defined in ActionT</span><span class=identifier>
|
||||
</span><span class=special>}
|
||||
|
||||
</span><span class=keyword>template</span><span class=special><</span><span class=keyword>typename </span><span class=identifier>IteratorT</span><span class=special>>
|
||||
</span><span class=keyword>void operator</span><span class=special>()(
|
||||
</span><span class=identifier>IteratorT </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>first</span><span class=special>,
|
||||
</span><span class=identifier>IteratorT </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>last</span><span class=special>) </span><span class=keyword>const
|
||||
</span><span class=special>{
|
||||
</span><span class=identifier>act</span><span class=special>(</span><span class=identifier>ref</span><span class=special>,</span><span class=identifier>first</span><span class=special>,</span><span class=identifier>last</span><span class=special>);</span><span class="comment"> // defined in ActionT</span><span class=identifier>
|
||||
</span><span class=special>}
|
||||
|
||||
</span><span class=keyword>private</span><span class=special>:
|
||||
|
||||
</span><span class=identifier> T</span><span class=special>& </span><span class=identifier>ref</span><span class=special>;
|
||||
};</span></code></pre>
|
||||
<h3>Actor example: <tt>assign_actor</tt></h3>
|
||||
<code>
|
||||
<pre> <span class=comment>// assign_action assigns the parse result to the reference
|
||||
|
||||
</span><span class=keyword>struct </span><span class=identifier>assign_action
|
||||
</span><span class=special>{
|
||||
</span><span class=keyword>template</span><span class=special><
|
||||
</span><span class=keyword>typename </span><span class=identifier>T</span><span class=special>,
|
||||
</span><span class=keyword>typename </span><span class=identifier>ValueT
|
||||
</span><span class=special>>
|
||||
</span><span class=keyword>void </span><span class=identifier>act</span><span class=special>(</span><span class=identifier>T</span><span class=special>& </span><span class=identifier>ref</span><span class=special>, </span><span class=identifier>ValueT </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>value</span><span class=special>) </span><span class=keyword>const
|
||||
</span><span class=special>{
|
||||
</span><span class=identifier>ref</span><span class=special> = </span><span class=special></span><span class=identifier>value</span><span class=special>;
|
||||
}
|
||||
|
||||
</span><span class=keyword>template</span><span class=special><
|
||||
</span><span class=keyword>typename </span><span class=identifier>T</span><span class=special>,
|
||||
</span><span class=keyword>typename </span><span class=identifier>IteratorT
|
||||
</span><span class=special>>
|
||||
</span><span class=keyword>void </span><span class=identifier>act</span><span class=special>(
|
||||
</span><span class=identifier>T</span><span class=special>& </span><span class=identifier>ref</span><span class=special>,
|
||||
</span><span class=identifier>IteratorT </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>first</span><span class=special>,
|
||||
</span><span class=identifier>IteratorT </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>last</span><span class=special>) </span><span class=keyword>const
|
||||
</span><span class=special>{
|
||||
</span><span class=keyword>typedef </span><span class=keyword>typename </span><span class=identifier>T</span><span class=special>::</span><span class=identifier>value_type </span><span class=identifier>value_type</span><span class=special>;
|
||||
</span><span class=identifier>value_type </span><span class=identifier>vt</span><span class=special>(</span><span class=identifier>first</span><span class=special>,</span><span class=identifier> last</span><span class=special>);
|
||||
</span><span class=identifier>ref</span><span class=special> = </span><span class=special></span><span class=identifier>vt</span><span class=special>;
|
||||
}
|
||||
};</span></pre>
|
||||
</code>
|
||||
<h3>Helper function example: <tt>assign_a</tt> function</h3>
|
||||
<code>
|
||||
<pre>
|
||||
<span class=comment>// assign_a is a polymorphic helper function that generators an
|
||||
// assign_actor based on ref_value_actor, assign_action and the
|
||||
// type of its argument.
|
||||
|
||||
</span><span class=keyword>template</span><span class=special><</span><span class=keyword>typename </span><span class=identifier>T</span><span class=special>>
|
||||
</span><span class=keyword>inline </span><span class=identifier>ref_value_actor</span><span class=special><</span><span class=identifier>T</span><span class=special>, </span>assign<span class=identifier>_action</span><span class=special>></span><span class=identifier>
|
||||
assign_a</span><span class=special>(</span><span class=identifier>T</span><span class=special>& </span><span class=identifier>ref</span><span class=special>)
|
||||
{
|
||||
</span><span class=keyword>return </span><span class=identifier>ref_value_actor</span><span class=special><</span><span class=identifier>T</span><span class=special>,</span><span class=identifier> assign_action</span><span class=special>>(</span><span class=identifier>ref</span><span class=special>);
|
||||
}</span></pre>
|
||||
</code>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="indepth_the_parser_context.html"><img src="theme/l_arr.gif" width="20" height="19" border="0"></a></td>
|
||||
<td width="20"><a href="parametric_parsers.html"><img src="theme/r_arr.gif" width="20" height="19" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<hr size="1">
|
||||
<p class="copyright">Copyright © 2003 <font color="#666666">Jonathan de Halleux</font><font size="2"><font size="2"><font color="#666666">
|
||||
</font></font> </font><br>
|
||||
Copyright © 2003 Joel de Guzman<br>
|
||||
<br>
|
||||
<font size="2">Use, modification and distribution is subject to the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) </font> </p>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,289 +0,0 @@
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<meta content=
|
||||
"HTML Tidy for Windows (vers 1st February 2003), see www.w3.org"
|
||||
name="generator">
|
||||
<title>
|
||||
Preface
|
||||
</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=us-ascii">
|
||||
<link rel="stylesheet" href="theme/style.css" type="text/css">
|
||||
</head>
|
||||
<body>
|
||||
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="85%">
|
||||
<font size="6" face=
|
||||
"Verdana, Arial, Helvetica, sans-serif"><b>Preface</b></font>
|
||||
</td>
|
||||
<td width="112">
|
||||
<a href="http://spirit.sf.net"><img src="theme/spirit.gif"
|
||||
width="112" height="48" align="right" border="0"></a>
|
||||
</td>
|
||||
</tr>
|
||||
</table><br>
|
||||
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30">
|
||||
<a href="../index.html"><img src="theme/u_arr.gif" border="0"></a>
|
||||
</td>
|
||||
<td width="30">
|
||||
<img src="theme/l_arr_disabled.gif" width="20" height="19">
|
||||
</td>
|
||||
<td width="30">
|
||||
<a href="introduction.html"><img src="theme/r_arr.gif" border="0">
|
||||
</a>
|
||||
</td>
|
||||
</tr>
|
||||
</table><br>
|
||||
|
||||
<table width="80%" border="0" align="center">
|
||||
<tr>
|
||||
<td>
|
||||
<p>
|
||||
<i>"Examples of designs that meet most of the criteria for
|
||||
"goodness" (easy to understand, flexible, efficient) are a
|
||||
recursive-descent parser, which is traditional procedural code.
|
||||
Another example is the STL, which is a generic library of
|
||||
containers and algorithms depending crucially on both traditional
|
||||
procedural code and on parametric polymorphism."</i>
|
||||
</p>
|
||||
<p>
|
||||
<b><font color="#003366">Bjarne Stroustrup</font></b>
|
||||
</p>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>
|
||||
<b>History</b>
|
||||
</p>
|
||||
<p>
|
||||
A decade and a half ago, I wrote my first calculator in Pascal. It is one
|
||||
of my most unforgettable coding experiences. I was amazed how a mutually
|
||||
recursive set of functions can model a grammar specification. In time,
|
||||
the skills I acquired from that academic experience became very
|
||||
practical. Periodically I was tasked to do some parsing. For instance,
|
||||
whenever I need to perform any form of I/O, even in binary, I try to
|
||||
approach the task somewhat formally by writing a grammar using
|
||||
Pascal-like syntax diagrams and then write a corresponding
|
||||
recursive-descent parser. This worked very well.
|
||||
</p>
|
||||
<p>
|
||||
The arrival of the Internet and the World Wide Web magnified this
|
||||
thousand-fold. At one point I had to write an HTML parser for a Web
|
||||
browser project. I got a recursive-descent HTML parser working based on
|
||||
the W3C formal specifications easily. I was certainly glad that HTML had
|
||||
a formal grammar specification. Because of the influence of the Internet,
|
||||
I then had to do more parsing. RFC specifications were everywhere. SGML,
|
||||
HTML, XML, even email addresses and those seemingly trivial URLs were all
|
||||
formally specified using small EBNF-style grammar specifications. This
|
||||
made me wish for a tool similar to big-time parser generators such as
|
||||
YACC and <a href="http://www.antlr.org/">ANTLR</a>, where a parser is
|
||||
built automatically from a grammar specification. Yet, I want it to be
|
||||
extremely small; small enough to fit in my pocket, yet scalable.
|
||||
</p>
|
||||
<p>
|
||||
It must be able to practically parse simple grammars such as email
|
||||
addresses to moderately complex grammars such as XML and perhaps some
|
||||
small to medium-sized scripting languages. Scalability is a prime goal.
|
||||
You should be able to use it for small tasks such as parsing command
|
||||
lines without incurring a heavy payload, as you do when you are using
|
||||
YACC or PCCTS. Even now that it has evolved and matured to become a
|
||||
multi-module library, true to its original intent, Spirit can still be
|
||||
used for extreme micro-parsing tasks. You only pay for features that you
|
||||
need. The power of Spirit comes from its modularity and extensibility.
|
||||
Instead of giving you a sledgehammer, it gives you the right ingredients
|
||||
to create a sledgehammer easily. For instance, it does not really have a
|
||||
lexer, but you have all the raw ingredients to write one, if you need
|
||||
one.
|
||||
</p>
|
||||
<p>
|
||||
The result was Spirit. Spirit was a personal project that was conceived
|
||||
when I was doing R&D in Japan. Inspired by the GoF's composite and
|
||||
interpreter patterns, I realized that I can model a recursive-descent
|
||||
parser with hierarchical-object composition of primitives (terminals) and
|
||||
composites (productions). The original version was implemented with
|
||||
run-time polymorphic classes. A parser is generated at run time by
|
||||
feeding in production rule strings such as <tt>"prod ::= {‘A’
|
||||
| ‘B’} ‘C’;"</tt>A compile function compiled the
|
||||
parser, dynamically creating a hierarchy of objects and linking semantic
|
||||
actions on the fly. A very early text can be found <a href=
|
||||
"http://spirit.sourceforge.net/dl_docs/pre-spirit.htm">here</a>.
|
||||
</p>
|
||||
<p>
|
||||
The version that we have now is a complete rewrite of the original Spirit
|
||||
parser using expression templates and static polymorphism, inspired by
|
||||
the works of Todd Veldhuizen (" <a href=
|
||||
"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.43.248">
|
||||
Expression Templates</a>", C++ Report, June 1995). Initially, the
|
||||
<i><b>static-Spirit</b></i> version was meant only to replace the core of
|
||||
the original <i><b>dynamic-Spirit</b></i>. Dynamic-spirit needed a parser
|
||||
to implement itself anyway. The original employed a hand-coded
|
||||
recursive-descent parser to parse the input grammar specification
|
||||
strings.
|
||||
</p>
|
||||
<p>
|
||||
After its initial "open-source" debut in May 2001, static-Spirit became a
|
||||
success. At around November 2001, the Spirit website had an activity
|
||||
percentile of 98%, making it the number one parser tool at Source Forge
|
||||
at the time. Not bad for such a niche project such as a parser library.
|
||||
The "static" portion of Spirit was forgotten and static-Spirit simply
|
||||
became Spirit. The framework soon evolved to acquire more dynamic
|
||||
features.
|
||||
</p>
|
||||
<p>
|
||||
<b>How to use this manual</b>
|
||||
</p>
|
||||
<p>
|
||||
The Spirit framework is organized in logical modules starting from the
|
||||
core. This documentation provides a user's guide and reference for each
|
||||
module in the framework. A simple and clear code example is worth a
|
||||
hundred lines of documentation; therefore, the user's guide is presented
|
||||
with abundant examples annotated and explained in step-wise manner. The
|
||||
user's guide is based on examples -lots of them.
|
||||
</p>
|
||||
<p>
|
||||
As much as possible, forward information (i.e. citing a specific piece of
|
||||
information that has not yet been discussed) is avoided in the user's
|
||||
manual portion of each module. In many cases, though, it is unavoidable
|
||||
that advanced but related topics are interspersed with the normal flow of
|
||||
discussion. To alleviate this problem, topics categorized as "advanced"
|
||||
may be skipped at first reading.
|
||||
</p>
|
||||
<p>
|
||||
Some icons are used to mark certain topics indicative of their relevance.
|
||||
These icons precede some text to indicate:
|
||||
</p>
|
||||
<table width="90%" border="0" align="center">
|
||||
<tr>
|
||||
<td>
|
||||
<table width="100%" border="0">
|
||||
<tr>
|
||||
<td colspan="3" class="table_title">
|
||||
Icons
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td width="19" class="table_cells">
|
||||
<img src="theme/note.gif" width="16" height="16">
|
||||
</td>
|
||||
<td width="58" class="table_cells">
|
||||
<b>Note</b>
|
||||
</td>
|
||||
<td width="627" class="table_cells">
|
||||
Information provided is moderately important and should be
|
||||
noted by the reader.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td width="19" class="table_cells">
|
||||
<img src="theme/alert.gif">
|
||||
</td>
|
||||
<td width="58" class="table_cells">
|
||||
<b>Alert</b>
|
||||
</td>
|
||||
<td width="627" class="table_cells">
|
||||
Information provided is of utmost importance.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td width="19" class="table_cells">
|
||||
<img src="theme/lens.gif" width="15" height="16">
|
||||
</td>
|
||||
<td width="58" class="table_cells">
|
||||
<b>Detail</b>
|
||||
</td>
|
||||
<td width="627" class="table_cells">
|
||||
Information provided is auxiliary but will give the reader a
|
||||
deeper insight into a specific topic. May be skipped.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td width="19" class="table_cells">
|
||||
<img src="theme/bulb.gif" width="13" height="18">
|
||||
</td>
|
||||
<td width="58" class="table_cells">
|
||||
<b>Tip</b>
|
||||
</td>
|
||||
<td width="627" class="table_cells">
|
||||
A potentially useful and helpful piece of information.
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>
|
||||
<b>Support</b>
|
||||
</p>
|
||||
<p>
|
||||
Please direct all questions to Spirit's mailing list. You can subscribe
|
||||
to the mailing list <a href=
|
||||
"https://lists.sourceforge.net/lists/listinfo/spirit-general">here</a>.
|
||||
The mailing list has a searchable archive. A search link to this archive
|
||||
is provided in <a href="http://spirit.sf.net">Spirit's home page</a>. You
|
||||
may also read and post messages to the mailing list through an
|
||||
<a href="http://news.gmane.org/thread.php?group=gmane.comp.parsers.spirit.general">
|
||||
NNTP news portal</a> (thanks to <a href=
|
||||
"http://www.gmane.org">www.gmane.org</a>). The news group mirrors the
|
||||
mailing list. Here are two links to the archives: via <a href=
|
||||
"http://dir.gmane.org/gmane.comp.parsers.spirit.general">
|
||||
gmane</a>, via <a href=
|
||||
"http://sourceforge.net/mailarchive/forum.php?forum_id=1595gmane.org">geocrawler</a>.
|
||||
</p>
|
||||
<table width="100%" border="0" align="center">
|
||||
<tr>
|
||||
<td>
|
||||
<div align="center">
|
||||
<i><b><font size="5">To my dear daughter Phoenix</font></b></i>
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
<table width="100%" border="0">
|
||||
<tr>
|
||||
<td width="72%">
|
||||
|
||||
</td>
|
||||
<td width="28%">
|
||||
<div align="right">
|
||||
<p>
|
||||
<b>Joel de Guzman<br></b> September 2002
|
||||
</p>
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30">
|
||||
<a href="../index.html"><img src="theme/u_arr.gif" border="0"></a>
|
||||
</td>
|
||||
<td width="30">
|
||||
<img src="theme/l_arr_disabled.gif" width="20" height="19">
|
||||
</td>
|
||||
<td width="30">
|
||||
<a href="introduction.html"><img src="theme/r_arr.gif" border="0">
|
||||
</a>
|
||||
</td>
|
||||
</tr>
|
||||
</table><br>
|
||||
|
||||
<hr size="1">
|
||||
<p class="copyright">
|
||||
Copyright © 1998-2003 Joel de Guzman<br>
|
||||
<br>
|
||||
<font size="2">Use, modification and distribution is subject to the
|
||||
Boost Software License, Version 1.0. (See accompanying file
|
||||
LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)</font>
|
||||
</p>
|
||||
<p>
|
||||
|
||||
</p>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,250 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
|
||||
<html><head>
|
||||
|
||||
<title>Primitives</title><meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" href="theme/style.css" type="text/css"></head>
|
||||
<body>
|
||||
<table background="theme/bkd2.gif" border="0" cellspacing="2" width="100%">
|
||||
<tbody><tr>
|
||||
<td width="10">
|
||||
</td>
|
||||
<td width="85%">
|
||||
<font face="Verdana, Arial, Helvetica, sans-serif" size="6"><b>Primitives</b></font>
|
||||
</td>
|
||||
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" align="right" border="0" height="48" width="112"></a></td>
|
||||
</tr>
|
||||
</tbody></table>
|
||||
<br>
|
||||
<table border="0">
|
||||
<tbody><tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="organization.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="operators.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</tbody></table>
|
||||
<p>The framework predefines some parser primitives. These are the most basic building
|
||||
blocks that the client uses to build more complex parsers. These primitive parsers
|
||||
are template classes, making them very flexible.</p>
|
||||
<p>These primitive parsers can be instantiated directly or through a templatized
|
||||
helper function. Generally, the helper function is far simpler to deal with
|
||||
as it involves less typing.</p>
|
||||
<p>We have seen the character literal parser before through the generator function
|
||||
<tt>ch_p</tt> which is not really a parser but, rather, a parser generator.
|
||||
Class <tt>chlit<CharT></tt> is the actual template class behind the character
|
||||
literal parser. To instantiate a <tt>chlit</tt> object, you must explicitly
|
||||
provide the character type, <tt>CharT</tt>, as a template parameter which determines
|
||||
the type of the character. This type typically corresponds to the input type,
|
||||
usually <tt>char</tt> or <tt>wchar_t</tt>. The following expression creates
|
||||
a temporary parser object which will recognize the single letter <span class="quotes">'X'</span>.</p>
|
||||
<pre><code><font color="#000000"><span class="identifier"> </span><span class="identifier">chlit</span><span class="special"><</span><span class="keyword">char</span><span class="special">>(</span><span class="literal">'X'</span><span class="special">);</span></font></code></pre>
|
||||
<p>Using <tt>chlit</tt>'s generator function <tt>ch_p</tt> simplifies the usage
|
||||
of the <tt>chlit<></tt> class (this is true of most Spirit parser classes
|
||||
since most have corresponding generator functions). It is convenient to call
|
||||
the function because the compiler will deduce the template type through argument
|
||||
deduction for us. The example above could be expressed less verbosely using
|
||||
the <tt>ch_p </tt>helper function. </p>
|
||||
<pre><code><font color="#000000"><span class="special"> </span><span class="identifier">ch_p</span><span class="special">(</span><span class="literal">'X'</span><span class="special">) </span><span class="comment">// equivalent to chlit<char>('X') object</span></font></code></pre>
|
||||
<table align="center" border="0" width="80%">
|
||||
<tbody><tr>
|
||||
<td class="note_box"><img src="theme/lens.gif" height="16" width="15"> <b>Parser
|
||||
generators</b><br>
|
||||
<br>
|
||||
Whenever you see an invocation of the parser generator function, it is equivalent
|
||||
to the parser itself. Therefore, we often call <tt>ch_p</tt> a character
|
||||
parser, even if, technically speaking, it is a function that generates a
|
||||
character parser.</td>
|
||||
</tr>
|
||||
</tbody></table>
|
||||
<p>The following grammar snippet shows these forms in action:</p>
|
||||
<pre><code><span class="comment"> </span><span class="comment">// a rule can "store" a parser object. They're covered<br> </span><span class="comment">// later, but for now just consider a rule as an opaque type<br> </span><span class="identifier">rule</span><span class="special"><> </span><span class="identifier">r1</span><span class="special">, </span><span class="identifier">r2</span><span class="special">, </span><span class="identifier">r3</span><span class="special">;<br><br> </span><span class="identifier">chlit</span><span class="special"><</span><span class="keyword">char</span><span class="special">> </span><span class="identifier">x</span><span class="special">(</span><span class="literal">'X'</span><span class="special">); </span><span class="comment">// declare a parser named x<br><br> </span><span class="identifier">r1 </span><span class="special">= </span><span class="identifier">chlit</span><span class="special"><</span><span class="keyword">char</span><span class="special">>(</span><span class="literal">'X'</span><span class="special">); </span><span class="comment">// explicit declaration<br> </span><span class="identifier">r2 </span><span class="special">= </span><span class="identifier">x</span><span class="special">; </span><span class="comment">// using x<br> </span><span class="identifier">r3 </span><span class="special">= </span><span class="identifier">ch_p</span><span class="special">(</span><span class="literal">'X'</span><span class="special">) </span><span class="comment">// using the generator</span></code></pre>
|
||||
<h2> chlit and ch_p</h2>
|
||||
<p>Matches a single character literal. <tt>chlit</tt> has a single template type
|
||||
parameter which defaults to <tt>char</tt> (i.e. <tt>chlit<></tt> is equivalent
|
||||
to <tt>chlit<char></tt>). This type parameter is the character type that
|
||||
<tt>chlit</tt> will recognize when parsing. The function generator version deduces
|
||||
the template type parameters from the actual function arguments. The <tt>chlit</tt>
|
||||
class constructor accepts a single parameter: the character it will match the
|
||||
input against. Examples:</p>
|
||||
<pre><code><span class="comment"> </span><span class="identifier">r1 </span><span class="special">= </span><span class="identifier">chlit</span><span class="special"><>(</span><span class="literal">'X'</span><span class="special">);<br> </span><span class="identifier">r2 </span><span class="special">= </span><span class="identifier">chlit</span><span class="special"><</span><span class="keyword">wchar_t</span><span class="special">>(</span><span class="identifier">L</span><span class="literal">'X'</span><span class="special">);<br> </span><span class="identifier">r3 </span><span class="special">= </span><span class="identifier">ch_p</span><span class="special">(</span><span class="literal">'X'</span><span class="special">);</span></code></pre>
|
||||
<p>Going back to our original example:</p>
|
||||
<pre><code><span class="special"> </span><span class="identifier">group </span><span class="special">= </span><span class="literal">'(' </span><span class="special">>> </span><span class="identifier">expr </span><span class="special">>> </span><span class="literal">')'</span><span class="special">;<br> </span><span class="identifier">expr1 </span><span class="special">= </span><span class="identifier">integer </span><span class="special">| </span><span class="identifier">group</span><span class="special">;<br> </span><span class="identifier">expr2 </span><span class="special">= </span><span class="identifier">expr1 </span><span class="special">>> </span><span class="special">*((</span><span class="literal">'*' </span><span class="special">>> </span><span class="identifier">expr1</span><span class="special">) </span><span class="special">| </span><span class="special">(</span><span class="literal">'/' </span><span class="special">>> </span><span class="identifier">expr1</span><span class="special">));<br> </span><span class="identifier">expr </span><span class="special">= </span><span class="identifier">expr2 </span><span class="special">>> </span><span class="special">*((</span><span class="literal">'+' </span><span class="special">>> </span><span class="identifier">expr2</span><span class="special">) </span><span class="special">| </span><span class="special">(</span><span class="literal">'-' </span><span class="special">>> </span><span class="identifier">expr2</span><span class="special">));</span></code></pre>
|
||||
<p></p>
|
||||
<p>the character literals <tt class="quotes">'('</tt>, <tt class="quotes">')'</tt>,
|
||||
<tt class="quotes">'+'</tt>, <tt class="quotes">'-'</tt>, <tt class="quotes">'*'</tt>
|
||||
and <tt class="quotes">'/'</tt> in the grammar declaration are <tt>chlit</tt>
|
||||
objects that are implicitly created behind the scenes.</p>
|
||||
<table align="center" border="0" width="80%">
|
||||
<tbody><tr>
|
||||
<td class="note_box"><img src="theme/lens.gif" height="16" width="15"> <b>char
|
||||
operands</b> <br>
|
||||
<br>
|
||||
The reason this works is from two special templatized overloads of <tt>operator<span class="operators">>></span></tt>
|
||||
that takes a (<tt>char</tt>, <tt> ParserT</tt>), or (<tt>ParserT</tt>, <tt>char</tt>).
|
||||
These functions convert the character into a <tt>chlit</tt> object.</td>
|
||||
</tr>
|
||||
</tbody></table>
|
||||
<p> One may prefer to declare these explicitly as:</p>
|
||||
<pre><code><span class="special"> </span><span class="identifier">chlit</span><span class="special"><> </span><span class="identifier">plus</span><span class="special">(</span><span class="literal">'+'</span><span class="special">);<br> </span><span class="identifier">chlit</span><span class="special"><> </span><span class="identifier">minus</span><span class="special">(</span><span class="literal">'-'</span><span class="special">);<br> </span><span class="identifier">chlit</span><span class="special"><> </span><span class="identifier">times</span><span class="special">(</span><span class="literal">'*'</span><span class="special">);<br> </span><span class="identifier">chlit</span><span class="special"><> </span><span class="identifier">divide</span><span class="special">(</span><span class="literal">'/'</span><span class="special">);<br> </span><span class="identifier">chlit</span><span class="special"><> </span><span class="identifier">oppar</span><span class="special">(</span><span class="literal">'('</span><span class="special">);<br> </span><span class="identifier">chlit</span><span class="special"><> </span><span class="identifier">clpar</span><span class="special">(</span><span class="literal">')'</span><span class="special">);</span></code></pre>
|
||||
<h2>range and range_p</h2>
|
||||
<p>A <tt>range</tt> of characters is created from a low/high character pair. Such
|
||||
a parser matches a single character that is in the <tt>range</tt>, including
|
||||
both endpoints. Like <tt>chlit</tt>, <tt>range</tt> has a single template type
|
||||
parameter which defaults to <tt>char</tt>. The <tt>range</tt> class constructor
|
||||
accepts two parameters: the character range (<i>from</i> and <i>to</i>, inclusive)
|
||||
it will match the input against. The function generator version is <tt>range_p</tt>.
|
||||
Examples:</p>
|
||||
<pre><code><span class="special"> </span><span class="identifier">range</span><span class="special"><>(</span><span class="literal">'A'</span><span class="special">,</span><span class="literal">'Z'</span><span class="special">) </span><span class="comment">// matches 'A'..'Z'<br> </span><span class="identifier">range_p</span><span class="special">(</span><span class="literal">'a'</span><span class="special">,</span><span class="literal">'z'</span><span class="special">) </span><span class="comment">// matches 'a'..'z'</span></code></pre>
|
||||
<p>Note, the first character must be "before" the second, according
|
||||
to the underlying character encoding characters. The range, like chlit is a
|
||||
single character parser.</p>
|
||||
<table align="center" border="0" width="80%">
|
||||
<tbody><tr>
|
||||
<td class="note_box"><img src="theme/alert.gif" height="16" width="16"><b>
|
||||
Character mapping</b><br>
|
||||
<br>
|
||||
Character mapping to is inherently platform dependent. It is not guaranteed
|
||||
in the standard for example that 'A' < 'Z', however, in many occasions,
|
||||
we are well aware of the character set we are using such as ASCII, ISO-8859-1
|
||||
or Unicode. Take care though when porting to another platform.</td>
|
||||
</tr>
|
||||
</tbody></table>
|
||||
<h2> strlit and str_p</h2>
|
||||
<p>This parser matches a string literal. <tt>strlit</tt> has a single template
|
||||
type parameter: an iterator type. Internally, <tt>strlit</tt> holds a begin/end
|
||||
iterator pair pointing to a string or a container of characters. The <tt>strlit</tt>
|
||||
attempts to match the current input stream with this string. The template type
|
||||
parameter defaults to <tt>char const<span class="operators">*</span></tt>. <tt>strlit</tt>
|
||||
has two constructors. The first accepts a null-terminated character pointer.
|
||||
This constructor may be used to build <tt>strlits</tt> from quoted string literals.
|
||||
The second constructor takes in a first/last iterator pair. The function generator
|
||||
version is <tt>str_p</tt>. Examples:</p>
|
||||
<pre><code><span class="comment"> </span><span class="identifier">strlit</span><span class="special"><>(</span><span class="string">"Hello World"</span><span class="special">)<br> </span><span class="identifier">str_p</span><span class="special">(</span><span class="string">"Hello World"</span><span class="special">)<br><br> </span><span class="identifier">std</span><span class="special">::</span><span class="identifier">string </span><span class="identifier">msg</span><span class="special">(</span><span class="string">"Hello World"</span><span class="special">);<br> </span><span class="identifier">strlit</span><span class="special"><</span><span class="identifier">std</span><span class="special">::</span><span class="identifier">string</span><span class="special">::</span><span class="identifier">const_iterator</span><span class="special">>(</span><span class="identifier">msg</span><span class="special">.</span><span class="identifier">begin</span><span class="special">(), </span><span class="identifier">msg</span><span class="special">.</span><span class="identifier">end</span><span class="special">());</span></code></pre>
|
||||
<table align="center" border="0" width="80%">
|
||||
<tbody><tr>
|
||||
<td class="note_box"><img src="theme/note.gif" height="16" width="16"> <b>Character
|
||||
and phrase level parsing</b><br>
|
||||
<br>
|
||||
Typical parsers regard the processing of characters (symbols that form words
|
||||
or lexemes) and phrases (words that form sentences) as separate domains.
|
||||
Entities such as reserved words, operators, literal strings, numerical constants,
|
||||
etc., which constitute the terminals of a grammar are usually extracted
|
||||
first in a separate lexical analysis stage.<br>
|
||||
<br>
|
||||
At this point, as evident in the examples we have so far, it is important
|
||||
to note that, contrary to standard practice, the Spirit framework handles
|
||||
parsing tasks at both the character level as well as the phrase level. One
|
||||
may consider that a lexical analyzer is seamlessly integrated in the Spirit
|
||||
framework.<br>
|
||||
<br>
|
||||
Although the Spirit parser library does not need a separate lexical analyzer,
|
||||
there is no reason why we cannot have one. One can always have as many parser
|
||||
layers as needed. In theory, one may create a preprocessor, a lexical analyzer
|
||||
and a parser proper, all using the same framework.</td>
|
||||
</tr>
|
||||
</tbody></table>
|
||||
<h2>chseq and chseq_p</h2>
|
||||
<p>Matches a character sequence. <tt>chseq</tt> has the same template type parameters
|
||||
and constructor parameters as strlit. The function generator version is <tt>chseq_p</tt>.
|
||||
Examples:</p>
|
||||
<pre><code><span class="special"> </span><span class="identifier">chseq</span><span class="special"><>(</span><span class="string">"ABCDEFG"</span><span class="special">)<br> </span><span class="identifier">chseq_p</span><span class="special">(</span><span class="string">"ABCDEFG"</span><span class="special">)</span></code></pre>
|
||||
<p><tt>strlit</tt> is an implicit lexeme. That is, it works solely on the character
|
||||
level. <tt>chseq</tt>, <tt>strlit</tt>'s twin, on the other hand, can work on
|
||||
both the character and phrase levels. What this simply means is that it can
|
||||
ignore white spaces in between the string characters. For example:</p>
|
||||
<pre><code><span class="special"> </span><span class="identifier">chseq</span><span class="special"><>(</span><span class="string">"ABCDEFG"</span><span class="special">)</span></code></pre>
|
||||
<p>can parse:</p>
|
||||
<pre><span class="special"> </span><span class="identifier">ABCDEFG<br> </span><span class="identifier">A </span><span class="identifier">B </span><span class="identifier">C </span><span class="identifier">D </span><span class="identifier">E </span><span class="identifier">F </span><span class="identifier">G<br> </span><span class="identifier">AB </span><span class="identifier">CD </span><span class="identifier">EFG</span></pre>
|
||||
<h2>More character parsers</h2>
|
||||
<p>The framework also predefines the full repertoire of single character parsers:</p>
|
||||
<table align="center" border="0" width="90%">
|
||||
<tbody><tr>
|
||||
<td class="table_title" colspan="2">Single character parsers</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells" width="30%"><b>anychar_p</b></td>
|
||||
<td class="table_cells" width="70%">Matches any single character (including
|
||||
the null terminator: '\0')</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells" width="30%"><b>alnum_p</b></td>
|
||||
<td class="table_cells" width="70%">Matches alpha-numeric characters</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells" width="30%"><b>alpha_p</b></td>
|
||||
<td class="table_cells" width="70%">Matches alphabetic characters</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells" width="30%"><b>blank_p</b></td>
|
||||
<td class="table_cells" width="70%">Matches spaces or tabs</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells" width="30%"><b>cntrl_p</b></td>
|
||||
<td class="table_cells" width="70%">Matches control characters</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells" width="30%"><b>digit_p</b></td>
|
||||
<td class="table_cells" width="70%">Matches numeric digits</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells" width="30%"><b>graph_p</b></td>
|
||||
<td class="table_cells" width="70%">Matches non-space printing characters</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells" width="30%"><b>lower_p</b></td>
|
||||
<td class="table_cells" width="70%">Matches lower case letters</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells" width="30%"><b>print_p</b></td>
|
||||
<td class="table_cells" width="70%">Matches printable characters</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells" width="30%"><b>punct_p</b></td>
|
||||
<td class="table_cells" width="70%">Matches punctuation symbols</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells" width="30%"><b>space_p</b></td>
|
||||
<td class="table_cells" width="70%">Matches spaces, tabs, returns, and newlines</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells" width="30%"><b>upper_p</b></td>
|
||||
<td class="table_cells" width="70%">Matches upper case letters</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells" width="30%"><b>xdigit_p</b></td>
|
||||
<td class="table_cells" width="70%">Matches hexadecimal digits</td>
|
||||
</tr>
|
||||
</tbody></table>
|
||||
<h2><a name="negation"></a>negation ~</h2>
|
||||
<p>Single character parsers such as the <tt>chlit</tt>, <tt>range</tt>, <tt>anychar_p</tt>,
|
||||
<tt>alnum_p</tt> etc. can be negated. For example:</p>
|
||||
<pre><code><span class="special"> ~</span><span class="identifier">ch_p</span><span class="special">(</span><span class="literal">'x'</span><span class="special">)</span></code></pre>
|
||||
<p>matches any character except <tt>'x'</tt>. Double negation of a character parser
|
||||
cancels out the negation. <tt>~~alpha_p</tt> is equivalent to <tt>alpha_p</tt>.</p>
|
||||
<h2>eol_p</h2>
|
||||
<p>Matches the end of line (CR/LF and combinations thereof).</p>
|
||||
<h2><b>nothing_p</b></h2>
|
||||
<p>Never matches anything and always fails.</p>
|
||||
<h2>end_p</h2>
|
||||
<p>Matches the end of input (returns a sucessful match with 0 length when the
|
||||
input is exhausted)</p><h2>eps_p</h2>
|
||||
<p>The <strong>Epsilon</strong> (<tt>epsilon_p</tt> and <tt>eps_p</tt>) is a multi-purpose
|
||||
parser that returns a zero length match. See <a href="epsilon.html">Epsilon</a> for details.</p><p></p>
|
||||
<table border="0">
|
||||
<tbody><tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="organization.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="operators.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</tbody></table>
|
||||
<br>
|
||||
<hr size="1">
|
||||
<p class="copyright">Copyright © 1998-2003 Joel de Guzman<br>
|
||||
Copyright © 2003 Martin Wille<br>
|
||||
<br>
|
||||
<font size="2">Use, modification and distribution is subject to the Boost Software
|
||||
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
||||
http://www.boost.org/LICENSE_1_0.txt) </font> </p>
|
||||
<p> </p>
|
||||
</body></html>
|
||||
@@ -1,462 +0,0 @@
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<meta content=
|
||||
"HTML Tidy for Windows (vers 1st February 2003), see www.w3.org"
|
||||
name="generator">
|
||||
<title>
|
||||
Quick Start
|
||||
</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=us-ascii">
|
||||
<link rel="stylesheet" href="theme/style.css" type="text/css">
|
||||
</head>
|
||||
<body>
|
||||
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="85%">
|
||||
<font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Quick
|
||||
Start</b></font>
|
||||
</td>
|
||||
<td width="112">
|
||||
<a href="http://spirit.sf.net"><img src="theme/spirit.gif"
|
||||
width="112" height="48" align="right" border="0"></a>
|
||||
</td>
|
||||
</tr>
|
||||
</table><br>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30">
|
||||
<a href="../index.html"><img src="theme/u_arr.gif" border="0"></a>
|
||||
</td>
|
||||
<td width="30">
|
||||
<a href="introduction.html"><img src="theme/l_arr.gif" border="0">
|
||||
</a>
|
||||
</td>
|
||||
<td width="30">
|
||||
<a href="basic_concepts.html"><img src="theme/r_arr.gif" border="0">
|
||||
</a>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
<h2>
|
||||
<b>Why would you want to use Spirit?</b>
|
||||
</h2>
|
||||
<p>
|
||||
Spirit is designed to be a practical parsing tool. At the very least, the
|
||||
ability to generate a fully-working parser from a formal EBNF
|
||||
specification inlined in C++ significantly reduces development time.
|
||||
While it may be practical to use a full-blown, stand-alone parser such as
|
||||
YACC or ANTLR when we want to develop a computer language such as C or
|
||||
Pascal, it is certainly overkill to bring in the big guns when we wish to
|
||||
write extremely small micro-parsers. At that end of the spectrum,
|
||||
programmers typically approach the job at hand not as a formal parsing
|
||||
task but through ad hoc hacks using primitive tools such as
|
||||
<tt>scanf</tt>. True, there are tools such as regular-expression
|
||||
libraries (such as <a href=
|
||||
"http://www.boost.org/libs/regex/index.html">boost regex</a>) or scanners
|
||||
(such as <a href="http://www.boost.org/libs/tokenizer/index.html">boost
|
||||
tokenizer</a>), but these tools do not scale well when we need to write
|
||||
more elaborate parsers. Attempting to write even a moderately-complex
|
||||
parser using these tools leads to code that is hard to understand and
|
||||
maintain.
|
||||
</p>
|
||||
<p>
|
||||
One prime objective is to make the tool easy to use. When one thinks of a
|
||||
parser generator, the usual reaction is "it must be big and complex with
|
||||
a steep learning curve." Not so. Spirit is designed to be fully scalable.
|
||||
The framework is structured in layers. This permits learning on an
|
||||
as-needed basis, after only learning the minimal core and basic concepts.
|
||||
</p>
|
||||
<p>
|
||||
For development simplicity and ease in deployment, the entire framework
|
||||
consists of only header files, with no libraries to link against or
|
||||
build. Just put the spirit distribution in your include path, compile and
|
||||
run. Code size? -very tight. In the quick start example that we shall
|
||||
present in a short while, the code size is dominated by the instantiation
|
||||
of the <tt>std::vector</tt> and <tt>std::iostream</tt>.
|
||||
</p>
|
||||
<h2>
|
||||
<b>Trivial Example #1</b></h2>
|
||||
<p>Create a parser that will parse
|
||||
a floating-point number.
|
||||
</p>
|
||||
<pre><code><font color="#000000"> </font></code><span class="identifier">real_p</span>
|
||||
</pre>
|
||||
<p>
|
||||
(You've got to admit, that's trivial!) The above code actually generates
|
||||
a Spirit <tt>real_parser</tt> (a built-in parser) which parses a floating
|
||||
point number. Take note that parsers that are meant to be used directly
|
||||
by the user end with "<tt>_p</tt>" in their names as a Spirit convention.
|
||||
Spirit has many pre-defined parsers and consistent naming conventions
|
||||
help you keep from going insane!
|
||||
</p>
|
||||
<h2>
|
||||
<b>Trivial Example #2</b></h2>
|
||||
<p>
|
||||
Create a parser that will accept a line consisting of two floating-point
|
||||
numbers.
|
||||
</p>
|
||||
|
||||
<pre><code><font color="#000000"> </font></code><code><span class=
|
||||
"identifier">real_p</span> <span class=
|
||||
"special">>></span> <span class="identifier">real_p</span></code>
|
||||
</pre>
|
||||
<p>
|
||||
Here you see the familiar floating-point numeric parser
|
||||
<code><tt>real_p</tt></code> used twice, once for each number. What's
|
||||
that <tt class="operators">>></tt> operator doing in there? Well,
|
||||
they had to be separated by something, and this was chosen as the
|
||||
"followed by" sequence operator. The above program creates a parser from
|
||||
two simpler parsers, glueing them together with the sequence operator.
|
||||
The result is a parser that is a composition of smaller parsers.
|
||||
Whitespace between numbers can implicitly be consumed depending on how
|
||||
the parser is invoked (see below).
|
||||
</p>
|
||||
<p>
|
||||
Note: when we combine parsers, we end up with a "bigger" parser, But it's
|
||||
still a parser. Parsers can get bigger and bigger, nesting more and more,
|
||||
but whenever you glue two parsers together, you end up with one bigger
|
||||
parser. This is an important concept.
|
||||
</p>
|
||||
<h2>
|
||||
<b>Trivial Example #3</b></h2>
|
||||
<p>
|
||||
Create a parser that will accept an arbitrary number of floating-point
|
||||
numbers. (Arbitrary means anything from zero to infinity)
|
||||
</p>
|
||||
|
||||
<pre><code><font color="#000000"> </font></code><code><span class=
|
||||
"special">*</span><span class="identifier">real_p</span></code>
|
||||
</pre>
|
||||
<p>
|
||||
This is like a regular-expression Kleene Star, though the syntax might
|
||||
look a bit odd for a C++ programmer not used to seeing the <tt class=
|
||||
"operators">*</tt> operator overloaded like this. Actually, if you know
|
||||
regular expressions it may look odd too since the star is <b>before</b>
|
||||
the expression it modifies. C'est la vie. Blame it on the fact that we
|
||||
must work with the syntax rules of C++.
|
||||
</p>
|
||||
<p>
|
||||
Any expression that evaluates to a parser may be used with the Kleene
|
||||
Star. Keep in mind, though, that due to C++ operator precedence rules you
|
||||
may need to put the expression in parentheses for complex expressions.
|
||||
The Kleene Star is also known as a Kleene Closure, but we call it the
|
||||
Star in most places.
|
||||
</p>
|
||||
<h3>
|
||||
<b><a name="list_of_numbers"></a> Example #4 [ A Just Slightly Less Trivial Example</b>
|
||||
] </h3>
|
||||
<p>
|
||||
This example will create a parser that accepts a comma-delimited list of numbers and put the numbers in a vector.
|
||||
</p>
|
||||
<h4><strong> Step 1. Create the parser</strong></h4>
|
||||
<pre><code><font color="#000000"> </font></code><code><span class=
|
||||
"identifier">real_p</span> <span class=
|
||||
"special">>></span> <span class="special">*(</span><span class=
|
||||
"identifier">ch_p</span><span class="special">(</span><span class=
|
||||
"literal">','</span><span class="special">)</span> <span class=
|
||||
"special">>></span> <span class=
|
||||
"identifier">real_p</span><span class="special">)</span></code>
|
||||
</pre>
|
||||
<p>
|
||||
Notice <tt>ch_p(',')</tt>. It is a literal character parser that can
|
||||
recognize the comma <tt>','</tt>. In this case, the Kleene Star is
|
||||
modifying a more complex parser, namely, the one generated by the
|
||||
expression:
|
||||
</p>
|
||||
|
||||
<pre><code><font color="#000000"> </font></code><code><span class=
|
||||
"special">(</span><span class="identifier">ch_p</span><span class=
|
||||
"special">(</span><span class="literal">','</span><span class=
|
||||
"special">)</span> <span class="special">>></span> <span class=
|
||||
"identifier">real_p</span><span class="special">)</span></code>
|
||||
</pre>
|
||||
<p>
|
||||
Note that this is a case where the parentheses are necessary. The Kleene
|
||||
star encloses the complete expression above.
|
||||
</p>
|
||||
<h4>
|
||||
<b><strong>Step 2. </strong>Using a Parser (now that it's created)</b></h4>
|
||||
<p>
|
||||
Now that we have created a parser, how do we use it? Like the result of
|
||||
any C++ temporary object, we can either store it in a variable, or call
|
||||
functions directly on it.
|
||||
</p>
|
||||
<p>
|
||||
We'll gloss over some low-level C++ details and just get to the good
|
||||
stuff.
|
||||
</p>
|
||||
<p>
|
||||
If <b><tt>r</tt></b> is a rule (don't worry about what rules exactly are
|
||||
for now. This will be discussed later. Suffice it to say that the rule is
|
||||
a placeholder variable that can hold a parser), then we store the parser
|
||||
as a rule like this:
|
||||
</p>
|
||||
|
||||
<pre><code><font color="#000000"> </font></code><code><font color="#000000"><span class=
|
||||
"identifier">r</span> <span class="special">=</span> <span class=
|
||||
"identifier">real_p</span> <span class=
|
||||
"special">>> *(</span><span class=
|
||||
"identifier">ch_p</span><span class="special">(</span><span class=
|
||||
"literal">','</span><span class="special">) >></span> <span class=
|
||||
"identifier">real_p</span><span class="special">);</span></font></code>
|
||||
</pre>
|
||||
<p>
|
||||
Not too exciting, just an assignment like any other C++ expression you've
|
||||
used for years. The cool thing about storing a parser in a rule is this:
|
||||
rules are parsers, and now you can refer to it <b>by name</b>. (In this
|
||||
case the name is <tt><b>r</b></tt>). Notice that this is now a full
|
||||
assignment expression, thus we terminate it with a semicolon,
|
||||
"<tt>;</tt>".
|
||||
</p>
|
||||
<p>
|
||||
That's it. We're done with defining the parser. So the next step is now
|
||||
invoking this parser to do its work. There are a couple of ways to do
|
||||
this. For now, we shall use the free <tt>parse</tt> function that takes
|
||||
in a <tt>char const*</tt>. The function accepts three arguments:
|
||||
</p>
|
||||
<blockquote>
|
||||
<p>
|
||||
<img src="theme/bullet.gif" width="12" height="12"> The null-terminated
|
||||
<tt>const char*</tt> input<br>
|
||||
<img src="theme/bullet.gif" width="12" height="12"> The parser
|
||||
object<br>
|
||||
<img src="theme/bullet.gif" width="12" height="12"> Another parser
|
||||
called the <b>skip parser</b>
|
||||
</p>
|
||||
</blockquote>
|
||||
<p>
|
||||
In our example, we wish to skip spaces and tabs. Another parser named
|
||||
<tt>space_p</tt> is included in Spirit's repertoire of predefined
|
||||
parsers. It is a very simple parser that simply recognizes whitespace. We
|
||||
shall use <tt>space_p</tt> as our skip parser. The skip parser is the one
|
||||
responsible for skipping characters in between parser elements such as
|
||||
the <tt>real_p</tt> and the <tt>ch_p</tt>.
|
||||
</p>
|
||||
<p>
|
||||
Ok, so now let's parse!
|
||||
</p>
|
||||
|
||||
<pre><code><font color="#000000"> </font></code><code><font color="#000000"><span class=
|
||||
"identifier">r</span> <span class="special">=</span> <span class=
|
||||
"identifier">real_p</span> <span class=
|
||||
"special">>></span> <span class="special">*(</span><span class=
|
||||
"identifier">ch_p</span><span class="special">(</span><span class=
|
||||
"literal">','</span><span class="special">)</span> <span class=
|
||||
"special">>></span> <span class=
|
||||
"identifier">real_p</span><span class="special">);
|
||||
</span> <span class="identifier"> parse</span><span class=
|
||||
"special">(</span><span class="identifier">str</span><span class=
|
||||
"special">,</span> <span class="identifier">r</span><span class=
|
||||
"special">,</span> <span class="identifier">space_p</span><span class=
|
||||
"special">)</span> <span class=
|
||||
"comment">// Not a full statement yet, patience...</span></font></code>
|
||||
</pre>
|
||||
<p>
|
||||
The parse function returns an object (called <tt>parse_info</tt>) that
|
||||
holds, among other things, the result of the parse. In this example, we
|
||||
need to know:
|
||||
</p>
|
||||
<blockquote>
|
||||
<p>
|
||||
<img src="theme/bullet.gif" width="12" height="12"> Did the parser
|
||||
successfully recognize the input <tt>str</tt>?<br>
|
||||
<img src="theme/bullet.gif" width="12" height="12"> Did the parser
|
||||
<b>fully</b> parse and consume the input up to its end?
|
||||
</p>
|
||||
</blockquote>
|
||||
<p>
|
||||
To get a complete picture of what we have so far, let us also wrap this
|
||||
parser inside a function:
|
||||
</p>
|
||||
|
||||
<pre><code><font color="#000000"> </font></code><code><font color="#000000"><span class=
|
||||
"keyword">bool
|
||||
</span> <span class="identifier"> parse_numbers</span><span class=
|
||||
"special">(</span><span class="keyword">char</span> <span class=
|
||||
"keyword">const</span><span class="special">*</span> <span class=
|
||||
"identifier">str</span><span class="special">)
|
||||
{
|
||||
</span> <span class="keyword"> return</span> <span class=
|
||||
"identifier">parse</span><span class="special">(</span><span class=
|
||||
"identifier">str</span><span class="special">,</span> <span class=
|
||||
"identifier">real_p</span> <span class=
|
||||
"special">>></span> <span class="special">*(</span><span class=
|
||||
"literal">','</span> <span class="special">>></span> <span class=
|
||||
"identifier">real_p</span><span class="special">),</span> <span class=
|
||||
"identifier">space_p</span><span class="special">).</span><span class=
|
||||
"identifier">full</span><span class="special">;
|
||||
}</span></font></code>
|
||||
</pre>
|
||||
<p>
|
||||
Note in this case we dropped the named rule and inlined the parser
|
||||
directly in the call to parse. Upon calling parse, the expression
|
||||
evaluates into a temporary, unnamed parser which is passed into the
|
||||
parse() function, used, and then destroyed.
|
||||
</p>
|
||||
<table border="0" width="80%" align="center">
|
||||
<tr>
|
||||
<td class="note_box">
|
||||
<img src="theme/note.gif" width="16" height="16"><b>char and wchar_t
|
||||
operands</b><br>
|
||||
<br>
|
||||
The careful reader may notice that the parser expression has
|
||||
<tt class="quotes">','</tt> instead of <tt>ch_p(',')</tt> as the
|
||||
previous examples did. This is ok due to C++ syntax rules of
|
||||
conversion. There are <tt>>></tt> operators that are overloaded
|
||||
to accept a <tt>char</tt> or <tt>wchar_t</tt> argument on its left or
|
||||
right (but not both). An operator may be overloaded if at least one
|
||||
of its parameters is a user-defined type. In this case, the
|
||||
<tt>real_p</tt> is the 2nd argument to <tt>operator<span class=
|
||||
"operators">>></span></tt>, and so the proper overload of
|
||||
<tt class="operators">>></tt> is used, converting
|
||||
<tt class="quotes">','</tt> into a character literal parser.<br>
|
||||
<br>
|
||||
The problem with omitting the <tt>ch_p</tt> call should be obvious:
|
||||
<tt>'a' >> 'b'</tt> is <b>not</b> a spirit parser, it is a
|
||||
numeric expression, right-shifting the ASCII (or another encoding)
|
||||
value of <tt class="quotes">'a'</tt> by the ASCII value of
|
||||
<tt class="quotes">'b'</tt>. However, both <tt>ch_p('a') >>
|
||||
'b'</tt> and <tt>'a' >> ch_p('b')</tt> are Spirit sequence
|
||||
parsers for the letter <tt class="quotes">'a'</tt> followed by
|
||||
<tt class="quotes">'b'</tt>. You'll get used to it, sooner or
|
||||
later.
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>
|
||||
Take note that the object returned from the parse function has a member
|
||||
called <tt>full</tt> which returns true if both of our requirements above
|
||||
are met (i.e. the parser fully parsed the input).
|
||||
</p>
|
||||
<h4>
|
||||
<b> Step 3. Semantic Actions</b></h4>
|
||||
<p>
|
||||
Our parser above is really nothing but a recognizer. It answers the
|
||||
question <i class="quotes">"did the input match our grammar?"</i>, but it
|
||||
does not remember any data, nor does it perform any side effects.
|
||||
Remember: we want to put the parsed numbers into a vector. This is done
|
||||
in an <b>action</b> that is linked to a particular parser. For example,
|
||||
whenever we parse a real number, we wish to store the parsed number after
|
||||
a successful match. We now wish to extract information from the parser.
|
||||
Semantic actions do this. Semantic actions may be attached to any point
|
||||
in the grammar specification. These actions are C++ functions or functors
|
||||
that are called whenever a part of the parser successfully recognizes a
|
||||
portion of the input. Say you have a parser <b>P</b>, and a C++ function
|
||||
<b>F</b>, you can make the parser call <b>F</b> whenever it matches an
|
||||
input by attaching <b>F</b>:
|
||||
</p>
|
||||
|
||||
<pre><code><font color="#000000"> </font></code><code><font color="#000000"><span class=
|
||||
"identifier">P</span><span class="special">[&</span><span class=
|
||||
"identifier">F</span><span class="special">]</span></font></code>
|
||||
</pre>
|
||||
<p>
|
||||
Or if <b>F</b> is a function object (a functor):
|
||||
</p>
|
||||
|
||||
<pre><code><font color="#000000"> </font></code><code><font color="#000000"><span class=
|
||||
"identifier">P</span><span class="special">[</span><span class=
|
||||
"identifier">F</span><span class="special">]</span></font></code>
|
||||
</pre>
|
||||
<p>
|
||||
The function/functor signature depends on the type of the parser to which
|
||||
it is attached. The parser <tt>real_p</tt> passes a single argument: the
|
||||
parsed number. Thus, if we were to attach a function <b>F</b> to
|
||||
<tt>real_p</tt>, we need <b>F</b> to be declared as:
|
||||
</p>
|
||||
|
||||
<pre><code> </code><code><span class=
|
||||
"keyword">void</span> <span class="identifier">F</span><span class=
|
||||
"special">(</span><span class="keyword">double</span> <span class=
|
||||
"identifier">n</span><span class="special">);</span></code></pre>
|
||||
<p>
|
||||
For our example however, again, we can take advantage of some predefined
|
||||
semantic functors and functor generators (<img src="theme/lens.gif"
|
||||
width="15" height="16"> A functor generator is a function that returns
|
||||
a functor). For our purpose, Spirit has a functor generator
|
||||
<tt>push_back_a(c)</tt>. In brief, this semantic action, when called,
|
||||
<b>appends</b> the parsed value it receives from the parser it is
|
||||
attached to, to the container <tt>c</tt>.
|
||||
</p>
|
||||
<p>
|
||||
Finally, here is our complete comma-separated list parser:
|
||||
</p>
|
||||
|
||||
<pre><code><font color="#000000"> </font></code><code><font color="#000000"><span class=
|
||||
"keyword">bool
|
||||
</span> <span class="identifier">parse_numbers</span><span class=
|
||||
"special">(</span><span class="keyword">char</span> <span class=
|
||||
"keyword">const</span><span class="special">*</span> <span class=
|
||||
"identifier">str</span><span class="special">,</span> <span class=
|
||||
"identifier">vector</span><span class="special"><</span><span class=
|
||||
"keyword">double</span><span class=
|
||||
"special">>&</span> <span class="identifier">v</span><span class=
|
||||
"special">)
|
||||
{
|
||||
</span> <span class="keyword">return</span> <span class=
|
||||
"identifier">parse</span><span class="special">(</span><span class=
|
||||
"identifier">str</span><span class="special">,
|
||||
|
||||
</span> <span class="comment"> // Begin grammar
|
||||
</span> <span class="special"> (
|
||||
</span> <span class="identifier">real_p</span><span class=
|
||||
"special">[</span><span class="identifier">push_back_a</span><span class=
|
||||
"special">(</span><span class="identifier">v</span><span class=
|
||||
"special">)]</span> <span class="special">>></span> <span class=
|
||||
"special">*(</span><span class="literal">','</span> <span class=
|
||||
"special">>></span> <span class=
|
||||
"identifier">real_p</span><span class="special">[</span><span class=
|
||||
"identifier">push_back_a</span><span class="special">(</span><span class=
|
||||
"identifier">v</span><span class="special">)])
|
||||
)
|
||||
</span> <span class="special"> ,
|
||||
</span> <span class="comment"> // End grammar
|
||||
|
||||
</span> <span class="identifier"> space_p</span><span class=
|
||||
"special">).</span><span class="identifier">full</span><span class="special">;
|
||||
}</span></font></code>
|
||||
</pre>
|
||||
<p>
|
||||
This is the same parser as above. This time with appropriate semantic
|
||||
actions attached to strategic places to extract the parsed numbers and
|
||||
stuff them in the vector <tt>v</tt>. The parse_numbers function returns
|
||||
true when successful.
|
||||
</p>
|
||||
<p>
|
||||
<img src="theme/lens.gif" width="15" height="16"> The full source code
|
||||
can be <a href="../example/fundamental/number_list.cpp">viewed here</a>.
|
||||
This is part of the Spirit distribution.
|
||||
</p>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30">
|
||||
<a href="../index.html"><img src="theme/u_arr.gif" border="0"></a>
|
||||
</td>
|
||||
<td width="30">
|
||||
<a href="introduction.html"><img src="theme/l_arr.gif" border="0">
|
||||
</a>
|
||||
</td>
|
||||
<td width="30">
|
||||
<a href="basic_concepts.html"><img src="theme/r_arr.gif" border="0">
|
||||
</a>
|
||||
</td>
|
||||
</tr>
|
||||
</table><br>
|
||||
<hr size="1">
|
||||
<p class="copyright">
|
||||
Copyright © 1998-2003 Joel de Guzman<br>
|
||||
Copyright © 2002 Chris Uzdavinis<br>
|
||||
<br>
|
||||
<font size="2">Use, modification and distribution is subject to the
|
||||
Boost Software License, Version 1.0. (See accompanying file
|
||||
LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)</font>
|
||||
</p>
|
||||
<blockquote>
|
||||
|
||||
</blockquote>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,602 +0,0 @@
|
||||
<html>
|
||||
|
||||
<head>
|
||||
<title>Quick Reference</title>
|
||||
<link rel="stylesheet" href="theme/style.css" type="text/css">
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
|
||||
<tr>
|
||||
<td width="10"> </td>
|
||||
<td width="85%"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Quick
|
||||
Reference </b></font></td>
|
||||
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="error_handling.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="includes.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>This isn't intended to be a full, detailed reference; nor is it intended to
|
||||
be of any use to readers who aren't already familiar with Spirit. It's just
|
||||
a brief reminder of the syntax and behaviour of each component, with links to
|
||||
the full documentation. </p>
|
||||
<ul>
|
||||
<li><strong>Primitive parser generators</strong> <i>(action arguments are listed
|
||||
on the right)</i>
|
||||
<ul>
|
||||
<li><a href="quickref.html#null_parsers">Null parsers</a></li>
|
||||
<li><a href="quickref.html#character_parsers">Character parsers</a></li>
|
||||
<li><a href="quickref.html#number_parsers">Number parsers</a></li>
|
||||
<li><a href="quickref.html#otherlexeme_parsers">Other lexeme parsers</a></li>
|
||||
<li><a href="quickref.html#text_parsers">Text parsers</a><br>
|
||||
<br>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li><strong>Other parser elements</strong>
|
||||
<ul>
|
||||
<li><a href="quickref.html#compound_parsers">Compound parsers</a></li>
|
||||
<li><a href="quickref.html#general_directives">General directives</a></li>
|
||||
<li><a href="quickref.html#tree_specific_directives">Tree-specific directives</a><br>
|
||||
<br>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li><strong>Operators</strong>
|
||||
<ul>
|
||||
<li><a href="quickref.html#unary_operators">Unary operators</a></li>
|
||||
<li><a href="quickref.html#binary_operators">Binary operators</a> <i>(in order of precedence)<br>
|
||||
</i></li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
<table>
|
||||
|
||||
<tr>
|
||||
|
||||
<td valign="top">
|
||||
|
||||
<table>
|
||||
|
||||
<tr>
|
||||
<td class="table_title" colspan="3"><a name="null_parsers"></a>Null parsers</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells"><code><a href="primitives.html">end_p</a></code></td>
|
||||
<td class="table_cells">Matches EOF</td>
|
||||
<td class="table_cells"><i>iter,iter</i></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells"><code><a href="primitives.html">eps_p</a><br />
|
||||
<a href="primitives.html">eps_p</a>(P)</code></td>
|
||||
<td class="table_cells">Matches without consuming text</td>
|
||||
<td class="table_cells"><i>iter,iter</i></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells"><code><a href="primitives.html">epsilon_p</a><br />
|
||||
<a href="primitives.html">epsilon_p</a>(P)</code></td>
|
||||
<td class="table_cells">Synonym for <b>eps_p</b></td>
|
||||
<td class="table_cells"><i>iter,iter</i></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells"><code><a href="primitives.html">nothing_p</a></code></td>
|
||||
<td class="table_cells">Always fails</td>
|
||||
<td class="table_cells"><i>iter,iter</i></td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td class="table_title" colspan="3"><a name="character_parsers"></a>Character parsers</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells"><code><a href="primitives.html">alnum_p</a></code></td>
|
||||
<td class="table_cells">Matches any alphanumeric character</td>
|
||||
<td class="table_cells"><i>char</i></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells"><code><a href="primitives.html">alpha_p</a></code></td>
|
||||
<td class="table_cells">Matches any letter</td>
|
||||
<td class="table_cells"><i>char</i></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells"><code><a href="primitives.html">anychar_p</a></code></td>
|
||||
<td class="table_cells">Matches any character</td>
|
||||
<td class="table_cells"><i>char</i></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells"><code><a href="primitives.html">blank_p</a></code></td>
|
||||
<td class="table_cells">Matches a space or tab</td>
|
||||
<td class="table_cells"><i>char</i></td>
|
||||
</tr>
|
||||
<tr>
|
||||
|
||||
<td class="table_cells"><code><a href="primitives.html">ch_p</a>(char)</code></td>
|
||||
<td class="table_cells">Matches a character</td>
|
||||
<td class="table_cells"><i>char</i></td>
|
||||
</tr>
|
||||
<tr>
|
||||
|
||||
<td class="table_cells"><code><a href="character_sets.html">chset_p</a>(charset)</code></td>
|
||||
<td class="table_cells">Matches a character in the set</td>
|
||||
<td class="table_cells"><i>char</i></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells"><code><a href="primitives.html">cntrl_p</a></code></td>
|
||||
<td class="table_cells">Matches any control character</td>
|
||||
<td class="table_cells"><i>char</i></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells"><code><a href="primitives.html">digit_p</a></code></td>
|
||||
<td class="table_cells">Matches any digit</td>
|
||||
<td class="table_cells"><i>char</i></td>
|
||||
</tr>
|
||||
<tr>
|
||||
|
||||
<td class="table_cells"><code><a href="parametric_parsers.html">f_ch_p</a>(func)</code></td>
|
||||
<td class="table_cells">Matches a character</td>
|
||||
<td class="table_cells"><i>char</i></td>
|
||||
</tr>
|
||||
<tr>
|
||||
|
||||
<td class="table_cells"><code><a href="parametric_parsers.html">f_range_p</a>(func1,
|
||||
func2)</code></td>
|
||||
<td class="table_cells">Matches any character in the inclusive range</td>
|
||||
<td class="table_cells"><i>char</i></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells"><code><a href="primitives.html">graph_p</a></code></td>
|
||||
<td class="table_cells">Matches any non-space printable character</td>
|
||||
<td class="table_cells"><i>char</i></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells"><code><a href="primitives.html">lower_p</a></code></td>
|
||||
<td class="table_cells">Matches any lower-case letter</td>
|
||||
<td class="table_cells"><i>char</i></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells"><code><a href="primitives.html">print_p</a></code></td>
|
||||
<td class="table_cells">Matches any printable character</td>
|
||||
<td class="table_cells"><i>char</i></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells"><code><a href="primitives.html">punct_p</a></code></td>
|
||||
<td class="table_cells">Matches any punctuation mark</td>
|
||||
<td class="table_cells"><i>char</i></td>
|
||||
</tr>
|
||||
<tr>
|
||||
|
||||
<td class="table_cells"><code><a href="primitives.html">range_p</a>(char1,
|
||||
char2)</code></td>
|
||||
<td class="table_cells">Matches any character in the inclusive range</td>
|
||||
<td class="table_cells"><i>char</i></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells"><code><a href="numerics.html">sign_p</a></code></td>
|
||||
<td class="table_cells">Matches a plus or minus sign</td>
|
||||
<td class="table_cells"><i>bool</i></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells"><code><a href="primitives.html">space_p</a></code></td>
|
||||
<td class="table_cells">Matches any whitespace character</td>
|
||||
<td class="table_cells"><i>char</i></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells"><code><a href="primitives.html">upper_p</a></code></td>
|
||||
<td class="table_cells">Matches any upper-case letter</td>
|
||||
<td class="table_cells"><i>char</i></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells"><code><a href="primitives.html">xdigit_p</a></code></td>
|
||||
<td class="table_cells">Matches any hexadecimal digit</td>
|
||||
<td class="table_cells"><i>char</i></td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td class="table_title" colspan="3"><a name="number_parsers"></a>Number parsers</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells"><code><a href="numerics.html">bin_p</a></code></td>
|
||||
<td class="table_cells">Matches an unsigned binary integer</td>
|
||||
<td class="table_cells"><i>numeric</i></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells"><code><a href="numerics.html">hex_p</a></code></td>
|
||||
<td class="table_cells">Matches an unsigned hexadecimal integer</td>
|
||||
<td class="table_cells"><i>numeric</i></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells"><code><a href="numerics.html">int_p</a></code></td>
|
||||
<td class="table_cells">Matches a signed decimal integer</td>
|
||||
<td class="table_cells"><i>numeric</i></td>
|
||||
</tr>
|
||||
<tr>
|
||||
|
||||
<td class="table_cells"><code><a href="numerics.html">int_parser</a><type,
|
||||
base, min, max></code></td>
|
||||
<td class="table_cells">Matches a signed integer with <b>min</b> to <b>max</b> digits</td>
|
||||
<td class="table_cells"><i>numeric</i></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells"><code><a href="numerics.html">oct_p</a></code></td>
|
||||
<td class="table_cells">Matches an unsigned octal integer</td>
|
||||
<td class="table_cells"><i>numeric</i></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells"><code><a href="numerics.html">real_p</a></code></td>
|
||||
<td class="table_cells">Matches a floating point number</td>
|
||||
<td class="table_cells"><i>numeric</i></td>
|
||||
</tr>
|
||||
<tr>
|
||||
|
||||
<td class="table_cells"><code><a href="numerics.html">real_parser</a><type,
|
||||
policy></code></td>
|
||||
<td class="table_cells">Matches a floating point number</td>
|
||||
<td class="table_cells"><i>numeric</i></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells"><code><a href="numerics.html">strict_real_p</a></code></td>
|
||||
<td class="table_cells">Matches a floating point number (requires decimal point)</td>
|
||||
<td class="table_cells"><i>numeric</i></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells"><code><a href="numerics.html">strict_ureal_p</a></code></td>
|
||||
<td class="table_cells">Matches an unsigned FP number (requires decimal point)</td>
|
||||
<td class="table_cells"><i>numeric</i></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells"><code><a href="numerics.html">uint_p</a></code></td>
|
||||
<td class="table_cells">Matches an unsigned decimal integer</td>
|
||||
<td class="table_cells"><i>numeric</i></td>
|
||||
</tr>
|
||||
<tr>
|
||||
|
||||
<td class="table_cells"><code><a href="numerics.html">uint_parser</a><type,
|
||||
base, min, max></code></td>
|
||||
<td class="table_cells">Matches an unsigned integer with <b>min</b> to <b>max</b> digits</td>
|
||||
<td class="table_cells"><i>numeric</i></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells"><code><a href="numerics.html">ureal_p</a></code></td>
|
||||
<td class="table_cells">Matches an unsigned FP number</td>
|
||||
<td class="table_cells"><i>numeric</i></td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td class="table_title" colspan="3" id="other_lexeme_parsers"><a name="otherlexeme_parsers"></a>Other lexeme parsers</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells"><code><a href="escape_char_parser.html">c_escape_ch_p</a></code></td>
|
||||
<td class="table_cells">Matches a C escape code</td>
|
||||
<td class="table_cells"><i>char</i></td>
|
||||
</tr>
|
||||
<tr>
|
||||
|
||||
<td class="table_cells"><code><a href="confix.html">comment_p</a>(string)<br />
|
||||
<a href="confix.html">comment_p</a> (string1, string2)</code></td>
|
||||
<td class="table_cells">Matches C++ or C-style comments</td>
|
||||
<td class="table_cells"><i>iter,iter</i></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells"><code><a href="primitives.html">eol_p</a></code></td>
|
||||
<td class="table_cells">Matches CR, LF, or any combination</td>
|
||||
<td class="table_cells"><i>iter,iter</i></td>
|
||||
</tr>
|
||||
<tr>
|
||||
|
||||
<td class="table_cells"><code><a href="parametric_parsers.html">f_str_p</a>(func1,
|
||||
func2)</code></td>
|
||||
<td class="table_cells">Matches a string</td>
|
||||
<td class="table_cells"><i>iter,iter</i></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells"><code><a href="escape_char_parser.html">lex_escape_ch_p</a></code></td>
|
||||
<td class="table_cells">Matches a C escape code or any backslash escape</td>
|
||||
<td class="table_cells"><i>char</i></td>
|
||||
</tr>
|
||||
<tr>
|
||||
|
||||
<td class="table_cells"><code><a href="regular_expression_parser.html">regex_p</a>(regex)</code></td>
|
||||
<td class="table_cells">Matches a regular expression</td>
|
||||
<td class="table_cells"><i>iter,iter</i></td>
|
||||
</tr>
|
||||
<tr>
|
||||
|
||||
<td class="table_cells"><code><a href="primitives.html">str_p</a>(string)<br />
|
||||
<a href="primitives.html">str_p</a>(iter1, iter2)</code></td>
|
||||
<td class="table_cells">Matches a string</td>
|
||||
<td class="table_cells"><i>iter,iter</i></td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td class="table_title" colspan="3"><a name="text_parsers"></a>Text parsers</td>
|
||||
</tr>
|
||||
<tr>
|
||||
|
||||
<td class="table_cells"><code><a href="primitives.html">chseq_p</a>(string)<br />
|
||||
<a href="primitives.html">chseq_p</a>(iter1, iter2)</code></td>
|
||||
<td class="table_cells">Matches a string, possibly with embedded whitespace</td>
|
||||
<td class="table_cells"><i>iter,iter</i></td>
|
||||
</tr>
|
||||
<tr>
|
||||
|
||||
<td class="table_cells"><code><a href="parametric_parsers.html">f_chseq_p</a>(func1,
|
||||
func2)</code></td>
|
||||
<td class="table_cells">Matches a string, possibly with embedded whitespace</td>
|
||||
<td class="table_cells"><i>iter,iter</i></td>
|
||||
</tr>
|
||||
|
||||
</table>
|
||||
|
||||
</td>
|
||||
|
||||
<td width="10"> </td>
|
||||
|
||||
<td valign="top">
|
||||
|
||||
<table>
|
||||
|
||||
<tr>
|
||||
<td class="table_title" colspan="2"><a name="compound_parsers"></a>Compound parsers</td>
|
||||
</tr>
|
||||
<tr>
|
||||
|
||||
<td class="table_cells"><code><a href="confix.html">confix_p</a>(open,
|
||||
exp, close)</code></td>
|
||||
<td class="table_cells">Matches <b>open >> (exp - close) >> close</b></td>
|
||||
</tr>
|
||||
<tr>
|
||||
|
||||
<td class="table_cells"><code><a href="dynamic_parsers.html">do_p</a>[P].<a href="dynamic_parsers.html">while_p</a>(cond)</code></td>
|
||||
<td class="table_cells">Matches while a condition is true (at least once)</td>
|
||||
</tr>
|
||||
<tr>
|
||||
|
||||
<td class="table_cells"><code><a href="dynamic_parsers.html">for_p</a>(init,
|
||||
cond, step)[P]</code></td>
|
||||
<td class="table_cells">Matches in a loop</td>
|
||||
</tr>
|
||||
<tr>
|
||||
|
||||
<td class="table_cells"><code><a href="functor_parser.html">functor_parser</a><func></code></td>
|
||||
<td class="table_cells">Wraps an external parser</td>
|
||||
</tr>
|
||||
<tr>
|
||||
|
||||
<td class="table_cells"><code><a href="dynamic_parsers.html">if_p</a>(cond)[P]<br />
|
||||
<a href="dynamic_parsers.html">if_p</a>(cond)[P].<a href="dynamic_parsers.html">else_p</a>[P]</code></td>
|
||||
<td class="table_cells">Matches depending on a condition</td>
|
||||
</tr>
|
||||
<tr>
|
||||
|
||||
<td class="table_cells"><code><a href="the_lazy_parser.html">lazy_p</a>(P)</code></td>
|
||||
<td class="table_cells">Evaluates a parser at run time</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells"><code><a href="list_parsers.html">list_p</a> <br />
|
||||
<a href="list_parsers.html">list_p</a>(del)<br />
|
||||
<a href="list_parsers.html">list_p</a>(item, del)<br />
|
||||
<a href="list_parsers.html">list_p</a>(item, del, end)</code></td>
|
||||
<td class="table_cells">Matches a delimited list</td>
|
||||
</tr>
|
||||
<tr>
|
||||
|
||||
<td class="table_cells"><code><a href="loops.html">repeat_p</a>(num)[P]<br />
|
||||
<a href="loops.html">repeat_p</a>(min, max)[P]<br />
|
||||
<a href="loops.html">repeat_p</a>(min, <a href="loops.html">more</a>)[P]</code></td>
|
||||
<td class="table_cells">Matches multiple times</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells"><code><a href="dynamic_parsers.html">while_p</a> (cond) [P]</code></td>
|
||||
<td class="table_cells">Matches while a condition is true</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td class="table_title" colspan="2" id="general_directives"><a name="general_directives"></a>General directives</td>
|
||||
</tr>
|
||||
<tr>
|
||||
|
||||
<td class="table_cells"><code><a href="directives.html">as_lower_d</a>[P]</code></td>
|
||||
<td class="table_cells">Converts text to lower case before matching</td>
|
||||
</tr>
|
||||
<tr>
|
||||
|
||||
<td class="table_cells"><code><a href="refactoring.html">attach_action_d</a>[(P1
|
||||
op P2)[act]]</code></td>
|
||||
<td class="table_cells">Transforms to <b>P1 [act] op P2 [act]</b></td>
|
||||
</tr>
|
||||
<tr>
|
||||
|
||||
<td class="table_cells"><code><a href="directives.html">lexeme_d</a>[P]</code></td>
|
||||
<td class="table_cells">Turns off whitespace skipping</td>
|
||||
</tr>
|
||||
<tr>
|
||||
|
||||
<td class="table_cells"><code><a href="directives.html">limit_d</a>[P](min,
|
||||
max)</code></td>
|
||||
<td class="table_cells">Matches only if the value is within the range</td>
|
||||
</tr>
|
||||
<tr>
|
||||
|
||||
<td class="table_cells"><code><a href="directives.html">longest_d</a>[P]</code></td>
|
||||
<td class="table_cells">Matches the longest of alternatives</td>
|
||||
</tr>
|
||||
<tr>
|
||||
|
||||
<td class="table_cells"><code><a href="directives.html">max_limit_d</a>[P](max)</code></td>
|
||||
<td class="table_cells">Matches only if <b>value <= max</b></td>
|
||||
</tr>
|
||||
<tr>
|
||||
|
||||
<td class="table_cells"><code><a href="directives.html">min_limit_d</a>[P](min)</code></td>
|
||||
<td class="table_cells">Matches only if <b>value >= min</b></td>
|
||||
</tr>
|
||||
<tr>
|
||||
|
||||
<td class="table_cells"><code><a href="refactoring.html">refactor_action_d</a>[P1
|
||||
[act] op P2]</code></td>
|
||||
<td class="table_cells">Transforms to <b>(P1 op P2) [act]</b></td>
|
||||
</tr>
|
||||
<tr>
|
||||
|
||||
<td class="table_cells"><code><a href="refactoring.html">refactor_unary_d</a>[op1
|
||||
P1 op2 P2]</code></td>
|
||||
<td class="table_cells">Transforms to <b>op1 (P1 op2 P2)</b></td>
|
||||
</tr>
|
||||
<tr>
|
||||
|
||||
<td class="table_cells"><code><a href="scoped_lock.html">scoped_lock_d</a>[P](mutex)</code></td>
|
||||
<td class="table_cells">Locks a mutex while matching</td>
|
||||
</tr>
|
||||
<tr>
|
||||
|
||||
<td class="table_cells"><code><a href="directives.html">shortest_d</a>[P]</code></td>
|
||||
<td class="table_cells">Matches the shortest of alternatives</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td class="table_title" colspan="2"><a name="tree_specific_directives"></a>Tree-specific directives</td>
|
||||
</tr>
|
||||
<tr>
|
||||
|
||||
<td class="table_cells"><code><a href="trees.html">access_node_d</a>[P]</code></td>
|
||||
<td class="table_cells">Passes node value to action</td>
|
||||
</tr>
|
||||
<tr>
|
||||
|
||||
<td class="table_cells"><code><a href="trees.html">discard_first_node_d</a>[P]</code></td>
|
||||
<td class="table_cells">Discards first node</td>
|
||||
</tr>
|
||||
<tr>
|
||||
|
||||
<td class="table_cells"><code><a href="trees.html">discard_last_node_d</a>[P]</code></td>
|
||||
<td class="table_cells">Discards last node</td>
|
||||
</tr>
|
||||
<tr>
|
||||
|
||||
<td class="table_cells"><code><a href="trees.html">discard_node_d</a>[P]</code></td>
|
||||
<td class="table_cells">Discards the generated node</td>
|
||||
</tr>
|
||||
<tr>
|
||||
|
||||
<td class="table_cells"><code><a href="trees.html">infix_node_d</a>[P]</code></td>
|
||||
<td class="table_cells">Discards even-position nodes</td>
|
||||
</tr>
|
||||
<tr>
|
||||
|
||||
<td class="table_cells"><code><a href="trees.html">inner_node_d</a>[P]</code></td>
|
||||
<td class="table_cells">Discards first and last nodes</td>
|
||||
</tr>
|
||||
<tr>
|
||||
|
||||
<td class="table_cells"><code><a href="trees.html">leaf_node_d</a>[P]</code></td>
|
||||
<td class="table_cells">Generates a single node with no children</td>
|
||||
</tr>
|
||||
<tr>
|
||||
|
||||
<td class="table_cells"><code><a href="trees.html">no_node_d</a>[P]</code></td>
|
||||
<td class="table_cells">Does not generate a node</td>
|
||||
</tr>
|
||||
<tr>
|
||||
|
||||
<td class="table_cells"><code><a href="trees.html">root_node_d</a>[P]</code></td>
|
||||
<td class="table_cells">Identifies root nodes for an AST</td>
|
||||
</tr>
|
||||
<tr>
|
||||
|
||||
<td class="table_cells"><code><a href="trees.html">token_node_d</a>[P]</code></td>
|
||||
<td class="table_cells">Synonym for <b>leaf_node_d</b></td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td class="table_title" colspan="2"><a name="unary_operators"></a>Unary operators</td>
|
||||
</tr>
|
||||
<tr>
|
||||
|
||||
<td class="table_cells"><code><a href="operators.html">!P</a></code></td>
|
||||
<td class="table_cells">Matches <b>P</b> or an empty string</td>
|
||||
</tr>
|
||||
<tr>
|
||||
|
||||
<td class="table_cells"><code><a href="operators.html">*P</a></code></td>
|
||||
<td class="table_cells">Matches <b>P</b> zero or more times</td>
|
||||
</tr>
|
||||
<tr>
|
||||
|
||||
<td class="table_cells"><code><a href="operators.html">+P</a></code></td>
|
||||
<td class="table_cells">Matches <b>P</b> one or more times</td>
|
||||
</tr>
|
||||
<tr>
|
||||
|
||||
<td class="table_cells"><code><a href="primitives.html">~P</a></code></td>
|
||||
<td class="table_cells">Matches anything that does not match <b>P</b></td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td class="table_title" colspan="2"><a name="binary_operators"></a>Binary operators</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells"><code><a href="operators.html">P1 % P2</a></code></td>
|
||||
<td class="table_cells">Matches one or more <b>P1</b> separated by <b>P2</b></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells"><code><a href="operators.html">P1 - P2</a></code></td>
|
||||
<td class="table_cells">Matches <b>P1</b> but not <b>P2</b></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells"><code><a href="operators.html">P1 >> P2</a></code></td>
|
||||
<td class="table_cells">Matches <b>P1</b> followed by <b>P2</b></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells"><code><a href="operators.html">P1 & P2</a></code></td>
|
||||
<td class="table_cells">Matches both <b>P1</b> and <b>P2</b></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells"><code><a href="operators.html">P1 ^ P2</a></code></td>
|
||||
<td class="table_cells">Matches <b>P1</b> or <b>P2</b>, but not both</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells"><code><a href="operators.html">P1 | P2</a></code></td>
|
||||
<td class="table_cells">Matches <b>P1</b> or <b>P2</b></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells"><code><a href="operators.html">P1 && P2</a></code></td>
|
||||
<td class="table_cells">Synonym for <b>P1 >> P2</b></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells"><code><a href="operators.html">P1 || P2</a></code></td>
|
||||
<td class="table_cells">Matches <b>P1 | P2 | P1 >> P2</b></td>
|
||||
</tr>
|
||||
|
||||
</table>
|
||||
|
||||
</td>
|
||||
|
||||
</tr>
|
||||
|
||||
</table>
|
||||
|
||||
<br>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="error_handling.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="includes.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<hr size="1">
|
||||
<p class="copyright">Copyright © 2003 Ross Smith<br>
|
||||
<br>
|
||||
<font size="2">Use, modification and distribution is subject to the Boost Software
|
||||
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)</font></p>
|
||||
<p> </p>
|
||||
</body>
|
||||
|
||||
</html>
|
||||
@@ -1,162 +0,0 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>Rationale</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" href="theme/style.css" type="text/css">
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
|
||||
<tr>
|
||||
<td width="10">
|
||||
</td>
|
||||
<td width="85%"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Rationale</b></font></td>
|
||||
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="faq.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="acknowledgments.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<p><img src="theme/lens.gif" width="15" height="16"> <strong>Virtual functions:
|
||||
From static to dynamic C++</strong></p>
|
||||
<p>Rules straddle the border between static and dynamic C++. In effect, a rule
|
||||
transforms compile-time polymorphism (using templates) into run-time polymorphism
|
||||
(using virtual functions). This is necessary due to C++'s inability to automatically
|
||||
declare a variable of a type deduced from an arbitrarily complex expression
|
||||
in the right-hand side (rhs) of an assignment. Basically, we want to do something
|
||||
like:</p>
|
||||
<pre><code><font color="#000000"> <span class=identifier>T </span><span class=identifier>rule </span><span class=special>= </span><span class=identifier>an_arbitrarily_complex_expression</span><span class=special>;</span></font></code></pre>
|
||||
<p>without having to know or care about the resulting type of the right-hand side
|
||||
(rhs) of the assignment expression. Apart from this, we also need a facility
|
||||
to forward declare an unknown type:</p>
|
||||
<pre><code><font color="#000000"><span class=special> </span><span class=identifier>T </span><span class=identifier>rule</span><span class=special>;
|
||||
</span><span class=special>...
|
||||
</span><span class=identifier>rule </span><span class=special>= </span><span class=identifier>a </span><span class=special>| </span><span class=identifier>b</span><span class=special>;</span></font></code></pre>
|
||||
<p>These limitations lead us to this implementation of rules. This comes at the
|
||||
expense of the overhead of a virtual function call, once through each invocation
|
||||
of a rule.</p>
|
||||
<p><img src="theme/lens.gif" width="15" height="16"> <strong>Multiple declaration
|
||||
</strong> </p>
|
||||
<p>Some BNF variants allow multiple declarations of a <tt>rule</tt>. The declarations
|
||||
are taken as alternatives. Example:</p>
|
||||
<pre>
|
||||
<span class=identifier><code>r </code></span><code><span class=special>= </span><span class=identifier>a</span><span class=special>; </span><span class=identifier>
|
||||
r </span><span class=special>= </span><span class=identifier>b</span><span class=special>;</span></code></pre>
|
||||
<p> is equivalent to: </p>
|
||||
<pre>
|
||||
<span class=identifier><code>r </code></span><code><span class=special>= </span><span class=identifier>a </span><span class=special>| </span><span class=identifier>b</span><span class=special>;</span></code></pre>
|
||||
<p>Spirit v1.3 allowed this behavior. However, the current version of Spirit <b>no
|
||||
longer</b> allows this because experience shows that this behavior leads to
|
||||
unwanted gotchas (for instance, it does not allow rules to be held in containers).
|
||||
In the current release of Spirit, a second assignment to a rule will simply
|
||||
redefine it. The old definition is destructed. This follows more closely C++
|
||||
semantics and is more in line with what the user expects the rule to behave.</p>
|
||||
<p><img src="theme/lens.gif" width="15" height="16"> <b>Sequencing Syntax</b>
|
||||
<br>
|
||||
<br>
|
||||
The comma operator as in a, b seems to be a better candidate, syntax-wise. But
|
||||
then the problem is with its precedence. It has the lowest precedence in C/C++,
|
||||
which makes it virtually useless. <br>
|
||||
<br>
|
||||
Bjarne Stroustrup, in his article <a href="references.html#generalized_overloading">"Generalizing
|
||||
Overloading for C++2000"</a> talks about overloading whitespace. Such a
|
||||
feature would allow juxtapositioning of parser objects exactly as we do in (E)BNF
|
||||
(e.g. a b | c instead of a >> b | c). Unfortunately, the article was dated
|
||||
April 1, 1998. Oh well.</p>
|
||||
<p><img src="theme/lens.gif" width="15" height="16"> <b>Forward iterators</b><br>
|
||||
<br>
|
||||
In general, the scanner expects at least a standard conforming forward iterator.
|
||||
Forward iterators are needed for backtracking where the iterator needs to be
|
||||
saved and restored later. Generally speaking, Spirit is a backtracking parser.
|
||||
The implication of this is that at some point, the iterator position will have
|
||||
to be saved to allow the parser to backtrack to a previous point. Thus, for
|
||||
backtracking to work, the framework requires at least a forward iterator.<br>
|
||||
<br>
|
||||
Some parsers might require more specialized iterators (bi-directional or even
|
||||
random access). Perhaps in the future, deterministic parsers when added to the
|
||||
framework, will perform no backtracking and will need just a single token lookahead,
|
||||
hence will require input iterators only.</p>
|
||||
<p><img src="theme/lens.gif" width="15" height="16"><b> Why are subrules important?</b><br>
|
||||
<br>
|
||||
Subrules open up the opportunity to do aggressive meta programming as well because
|
||||
they do not rely on virtual functions. The virtual function is the meta-programmer's
|
||||
hell. Not only does it slow down the program due to the virtual function indirect
|
||||
call, it is also an opaque wall where no metaprogram can get past. It kills
|
||||
all meta-information beyond the virtual function call. Worse, the virtual function
|
||||
cannot be templated. Which means that its arguments have to be tied to a actual
|
||||
types. Many problems stem from this limitation. <br>
|
||||
<br>
|
||||
While Spirit is a currently classified as a non-deterministic recursive-descent
|
||||
parser, Doug Gregor first noted that other parsing techniques apart from top-down
|
||||
recursive descent may be applied. For instance, apart from non-deterministic
|
||||
recursive descent, deterministic LL(1) and LR(1) can theoretically be implemented
|
||||
using the same expression template front end. Spirit rules use virtual functions
|
||||
to encode the RHS parser expression in an opaque abstract parser type. While
|
||||
it serves its purpose well, the rule's virtual functions are the stumbling blocks
|
||||
to more advanced metaprogramming. Subrules are free from virtual functions.</p>
|
||||
<p><img src="theme/lens.gif" width="15" height="16"><b> <a name="exhaustive_rd"></a>Exhaustive
|
||||
backtracking and greedy RD</b></p>
|
||||
<p>Spirit doesn't do exhaustive backtracking like regular expressions are expected
|
||||
to. For example:</p>
|
||||
<pre> <span class="special">*</span>chlit_p<span class="special">(</span><span class="quotes">'a'</span><span class="special">) >></span> chlit_p<span class="special">(</span><span class="quotes">'a'</span><span class="special">);</span></pre>
|
||||
<p>will always fail to match because Spirit's Kleene star does not back off when
|
||||
the rest of the rule fails to match. </p>
|
||||
<p>Actually, there's a solution to this greedy RD problem. Such a scheme is discussed
|
||||
in section 6.6.2 of <a
|
||||
href="http://www.cs.vu.nl/%7Edick/PTAPG.html">Parsing Techniques: A Practical
|
||||
Guide</a>. The trick involves passing a <em>tail</em> parser (in addition to
|
||||
the scanner) to each parser. The start parser will then simply be: <tt>start
|
||||
>> end_p;</tt> (end_p is the start's tail). </p>
|
||||
<p>Spirit is greedy --using straight forward, naive RD. It is certainly possible
|
||||
to implement the fully backtracking scheme presented above, but there will be
|
||||
also certainly be a performance hit. The scheme will always try to match all
|
||||
possible parser paths (full parser hierarchy traversal) until it reaches a point
|
||||
of certainty, that the whole thing matches or fails to match. </p>
|
||||
<table border="0" width="80%" align="center">
|
||||
<tr>
|
||||
<td class="note_box"><p><img src="theme/note.gif" width="16" height="16"><b>Backtracking
|
||||
and Greedy RD </b><br>
|
||||
<br>
|
||||
Spirit is quite consistent and intuitive about when it backtracks and
|
||||
to where, although it may not be obvious to those coming from different
|
||||
backgrounds. In general, any (sub)parser will, given the same input, always
|
||||
match the same portion of the input (or fail to match the input at all).
|
||||
This means that Spirit is inherently greedy. Spirit will only backtrack
|
||||
when a (sub)parser fails to match the input, and it will always backtrack
|
||||
to the next choice point upward (not backward) in the parser structure.
|
||||
In other words abb|ab will match "ab", as will a(bb|b), but
|
||||
(ab|a)b won't because the (ab|a) subparser will always match the 'b' after
|
||||
the 'a' if it is available.</p>
|
||||
<p>--Rainer Deyke</p>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>There's a strong preference on "simplicity with all the knobs when you
|
||||
need them" approach, right now. On the other hand, the flexibility of Spirit
|
||||
makes it possible to have different optional schemes available. It might be
|
||||
possible to implement an exhaustive backtracking RD scheme as an optional feature
|
||||
in the future. </p>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="faq.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="acknowledgments.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<hr size="1">
|
||||
<p class="copyright">Copyright © 1998-2003 Joel de Guzman<br>
|
||||
<br>
|
||||
<font size="2">Use, modification and distribution is subject to the Boost Software
|
||||
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
||||
http://www.boost.org/LICENSE_1_0.txt)</font></p>
|
||||
<p class="copyright"> </p>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,125 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Refactoring Parsers</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link href="theme/style.css" rel="stylesheet" type="text/css">
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
|
||||
<tr>
|
||||
<td width="10"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b> </b></font></td>
|
||||
<td width="85%"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Refactoring Parsers</b></font></td>
|
||||
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="functor_parser.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="regular_expression_parser.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<p><a name="refactoring_parsers"></a>There are three types of Refactoring Parsers
|
||||
implemented right now, which help to abstract common parser refactoring tasks.
|
||||
Parser refactoring means, that a concrete parser construct is replaced (refactored)
|
||||
by another very similar parser construct. Two of the Refactoring Parsers described
|
||||
here (<tt>refactor_unary_parser</tt> and <tt>refactor_action_parser</tt>) are
|
||||
introduced to allow a simple and more expressive notation while using <a href="confix.html">Confix
|
||||
Parsers</a> and <a href="list_parsers.html">List Parsers</a>. The third Refactoring
|
||||
Parser (<tt>attach_action_parser</tt>) is implemented to abstract some functionality
|
||||
required for the Grouping Parser. Nevertheless
|
||||
these Refactoring Parsers may help in solving other complex parsing tasks too.</p>
|
||||
<h3>Refactoring unary parsers</h3>
|
||||
<p>The <tt>refactor_unary_d</tt> parser generator, which should be used to generate
|
||||
a unary refactoring parser, transforms a construct of the following type</p>
|
||||
<pre><code> <span class=identifier>refactor_unary_d</span><span class=special>[*</span><span class=identifier>some_parser </span><span class=special>- </span><span class=identifier>another_parser</span><span class=special>]</span></code></pre>
|
||||
<p>to </p>
|
||||
<pre><code> <span class=special>*(</span><span class=identifier>some_parser</span> <span class=special>- </span><span class=identifier>another_parser</span><span class=special>)</span></code></pre>
|
||||
<blockquote>
|
||||
<p>where <tt>refactor_unary_d</tt> is a predefined object of the parser generator
|
||||
struct <tt>refactor_unary_gen<></tt></p>
|
||||
</blockquote>
|
||||
<p>The <tt>refactor_unary_d</tt> parser generator generates a new parser as shown
|
||||
above, only if the original construct is an auxiliary binary parser (here the
|
||||
difference parser) and the left operand of this binary parser is an auxiliary
|
||||
unary parser (here the kleene star operator). If the original parser isn't a
|
||||
binary parser the compilation will fail. If the left operand isn't an unary
|
||||
parser, no refactoring will take place.</p>
|
||||
<h3>Refactoring action parsers</h3>
|
||||
<p>The <tt>refactor_action_d</tt> parser generator, which should be used to generate
|
||||
an action refactoring parser, transforms a construct of the following type</p>
|
||||
<pre><code> <span class=identifier>refactor_action_d</span><span class=special>[</span><span class=identifier>some_parser</span><span class=special>[</span><span class=identifier>some_actor</span><span class=special>] </span><span class=special>- </span><span class=identifier>another_parser</span><span class=special>]</span></code></pre>
|
||||
<p>to </p>
|
||||
<pre><code> <span class=special>(</span><span class=identifier>some_parser </span><span class=special>- </span><span class=identifier>another_parser</span><span class=special>)[</span><span class=identifier>some_actor</span><span class=special>]</span></code></pre>
|
||||
<blockquote>
|
||||
<p>where <tt>refactor_action_d</tt> is a predefined object of the parser generator
|
||||
struct <tt>refactor_action_gen<></tt></p>
|
||||
</blockquote>
|
||||
<p>The <tt>refactor_action_d</tt> parser generator generates a new parser as shown
|
||||
above, only if the original construct is an auxiliary binary parser (here the
|
||||
difference parser) and the left operand of this binary parser is an auxiliary
|
||||
parser generated by an attached semantic action. If the original parser isn't
|
||||
a binary parser the compilation will fail. If the left operand isn't an action
|
||||
parser, no refactoring will take place.</p>
|
||||
<h3>Attach action refactoring</h3>
|
||||
<p>The <tt>attach_action_d</tt> parser generator, which should be used to generate
|
||||
an attach action refactoring parser, transforms a construct of the following
|
||||
type</p>
|
||||
<pre><code> <span class=identifier>attach_action_d</span><span class=special>[</span><span class=identifier>(some_parser</span> <span class=special>>> </span><span class=identifier>another_parser</span>)<span class=special>[</span><span class=identifier>some_actor</span><span class=special>]</span><span class=special>]</span></code></pre>
|
||||
<p>to </p>
|
||||
<pre><code> <span class=identifier>some_parser</span><span class=special>[</span><span class=identifier>some_actor</span><span class=special>]</span><span class=identifier> </span><span class=special>>> </span><span class=identifier>another_parser</span><span class=special>[</span><span class=identifier>some_actor</span><span class=special>]</span></code></pre>
|
||||
<blockquote>
|
||||
<p>where <tt>attach_action_d</tt> is a predefined object of the parser generator
|
||||
struct <tt>attach_action_gen<></tt></p>
|
||||
</blockquote>
|
||||
|
||||
<p>The <tt>attach_action_d</tt> parser generator generates a new parser as shown
|
||||
above, only if the original construct is an auxiliary action parser and the
|
||||
parser to it this action is attached is an auxiliary binary parser (here the
|
||||
sequence parser). If the original parser isn't a action parser the compilation
|
||||
will fail. If the parser to which the action is attached isn't an binary parser,
|
||||
no refactoring will take place.</p>
|
||||
<h3>Nested refactoring</h3>
|
||||
<p>Sometimes it is required to nest different types of refactoring, i.e. to transform
|
||||
constructs like</p>
|
||||
<pre><code> <span class=special>(*</span><span class=identifier>some_parser</span><span class=special>)[</span><span class=identifier>some_actor</span><span class=special>] </span><span class=special>- </span><span class=identifier>another_parser</span></code></pre>
|
||||
<p>to </p>
|
||||
<pre><code> <span class=special>(*(</span><span class=identifier>some_parser </span><span class=special>- </span><span class=identifier>another_parser</span><span class=special>))[</span><span class=identifier>some_actor</span><span class=special>]</span></code></pre>
|
||||
<p>To simplify the construction of such nested refactoring parsers the <tt>refactor_unary_gen<></tt>
|
||||
and <tt>refactor_action_gen<></tt> both can take another refactoring parser
|
||||
generator type as their respective template parameter. For instance, to construct
|
||||
a refactoring parser generator for the mentioned nested transformation we should
|
||||
write:</p>
|
||||
<pre><span class=special> </span><span class=keyword>typedef </span><span class=identifier>refactor_action_gen</span><span class=special><</span><span class=identifier>refactor_unary_gen</span><span class=special><> </span><span class=special>> </span><span class=identifier>refactor_t</span><span class=special>;
|
||||
</span><span class=keyword>const </span><span class=identifier>refactor_t </span><span class=identifier>refactor_nested_d </span><span class=special>= </span><span class=identifier>refactor_t</span><span class=special>(</span><span class=identifier>refactor_unary_d</span><span class=special>);</span></pre>
|
||||
<p>Now we could use it as follows to get the required result:</p>
|
||||
<pre><code><font color="#0000FF"> </font><span class=identifier>refactor_nested_d</span><span class=special>[(*</span><span class=identifier>some_parser</span><span class=special>)[</span><span class=identifier>some_actor</span><span class=special>] </span><span class=special>- </span><span class=identifier>another_parser</span><span class=special>]</span></code></pre>
|
||||
<p>An empty template parameter means not to nest this particular refactoring parser.
|
||||
The default template parameter is <tt>non_nesting_refactoring</tt>, a predefined
|
||||
helper structure for inhibiting nesting. Sometimes it is required to nest a
|
||||
particular refactoring parser with itself. This is achieved by providing the
|
||||
predefined helper structure <tt>self_nested_refactoring</tt> as the template
|
||||
parameter to the corresponding refactoring parser generator template.</p>
|
||||
<p><img src="theme/lens.gif" width="15" height="16"> See <a href="../example/fundamental/refactoring.cpp">refactoring.cpp</a> for a compilable example. This is part of the Spirit distribution. </p>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="functor_parser.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="regular_expression_parser.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<hr size="1">
|
||||
<p class="copyright">Copyright © 2001-2003 Hartmut Kaiser<br>
|
||||
<br>
|
||||
<font size="2">Use, modification and distribution is subject to the Boost Software
|
||||
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
||||
http://www.boost.org/LICENSE_1_0.txt)</font></p>
|
||||
<p> </p>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,240 +0,0 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>References</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" href="theme/style.css" type="text/css">
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
|
||||
<tr>
|
||||
<td width="10">
|
||||
</td>
|
||||
<td width="85%">
|
||||
<font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>References</b></font>
|
||||
</td>
|
||||
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="acknowledgments.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><img src="theme/r_arr_disabled.gif" width="20" height="19"></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<table width="90%" border="0" align="center">
|
||||
<tr>
|
||||
<td width="36" class="table_cells"> 1.</td>
|
||||
<td width="236" class="table_cells"> <a name="expression_templates"></a>Todd
|
||||
Veldhuizen</td>
|
||||
<td width="520" class="table_cells"> "<a
|
||||
href="http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.43.248">Expression
|
||||
Templates</a>". <br>
|
||||
C++ Report, June 1995.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td width="36" class="table_cells"> 2.</td>
|
||||
<td width="236" class="table_cells"> <a name="bnf"></a>Peter Naur (ed.)</td>
|
||||
<td width="520" class="table_cells"> "<a href="http://www.masswerk.at/algol60/report.htm">Report
|
||||
on the Algorithmic Language ALGOL 60</a>". <br>
|
||||
CACM, May 1960.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td width="36" class="table_cells"> 3.</td>
|
||||
<td width="236" class="table_cells"> ISO/IEC</td>
|
||||
<td width="520" class="table_cells"> "<a
|
||||
href="http://www.cl.cam.ac.uk/%7Emgk25/iso-14977.pdf">ISO-EBNF</a>", <br>
|
||||
ISO/IEC 14977: 1996(E).</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td width="36" class="table_cells"> 4.</td>
|
||||
<td width="236" class="table_cells"> <a name="intersections"></a>Richard J.
|
||||
Botting, Ph.D. </td>
|
||||
<td width="520" class="table_cells"> "<a
|
||||
href="http://www.csci.csusb.edu/dick/maths/intro_ebnf.html">XBNF</a>" (citing
|
||||
Leu-Weiner, 1973). <br>
|
||||
California State University, San Bernardino, 1998. </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td width="36" class="table_cells"> 5.</td>
|
||||
<td width="236" class="table_cells"> <a name="curious_recurring"></a>James
|
||||
Coplien. </td>
|
||||
<td width="520" class="table_cells"> "<b>Curiously Recurring Template Pattern</b>".
|
||||
<br>
|
||||
C++ Report, Feb. 1995.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td width="36" class="table_cells"> 6.</td>
|
||||
<td width="236" class="table_cells"> <a name="generic_patterns"></a>Thierry
|
||||
Géraud and <br>
|
||||
Alexandre Duret-Lutz</td>
|
||||
<td width="520" class="table_cells"> <a
|
||||
href="http://www.coldewey.com/europlop2000/papers/geraud%2Bduret.zip">Generic
|
||||
Programming Redesign of Patterns</a><br>
|
||||
Proceedings of the 5th European Conference on Pattern Languages of Programs
|
||||
<br>
|
||||
(EuroPLoP'2000) Irsee, Germany, July 2000. </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td width="36" class="table_cells">7.</td>
|
||||
<td width="236" class="table_cells">Geoffrey Furnish</td>
|
||||
<td width="520" height="53" class="table_cells"><a href="https://adtmag.com/articles/2000/04/25/disambiguated-glommable-expression-templates-reintroduced.aspx">"Disambiguated
|
||||
Glommable Expression Templates Reintroduced"</a><br>
|
||||
C++ Report, May 2000</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td width="36" class="table_cells">8.</td>
|
||||
<td width="236" height="53" class="table_cells"> Erich Gamma, <br>
|
||||
Richard Helm, <br>
|
||||
Ralph Jhonson, <br>
|
||||
and John Vlissides</td>
|
||||
<td width="520" height="53" class="table_cells"> <b>Design Patterns, Elements
|
||||
of Reusable Object-Oriented Software</b>. <br>
|
||||
Addison-Wesley, 1995.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td width="36" class="table_cells">9. </td>
|
||||
<td width="236" class="table_cells">Alfred V. Aho<br>
|
||||
Revi Sethi<br>
|
||||
Feffrey D. Ulman</td>
|
||||
<td width="520" class="table_cells"><b>Compilers, Principles, Techniques and
|
||||
Tools</b><br>
|
||||
Addison-Wesley, June 1987. </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td width="36" class="table_cells">10. </td>
|
||||
<td width="236" class="table_cells"> Dick Grune and <br>
|
||||
Ceriel Jacobs</td>
|
||||
<td width="520" class="table_cells"> <a
|
||||
href="http://www.cs.vu.nl/%7Edick/PTAPG.html">Parsing Techniques: A Practical
|
||||
Guide</a>. <br>
|
||||
Ellis Horwood Ltd.: West Sussex, England, 1990. (electronic copy, 1998).</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td width="36" class="table_cells">11. </td>
|
||||
<td width="236" class="table_cells"> T. J. Parr, H. G. Dietz, and<br>
|
||||
W. E. Cohen</td>
|
||||
<td width="520" class="table_cells"> <a
|
||||
href="https://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.51.7097">PCCTS Reference Manual (Version
|
||||
1.00)</a>. <br>
|
||||
School of Electrical Engineering, Purdue University, West Lafayette, August
|
||||
1991.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td width="36" class="table_cells">12. </td>
|
||||
<td width="236" class="table_cells"> Adrian Johnstone and Elizabeth Scott.</td>
|
||||
<td width="520" class="table_cells"> <a
|
||||
href="ftp://ftp.cs.rhul.ac.uk/pub/rdp">RDP, A Recursive Descent Compiler Compiler</a>.
|
||||
<br>
|
||||
Technical Report CSD TR 97 25, Dept. of Computer Science, Egham, Surrey,
|
||||
England, Dec. 20, 1997. </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td width="36" class="table_cells">13. </td>
|
||||
<td width="236" class="table_cells"> <a name="back_tracking_parsers"></a>Adrian
|
||||
Johnstone</td>
|
||||
<td width="520" class="table_cells"> <a
|
||||
href="https://www.cs.rhul.ac.uk/research/languages/csle/lookahead_backtrack.html">Languages
|
||||
and Architectures, <br>
|
||||
Parser generators with backtrack or extended lookahead capability</a><br>
|
||||
Department of Computer Science, Royal Holloway, University of London, Egham,
|
||||
Surrey, England</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td width="36" class="table_cells">14. </td>
|
||||
<td width="236" class="table_cells"> <a name="damian_conway"></a>Damian Conway</td>
|
||||
<td width="520" class="table_cells"><a href="http://www.csse.monash.edu.au/%7Edamian/papers/#Embedded_Input_Parsing_for_C">Parsing
|
||||
with C++ Classes.</a><br>
|
||||
ACM SIGPLAN Notices, 29:1, 1994.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td width="36" class="table_cells">15. </td>
|
||||
<td width="236" class="table_cells"> Joel de Guzman</td>
|
||||
<td width="520" class="table_cells"><a href="http://spirit.sourceforge.net/index.php?doc=docs/v1_3/index.html">"Spirit
|
||||
Version 1.3"</a>. <br>
|
||||
http://spirit.sourceforge.net/, November 2001.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td width="36" class="table_cells">16. </td>
|
||||
<td width="236" class="table_cells"> <a name="combinators"></a>S. Doaitse
|
||||
Swierstra and <br>
|
||||
Luc Duponcheel</td>
|
||||
<td width="520" class="table_cells"> <a
|
||||
href="http://www.cs.uu.nl/groups/ST/Publications/AFP2.pdf">Deterministic, Error-Correcting
|
||||
Combinator Parsers </a><br>
|
||||
Dept. of Computer Science, Utrecht University P.O.Box 80.089, 3508 TB Utrecht,
|
||||
The Netherland</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td width="36" class="table_cells">17.</td>
|
||||
<td width="236" class="table_cells"> <a name="generalized_overloading"></a>Bjarne
|
||||
Stroustrup</td>
|
||||
<td width="520" class="table_cells"> <a
|
||||
href="http://www.research.att.com/%7Ebs/whitespace98.pdf">Generalizing Overloading
|
||||
for C++2000</a><br>
|
||||
Overload, Issue 25. April 1, 1998.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td width="36" class="table_cells">18.</td>
|
||||
<td width="236" class="table_cells"><a name="regex_boost_doc"></a>Dr. John
|
||||
Maddock</td>
|
||||
<td width="520" class="table_cells"><a href="http://www.boost.org/libs/regex/index.html">Regex++
|
||||
Documentation</a><br>
|
||||
http://www.boost.org/libs/regex/index.htm </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells">19.</td>
|
||||
<td class="table_cells">Anonymous<br>
|
||||
Edited by Graham Hutton </td>
|
||||
<td class="table_cells"> Frequently Asked Questions for comp.lang.functional.
|
||||
<br>
|
||||
Edited by Graham Hutton, University of Nottingham. <a href="http://www.cs.nott.ac.uk/%7Egmh//faq.html">http://www.cs.nott.ac.uk/~gmh//faq.html</a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells">20.</td>
|
||||
<td class="table_cells">Hewlett-Packard</td>
|
||||
<td class="table_cells">Standard Template Library Programmer's Guide.<br>
|
||||
<a href="https://www.boost.org/sgi/stl/">https://www.boost.org/sgi/stl/</a>,
|
||||
Hewlett-Packard Company, 1994</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells">21.</td>
|
||||
<td class="table_cells">boost.org</td>
|
||||
<td class="table_cells">Boost Libraries Documentation. <a href="http://www.boost.org/">http://www.boost.org/</a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells">22.</td>
|
||||
<td class="table_cells">Brian McNamara and Yannis Smaragdakis</td>
|
||||
<td class="table_cells"> FC++: Functional Programming in C++. <a href="https://people.cs.umass.edu/~yannis/fc++/">https://people.cs.umass.edu/~yannis/fc++/</a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="table_cells">23.</td>
|
||||
<td class="table_cells">Todd Veldhuizen</td>
|
||||
<td class="table_cells"> <a href="ftp://ftp.cs.indiana.edu/pub/techreports/TR542.pdf">Techniques
|
||||
for Scientic C++.</a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="acknowledgments.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><img src="theme/r_arr_disabled.gif" width="20" height="19"></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<hr size="1">
|
||||
<p class="copyright">Copyright © 1998-2003 Joel de Guzman<br>
|
||||
<br>
|
||||
<font size="2">Use, modification and distribution is subject to the Boost Software
|
||||
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
||||
http://www.boost.org/LICENSE_1_0.txt)</font></p>
|
||||
<p> </p>
|
||||
<p> </p>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,91 +0,0 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Regular Expression Parser</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link href="theme/style.css" rel="stylesheet" type="text/css">
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
|
||||
<tr>
|
||||
<td width="10" height="49"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b> </b></font></td>
|
||||
<td width="85%" height="49"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Regular Expression Parser</b></font></td>
|
||||
<td width="112" height="49"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="refactoring.html"><img src="theme/l_arr.gif" width="20" height="19" border="0"></a></td>
|
||||
<td width="30"><a href="scoped_lock.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<p><a name="regular_expression_parser"></a>Regular expressions are a form of pattern-matching
|
||||
that are often used in text processing. Many users will be familiar with the
|
||||
usage of regular expressions. Initially there were the Unix utilities grep,
|
||||
sed and awk, and the programming language perl, each of which make extensive
|
||||
use of regular expressions. Today the usage of such regular expressions is integrated
|
||||
in many more available systems.</p>
|
||||
<p>During parser construction it is often useful to have the power of regular
|
||||
expressions available. The Regular Expression Parser was introduced, to make
|
||||
the use of regular expressions accessible for Spirit parser construction.</p>
|
||||
<p>The Regular Expression Parser <tt>rxstrlit</tt> has a single template type
|
||||
parameter: an iterator type. Internally, <tt>rxstrlit</tt> holds the Boost Regex
|
||||
object containing the provided regular expression. The <tt>rxstrlit</tt> attempts
|
||||
to match the current input stream with this regular expression. The template
|
||||
type parameter defaults to <tt>char const<span class="operators">*</span></tt>.
|
||||
<tt>rxstrlit</tt> has two constructors. The first accepts a null-terminated
|
||||
character pointer. This constructor may be used to build <tt>rxstrlit's</tt>
|
||||
from quoted regular expression literals. The second constructor takes in a first/last
|
||||
iterator pair. The function generator version is <tt>regex_p</tt>. </p>
|
||||
<p>Here are some examples:</p>
|
||||
<pre><code><span class=comment> </span><span class=identifier>rxstrlit</span><span class=special><>(</span><span class=string>"Hello[[:space:]]+[W|w]orld"</span><span class=special>)
|
||||
</span><span class=identifier>regex_p</span><span class=special>(</span><span class=string>"Hello[[:space:]]+[W|w]orld"</span><span class=special>)
|
||||
|
||||
</span><span class=identifier>std</span><span class=special>::</span><span class=identifier>string </span><span class=identifier>msg</span><span class=special>(</span><span class=string>"Hello[[:space:]]+[W|w]orld"</span><span class=special>);
|
||||
rx</span><span class=identifier>strlit</span><span class=special><>(</span><span class=identifier>msg</span><span class=special>.</span><span class=identifier>begin</span><span class=special>(), </span><span class=identifier>msg</span><span class=special>.</span><span class=identifier>end</span><span class=special>());</span></code></pre>
|
||||
<p>The generated parser object acts at the character level, thus an eventually
|
||||
given skip parser is not used during the attempt to match the regular expression
|
||||
(see <a href="faq.html#scanner_business">The Scanner Business</a>).</p>
|
||||
<p>The Regular Expression Parser is implemented by the help of the <a href="http://www.boost.org/libs/regex/index.html">Boost
|
||||
Regex++ library</a>, so you have to have some limitations in mind. </p>
|
||||
<blockquote>
|
||||
<p><img src="theme/bullet.gif" width="12" height="12"> Boost libraries have
|
||||
to be installed on your computer and the Boost root directory has to be added
|
||||
to your compiler <tt>#include<...></tt> search path. You can download
|
||||
the actual version at the <a href="http://www.boost.org/">Boost web site</a>.</p>
|
||||
<p><img src="theme/bullet.gif" width="12" height="12"> The Boost Regex library
|
||||
requires the usage of bi-directional iterators. So you have to ensure this
|
||||
during the usage of the Spirit parser, which contains a Regular Expression
|
||||
Parser.</p>
|
||||
<p><img src="theme/bullet.gif" width="12" height="12"> The Boost Regex library
|
||||
is not a header only library, as Spirit is, though it provides the possibility
|
||||
to include all of the sources, if you are using it in one compilation unit
|
||||
only. Define the preprocessor constant <tt>BOOST_SPIRIT_NO_REGEX_LIB</tt> before
|
||||
including the spirit Regular Expression Parser header, if you want to include
|
||||
all the Boost Regex sources into this compilation unit. If you are using the
|
||||
Regular Expression Parser in more than one compilation unit, you should not
|
||||
define this constant and must link your application against the regex library
|
||||
as described in the related documentation.</p>
|
||||
</blockquote>
|
||||
<p> <img src="theme/lens.gif" width="15" height="16"> See <a href="../example/fundamental/regular_expression.cpp">regular_expression.cpp</a> for a compilable example. This is part of the Spirit distribution.</p>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="refactoring.html"><img src="theme/l_arr.gif" width="20" height="19" border="0"></a></td>
|
||||
<td width="30"><a href="scoped_lock.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<hr size="1">
|
||||
<p class="copyright">Copyright © 2001-2002 Hartmut Kaiser<br>
|
||||
<br>
|
||||
<font size="2">Use, modification and distribution is subject to the Boost Software
|
||||
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
||||
http://www.boost.org/LICENSE_1_0.txt)</font></p>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,231 +0,0 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>The Rule</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" href="theme/style.css" type="text/css">
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
|
||||
<tr>
|
||||
<td width="10">
|
||||
</td>
|
||||
<td width="85%">
|
||||
<font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>The Rule</b></font>
|
||||
</td>
|
||||
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="numerics.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="epsilon.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>The <b>rule</b> is a polymorphic parser that acts as a named place-holder capturing
|
||||
the behavior of an EBNF expression assigned to it. Naming an EBNF expression
|
||||
allows it to be referenced later. The <tt>rule</tt> is a template class parameterized
|
||||
by the type of the scanner (<tt>ScannerT</tt>), the rule's <a href="indepth_the_parser_context.html">context</a>
|
||||
and its <a href="#tag">tag</a>. Default template parameters are provided to
|
||||
make it easy to use the rule.</p>
|
||||
<pre><code><font color="#000000"><span class=identifier> </span><span class=keyword>template</span><span class=special><
|
||||
</span><span class=keyword>typename </span><span class=identifier>ScannerT </span><span class=special>= </span><span class=identifier>scanner</span><span class=special><>,
|
||||
</span><span class=keyword>typename </span><span class=identifier>ContextT </span><span class=special>= </span><span class=identifier>parser_context</span><span class=special><></span><span class=identifier>,
|
||||
</span><span class="keyword">typename</span><span class=identifier> TagT </span><span class="special">=</span><span class=identifier> parser_address_tag</span><span class=special>>
|
||||
</span><span class=keyword>class </span><span class=identifier>rule</span><span class=special>;</span></font></code></pre>
|
||||
<p>Default template parameters are supplied to handle the most common case. <tt>ScannerT</tt>
|
||||
defaults to <tt>scanner<></tt>, a plain vanilla scanner that acts on <tt>char
|
||||
const<span class="operators">*</span></tt> iterators and does nothing special
|
||||
at all other than iterate through all the chars in the null terminated input
|
||||
a character at a time. The rule tag, <tt>TagT</tt>, typically used with <a href="trees.html">ASTs</a>,
|
||||
is used to identify a rule; it is explained <a href="#tag">here</a>. In trivial
|
||||
cases, declaring a rule as <tt>rule<></tt> is enough. You need not be
|
||||
concerned at all with the <tt>ContextT</tt> template parameter unless you wish
|
||||
to tweak the low level behavior of the rule. Detailed information on the <tt>ContextT</tt>
|
||||
template parameter is provided <a href="indepth_the_parser_context.html">elsewhere</a>.
|
||||
</p>
|
||||
<h3><a name="order_of_parameters"></a>Order of parameters</h3>
|
||||
<p>As of v1.8.0, the <tt>ScannerT</tt>, <tt>ContextT</tt> and <tt>TagT</tt> can
|
||||
be specified in any order. If a template parameter is missing, it will assume
|
||||
the defaults. Examples:</p>
|
||||
<pre><span class=identifier> rule</span><span class=special><> </span><span class=identifier>rx1</span><span class=special>;
|
||||
</span><span class=identifier>rule</span><span class=special><</span><span class=identifier>scanner</span><span class=special><> </span><span class=special>> </span><span class=identifier>rx2</span><span class=special>;
|
||||
</span> <span class=identifier>rule</span><span class=special><</span><span class=identifier>parser_context<code><font color="#000000"><span class=special><></span></font></code> </span><span class=special>> </span><span class=identifier>rx3</span><span class=special>;
|
||||
</span><span class=identifier>rule</span><span class=special><</span><span class=identifier>parser_context<code><font color="#000000"><span class=special><></span></font></code></span><span class=special>, </span><span class=identifier>parser_address_tag</span><span class=special>> </span><span class=identifier>rx4</span><span class=special>;
|
||||
</span> <span class=identifier>rule</span><span class=special><</span><span class=identifier>parser_address_tag</span><span class=special>> </span><span class=identifier>rx5</span><span class=special>;
|
||||
</span> <span class=identifier>rule</span><span class=special><</span><span class=identifier>parser_address_tag</span><span class=special>, </span><span class=identifier>scanner</span><span class=special><>, </span><span class=identifier>parser_context<code><font color="#000000"><span class=special><></span></font></code> </span><span class=special>> </span><span class=identifier>rx6</span><span class=special>;
|
||||
</span><span class=identifier>rule</span><span class=special><</span><span class=identifier>parser_context<code><font color="#000000"><span class=special><></span></font></code></span><span class=special>, </span><span class=identifier>scanner</span><span class=special><>, </span><span class=identifier>parser_address_tag</span><span class=special>> </span><span class=identifier>rx7</span><span class=special>;</span></pre>
|
||||
<h3><a name="multiple_scanner_support" id="multiple_scanner_support"></a>Multiple scanners</h3>
|
||||
<p>As of v1.8.0, rules can use one or more scanner types. There are cases, for
|
||||
instance, where we need a rule that can work on the phrase and character levels.
|
||||
Rule/scanner mismatch has been a source of confusion and is the no. 1 <a href="faq.html#scanner_business">FAQ</a>.
|
||||
To address this issue, we now have multiple scanner support. Example:</p>
|
||||
<pre><span class=special> </span><span class=keyword>typedef </span><span class=identifier>scanner_list</span><span class=special><</span><span class=identifier>scanner</span><span class=special><>, </span><span class=identifier>phrase_scanner_t</span><span class=special>> </span><span class=identifier>scanners</span><span class=special>;
|
||||
|
||||
</span><span class=identifier>rule</span><span class=special><</span><span class=identifier>scanners</span><span class=special>> </span><span class=identifier>r </span><span class=special>= </span><span class=special>+</span><span class=identifier>anychar_p</span><span class=special>;
|
||||
</span><span class=identifier>assert</span><span class=special>(</span><span class=identifier>parse</span><span class=special>(</span><span class=string>"abcdefghijk"</span><span class=special>, </span><span class=identifier>r</span><span class=special>).</span><span class=identifier>full</span><span class=special>);
|
||||
</span><span class=identifier>assert</span><span class=special>(</span><span class=identifier>parse</span><span class=special>(</span><span class=string>"a b c d e f g h i j k"</span><span class=special>, </span><span class=identifier>r</span><span class=special>, </span><span class=identifier>space_p</span><span class=special>).</span><span class=identifier>full</span><span class=special>);</span></pre>
|
||||
<p>Notice how rule <tt>r</tt> is used in both the phrase and character levels.
|
||||
</p>
|
||||
<p>By default support for multiple scanners is disabled. The macro
|
||||
<tt>BOOST_SPIRIT_RULE_SCANNERTYPE_LIMIT</tt> must be defined to the
|
||||
maximum number of scanners allowed in a scanner_list. The value must
|
||||
be greater than 1 to enable multiple scanners. Given the
|
||||
example above, to define a limit of two scanners for the list, the
|
||||
following line must be inserted into the source file before the
|
||||
inclusion of Spirit headers:
|
||||
</p>
|
||||
<pre><span class=special> </span><span class=preprocessor>#define </span><span class=identifier>BOOST_SPIRIT_RULE_SCANNERTYPE_LIMIT</span> <span class=literal>2</span></pre>
|
||||
<table width="80%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="note_box"><img src="theme/bulb.gif" width="13" height="18"> See
|
||||
the techniques section for an <a href="techniques.html#multiple_scanner_support">example</a>
|
||||
of a <a href="grammar.html">grammar</a> using a multiple scanner enabled
|
||||
rule, <a href="scanner.html#lexeme_scanner">lexeme_scanner</a> and <a href="scanner.html#as_lower_scanner">as_lower_scanner.</a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<h3>Rule Declarations</h3>
|
||||
<p>The rule class models EBNF's production rule. Example:</p>
|
||||
<pre><code><font color="#000000"> <span class=identifier>rule</span><span class=special><> </span><span class=identifier>a_rule </span><span class=special>= </span><span class=special>*(</span><span class=identifier>a </span><span class=special>| </span><span class=identifier>b</span><span class=special>) </span><span class=special>& </span><span class=special>+(</span><span class=identifier>c </span><span class=special>| </span><span class=identifier>d </span><span class=special>| </span><span class=identifier>e</span><span class=special>);</span></font></code></pre>
|
||||
<p>The type and behavior of the right-hand (rhs) EBNF expression, which may be
|
||||
arbitrarily complex, is encoded in the rule named a_rule. a_rule may now be
|
||||
referenced elsewhere in the grammar:</p>
|
||||
<pre><code><font color="#000000"> <span class=identifier>rule</span><span class=special><> </span><span class=identifier>another_rule </span><span class=special>= </span><span class=identifier>f </span><span class=special>>> </span><span class=identifier>g </span><span class=special>>> </span><span class=identifier>h </span><span class=special>>> </span><span class=identifier>a_rule</span><span class=special>;</span></font></code></pre>
|
||||
<table width="80%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="note_box"><img src="theme/alert.gif" width="16" height="16"> <b>Referencing
|
||||
rules <br>
|
||||
</b><br>
|
||||
When a rule is referenced anywhere in the right hand side of an EBNF expression,
|
||||
the rule is held by the expression by reference. It is the responsibility
|
||||
of the client to ensure that the referenced rule stays in scope and does
|
||||
not get destructed while it is being referenced. </td>
|
||||
</tr>
|
||||
</table>
|
||||
<pre><span class=special> </span><span class=identifier>a </span><span class=special>= </span><span class=identifier>int_p</span><span class=special>;
|
||||
</span><span class=identifier>b </span><span class=special>= </span><span class=identifier>a</span><span class=special>;
|
||||
</span><span class=identifier>c </span><span class=special>= </span><span class=identifier>int_p </span><span class=special>>> </span><span class=identifier>b</span><span class=special>;</span></pre>
|
||||
<h3>Copying Rules</h3>
|
||||
<p>The rule is a weird C++ citizen, unlike any other C++ object. It does not have
|
||||
the proper copy and assignment semantics and cannot be stored and passed around
|
||||
by value. If you need to copy a rule you have to explicitly call its member
|
||||
function <tt>copy()</tt>:</p>
|
||||
<pre><span class=special> </span><span class=identifier>r</span><span class="special">.</span><span class=identifier>copy()</span><span class=special>;</span></pre>
|
||||
<p>However, be warned that copying a rule will not deep copy other referenced
|
||||
rules of the source rule being copied. This might lead to dangling references.
|
||||
Again, it is the responsibility of the client to ensure that all referenced
|
||||
rules stay in scope and does not get destructed while it is being referenced.
|
||||
Caveat emptor.</p>
|
||||
<p>If you copy a rule, then you'll want to place it in a storage somewhere. The
|
||||
problem is how? The storage can't be another rule:</p>
|
||||
<pre> <code><font color="#000000"><span class=identifier>rule</span><span class=special><></span></font></code> r2 <span class="special">=</span> <span class=identifier>r</span><span class="special">.</span><span class=identifier>copy()</span><span class=special>; </span><span class="comment">// BAD!</span></pre>
|
||||
<p>because rules are weird and does not have the expected C++ copy-constructor
|
||||
and assignment semantics! As a general rule: <strong>Don't put a copied rule
|
||||
into another rule! </strong>Instead, use the <a href="stored_rule.html">stored_rule</a>
|
||||
for that purpose.</p>
|
||||
<h3>Forward declarations</h3>
|
||||
<p>A <tt>rule</tt> may be declared before being defined to allow cyclic structures
|
||||
typically found in BNF declarations. Example:</p>
|
||||
<pre><code><font color="#000000"><span class=special> </span><span class=identifier>rule</span><span class=special><> </span><span class=identifier>a</span><span class=special>, </span><span class=identifier>b</span><span class=special>, </span><span class=identifier>c</span><span class=special>;
|
||||
|
||||
</span><span class=identifier>a </span><span class=special>= </span><span class=identifier>b </span><span class=special>| </span><span class=identifier>a</span><span class=special>;
|
||||
</span><span class=identifier>b </span><span class=special>= </span><span class=identifier>c </span><span class=special>| </span><span class=identifier>a</span><span class=special>;</span></font></code></pre>
|
||||
<h3>Recursion</h3>
|
||||
<p>The right-hand side of a rule may reference other rules, including itself.
|
||||
The limitation is that direct or indirect left recursion is not allowed (this
|
||||
is an unchecked run-time error that results in an infinite loop). This is typical
|
||||
of top-down parsers. Example:</p>
|
||||
<pre><code><font color="#000000"><span class=special> </span><span class=identifier>a </span><span class=special>= </span><span class=identifier>a </span><span class=special>| </span><span class=identifier>b</span><span class=special>; </span><span class=comment>// infinite loop!</span></font></code></pre>
|
||||
<table width="80%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="note_box"><img src="theme/lens.gif" width="15" height="16"> <b>What
|
||||
is left recursion?<br>
|
||||
</b><br>
|
||||
Left recursion happens when you have a rule that calls itself before anything
|
||||
else. A top-down parser will go into an infinite loop when this happens.
|
||||
See the <a href="faq.html#left_recursion">FAQ</a> for details on how to
|
||||
eliminate left recursion.</td>
|
||||
</tr>
|
||||
</table>
|
||||
<h3>Undefined rules</h3>
|
||||
<p>An undefined rule matches nothing and is semantically equivalent to <tt>nothing_p</tt>.</p>
|
||||
<h3>Redeclarations</h3>
|
||||
<p>Like any other C++ assignment, a second assignment to a rule is destructive
|
||||
and will redefine it. The old definition is lost. Rules are dynamic. A rule
|
||||
can change its definition anytime:</p>
|
||||
<pre><code><font color="#000000"><span class=identifier> r </span><span class=special>= </span><span class=identifier>a_definition</span><span class=special>;
|
||||
</span><span class=identifier> r </span><span class=special>= </span><span class=identifier>another_definition</span><span class=special>;</span></font></code></pre>
|
||||
<p>Rule <tt>r</tt> loses the old definition when the second assignment is made.
|
||||
As mentioned, an undefined rule matches nothing and is semantically equivalent
|
||||
to <tt>nothing_p</tt>.
|
||||
<h3>Dynamic Parsers</h3>
|
||||
<p>Hosting declarative EBNF in imperative C++ yields an interesting blend. We
|
||||
have the best of both worlds. We have the ability to conveniently modify the
|
||||
grammar at run time using imperative constructs such as <tt>if</tt>, <tt>else</tt>
|
||||
statements. Example:</p>
|
||||
<pre><code><font color="#000000"><span class=special> </span><span class=keyword>if </span><span class=special>(</span><span class=identifier>feature_is_available</span><span class=special>)
|
||||
</span><span class=identifier>r </span><span class=special>= </span><span class=identifier>add_this_feature</span><span class=special>;</span></font></code></pre>
|
||||
<p>Rules are essentially dynamic parsers. A dynamic parser is characterized by
|
||||
its ability to modify its behavior at run time. Initially, an undefined rule
|
||||
matches nothing. At any time, the rule may be defined and redefined, thus, dynamically
|
||||
altering its behavior.</p>
|
||||
<h3>No start rule</h3>
|
||||
<p>Typically, parsers have what is called a start symbol, chosen to be the root
|
||||
of the grammar where parsing starts. The Spirit parser framework has no notion
|
||||
of a start symbol. Any rule can be a start symbol. This feature promotes step-wise
|
||||
creation of parsers. We can build parsers from the bottom up while fully testing
|
||||
each level or module up until we get to the top-most level.</p>
|
||||
<h3><a name="tag"></a>Parser Tags</h3>
|
||||
<p>Rules may be tagged for identification purposes. This is necessary, especially
|
||||
when dealing with <a href="trees.html">parse trees and ASTs</a> to see which
|
||||
rule created a specific AST/parse tree node. Each rule has an ID of type <tt>parser_id</tt>.
|
||||
This ID can be obtained through the rule's <tt>id()</tt> member function:</p>
|
||||
<pre><code><font color="#000000"><span class=identifier> my_rule</span><span class=special>.</span><span class=identifier>id</span><span class=special>(); </span><span class=comment>// get my_rule's id</span></font></code></pre>
|
||||
<p>The <tt>parser_id</tt> class is declared as:</p>
|
||||
<pre> <span class="keyword">class</span> <span class="identifier">parser_id</span><br> <span class="special">{</span><br> <span class="keyword">public</span><span class="special">:</span><br> parser_id<span class="special">();</span><br> <span class="keyword">explicit</span> parser_id<span class="special">(</span><span class="keyword">void const</span><span class="special">*</span> p<span class="special">);</span><br> parser_id<span class="special">(</span><span class="keyword">std::size_t</span> l<span class="special">);</span>
|
||||
|
||||
<span class="keyword">bool</span> <span class="keyword">operator</span><span class="special">==(</span><span class="identifier">parser_id</span> <span class="keyword">const</span><span class="special">&</span> x<span class="special">)</span> const<span class="special">;</span><br> <span class="keyword">bool</span> <span class="keyword">operator</span><span class="special">!=(</span><span class="identifier">parser_id</span> <span class="keyword">const</span><span class="special">&</span> x<span class="special">)</span> const<span class="special">;</span>
|
||||
<span class="keyword">bool</span> <span class="keyword"> operator</span><span class="special"><(</span><span class="identifier">parser_id</span> <span class="keyword">const</span><span class="special">&</span> x<span class="special">)</span> const<span class="special">;</span>
|
||||
<span class="special"></span><span class="keyword">std::size_t</span><span class="identifier"> to_long</span><span class="special">()</span> <span class="keyword">const</span><span class="special">;
|
||||
};</span></pre>
|
||||
<h3>parser_address_tag</h3>
|
||||
<p>The rule's <tt>TagT</tt> template parameter supplies this ID. This defaults
|
||||
to <tt>parser_address_tag</tt>. The <tt>parser_address_tag</tt> uses the address
|
||||
of the rule as its ID. This is often not the most convenient, since it is not
|
||||
always possible to get the address of a rule to compare against. </p>
|
||||
<h3>parser_tag</h3>
|
||||
<p>It is possible to have specific constant integers to identify a rule. For this
|
||||
purpose, we can use the <tt>parser_tag<N></tt>, where N is a constant
|
||||
integer:</p>
|
||||
<pre><code><font color="#000000"><span class=identifier> rule</span><span class=special><</span><span class=identifier>parser_tag</span><span class="special"><</span><span class=identifier>123</span><span class="special">> > </span><span class="identifier">my_rule</span><span class="special">; </span><span class="comment">// set my_rule's id to 123</span></font></code></pre>
|
||||
<h3>dynamic_parser_tag</h3>
|
||||
<p>The <tt>parser_tag<N></tt> can only specify a <strong>static ID</strong>,
|
||||
which is defined at compile time. If you need the ID to be <strong>dynamic</strong>
|
||||
(changeable at runtime), you can use the <tt>dynamic_parser_tag</tt> class as
|
||||
the <tt>TagT</tt> template parameter. This template parameter enables the <tt>set_id()</tt>
|
||||
function, which may be used to set the required id at runtime:</p>
|
||||
<pre><code><font color="#000000"><span class=identifier> rule</span><span class=special><</span><span class=identifier>dynamic_parser_tag</span><span class="special">> </span><span class="identifier">my_dynrule</span><span class="special">;</span>
|
||||
my_dynrule.set_id(1234); <span class="comment">// set my_dynrule's id to 1234</span></font></code></pre>
|
||||
<p>If the <tt>set_id()</tt> function isn't called, the parser id defaults to the
|
||||
address of the rule as its ID, just like the <tt>parser_address_tag</tt> template
|
||||
parameter would do. </p>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="numerics.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="epsilon.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<hr size="1">
|
||||
<p class="copyright">Copyright © 1998-2003 Joel de Guzman<br>
|
||||
<br>
|
||||
<font size="2">Use, modification and distribution is subject to the Boost Software
|
||||
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
||||
http://www.boost.org/LICENSE_1_0.txt)</font></p>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,288 +0,0 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>The Scanner and Parsing</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" href="theme/style.css" type="text/css">
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
|
||||
<tr>
|
||||
<td width="10">
|
||||
</td>
|
||||
<td width="85%">
|
||||
<font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>The Scanner and Parsing</b></font>
|
||||
</td>
|
||||
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="directives.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="grammar.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>The <b>scanner</b>'s task is to feed the sequential input data stream to the
|
||||
parser. The scanner extracts data from the input, parceling, potentially modifying
|
||||
or filtering, and then finally relegating the result to individual parser elements
|
||||
on demand until the input is exhausted. The scanner is composed of two STL conforming
|
||||
forward iterators, first and last, where first is held by reference and last,
|
||||
by value. The first iterator is held by reference to allow it to be re-positioned.
|
||||
The following diagram illustrates what's happening:</p>
|
||||
<table width="62%" border="0" align="center">
|
||||
<tr>
|
||||
<td><img src="theme/scanner1.png"></td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>The scanner manages various aspects of the parsing process through a set of
|
||||
policies. There are three sets of policies that govern:</p>
|
||||
<blockquote>
|
||||
<p><img src="theme/bullet.gif" width="12" height="12"> Iteration and filtering<br>
|
||||
<img src="theme/bullet.gif" width="12" height="12"> Recognition and matching<br>
|
||||
<img src="theme/bullet.gif" width="12" height="12"> Handling semantic actions</p>
|
||||
</blockquote>
|
||||
<p>These policies are mostly hidden from view and users generally need not know
|
||||
about them. Advanced users might however provide their own policies that override
|
||||
the ones that are already in place to fine tune the parsing process
|
||||
to fit their own needs. We shall see how this can be done. This will be covered
|
||||
in further detail later.</p>
|
||||
<p>The <tt>scanner</tt> is a template class expecting two parameters: <tt>IteratorT</tt>,
|
||||
the iterator type and <tt>PoliciesT</tt>, its set of policies. <tt>IteratorT</tt>
|
||||
defaults to <tt>char const*</tt> while <tt>PoliciesT</tt> defaults to <tt>scanner_policies<></tt>,
|
||||
a predefined set of scanner policies that we can use straight out of the box.</p>
|
||||
<pre><code><font color="#000000"><span class=keyword> template</span><span class=special><
|
||||
</span><span class=keyword>typename </span><span class=identifier>IteratorT </span><span class=special>= </span><span class=keyword>char </span><span class=keyword>const</span><span class=special>*,
|
||||
</span><span class=keyword>typename </span><span class=identifier>PoliciesT </span><span class=special>= </span><span class=identifier>scanner_policies</span><span class=special><> </span><span class=special>>
|
||||
</span><span class=keyword>class </span><span class=identifier>scanner</span><span class=special>;</span></font></code></pre>
|
||||
<p>Spirit uses the same iterator concepts and interface formally defined by the
|
||||
C++ Standard Template Library (STL). We can use iterators supplied by STL's
|
||||
containers (e.g. <tt>list</tt>, <tt>vector</tt>, <tt>string</tt>, etc.) as is,
|
||||
or perhaps write our own. Iterators can be as simple as a pointer (e.g. <tt>char
|
||||
const<span class="operators">*</span></tt>). At the other end of the spectrum,
|
||||
iterators can be quite complex; for instance, an iterator adapter that wraps
|
||||
a lexer such as LEX.</p>
|
||||
<h2>The Free Parse Functions</h2>
|
||||
<p>The framework provides a couple of free functions to make parsing a snap. These
|
||||
parser functions have two forms. The first form works on the <b>character level</b>.
|
||||
The second works on the <b>phrase level</b> and asks for a <b>skip parser</b>.</p>
|
||||
<p>The <b>skip parser</b> is just about any parser primitive or composite. Its
|
||||
purpose is to move the scanner's <tt>first</tt> iterator to valid tokens by
|
||||
skipping white spaces. In C for instance, the tab <tt class="quotes">'\t'</tt>,
|
||||
the newline <tt class="quotes">'\n'</tt>, return <tt><span class="quotes">'\r'</span></tt>,
|
||||
space <tt class="quotes">' '</tt> and characters inside comments <tt class="quotes">/*...*/</tt>
|
||||
are considered as white spaces.</p>
|
||||
<p><b>Character level parsing</b></p>
|
||||
<pre><code><font color="#000000"><span class=special> </span><span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>IteratorT</span><span class=special>, </span><span class=keyword>typename </span><span class=identifier>DerivedT</span><span class=special>>
|
||||
</span><span class=identifier>parse_info</span><span class=special><</span><span class=identifier>IteratorT</span><span class=special>>
|
||||
</span><span class=identifier>parse
|
||||
</span><span class=special>(
|
||||
</span><span class=identifier>IteratorT </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>first</span><span class=special>,
|
||||
</span><span class=identifier>IteratorT </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>last</span><span class=special>,
|
||||
</span><span class=identifier>parser</span><span class=special><</span><span class=identifier>DerivedT</span><span class=special>> </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>p
|
||||
</span><span class=special>);</span></font></code></pre>
|
||||
<pre><code><font color="#000000"><span class=special> </span><span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>CharT</span><span class=special>, </span><span class=keyword>typename </span><span class=identifier>DerivedT</span><span class=special>>
|
||||
</span><span class=identifier>parse_info</span><span class=special><</span><span class=identifier>CharT </span><span class=keyword>const</span><span class=special>*>
|
||||
</span><span class=identifier>parse
|
||||
</span><span class=special>(
|
||||
</span><span class=identifier>CharT </span><span class=keyword>const</span><span class=special>* </span><span class=identifier>str</span><span class=special>,
|
||||
</span><span class=identifier>parser</span><span class=special><</span><span class=identifier>DerivedT</span><span class=special>> </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>p
|
||||
</span><span class=special>);</span></font></code></pre>
|
||||
<p>There are two variants. The first variant accepts a <tt>first</tt>, <tt>last</tt>
|
||||
iterator pair like you do STL algorithms. The second variant accepts a null
|
||||
terminated string. The last argument is a parser <tt>p</tt> which will be used
|
||||
to parse the input.</p>
|
||||
<p><b>Phrase level parsing</b></p>
|
||||
<pre><code><font color="#000000"><span class=special> </span><span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>IteratorT</span><span class=special>, </span><span class=keyword>typename </span><span class=identifier>ParserT</span><span class=special>, </span><span class=keyword>typename </span><span class=identifier>SkipT</span><span class=special>>
|
||||
</span><span class=identifier>parse_info</span><span class=special><</span><span class=identifier>IteratorT</span><span class=special>>
|
||||
</span><span class=identifier>parse
|
||||
</span><span class=special>(
|
||||
</span><span class=identifier>IteratorT </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>first</span><span class=special>,
|
||||
</span><span class=identifier>IteratorT </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>last</span><span class=special>,
|
||||
</span><span class=identifier>parser</span><span class=special><</span><span class=identifier>ParserT</span><span class=special>> </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>p</span><span class=special>,
|
||||
</span><span class=identifier>parser</span><span class=special><</span><span class=identifier>SkipT</span><span class=special>> </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>skip
|
||||
</span><span class=special>);</span></font></code></pre>
|
||||
<pre><code><font color="#000000"><span class=special> </span><span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>CharT</span><span class=special>, </span><span class=keyword>typename </span><span class=identifier>ParserT</span><span class=special>, </span><span class=keyword>typename </span><span class=identifier>SkipT</span><span class=special>>
|
||||
</span><span class=identifier>parse_info</span><span class=special><</span><span class=identifier>CharT </span><span class=keyword>const</span><span class=special>*>
|
||||
</span><span class=identifier>parse
|
||||
</span><span class=special>(
|
||||
</span><span class=identifier>CharT </span><span class=keyword>const</span><span class=special>* </span><span class=identifier>str</span><span class=special>,
|
||||
</span><span class=identifier>parser</span><span class=special><</span><span class=identifier>ParserT</span><span class=special>> </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>p</span><span class=special>,
|
||||
</span><span class=identifier>parser</span><span class=special><</span><span class=identifier>SkipT</span><span class=special>> </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>skip
|
||||
</span><span class=special>);</span></font></code></pre>
|
||||
<p>Like above, there are two variants. The first variant accepts a <tt>first</tt>,
|
||||
<tt>last</tt> iterator pair like you do STL algorithms. The second variant accepts
|
||||
a null terminated string. The argument <tt>p</tt> is the parser which will be
|
||||
used to parse the input. The last argument <tt>skip</tt> is the skip parser.</p>
|
||||
<p><b>The parse_info structure</b></p>
|
||||
<p>The functions above return a <tt>parse_info</tt> structure parameterized by
|
||||
the iterator type passed in. The parse_info struct has these members:</p>
|
||||
<table width="90%" border="0" align="center">
|
||||
<tr>
|
||||
<td colspan="2" class="table_title"><b>parse_info</b></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td width="14%" class="table_cells"><b>stop</b></td>
|
||||
<td width="86%" class="table_cells">Points to the final parse position (i.e
|
||||
The parser recognized and processed the input up to this point)</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td width="14%" class="table_cells"><b>hit</b></td>
|
||||
<td width="86%" class="table_cells">True if parsing is successful. This may
|
||||
be full: the parser consumed all the input, or partial: the parser consumed
|
||||
only a portion of the input.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td width="14%" class="table_cells"><b>full</b></td>
|
||||
<td width="86%" class="table_cells">True when we have a full match (i.e The
|
||||
parser consumed all the input).</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td width="14%" class="table_cells"><b>length</b></td>
|
||||
<td width="86%" class="table_cells">The number of characters consumed by the
|
||||
parser. This is valid only if we have a successful match (either partial
|
||||
or full). </td>
|
||||
</tr>
|
||||
</table>
|
||||
<h2><a name="phrase_scanner_t" id="phrase_scanner_t"></a><img src="theme/lens.gif" width="15" height="16">
|
||||
The phrase_scanner_t and wide_phrase_scanner_t</h2>
|
||||
<p>For convenience, Spirit declares these typedefs:</p>
|
||||
<pre>
|
||||
<span class="keyword">typedef</span> scanner<span class="special"><</span><span class="keyword">char const</span><span class="special">*,</span> unspecified<span class="special">></span> phrase_scanner_t<span class="special">;</span>
|
||||
<span class="keyword">typedef</span> scanner<span class="special"><</span><span class="keyword">wchar_t const</span><span class="special">*,</span> <span class="identifier">unspecified</span><span class="special">></span> wide_phrase_scanner_t<span class="special">;</span>
|
||||
</pre>
|
||||
<p>These are the exact scanner types used by Spirit on calls to the parse function
|
||||
passing in a <tt>char const*</tt> (C string) or a <tt>wchar_t const*</tt> (wide
|
||||
string) as the first parameter and a <tt>space_p</tt> as skip-parser (the third
|
||||
parameter). For instance, we can use these typedefs to declare some rules. Example:</p>
|
||||
<pre> rule<span class="special"><</span>phrase_scanner_t<span class="special">> </span><span class="identifier">my_rule</span><span class="special">;
|
||||
</span><span class="identifier">parse</span><span class="special">(</span><span class="string">"abrakadabra"</span><span class="special">, </span><span class="identifier">my_rule</span><span class="special">,</span> <span class="identifier">space_p</span><span class="special">);</span></pre>
|
||||
<h2><img src="theme/lens.gif" width="15" height="16"> Direct parsing with Iterators</h2>
|
||||
<p>The free parse functions make it easy for us. By using them, we need not bother
|
||||
with the scanner intricacies. The free parse functions hide the dirty details.
|
||||
However, sometime in the future, we will need to get under the hood. It's nice
|
||||
that we know what we are dealing with when that need comes. We will need to
|
||||
go low-level and call the parser's parse member function directly. </p>
|
||||
<p>If we wish to work on the <b>character level</b>, the procedure is quite simple:</p>
|
||||
<pre><span class=identifier> </span><span class=identifier>scanner</span><span class=special><</span><span class=identifier>IteratorT</span><span class=special>> </span><span class=identifier>scan</span><span class=special>(</span><span class=identifier>first</span><span class=special>, </span><span class=identifier>last</span><span class=special>);
|
||||
|
||||
</span><span class=keyword>if </span><span class=special>(</span><span class=identifier>p</span><span class=special>.</span><span class=identifier>parse</span><span class=special>(</span><span class=identifier>scan</span><span class=special>))
|
||||
</span><span class=special>{
|
||||
</span><span class=comment>// Parsed successfully. If first == last, then we have
|
||||
// a full parse, the parser recognized the input in whole.
|
||||
</span><span class=special>}
|
||||
</span><span class=keyword>else
|
||||
</span><span class=special>{
|
||||
</span><span class=comment>// Parsing failure. The parser failed to recognize the input
|
||||
</span><span class=special>}</span></pre>
|
||||
<table width="80%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="note_box"><img src="theme/alert.gif" width="16" height="16"> <strong>The
|
||||
scanner position on an unsuccessful match</strong><br> <br>
|
||||
On a successful match, the input is advanced accordingly. But what happens
|
||||
on an unsuccessful match? Be warned. It might be intuitive to think that
|
||||
the scanner position is reset to its initial position prior to parsing.
|
||||
No, the position is not reset. On an unsuccessful match, the position of
|
||||
the scanner is <strong>undefined</strong>! Usually, it is positioned at
|
||||
the farthest point where the error was found somewhere down the recursive
|
||||
descent. If this behavior is not desired, you may need to position the scanner
|
||||
yourself. The <a href="numerics.html#scanner_save">example in the numerics
|
||||
chapter</a> illustrates how the scanner position can be saved and later
|
||||
restored.</td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>Where <tt>p</tt> is the parser we want to use, and <tt>first</tt>/<tt>last</tt>
|
||||
are the iterator pairs referring to the input. We just create a scanner given
|
||||
the iterators. The scanner type we will use here uses the default <tt>scanner_policies<></tt>.</p>
|
||||
<p>The situation is a bit more complex when we wish to work on the <b>phrase level</b>:</p>
|
||||
<pre><span class=special> </span><span class=keyword>typedef </span><span class=identifier>skip_parser_iteration_policy</span><span class=special><</span><span class=identifier>SkipT</span><span class=special>> </span><span class=identifier>iter_policy_t</span><span class=special>;
|
||||
</span><span class=keyword>typedef </span><span class=identifier>scanner_policies</span><span class=special><</span><span class=identifier>iter_policy_t</span><span class=special>> </span><span class=identifier>scanner_policies_t</span><span class=special>;
|
||||
</span><span class=keyword>typedef </span><span class=identifier>scanner</span><span class=special><</span><span class=identifier>IteratorT</span><span class=special>, </span><span class=identifier>scanner_policies_t</span><span class=special>> </span><span class=identifier>scanner_t</span><span class=special>;
|
||||
|
||||
</span><span class=special> </span><span class=identifier>iter_policy_t </span><span class=identifier>iter_policy</span><span class=special>(</span><span class=identifier>skip</span><span class=special>);
|
||||
</span><span class=identifier>scanner_policies_t </span><span class=identifier>policies</span><span class=special>(</span><span class=identifier>iter_policy</span><span class=special>);
|
||||
</span><span class=identifier>scanner_t </span><span class=identifier>scan</span><span class=special>(</span><span class=identifier>first</span><span class=special>, </span><span class=identifier>last</span><span class=special>, </span><span class=identifier>policies</span><span class=special>);
|
||||
</span>
|
||||
<span class=keyword>if </span><span class=special>(</span><span class=identifier>p</span><span class=special>.</span><span class=identifier>parse</span><span class=special>(</span><span class=identifier>scan</span><span class=special>))
|
||||
</span><span class=special>{
|
||||
</span><span class=comment>// Parsed successfully. If first == last, then we have
|
||||
// a full parse, the parser recognized the input in whole.
|
||||
</span><span class=special>}
|
||||
</span><span class=keyword>else
|
||||
</span><span class=special>{
|
||||
</span><span class=comment>// Parsing failure. The parser failed to recognize the input
|
||||
</span><span class=special>}</span></pre>
|
||||
<p>Where <tt>SkipT</tt> is the type of the skip-parser, <tt>skip</tt>. Again,
|
||||
<tt>p</tt> is the parser we want to use, and <tt>first</tt>/<tt>last</tt> are
|
||||
the iterator pairs referring to the input. Given a skip-parser type <tt>SkipT</tt>,
|
||||
<span class=identifier><tt>skip_parser_iteration_policy</tt></span> creates
|
||||
a scanner iteration policy that skips over portions that are recognized by the
|
||||
skip-parser. This may then be used to create a scanner. The <tt>scanner_policies</tt>
|
||||
class wraps all scanner related policies including the iteration policies.</p>
|
||||
<h2><a name="lexeme_scanner"></a>lexeme_scanner</h2>
|
||||
<p>When switching from phrase level to character level parsing, the <tt>lexeme_d</tt>
|
||||
(see <a href="directives.html">directives.html</a>) does its magic by disabling
|
||||
the skipping of white spaces. This is done by tweaking the <a href="scanner.html">scanner</a>.
|
||||
However, when we do this, all parsers inside the lexeme gets a transformed scanner
|
||||
type. This should not be a problem in most cases. However, when rules are called
|
||||
inside the <tt>lexeme_d</tt>, the compiler will choke if the rule does not have
|
||||
the proper scanner type. If a rule must be used inside a <tt>lexeme_d</tt>,
|
||||
the rule's type must be:</p>
|
||||
<pre> <span class=identifier>rule</span><span class=special><</span><span class=identifier>lexeme_scanner</span><span class="special"><</span><span class=identifier>ScannerT</span><span class=special>>::</span><span class="identifier">type</span><span class=special>> </span>r<span class=special>;</span></pre>
|
||||
<p>where <span class=identifier><tt>ScannerT</tt></span> is the actual type of
|
||||
the scanner used. Take note that <tt>lexeme_scanner</tt> will only work for phrase level scanners. </p>
|
||||
<h2><a name="as_lower_scanner"></a>as_lower_scanner</h2>
|
||||
<p>Similarly, the <tt>as_lower_d</tt> does its work by filtering and converting
|
||||
all characters received from the scanner to lower case. This is also done by
|
||||
tweaking the <a href="scanner.html">scanner</a>. Then again, all parsers inside
|
||||
the <tt>as_lower_d</tt> gets a transformed scanner type. If a rule must be used
|
||||
inside a <tt>as_lower_d</tt>, the rule's type must be:</p>
|
||||
<pre> <span class=identifier>rule</span><span class=special><</span><span class=identifier>as_lower_scanner</span><span class="special"><</span><span class=identifier>ScannerT</span><span class=special>>::</span><span class="identifier">type</span><span class=special>> </span>r<span class=special>;</span></pre>
|
||||
<p>where <span class=identifier><tt>ScannerT</tt></span> is the actual type of
|
||||
the scanner used. </p>
|
||||
<table width="80%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="note_box"><img src="theme/bulb.gif" width="13" height="18"> See
|
||||
the techniques section for an <a href="techniques.html#multiple_scanner_support">example</a>
|
||||
of a <a href="grammar.html">grammar</a> using a <a href="rule.html#multiple_scanner_support">multiple
|
||||
scanner enabled rule</a>, <a href="scanner.html#lexeme_scanner">lexeme_scanner</a>
|
||||
and <a href="scanner.html#as_lower_scanner">as_lower_scanner.</a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<h3><a name="no_actions_scanner"></a>no_actions_scanner</h3>
|
||||
<p>Again, <tt>no_actions_d</tt> directive tweaks the scanner to disable firing
|
||||
semantic actions. Like before, all parsers inside the <tt>no_actions_d</tt>
|
||||
gets a transformed scanner type. If a rule must be used inside a <tt>no_actions_d</tt>,
|
||||
the rule's type must be:</p>
|
||||
<pre> <span class=identifier>rule</span><span class=special><</span>no_actions_scanner<span class="special"><</span><span class=identifier>ScannerT</span><span class=special>>::</span><span class="identifier">type</span><span class=special>> </span>r<span class=special>;</span></pre>
|
||||
<p>where <tt>ScannerT</tt> is the actual type of the scanner used. <span class=special></span></p>
|
||||
<table width="80%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="note_box"><img src="theme/note.gif" width="16" height="16"> Be
|
||||
sure to add "<tt>typename</tt>" before <tt><span class=identifier><tt>lexeme_scanner</tt>,
|
||||
<tt>as_lower_scanner</tt></span></tt> and <tt>no_actions_scanner</tt> when
|
||||
these are used inside a template class or function.</td>
|
||||
</tr>
|
||||
</table>
|
||||
<p><img src="theme/lens.gif" width="15" height="16"> See <a href="../example/fundamental/no_actions.cpp">no_actions.cpp</a>. This is part of the Spirit distribution.</p>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="directives.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="grammar.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<hr size="1">
|
||||
<p class="copyright">Copyright © 1998-2003 Joel de Guzman<br>
|
||||
<br>
|
||||
<font size="2">Use, modification and distribution is subject to the Boost Software
|
||||
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
||||
http://www.boost.org/LICENSE_1_0.txt)</font></p>
|
||||
<p> </p>
|
||||
<p> </p>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,54 +0,0 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>Scoped Lock</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" href="theme/style.css" type="text/css">
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
|
||||
<tr>
|
||||
<td width="10">
|
||||
</td>
|
||||
<td width="85%"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Scoped
|
||||
Lock</b></font></td>
|
||||
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="regular_expression_parser.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="distinct.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<h2>scoped_lock_d</h2>
|
||||
<p>The <tt>scoped_lock_d</tt> directive constructs a parser that locks a mutex
|
||||
during the attempt to match the contained parser.</p>
|
||||
<p>Syntax:</p>
|
||||
<pre> <span class="identifier">scoped_lock_d</span><span class="special">(</span>mutex<span class="special">&)[</span>body-parser<span class="special">]</span></pre>
|
||||
<p>Note, that nesting <tt>scoped_lock_d</tt> directives bears the risk of deadlocks
|
||||
since the order of locking depends on the grammar used and may even depend on
|
||||
the input being parsed. Locking order has to be consistent within an application
|
||||
to ensure deadlock free operation.</p>
|
||||
<p></p>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="regular_expression_parser.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="distinct.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<hr size="1">
|
||||
<p class="copyright">Copyright © 2003 Martin Wille<br>
|
||||
<br>
|
||||
<font size="2">Use, modification and distribution is subject to the Boost Software
|
||||
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
||||
http://www.boost.org/LICENSE_1_0.txt)</font></p>
|
||||
<p class="copyright"> </p>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,96 +0,0 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>The Select Parser</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" href="theme/style.css" type="text/css">
|
||||
<style type="text/css">
|
||||
<!--
|
||||
.style1 {font-family: "Courier New", Courier, mono}
|
||||
.style2 {font-family: "Courier New", Courier, mono; font-style: italic; }
|
||||
.style3 {font-family: "Courier New", Courier, mono; color: #FF0000; }
|
||||
-->
|
||||
</style>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
|
||||
<tr>
|
||||
<td width="10"> </td>
|
||||
<td width="85%"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>The Select Parser </b></font></td>
|
||||
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="the_lazy_parser.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="switch_parser.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>Select parsers may be used to identify a single parser from a given list
|
||||
of parsers, which successfully recognizes the current input sequence. Example:</p>
|
||||
<pre> rule<span class="special"><></span> rule_select <span class="special">=</span>
|
||||
select_p<span class="special">
|
||||
(</span>
|
||||
parser_a<span class="special">
|
||||
,</span> parser_b<span class="special">
|
||||
<span class="comment">/* ... */</span>
|
||||
,</span> parser_n
|
||||
<span class="special">)</span><span class="special">;</span></pre>
|
||||
<p>The parsers (parser_a, parser_b etc.) are tried sequentially from left to right until a parser matches the current input sequence.
|
||||
If there is a matching parser found, the <tt>select_p</tt> parser returns
|
||||
the parser's position (zero based index). For instance, in the example above, <tt>1</tt> is returned if parser_b
|
||||
matches.</p>
|
||||
<p>There are two predefined parsers of the select parser family: <tt>select_p</tt>
|
||||
and <tt>select_fail_p</tt>. These parsers differ in the way the no match
|
||||
case is handled (when none of the parsers match the current input sequence).
|
||||
While the <tt>select_p</tt> parser will return <tt>-1</tt>
|
||||
if no matching parser is found, the <tt>select_fail_p</tt> parser will not match
|
||||
at all.</p>
|
||||
<p>The following sample shows how the select parser may be used very conveniently
|
||||
in conjunction with a <a href="switch_parser.html">switch parser</a>:</p>
|
||||
<pre> <span class="keyword">int</span> choice <span class="special">=</span> <span class="literal">-1</span><span class="special">;</span>
|
||||
rule<span class="special"><></span> rule_select <span class="special">=</span>
|
||||
select_fail_p<span class="special">(</span><span class="literal">'a'</span><span class="special">,</span> <span class="literal">'b'</span><span class="special">,</span> <span class="literal">'c'</span><span class="special">,</span> <span class="literal">'d'</span><span class="special">)[</span>assign_a<span class="special">(</span>choice<span class="special">)]</span>
|
||||
>> switch_p(var<span class="special">(</span>choice)) <span class="special">
|
||||
[</span><br> case_p<span class="special"><</span><span class="literal">0</span><span class="special">>(</span>int_p<span class="special">),</span><br> case_p<span class="special"><</span><span class="literal">1</span><span class="special">>(</span>ch_p<span class="special">(</span><span class="literal">','</span><span class="special">)),</span><br> case_p<span class="special"><</span><span class="literal">2</span><span class="special">>(</span>str_p<span class="special">(</span><span class="string">"bcd"</span><span class="special">)),</span><br> default_p<br> <span class="special">]</span><br><span class="special"> ;</span></pre>
|
||||
<p>This example shows a rule, which matches:</p>
|
||||
<ul>
|
||||
<li><span class="literal"> 'a' </span>followed
|
||||
by an integer</li>
|
||||
<li><span class="literal">'b' </span>followed by a<span class="literal">
|
||||
','</span></li>
|
||||
<li><span class="literal">'c'</span> followed by <span class="string">"bcd"</span></li>
|
||||
<li>a single <span class="literal">'d'</span>. </li>
|
||||
</ul>
|
||||
<p>For other input sequences the
|
||||
give rule does not match at all.</p>
|
||||
<table width="80%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="note_box"><p><img src="theme/alert.gif" width="16" height="16"> <tt>BOOST_SPIRIT_SELECT_LIMIT</tt><br>
|
||||
<br>
|
||||
The number of possible entries inside the <tt>select_p</tt> parser is limited by the Spirit compile time constant <tt>BOOST_SPIRIT_SELECT_LIMIT</tt>, which defaults to 3. This value should not be greater than the compile time constant given by <tt>PHOENIX_LIMIT</tt> (see <a href="../phoenix/index.html">phoenix</a>). Example:</p>
|
||||
<p class="style1"><span class="comment">// Define these before including anything else <br>
|
||||
</span><span class="style3">#define</span> PHOENIX_LIMIT 10<br>
|
||||
<span class="preprocessor">#define</span> BOOST_SPIRIT_SELECT_LIMIT 10 </p></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="the_lazy_parser.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="switch_parser.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<hr size="1">
|
||||
<p class="copyright">Copyright © 2003-2004 Hartmut Kaiser <br>
|
||||
<br>
|
||||
<font size="2">Use, modification and distribution is subject to the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) </font> </p>
|
||||
<p> </p>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,258 +0,0 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>Semantic Actions</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" href="theme/style.css" type="text/css">
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
|
||||
<tr>
|
||||
<td width="10">
|
||||
</td>
|
||||
<td width="85%">
|
||||
<font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Semantic Actions</b></font>
|
||||
</td>
|
||||
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="subrules.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="indepth_the_parser.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>Semantic actions have the form: <b>expression[action]</b></p>
|
||||
<p>Ultimately, after having defined our grammar and having generated a corresponding
|
||||
parser, we will need to produce some output and do some work besides syntax
|
||||
analysis; unless, of course, what we want is merely to check for the conformance
|
||||
of an input with our grammar, which is very seldom the case. Semantic actions
|
||||
may be attached to any expression at any level within the parser hierarchy.
|
||||
An action is a C/C++ function or function object that will be called if a match
|
||||
is found in the particular context where it is attached. The action function
|
||||
serves as a hook into the parser and may be used to, for example:</p>
|
||||
<blockquote>
|
||||
<p><img src="theme/bullet.gif" width="13" height="13"> Generate output from
|
||||
the parser (ASTs, for example)<br>
|
||||
<img src="theme/bullet.gif" width="13" height="13"> Report warnings or errors<br>
|
||||
<img src="theme/bullet.gif" width="13" height="13"> Manage symbol tables</p>
|
||||
</blockquote>
|
||||
<h2>Generic Semantic Actions (Transduction Interface)</h2>
|
||||
<p>A generic semantic action can be any free function or function object that
|
||||
is compatible with the interface:</p>
|
||||
<pre><code><font color="#000000"><span class=identifier></span><span class=keyword> void </span><span class=identifier>f</span><span class=special>(</span><span class=identifier>IteratorT </span><span class=identifier>first</span><span class=special>, </span><span class=identifier>IteratorT </span><span class=identifier>last</span><span class=special>);</span></font></code></pre>
|
||||
<p>where <tt>IteratorT</tt> is the type of iterator used, <tt>first</tt> points
|
||||
to the current input and <tt>last</tt> points to one after the end of the input
|
||||
(identical to STL iterator ranges). A function object (functor) should have
|
||||
a member <tt>operator()</tt> with the same signature as above:</p>
|
||||
<pre><code><font color="#000000"><span class=special> </span><span class=keyword>struct </span><span class=identifier>my_functor
|
||||
</span><span class=special>{
|
||||
</span><span class=keyword>void </span><span class=keyword>operator</span><span class=special>()(</span><span class=identifier>IteratorT </span><span class=identifier>first</span><span class=special>, </span><span class=identifier>IteratorT </span><span class=identifier>last</span><span class=special>) </span><span class=keyword>const</span><span class=special>;
|
||||
</span><span class=special>};</span></font></code></pre>
|
||||
<p>Iterators pointing to the matching portion of the input are passed into the
|
||||
function/functor.</p>
|
||||
<p>In general, semantic actions accept the first-last iterator pair. This is the
|
||||
transduction interface. The action functions or functors receive the unprocessed
|
||||
data representing the matching production directly from the input. In many cases,
|
||||
this is sufficient. Examples are source to source translation, pre-processing,
|
||||
etc. </p>
|
||||
<h3>Example:</h3>
|
||||
<pre><code><font color="#000000"><span class=special> </span><span class=keyword>void
|
||||
</span><span class=identifier>my_action</span><span class=special>(</span><span class=keyword>char const</span><span class=special>* </span><span class=identifier>first</span><span class=special>, </span><span class=keyword>char const</span><span class=special>* </span><span class=identifier>last</span><span class=special>)
|
||||
{
|
||||
</span><span class=identifier>std</span><span class=special>::</span><span class=identifier>string</span><span class=special> </span><span class="identifier">str</span><span class=special>(</span><span class=identifier>first</span><span class=special>, </span><span class=identifier>last</span><span class=special>);
|
||||
</span><span class=identifier>std</span><span class=special>::</span><span class=identifier>cout </span><span class=special><< </span><span class=identifier>str </span><span class=special><< </span><span class=identifier>std</span><span class=special>::</span><span class=identifier>endl</span><span class=special>;
|
||||
}
|
||||
|
||||
</span><span class=identifier>rule</span><span class=special><> </span><span class=identifier>myrule </span><span class=special>= (</span><span class=identifier>a </span><span class=special>| </span><span class=identifier>b </span><span class=special>| *(</span><span class=identifier>c </span><span class=special>>> </span><span class=identifier>d</span><span class=special>))[&</span><span class=identifier>my_action</span><span class=special>];</span></font></code></pre>
|
||||
<p>The function <tt>my_action</tt> will be called whenever the expression <tt>(a
|
||||
| b | *(c >> d)</tt> matches a portion of the input stream while parsing.
|
||||
Two iterators, <tt>first</tt> and <tt>last</tt>, are passed into the function.
|
||||
These iterators point to the start and end, respectively, of the portion of
|
||||
input stream where the match is found.</p>
|
||||
<h3>Const-ness:</h3>
|
||||
<p>With functors, take note that the <tt>operator()</tt> should be <tt>const</tt>.
|
||||
This implies that functors are immutable. One may wish to have some member variables
|
||||
that are modified when the action gets called. This is not a good idea. First
|
||||
of all, functors are preferably lightweight. Functors are passed around a lot
|
||||
and it would incur a lot of overhead if the functors are heavily laden. Second,
|
||||
functors are passed by value. Thus, the actual functor object that finally attaches
|
||||
to the parser, will surely not be the original instance supplied by the client.
|
||||
What this means is that changes to a functor's state will not affect the original
|
||||
functor that the client passed in since they are distinct copies. If a functor
|
||||
needs to update some state variables, which is often the case, it is better
|
||||
to use references to external data. The following example shows how this can
|
||||
be done:</p>
|
||||
<pre><code><font color="#000000"><span class=special> </span><span class=keyword>struct </span><span class=identifier>my_functor
|
||||
</span><span class=special>{
|
||||
</span><span class=identifier>my_functor</span><span class=special>(</span><span class=identifier>std</span><span class=special>::</span><span class=identifier>string</span><span class=special>& </span><span class=identifier>str_</span><span class=special>)
|
||||
</span><span class=special>: </span><span class=identifier>str</span><span class=special>(</span><span class=identifier>str_</span><span class=special>) </span><span class=special>{}
|
||||
|
||||
</span><span class=keyword>void
|
||||
</span><span class=keyword>operator</span><span class=special>()(</span><span class=identifier>IteratorT </span><span class=identifier>first</span><span class=special>, </span><span class=identifier>IteratorT </span><span class=identifier>last</span><span class=special>) </span><span class=keyword>const
|
||||
</span><span class=special>{
|
||||
</span><span class=identifier>str</span><span class=special>.</span><span class=identifier>assign</span><span class=special>(</span><span class=identifier>first</span><span class=special>, </span><span class=identifier>last</span><span class=special>);
|
||||
</span><span class=special>}
|
||||
|
||||
</span><span class=identifier>std</span><span class=special>::</span><span class=identifier>string</span><span class=special>& </span><span class=identifier>str</span><span class=special>;
|
||||
</span><span class=special>};</span></font></code></pre>
|
||||
<h3>Full Example:</h3>
|
||||
<p>Here now is our calculator enhanced with semantic actions:</p>
|
||||
<pre><code><font color="#000000"><span class=special> </span><span class=keyword>namespace
|
||||
</span><span class=special>{
|
||||
</span><span class=keyword>void </span><span class=identifier>do_int</span><span class=special>(</span><span class=keyword>char </span><span class=keyword>const</span><span class=special>* </span><span class=identifier>str</span><span class=special>, </span><span class=keyword>char </span><span class=keyword>const</span><span class=special>* </span><span class=identifier>end</span><span class=special>)
|
||||
</span><span class=special>{
|
||||
</span><span class=identifier>string </span><span class=identifier>s</span><span class=special>(</span><span class=identifier>str</span><span class=special>, </span><span class=identifier>end</span><span class=special>);
|
||||
</span><span class=identifier>cout </span><span class=special><< </span><span class=string>"PUSH(" </span><span class=special><< </span><span class=identifier>s </span><span class=special><< </span><span class=literal>')' </span><span class=special><< </span><span class=identifier>endl</span><span class=special>;
|
||||
</span><span class=special>}
|
||||
|
||||
</span><span class=keyword>void </span><span class=identifier>do_add</span><span class=special>(</span><span class=keyword>char </span><span class=keyword>const</span><span class=special>*, </span><span class=keyword>char </span><span class=keyword>const</span><span class=special>*) </span><span class=special>{ </span><span class=identifier>cout </span><span class=special><< </span><span class=string>"ADD\n"</span><span class=special>; </span><span class=special>}
|
||||
</span><span class=keyword>void </span><span class=identifier>do_subt</span><span class=special>(</span><span class=keyword>char </span><span class=keyword>const</span><span class=special>*, </span><span class=keyword>char </span><span class=keyword>const</span><span class=special>*) </span><span class=special>{ </span><span class=identifier>cout </span><span class=special><< </span><span class=string>"SUBTRACT\n"</span><span class=special>; </span><span class=special>}
|
||||
</span><span class=keyword>void </span><span class=identifier>do_mult</span><span class=special>(</span><span class=keyword>char </span><span class=keyword>const</span><span class=special>*, </span><span class=keyword>char </span><span class=keyword>const</span><span class=special>*) </span><span class=special>{ </span><span class=identifier>cout </span><span class=special><< </span><span class=string>"MULTIPLY\n"</span><span class=special>; </span><span class=special>}
|
||||
</span><span class=keyword>void </span><span class=identifier>do_div</span><span class=special>(</span><span class=keyword>char </span><span class=keyword>const</span><span class=special>*, </span><span class=keyword>char </span><span class=keyword>const</span><span class=special>*) </span><span class=special>{ </span><span class=identifier>cout </span><span class=special><< </span><span class=string>"DIVIDE\n"</span><span class=special>; </span><span class=special>}
|
||||
</span><span class=keyword>void </span><span class=identifier>do_neg</span><span class=special>(</span><span class=keyword>char </span><span class=keyword>const</span><span class=special>*, </span><span class=keyword>char </span><span class=keyword>const</span><span class=special>*) </span><span class=special>{ </span><span class=identifier>cout </span><span class=special><< </span><span class=string>"NEGATE\n"</span><span class=special>; </span><span class=special>}
|
||||
</span><span class=special>}</span></font></code></pre>
|
||||
<p>We augment our grammar with semantic actions:</p>
|
||||
<pre><code><font color="#000000"><span class=special> </span><span class=keyword>struct </span><span class=identifier>calculator </span><span class=special>: </span><span class=keyword>public </span><span class=identifier>grammar</span><span class=special><</span><span class=identifier>calculator</span><span class=special>>
|
||||
</span><span class=special>{
|
||||
</span><span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>>
|
||||
</span><span class=keyword>struct </span><span class=identifier>definition
|
||||
</span><span class=special>{
|
||||
</span><span class=identifier>definition</span><span class=special>(</span><span class=identifier>calculator </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>self</span><span class=special>)
|
||||
</span><span class=special>{
|
||||
</span><span class=identifier>expression
|
||||
</span><span class=special>= </span><span class=identifier>term
|
||||
</span><span class=special>>> </span><span class=special>*( </span><span class=special>(</span><span class=literal>'+' </span><span class=special>>> </span><span class=identifier>term</span><span class=special>)[&</span><span class=identifier>do_add</span><span class=special>]
|
||||
</span><span class=special>| </span><span class=special>(</span><span class=literal>'-' </span><span class=special>>> </span><span class=identifier>term</span><span class=special>)[&</span><span class=identifier>do_subt</span><span class=special>]
|
||||
</span><span class=special>)
|
||||
</span><span class=special>;
|
||||
|
||||
</span><span class=identifier>term </span><span class=special>=
|
||||
</span><span class=identifier>factor
|
||||
</span><span class=special>>> </span><span class=special>*( </span><span class=special>(</span><span class=literal>'*' </span><span class=special>>> </span><span class=identifier>factor</span><span class=special>)[&</span><span class=identifier>do_mult</span><span class=special>]
|
||||
</span><span class=special>| </span><span class=special>(</span><span class=literal>'/' </span><span class=special>>> </span><span class=identifier>factor</span><span class=special>)[&</span><span class=identifier>do_div</span><span class=special>]
|
||||
</span><span class=special>)
|
||||
</span><span class=special>;
|
||||
|
||||
</span><span class=identifier>factor
|
||||
</span><span class=special>= </span><span class=identifier>lexeme_d</span><span class=special>[(+</span><span class=identifier>digit_p</span><span class=special>)[&</span><span class=identifier>do_int</span><span class=special>]]
|
||||
</span><span class=special>| </span><span class=literal>'(' </span><span class=special>>> </span><span class=identifier>expression </span><span class=special>>> </span><span class=literal>')'
|
||||
</span><span class=special>| </span><span class=special>(</span><span class=literal>'-' </span><span class=special>>> </span><span class=identifier>factor</span><span class=special>)[&</span><span class=identifier>do_neg</span><span class=special>]
|
||||
</span><span class=special>| </span><span class=special>(</span><span class=literal>'+' </span><span class=special>>> </span><span class=identifier>factor</span><span class=special>)
|
||||
</span><span class=special>;
|
||||
</span><span class=special>}
|
||||
|
||||
</span><span class=identifier>rule</span><span class=special><</span><span class=identifier>ScannerT</span><span class=special>> </span><span class=identifier>expression</span><span class=special>, </span><span class=identifier>term</span><span class=special>, </span><span class=identifier>factor</span><span class=special>;
|
||||
|
||||
</span><span class=identifier>rule</span><span class=special><</span><span class=identifier>ScannerT</span><span class=special>> </span><span class=keyword>const</span><span class=special>&
|
||||
</span><span class=identifier>start</span><span class=special>() </span><span class=keyword>const </span><span class=special>{ </span><span class=keyword>return </span><span class=identifier>expression</span><span class=special>; </span><span class=special>}
|
||||
</span><span class=special>};
|
||||
</span><span class=special>};</span></font></code></pre>
|
||||
<p>Feeding in the expression <tt>(-1 + 2) * (3 + -4)</tt>, for example, to the
|
||||
rule <tt>expression</tt> will produce the expected output:</p>
|
||||
<pre><code><span class=special>-</span><span class=number>1
|
||||
</span><span class=number>2
|
||||
</span><span class=identifier>ADD
|
||||
</span><span class=number>3
|
||||
</span><span class=special>-</span><span class=number>4
|
||||
</span><span class=identifier>ADD
|
||||
</span><span class=identifier>MULT</span></code></pre>
|
||||
<p>which, by the way, is the Reverse Polish Notation (RPN) of the given expression,
|
||||
reminiscent of some primitive calculators and the language Forth.</p>
|
||||
<p><img src="theme/lens.gif" width="15" height="16"> <a href="../example/fundamental/calc_plain.cpp">View
|
||||
the complete source code here</a>. This is part of the Spirit distribution.
|
||||
</p>
|
||||
<h2><a name="specialized_actions"></a>Specialized Actions</h2>
|
||||
<p>In general, semantic actions accept the first-last iterator pair. There are
|
||||
situations though where we might want to pass data in its processed form. A
|
||||
concrete example is the numeric parser. It is unwise to pass unprocessed data
|
||||
to a semantic action attached to a numeric parser and just throw away what has
|
||||
been parsed by the parser. We want to pass the actual parsed number.</p>
|
||||
<p>The function and functor signature of a semantic action varies depending on
|
||||
the parser where it is attached to. The following table lists the parsers that
|
||||
accept unique signatures.</p>
|
||||
<table width="80%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="note_box"><img src="theme/note.gif" width="16" height="16"> Unless
|
||||
explicitly stated in the documentation of a specific parser type, parsers
|
||||
not included in the list by default expect the generic signature as explained
|
||||
above.</td>
|
||||
</tr>
|
||||
</table>
|
||||
<h3>Numeric Actions</h3>
|
||||
<p><b>Applies to:</b></p>
|
||||
<blockquote>
|
||||
<p><img src="theme/bullet.gif" width="13" height="13"> uint_p<br>
|
||||
<img src="theme/bullet.gif" width="13" height="13"> int_p<br>
|
||||
<img src="theme/bullet.gif" width="13" height="13"> ureal_p<br>
|
||||
<img src="theme/bullet.gif" width="13" height="13"> real_p</p>
|
||||
</blockquote>
|
||||
<p><b>Signature for functions:</b></p>
|
||||
<pre><code><font color="#000000"><span class=identifier> </span><span class=keyword>void </span><span class=identifier>func</span><span class=special>(</span><span class=identifier>NumT </span><span class=identifier>val</span><span class=special>);</span></font></code></pre>
|
||||
<p><b>Signature for functors:</b> </p>
|
||||
<pre><code><font color="#000000"><span class=special> </span><span class=keyword>struct </span><span class=identifier>ftor
|
||||
</span><span class=special>{
|
||||
</span><span class=keyword>void </span><span class=keyword>operator</span><span class=special>()(</span><span class=identifier>NumT </span><span class=identifier>val</span><span class=special>) </span><span class=keyword>const</span><span class=special>;
|
||||
</span><span class=special>};</span></font></code></pre>
|
||||
<p>Where <tt>NumT</tt> is any primitive numeric type such as <tt>int</tt>, <tt>long</tt>,
|
||||
<tt>float</tt>, <tt>double</tt>, etc., or a user defined numeric type such as
|
||||
big_int. <tt>NumT</tt> is the same type used as template parameter to <tt>uint_p</tt>,
|
||||
<tt>int_p</tt>, <tt>ureal_p</tt> or <tt>real_p</tt>. The parsed number is passed
|
||||
into the function/functor.</p>
|
||||
<h3>Character Actions</h3>
|
||||
<p><b>Applies to:</b></p>
|
||||
<blockquote>
|
||||
<p><img src="theme/bullet.gif" width="13" height="13"> chlit, ch_p<br>
|
||||
<img src="theme/bullet.gif" width="13" height="13"> range, range_p<br>
|
||||
<img src="theme/bullet.gif" width="13" height="13"> anychar<br>
|
||||
<img src="theme/bullet.gif" width="13" height="13"> alnum, alpha<br>
|
||||
<img src="theme/bullet.gif" width="13" height="13"> cntrl, digit<br>
|
||||
<img src="theme/bullet.gif" width="13" height="13"> graph, lower<br>
|
||||
<img src="theme/bullet.gif" width="13" height="13"> print, punct<br>
|
||||
<img src="theme/bullet.gif" width="13" height="13"> space, upper<br>
|
||||
<img src="theme/bullet.gif" width="13" height="13"> xdigit</p>
|
||||
</blockquote>
|
||||
<p><b>Signature for functions:</b></p>
|
||||
<pre><code><font color="#000000"><span class=identifier> </span><span class=keyword>void </span><span class=identifier>func</span><span class=special>(</span><span class=identifier>CharT </span><span class=identifier>ch</span><span class=special>);</span></font></code></pre>
|
||||
<p><b>Signature for functors:</b></p>
|
||||
<pre><code><font color="#000000"><span class=special> </span><span class=keyword>struct </span><span class=identifier>ftor
|
||||
</span><span class=special>{
|
||||
</span><span class=keyword>void </span><span class=keyword>operator</span><span class=special>()(</span><span class=identifier>CharT </span><span class=identifier>ch</span><span class=special>) </span><span class=keyword>const</span><span class=special>;
|
||||
</span><span class=special>};</span></font></code></pre>
|
||||
<p>Where <tt>CharT</tt> is the value_type of the iterator used in parsing. A <tt>char
|
||||
const*</tt> iterator for example has a <tt>value_type</tt> of <tt>char</tt>.
|
||||
The matching character is passed into the function/functor.</p>
|
||||
<h2>Cascading Actions</h2>
|
||||
<p>Actions can be cascaded. Cascaded actions also inherit the function/functor
|
||||
interface of the original. For example:</p>
|
||||
<pre><code><font color="#000000"><span class=special> </span><span class=identifier>uint_p</span><span class=special>[</span><span class=identifier>fa</span><span class=special>][</span><span class=identifier>fb</span><span class=special>][</span><span class=identifier>fc</span><span class=special>]</span></font></code></pre>
|
||||
<p>Here, the functors <tt>fa</tt>, <tt>fb</tt> and <tt>fc</tt> all expect the
|
||||
signature <tt>void operator()(unsigned n) const</tt>.</p>
|
||||
<h2>Directives and Actions</h2>
|
||||
<p>Directives inherit the function/functor interface of the subject it is
|
||||
enclosing. Example:</p>
|
||||
<pre><code><font color="#000000"><span class=special> </span><span class=identifier>as_lower_d</span><span class=special>[</span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>'x'</span><span class=special>)][</span><span class=identifier>f</span><span class=special>]</span></font></code></pre>
|
||||
<p>Here, the functor <tt>f</tt> expects the signature <tt>void operator()(char
|
||||
ch) const</tt>, assuming that the iterator used is a <tt>char const*</tt>.</p>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="subrules.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="indepth_the_parser.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<hr size="1">
|
||||
<p class="copyright">Copyright © 1998-2003 Joel de Guzman<br>
|
||||
<br>
|
||||
<font size="2">Use, modification and distribution is subject to the Boost Software
|
||||
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
||||
http://www.boost.org/LICENSE_1_0.txt)</font></p>
|
||||
<p> </p>
|
||||
<p> </p>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,99 +0,0 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>Storable Rules</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" href="theme/style.css" type="text/css">
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
|
||||
<tr>
|
||||
<td width="10">
|
||||
</td>
|
||||
<td width="85%"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Storable
|
||||
Rules</b></font></td>
|
||||
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="stored_rule.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="the_lazy_parser.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>The rule is a weird C++ citizen, unlike any other C++ object. It does not have
|
||||
the proper copy and assignment semantics and cannot be stored and passed around
|
||||
by value. You cannot store rules in STL containers (vector, stack, etc) for
|
||||
later use and you cannot pass and return rules to and from functions by value.</p>
|
||||
<p>EBNF is primarily declarative. Like in functional programming, an EBNF grammar
|
||||
is a static recipe and there's no notion of do this then that. However, in Spirit,
|
||||
we managed to coax imperative C++ to take in declarative EBNF. Hah! Fun!...
|
||||
We did that by masquerading the C++ assignment operator to mimic EBNF's <tt>::=</tt>.
|
||||
To do that, we gave the rule class' assignment operator and copy constructor
|
||||
a different meaning and semantics. The downside is that doing so made the rule
|
||||
unlike any other C++ object. You can't copy it. You can't assign it. </p>
|
||||
<p>We want to have the dynamic nature of C++ to our advantage. We've seen dynamic
|
||||
Spirit in action here and there. There are indeed some interesting applications
|
||||
of dynamic parsers using Spirit. Yet, we will not fully utilize the power of
|
||||
dynamic parsing, unless we have a rule that behaves like any other good C++
|
||||
object. With such a beast, we can write full parsers that's defined at run time,
|
||||
as opposed to compile time.</p>
|
||||
<p>We now have dynamic rules: <tt>stored_rules</tt>. Basically they are rules
|
||||
with perfect C++ assignment/copy-constructor semantics. This means that <tt>stored_rules</tt>
|
||||
can be stored in containers and/or dynamically created at run-time.</p>
|
||||
<pre><code><font color="#000000"><span class=identifier> </span><span class=keyword>template</span><span class=special><
|
||||
</span><span class=keyword>typename </span><span class=identifier>ScannerT </span><span class=special>= </span><span class=identifier>scanner</span><span class=special><>,
|
||||
</span><span class=keyword>typename </span><span class=identifier>ContextT </span><span class=special>= </span><span class=identifier>parser_context</span><span class=special><></span><span class=identifier>,
|
||||
</span><span class="keyword">typename</span><span class=identifier> TagT </span><span class="special">=</span><span class=identifier> parser_address_tag</span><span class=special>>
|
||||
</span><span class=keyword>class </span><span class=identifier>stored_rule</span><span class=special>;</span></font></code></pre>
|
||||
<p>The interface is exactly the same as with the rule class (see the <a href="rule.html">section
|
||||
on rules</a> for more information regarding the API). The only difference is
|
||||
with the copy and assignment semantics. Now, with <tt>stored_rule</tt>s, we
|
||||
can dynamically and algorithmically define our rules. Here are some samples...
|
||||
</p>
|
||||
<p>Say I want to dynamically create a rule for:</p>
|
||||
<pre>
|
||||
<span class=identifier> start </span><span class=special>= </span><span class=special>*(</span><span class=identifier>a </span><span class=special>| </span><span class=identifier>b </span><span class=special>| </span><span class=identifier>c</span><span class=special>);</span></pre>
|
||||
<p> I can write it dynamically step-by-step:</p>
|
||||
<pre> <span class=identifier> stored_rule</span><span class=special><> </span><span class=identifier>start</span><span class=special>;
|
||||
|
||||
</span><span class=identifier>start </span><span class=special>= </span><span class=identifier>a</span><span class=special>;
|
||||
</span><span class=identifier>start </span><span class=special>= </span><span class=identifier>start</span><span class=special>.</span><span class=identifier>copy</span><span class=special>() </span><span class=special>| </span><span class=identifier>b</span><span class=special>;
|
||||
</span><span class=identifier>start </span><span class=special>= </span><span class=identifier>start</span><span class=special>.</span><span class=identifier>copy</span><span class=special>() </span><span class=special>| </span><span class=identifier>c</span><span class=special>;
|
||||
</span><span class=identifier>start </span><span class=special>= </span><span class=special>*(</span><span class=identifier>start</span><span class=special>.</span><span class=identifier>copy</span><span class=special>());</span></pre>
|
||||
<p>Later, I changed my mind and want to redefine it (again dynamically) as:</p>
|
||||
<pre><span class=identifier> start </span><span class=special>= </span><span class=special>(</span><span class=identifier>a </span><span class=special>| </span><span class=identifier>b</span><span class=special>) </span><span class=special>>> </span><span class=special>(</span><span class=identifier>start </span><span class=special>| </span><span class=identifier>b</span><span class=special>);</span>
|
||||
</pre>
|
||||
<p>I write:</p>
|
||||
<pre> <span class=special> </span><span class=identifier>start </span><span class=special>= </span><span class=identifier>b</span><span class=special>;
|
||||
</span><span class=identifier>start </span><span class=special>= </span><span class=identifier>a </span><span class=special>| </span><span class=identifier>start</span><span class=special>.</span><span class=identifier>copy</span><span class=special>();
|
||||
</span><span class=identifier>start </span><span class=special>= </span><span class=identifier>start</span><span class=special>.</span><span class=identifier>copy</span><span class=special>() </span><span class=special>>> </span><span class=special>(</span><span class=identifier>start </span><span class=special>| </span><span class=identifier>b</span><span class=special>);</span></pre>
|
||||
<p>Notice the statement:</p>
|
||||
<pre> <span class=special> </span><span class=identifier>start </span><span class=special>= </span><span class=identifier>start</span><span class=special>.</span><span class=identifier>copy</span><span class=special>() </span><span class=special>| </span><span class=identifier>b</span><span class=special>;</span></pre>
|
||||
<p>Why is start.copy() required? Well, because like rules, stored rules are still
|
||||
embedded by reference when found in the RHS (one reason is to avoid cyclic-shared-pointers).
|
||||
If we write:</p>
|
||||
<pre> <span class=special> </span><span class=identifier>start </span><span class=special>= </span><span class=identifier>start </span><span class=special>| </span><span class=identifier>b</span><span class=special>;</span></pre>
|
||||
<p>We have <strong>left-recursion</strong>! Copying copy of start avoids self
|
||||
referencing. What we are doing is making a copy of start, ORing it with b, then
|
||||
destructively assigning the result back to start.</p>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="stored_rule.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="the_lazy_parser.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<hr size="1">
|
||||
<p class="copyright">Copyright © 1998-2003 Joel de Guzman<br>
|
||||
<br>
|
||||
<font size="2">Use, modification and distribution is subject to the Boost Software
|
||||
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
||||
http://www.boost.org/LICENSE_1_0.txt)</font></p>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,124 +0,0 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>Style Guide</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" href="theme/style.css" type="text/css">
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
|
||||
<tr>
|
||||
<td width="10">
|
||||
</td>
|
||||
<td width="85%"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Style
|
||||
Guide </b></font></td>
|
||||
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="portability.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="techniques.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<p> At some point, especially when there are lots of semantic actions attached
|
||||
to various points, the grammar tends to be quite difficult to follow. In order
|
||||
to keep an easy-to-read, consistent en aesthetically pleasing look to the Spirit
|
||||
code, the following coding styleguide is advised. </p>
|
||||
<p>This coding style is adapted and extended from the ANTLR/PCCTS style (Terrence
|
||||
Parr) and <a href="http://groups.yahoo.com/group/boost/files/coding_guidelines.html">Boost
|
||||
coding guidelines</a> (David Abrahams and Nathan Myers) and is the combined
|
||||
work of Joel de Guzman, Chris Uzdavinis and Hartmut Kaiser.</p>
|
||||
<ul>
|
||||
<li> Rule names use std C++ (Boost) convention. The rule name may be very long.</li>
|
||||
<li>The '=' is neatly indented 4 spaces below. Like Boost, use spaces instead
|
||||
of tabs. </li>
|
||||
<li>Breaking the operands into separate lines puts the semantic actions neatly
|
||||
to the right. </li>
|
||||
<li>Semicolon at the last line terminates the rule. </li>
|
||||
<li>The adjacent parts of a sequence should be indented accordingly to have
|
||||
all, what belongs to one level, at one indentation level.</li>
|
||||
</ul>
|
||||
<pre><span class=identifier> program
|
||||
</span><span class=special>= </span><span class=identifier>program_heading </span><span class=special>[</span><span class=identifier>heading_action</span><span class=special>]
|
||||
</span><span class=identifier> </span><span class=special> >> </span><span class=identifier>block </span><span class=special>[</span><span class=identifier>block_action</span><span class=special>]
|
||||
</span><span class=identifier> </span><span class=special> >> </span><span class=literal>'.'
|
||||
</span><span class=identifier> </span><span class=special>| </span><span class=identifier>another_sequence
|
||||
</span><span class=special>>> </span><span class=identifier>etc
|
||||
</span><span class=identifier> </span><span class=special>;</span></pre>
|
||||
<ul>
|
||||
<li>Prefer literals in the grammar instead of identifiers. e.g. <tt>"program"</tt>
|
||||
instead of <tt>PROGRAM</tt>, <tt>'>='</tt> instead of <tt>GTE</tt> and
|
||||
<tt>'.' </tt>instead of <tt>DOT</tt>. This makes it much easier to read. If
|
||||
this isn't possible (for instance where the used tokens must be identified
|
||||
through integers) capitalized identifiers should be used instead. </li>
|
||||
<li> Breaking the operands may not be needed for short expressions. e.g. <tt>*(','
|
||||
>> file_identifier)</tt> as long as the line does not exceed 80 characters.
|
||||
</li>
|
||||
<li> If a sequence fits on one line, put spaces inside the parentheses to clearly
|
||||
separate them from the rules. </li>
|
||||
</ul>
|
||||
<pre> <span class=identifier>program_heading
|
||||
</span><span class=special>= </span><span class=identifier>as_lower_d</span><span class=special>[</span><span class=string>"program"</span><span class=special>]
|
||||
>> </span><span class=identifier>identifier
|
||||
</span><span class=special>>> </span><span class=literal>'('
|
||||
</span><span class=special>>> </span><span class=identifier>file_identifier
|
||||
</span><span class=special>>> *( </span><span class=literal>',' </span><span class=special>>> </span><span class=identifier>file_identifier </span><span class=special>)
|
||||
>> </span><span class=literal>')'
|
||||
</span><span class=special>>> </span><span class=literal>';'
|
||||
</span><span class=special>;</span></pre>
|
||||
<ul>
|
||||
<li> Nesting directives: If a rule does not fit on one line (80 characters)
|
||||
it should be continued on the next line intended by one level. </li>
|
||||
<li>The brackets of directives, semantic expressions (using Phoenix or LL lambda
|
||||
expressions) or parsers should be placed as follows. </li>
|
||||
</ul>
|
||||
<pre> <span class=identifier>identifier
|
||||
</span><span class=special>= </span><span class=identifier>nocase
|
||||
</span><span class=special>[
|
||||
</span><span class=identifier>lexeme
|
||||
</span><span class=special>[
|
||||
</span><span class=identifier>alpha </span><span class=special>>> *(</span><span class=identifier>alnum </span><span class=special>| </span><span class=literal>'_'</span><span class=special>) [</span><span class=identifier>id_action</span><span class=special>]
|
||||
]
|
||||
]
|
||||
;</span></pre>
|
||||
<ul>
|
||||
<li> Nesting unary operators (e.g.Kleene star) </li>
|
||||
<li>Unary rule operators (Kleene star, <tt>'!'</tt>, <tt>'+'</tt> etc.) should
|
||||
be moved out one space before the corresponding indentation level, if this
|
||||
rule has a body or a sequence after it, which does not fit on on line. This
|
||||
makes the formatting more consistent and moves the rule 'body' at the same
|
||||
indentation level as the rule itself, highlighting the unary operator.</li>
|
||||
</ul>
|
||||
<pre><span class=special> </span><span class=identifier>block
|
||||
</span><span class=special>= *( </span><span class=identifier>label_declaration_part
|
||||
</span><span class=special>| </span><span class=identifier>constant_definition_part
|
||||
</span><span class=special>| </span><span class=identifier>type_definition_part
|
||||
</span><span class=special>| </span><span class=identifier>variable_declaration_part
|
||||
</span><span class=special>| </span><span class=identifier>procedure_and_function_declaration_part
|
||||
</span><span class=special>)
|
||||
>> </span><span class=identifier>statement_part
|
||||
</span><span class=special>;</span></pre>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="portability.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="techniques.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<hr size="1">
|
||||
<p class="copyright">Copyright © 2001-2003 Joel de Guzman<br>
|
||||
Copyright © 2001-2002 Hartmut Kaiser<br>
|
||||
Copyright © 2001-2002 Chris Uzdavinis<br>
|
||||
<br>
|
||||
<font size="2">Use, modification and distribution is subject to the Boost Software
|
||||
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
||||
http://www.boost.org/LICENSE_1_0.txt)</font></p>
|
||||
<p class="copyright"> </p>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,289 +0,0 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>Subrules</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" href="theme/style.css" type="text/css">
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
|
||||
<tr>
|
||||
<td width="10">
|
||||
</td>
|
||||
<td width="85%">
|
||||
<font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Subrules</b></font>
|
||||
</td>
|
||||
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="grammar.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="semantic_actions.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>Spirit is implemented using expression templates. This is a very powerful technique.
|
||||
Along with its power comes some complications. We almost take for granted that
|
||||
when we write <tt>i | j >> k</tt> where <tt>i</tt>, <tt>j</tt> and <tt>k</tt>
|
||||
are all integers the result is still an integer. Yet, with expression templates,
|
||||
the same expression <tt>i | j >> k</tt> where <tt>i</tt>, <tt>j</tt> and
|
||||
<tt>k</tt> are of type <tt>T</tt>, the result is a complex composite type [see
|
||||
<a href="basic_concepts.html">Basic Concepts</a>]. Spirit expressions, which
|
||||
are combinations of primitives and composites yield an infinite set of new types.
|
||||
One problem is that C++ offers no easy facility to deduce the type of an arbitrarily
|
||||
complex expression that yields a complex type. Thus, while it is easy to write:</p>
|
||||
<pre><code><font color="#000000"><span class=identifier> </span><span class=keyword>int </span><span class=identifier>r </span><span class=special>= </span><span class=identifier>i </span><span class=special>| </span><span class=identifier>j </span><span class=special>>> </span><span class=identifier>k</span><span class=special>; </span><span class=comment>// where i, j, and k are ints</span></font></code></pre>
|
||||
<p>Expression templates yield an endless supply of types. Without the <a href="rule.html">rule</a>,
|
||||
there is no easy way to do this in C++ if <tt>i</tt>, <tt>j</tt> and <tt>k</tt>
|
||||
are Spirit parsers:</p>
|
||||
<pre><code><font color="#000000"><span class=comment> </span><span class=special><</span><span class=identifier>what_type???</span><span class=special>> </span><span class=identifier>r </span><span class=special>= </span><span class=identifier>i </span><span class=special>| </span><span class=identifier>j </span><span class=special>>> </span><span class=identifier>k</span><span class=special>; </span><span class=comment>// where i, j, and k are Spirit parsers</span></font></code></pre>
|
||||
<p>If <tt>i</tt>, <tt>j</tt> and <tt>k</tt> are all <tt>chlit<></tt> objects,
|
||||
the type that we want is:</p>
|
||||
<pre><code><font color="#000000"><span class=comment> </span><span class=keyword>typedef
|
||||
</span><span class=identifier>alternative</span><span class=special><
|
||||
</span><span class=identifier>chlit</span><span class=special><></span><span class=comment> // i
|
||||
</span><span class=special>,</span> <span class=identifier>sequence</span><span class=special><
|
||||
</span><span class=identifier>chlit</span><span class=special><> </span><span class=comment>// j
|
||||
</span><span class=special> ,</span><span class=comment> </span><span class=identifier>chlit</span><span class=special><> </span><span class=comment>// k
|
||||
</span><span class=special>>
|
||||
>
|
||||
</span><span class=identifier>rule_t</span><span class=special>;
|
||||
|
||||
</span><span class=identifier>rule_t r </span><span class=special>= </span><span class=identifier>i </span><span class=special>| </span><span class=identifier>j </span><span class=special>>> </span><span class=identifier>k</span><span class=special>; </span><span class=comment>// where i, j, and k are chlit<> objects</span></font></code></pre>
|
||||
<p>We deliberately formatted the type declaration nicely to make it understandable.
|
||||
Try that with a more complex expression. While it can be done, explicitly spelling
|
||||
out the type of a Spirit expression template is tedious and error prone. The
|
||||
right hand side (rhs) has to mirror the type of the left hand side (lhs). (<img src="theme/lens.gif" width="15" height="16">
|
||||
Yet, if you still wish to do it, see this <a href="techniques.html#no_rules">link</a>
|
||||
for a technique). </p>
|
||||
<table width="80%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="note_box"><p><img src="theme/lens.gif" width="15" height="16"><b>
|
||||
typeof and auto</b> <br>
|
||||
<br>
|
||||
Some compilers already support the <tt>typeof</tt> keyword. This can be
|
||||
used to free us from having to explicitly type the type (pun intentional).
|
||||
Using the <tt>typeof</tt>, we can rewrite the Spirit expression above
|
||||
as:<br>
|
||||
<br>
|
||||
<span class="keyword"><code>typeof</code><code></code></span><code><span class=special>(</span><span class=identifier>i
|
||||
</span><span class=special>| </span><span class=identifier>j </span><span class=special>>>
|
||||
</span><span class=identifier>k</span><span class=special>) </span><span class=identifier>r
|
||||
</span><span class=special>= </span><span class=identifier>i </span><span class=special>|
|
||||
</span><span class=identifier>j </span><span class=special>>> </span><span class=identifier>k</span><span class=special>;</span></code><br>
|
||||
<br>
|
||||
While this is better than having to explicitly declare a complex type,
|
||||
it is redundant, error prone and still an eye sore. The expression is
|
||||
typed twice. The only way to simplify this is to introduce a macro (See
|
||||
this <a href="techniques.html#typeof">link</a> for more information).<br>
|
||||
<br>
|
||||
<a href="http://www.boost-consulting.com">David Abrahams</a> proposed
|
||||
in comp.std.c++ to reuse the <tt>auto</tt> keyword for type deduced variables.
|
||||
This has been extensibly discussed in <a href="http://www.boost.org">boost.org</a>. Example:
|
||||
<br>
|
||||
<br>
|
||||
<span class=keyword><code>auto </code></span><code><span class=identifier>r
|
||||
</span><span class=special>= </span><span class=identifier>i </span><span class=special>|
|
||||
</span><span class=identifier>j </span><span class=special>>> </span><span class=identifier>k</span><span class=special>;</span></code><br>
|
||||
<br>
|
||||
Once such a C++ extension is accepted into the standard, this would be
|
||||
a neat solution and a nice fit for our purpose. It's not a complete solution
|
||||
though since there are still situations where we do not know the rhs beforehand;
|
||||
for instance when pre-declaring cyclic dependent rules.</p>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>Fortunately, rules come to the rescue. Rules can capture the type of the expression
|
||||
assigned to it. Thus:</p>
|
||||
<pre><code><font color="#000000"> <span class=identifier>rule</span><span class=special><> </span><span class=identifier>r </span><span class=special>= </span><span class=identifier>i </span><span class=special>| </span><span class=identifier>j </span><span class=special>>> </span><span class=identifier>k</span><span class=special>; </span><span class=comment>// where i, j, and k are chlit<> objects</span></font></code></pre>
|
||||
<p>It might not be apparent but behind the scenes, plain rules are actually implemented
|
||||
using a pointer to a runtime polymorphic abstract class that holds the dynamic
|
||||
type of the parser assigned to it. When a Spirit expression is assigned to a
|
||||
rule, its type is encapsulated in a concrete subclass of the abstract class.
|
||||
A virtual parse function delegates the parsing to the encapsulated object.</p>
|
||||
<p>Rules have drawbacks though:</p>
|
||||
<p><img src="theme/bullet.gif" width="12" height="12"> It is coupled to a specific
|
||||
scanner type. The rule is tied to a specific scanner [see <a href="faq.html#scanner_business">The
|
||||
Scanner Business</a>].<br>
|
||||
<img src="theme/bullet.gif" width="12" height="12"> The rule's parse member
|
||||
function has a virtual function call overhead that cannot be inlined.</p>
|
||||
<h2>Static rules: subrules</h2>
|
||||
<p>The subrule is a fully static version of the rule. The subrule does not have
|
||||
the drawbacks listed above. </p>
|
||||
<p><img src="theme/bullet.gif" width="12" height="12"> The subrule is not tied
|
||||
to a specific scanner so just about any scanner type may be used<br>
|
||||
<img src="theme/bullet.gif" width="12" height="12"> The subrule also allows
|
||||
aggressive inlining since there are no virtual function calls</p>
|
||||
<pre><code><font color="#000000"><span class=identifier> </span><span class=keyword>template</span><span class=special><</span><span class=keyword>int </span></font><span class="identifier">ID</span><font color="#000000"><span class=special>, </span><span class=keyword>typename </span><span class=identifier>ContextT </span><span class=special>= </span><span class=identifier>parser_context</span><span class=special><></span> <span class=special>>
|
||||
</span><span class=keyword>class </span><span class=identifier>subrule</span><span class=special>;</span></font></code></pre>
|
||||
<p>The first template parameter gives the subrule an identification tag. Like
|
||||
the <a href="rule.html">rule</a>, there is a ContextT template parameter that
|
||||
defaults to <code><tt>parser_context</tt></code>. You need not be concerned
|
||||
at all with the <tt>ContextT</tt> template parameter unless you wish to tweak
|
||||
the low level behavior of the subrule. Detailed information on the <tt>ContextT</tt>
|
||||
template parameter is provided <a href="indepth_the_parser_context.html">elsewhere</a>.
|
||||
</p>
|
||||
<p>Presented above is the public API. There may actually be more template parameters
|
||||
after <tt>ContextT</tt>. Everything after the <tt>ContextT</tt> parameter should
|
||||
not be of concern to the client and are strictly for internal use only.</p>
|
||||
<p>Apart from a few minor differences, the subrule follows the usage and syntax
|
||||
of the rule closely. Here's the calculator grammar using subrules:</p>
|
||||
<pre><code><font color="#000000"><span class=comment> </span><span class=keyword>struct </span><span class=identifier>calculator </span><span class=special>: </span><span class=keyword>public </span><span class=identifier>grammar</span><span class=special><</span><span class=identifier>calculator</span><span class=special>>
|
||||
</span><span class=special>{
|
||||
</span><span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>>
|
||||
</span><span class=keyword>struct </span><span class=identifier>definition
|
||||
</span><span class=special>{
|
||||
</span><span class=identifier>definition</span><span class=special>(</span><span class=identifier>calculator </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>self</span><span class=special>)
|
||||
</span><span class=special>{
|
||||
</span><span class=identifier>first </span><span class=special>=
|
||||
</span><span class=special>(
|
||||
</span><span class=identifier>expression </span><span class=special>= </span><span class=identifier>term </span><span class=special>>> </span><span class=special>*((</span><span class=literal>'+' </span><span class=special>>> </span><span class=identifier>term</span><span class=special>) </span><span class=special>| </span><span class=special>(</span><span class=literal>'-' </span><span class=special>>> </span><span class=identifier>term</span><span class=special>)),
|
||||
</span><span class=identifier>term </span><span class=special>= </span><span class=identifier>factor </span><span class=special>>> </span><span class=special>*((</span><span class=literal>'*' </span><span class=special>>> </span><span class=identifier>factor</span><span class=special>) </span><span class=special>| </span><span class=special>(</span><span class=literal>'/' </span><span class=special>>> </span><span class=identifier>factor</span><span class=special>)),
|
||||
</span><span class=identifier>factor </span><span class=special>= </span><span class=identifier>integer </span><span class=special>| </span><span class=identifier>group</span><span class=special>,
|
||||
</span><span class=identifier>group </span><span class=special>= </span><span class=literal>'(' </span><span class=special>>> </span><span class=identifier>expression </span><span class=special>>> </span><span class=literal>')'
|
||||
</span><span class=special>);
|
||||
</span><span class=special>}
|
||||
|
||||
</span><span class=identifier>subrule</span><span class=special><</span><span class=number>0</span><span class=special>> </span><span class=identifier>expression</span><span class=special>;
|
||||
</span><span class=identifier>subrule</span><span class=special><</span><span class=number>1</span><span class=special>> </span><span class=identifier>term</span><span class=special>;
|
||||
</span><span class=identifier>subrule</span><span class=special><</span><span class=number>2</span><span class=special>> </span><span class=identifier>factor</span><span class=special>;
|
||||
</span><span class=identifier>subrule</span><span class=special><</span><span class=number>3</span><span class=special>> </span><span class=identifier>group</span><span class=special>;
|
||||
|
||||
</span><span class=identifier>rule</span><span class=special><</span><span class=identifier>ScannerT</span><span class=special>> </span><span class=identifier>first</span><span class=special>;
|
||||
</span><span class=identifier>rule</span><span class=special><</span><span class=identifier>ScannerT</span><span class=special>> </span><span class=keyword>const</span><span class=special>&
|
||||
</span><span class=identifier>start</span><span class=special>() </span><span class=keyword>const </span><span class=special>{ </span><span class=keyword>return </span><span class=identifier>first</span><span class=special>; </span><span class=special>}
|
||||
</span><span class=special>};
|
||||
</span><span class=special>};</span></font></code></pre>
|
||||
<p><img src="theme/lens.gif" width="15" height="16"> A fully working example with
|
||||
<a href="semantic_actions.html">semantic actions</a> can be <a href="../example/fundamental/subrule_calc.cpp">viewed
|
||||
here</a>. This is part of the Spirit distribution. </p>
|
||||
<table border="0" align="left">
|
||||
<tr>
|
||||
<td width="199"><img src="theme/subrule1.png" width="234" height="224"></td>
|
||||
<td width="2"></td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>The subrule as an efficient version of the rule. Compiler optimizations such
|
||||
as aggressive inlining help reduce the code size and increase performance significantly.
|
||||
</p>
|
||||
<p>The subrule is not a panacea however. Subrules push the C++ compiler hard to
|
||||
its knees. For example, current compilers have a limit on recursion depth that
|
||||
may not be exceeded. Don't even think about writing a full pascal grammar using
|
||||
subrules alone. A grammar using subrules is a single C++ expression. Current
|
||||
C++ compilers cannot handle very complex expressions very well. Finally, a plain
|
||||
rule is still needed to act as place holder for subrules.</p>
|
||||
<p>The code above is a good example of the recommended way to use subrules. Notice
|
||||
the hierarchy. We have a grammar that encapsulates the whole calculator. The
|
||||
start rule is a plain rule that holds the set of subrules. The subrules in turn
|
||||
defines the actual details of the grammar.</p>
|
||||
<table width="80%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="note_box"><img src="theme/lens.gif" width="15" height="16"><b>
|
||||
Template instantiation depth</b> <br> <br>
|
||||
Spirit pushes the C++ compiler hard. Current C++ compilers cannot handle
|
||||
very complex heavily nested expressions very well. One restricting factor
|
||||
is the typical compiler's limit on template recursion depth. Some, but not
|
||||
all, compilers allow this limit to be configured.<br>
|
||||
<br>
|
||||
g++'s maximum can be set using a compiler flag: -ftemplate-depth. Set this
|
||||
appropriately if you have a relatively complex grammar.<br>
|
||||
<br>
|
||||
Microsoft Visual C++ can take greater than 1000 for both template class
|
||||
and function instantiation depths. However, the linker chokes with deep
|
||||
template function instantiation unless inline recursion depth is set using
|
||||
these pragmas:<br>
|
||||
<br>
|
||||
<span class="preprocessor">#pragma</span> inline_depth<span class="special">(</span>255<span class="special">)</span><br>
|
||||
<span class="preprocessor">#pragma</span> inline_recursion<span class="special">(</span>on<span class="special">)<br>
|
||||
<br>
|
||||
</span>Perhaps this limitations no longer applies to more current versions
|
||||
of these compilers. Be sure to check your compiler documentation.</td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>This setup gives a good balance. The subrules do all the work. Each grammar
|
||||
will have only one rule: <tt>first</tt>. The rule is used just to hold the subrules
|
||||
and make them visible to the grammar. </p>
|
||||
<h3>The subrule definition</h3>
|
||||
<p>Like the rule, the expression after assignment operator <tt>=</tt> defines
|
||||
the subrule:</p>
|
||||
<pre> <span class=identifier>identifier </span><span class=special>= </span><span class=identifier>expression</span></pre>
|
||||
<p>Unlike rules, subrules may be defined only once. Redefining a subrule is illegal
|
||||
and will result to a compile time assertion.</p>
|
||||
<h3>Separators [ , ]</h3>
|
||||
<p>While rules are terminated by the semicollon <tt>';'</tt>. Subrules are not
|
||||
terminated but are separated by the comma: <tt>','</tt>. Like Pascal statements,
|
||||
the last subrule in a group may not have a trailing comma.</p>
|
||||
<pre><span class=identifier> </span><span class=identifier>a </span><span class=special>= </span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>'a'</span><span class=special>),
|
||||
</span><span class=identifier>b </span><span class=special>= </span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>'b'</span><span class=special>),
|
||||
</span><span class=identifier>c </span><span class=special>= </span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>'c'</span><span class=special>), </span><span class=comment>// BAD, trailing comma</span><code><font color="#000000"><font color="#800000"><i></i></font></font></code><code><font color="#000000"><font color="#800000"><i></i></font></font><i></i></code></pre>
|
||||
<p>
|
||||
<pre><code><span class=comment> </span><span class=identifier>a </span><span class=special>= </span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>'a'</span><span class=special>),
|
||||
</span><span class=identifier>b </span><span class=special>= </span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>'b'</span><span class=special>),
|
||||
</span><span class=identifier>c </span><span class=special>= </span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>'c'</span><span class=special>) </span><span class=comment>// OK</span></code></pre>
|
||||
<h3> The start subrule</h3>
|
||||
<p>Unlike rules, parsing proceeds from the start subrule. The first (topmost)
|
||||
subrule in a group of subrules is called the <b>start subrule</b>. In our example
|
||||
above, <tt>expression</tt> is the start subrule. When a group of subrules is
|
||||
called forth, the start subrule <tt>expression</tt> is called first.</p>
|
||||
<h3>IDs</h3>
|
||||
<p>Each subrule has a corresponding ID; an integral constant that uniquely specifies
|
||||
the subrule. Our example above has four subrules. They are declared as:</p>
|
||||
<pre><code><span class=comment> </span><span class=identifier>subrule</span><span class=special><</span><span class=number>0</span><span class=special>> </span><span class=identifier>expression</span><span class=special>;
|
||||
</span><span class=identifier>subrule</span><span class=special><</span><span class=number>1</span><span class=special>> </span><span class=identifier>term</span><span class=special>;
|
||||
</span><span class=identifier>subrule</span><span class=special><</span><span class=number>2</span><span class=special>> </span><span class=identifier>factor</span><span class=special>;
|
||||
</span><span class=identifier>subrule</span><span class=special><</span><span class=number>3</span><span class=special>> </span><span class=identifier>group</span><span class=special>;</span></code></pre>
|
||||
<h3> Aliases</h3>
|
||||
<p>It is possible to have subrules with similar IDs. A subrule with a similar
|
||||
ID to will be an alias of the other. Both subrules may be used interchangeably.</p>
|
||||
<pre><code><span class=special> </span><span class=identifier>subrule</span><span class=special><</span><span class=number>0</span><span class=special>> </span><span class=identifier>a</span><span class=special>;
|
||||
</span><span class=identifier>subrule</span><span class=special><</span><span class=number>0</span><span class=special>> </span><span class=identifier>alias</span><span class=special>; </span><span class=comment>// alias of a</span></code></pre>
|
||||
<h3>Groups: scope and nesting</h3>
|
||||
<p>The scope of a subrule and its definition is the enclosing group, typically
|
||||
(and by convention) enclosed inside the parentheses. IDs outside a scope are
|
||||
not directly visible. Inner subrule groups can be nested by enclosing each sub-group
|
||||
inside another set of parentheses. Each group is unique and acts independently.
|
||||
Consequently, while it may not be advisable to do so, a subrule in a group may
|
||||
share the same ID as a subrule in another group since both groups are independent
|
||||
of each other.</p>
|
||||
<pre><code><span class=comment> </span><span class=identifier>subrule</span><span class=special><</span><span class=number>0</span><span class=special>> </span><span class=identifier>a</span><span class=special>;
|
||||
</span><span class=identifier>subrule</span><span class=special><</span><span class=number>1</span><span class=special>> </span><span class=identifier>b</span><span class=special>;
|
||||
</span><span class=identifier>subrule</span><span class=special><</span><span class=number>0</span><span class=special>> </span><span class=identifier>c</span><span class=special>;
|
||||
</span><span class=identifier>subrule</span><span class=special><</span><span class=number>1</span><span class=special>> </span><span class=identifier>d</span><span class=special>;
|
||||
|
||||
</span><span class=special>( </span><span class=comment>// outer subrule group, scope of a and b
|
||||
</span><span class=identifier>a </span><span class=special>= </span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>'a'</span><span class=special>),
|
||||
</span><span class=identifier>b </span><span class=special>=
|
||||
</span><span class=special>( </span><span class=comment>// inner subrule group, scope of b and c
|
||||
</span><span class=identifier>c </span><span class=special>= </span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>'c'</span><span class=special>),
|
||||
</span><span class=identifier>d </span><span class=special>= </span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>'d'</span><span class=special>)
|
||||
</span><span class=special>)
|
||||
</span><span class=special>)</span></code></pre>
|
||||
<p>Subrule IDs need to be unique only within a group. A grammar is an implicit
|
||||
group. Furthermore, even subrules in a grammar may have the same IDs without
|
||||
clashing if they are inside a group. Subrules may be explicitly grouped using
|
||||
the parentheses. Parenthesized groups have unique scopes. In the code above,
|
||||
the outer subrule group defines the subrules <tt>a</tt> and <tt>b</tt> while
|
||||
the inner subrule group defines the subrules <tt>c</tt> and <tt>d</tt>. Notice
|
||||
that the definition of <tt>b</tt> is the inner subrule.</p>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="grammar.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="semantic_actions.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<hr size="1">
|
||||
<p class="copyright">Copyright © 1998-2003 Joel de Guzman<br>
|
||||
<br>
|
||||
<font size="2">Use, modification and distribution is subject to the Boost Software
|
||||
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
||||
http://www.boost.org/LICENSE_1_0.txt)</font></p>
|
||||
<p> </p>
|
||||
<p><code><font color="#000000"><font color="#0000ff"></font></font></code></p>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,115 +0,0 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>The Switch Parser</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" href="theme/style.css" type="text/css">
|
||||
<style type="text/css">
|
||||
<!--
|
||||
.style1 {font-family: "Courier New", Courier, mono}
|
||||
.style3 {font-family: "Courier New", Courier, mono; color: #FF0000; }
|
||||
-->
|
||||
</style>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
|
||||
<tr>
|
||||
<td width="10"> </td>
|
||||
<td width="85%"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>The Switch Parser </b></font></td>
|
||||
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="select_parser.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="escape_char_parser.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>Switch parsers may be used to simplify certain alternation constructs. Consider the following code:</p>
|
||||
<pre> rule<span class="special"><></span> rule_overall <span class="special">=</span>
|
||||
ch_p<span class="special">(</span><span class="literal">'a'</span><span class="special">)</span> <span class="special">>></span> parser_a
|
||||
<span class="special">|</span> ch_p<span class="special">(</span><span class="literal">'b'</span><span class="special">)</span> <span class="special">>></span> parser_b
|
||||
<span class="comment">// ...</span>
|
||||
<span class="special">|</span> ch_p<span class="special">(</span><span class="literal">'n'</span><span class="special">)</span> <span class="special">>></span> parser_n
|
||||
<span class="special">;</span></pre>
|
||||
<p>Each of the alternatives are evaluated normally in a sequential manner. This tend to be inefficient, especially for a large number of alternatives. To avoid this inefficiency and to make it possible to write such constructs in a more readable form, Spirit contains the <tt>switch_p</tt> family of parsers. The switch_p parser allows us to rewrite the previous construct as:</p>
|
||||
<pre> rule<span class="special"><></span> rule_overall <span class="special">=</span>
|
||||
switch_p
|
||||
<span class="special">[</span>
|
||||
case_p<span class="special"><</span><span class="literal">'a'</span><span class="special">>(</span>parser_a<span class="special">),</span>
|
||||
case_p<span class="special"><</span><span class="literal">'b'</span><span class="special">>(</span>parser_b<span class="special">),</span>
|
||||
<span class="comment"> // ...</span>
|
||||
case_p<span class="special"><</span><span class="literal">'n'</span><span class="special">>(</span>parser_n<span class="special">)</span>
|
||||
]
|
||||
;</pre>
|
||||
<p>This <tt>switch_p</tt> parser takes the next character (or token) from the input stream and tries to match it against the given integral compile time constants supplied as the template parameters to the <tt>case_p</tt> parsers. If this character matches one of the <tt>case_p</tt> branches, the associated parser is executed (i.e. if 'a' is matched, <tt>parser_a</tt> is executed, if 'b' is matched, <tt>parser_b</tt> is executed and so on) . If no <tt>case_p</tt> branch matches the next input character, the overall construct does not match at all. </p>
|
||||
<table width="80%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="note_box"><div align="justify"><img src="theme/bulb.gif" width="13" height="18"><strong> Nabialek trick </strong><br>
|
||||
<br>
|
||||
The <strong><em><a href="techniques.html#nabialek_trick">"Nabialek trick" </a></em></strong>(from the name of its inventor, Sam Nabialek), can also improve the rule dispatch from linear non-deterministic to deterministic. This is similar to the <tt>switch_p</tt> parser, yet, can handle grammars where a keyword (operator, etc), instead of a single character or token, precedes a production.</div></td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>Sometimes it is desirable to add handling of the default case (none of the <tt>case_p</tt> branches matched). This may be achieved with the help of a <tt>default_p</tt> branch:</p>
|
||||
<pre> rule<span class="special"><></span> rule_overall <span class="special">=</span>
|
||||
switch_p
|
||||
<span class="special">[</span>
|
||||
case_p<span class="special"><</span><span class="literal">'a'</span><span class="special">>(</span>parser_a<span class="special">),</span>
|
||||
case_p<span class="special"><</span><span class="literal">'b'</span><span class="special">>(</span>parser_b<span class="special">),</span>
|
||||
<span class="comment"> // ...</span>
|
||||
case_p<span class="special"><</span><span class="literal">'n'</span><span class="special">>(</span>parser_n<span class="special">),</span>
|
||||
default_p<span class="special">(</span>parser_default<span class="special">)</span>
|
||||
<span class="special">]
|
||||
;</span></pre>
|
||||
<p>This form chooses the <tt>parser_default</tt> parser if none of the cases matches the next character from the input stream. Please note that, obviously, only one <tt>default_p</tt> branch may be added to the <tt>switch_p</tt> parser construct. </p>
|
||||
<p>Moreover, it is possible to omit the parentheses and body from the <tt>default_p</tt> construct, in which case, no additional parser is executed and the overall <tt>switch_p</tt> construct simply returns a match on any character of the input stream, which does not match any of the <tt>case_p</tt> branches:</p>
|
||||
<pre> rule<span class="special"><></span> rule_overall <span class="special">=</span>
|
||||
switch_p
|
||||
<span class="special">[</span>
|
||||
case_p<span class="special"><</span><span class="literal">'a'</span><span class="special">>(</span>parser_a<span class="special">),</span>
|
||||
case_p<span class="special"><</span><span class="literal">'b'</span><span class="special">>(</span>parser_b<span class="special">),</span>
|
||||
<span class="comment">// ...</span>
|
||||
case_p<span class="special"><</span><span class="literal">'n'</span><span class="special">>(</span>parser_n<span class="special">),</span>
|
||||
default_p
|
||||
<span class="special">]</span>
|
||||
;</pre>
|
||||
<p>There is another form of the switch_p construct. This form allows us to explicitly specify the value to be used for matching against the <tt>case_p</tt> branches: </p>
|
||||
<pre> rule<span class="special"><></span> rule_overall <span class="special">=</span>
|
||||
switch_p<span class="special">(</span>cond<span class="special">)</span>
|
||||
<span class="special">[</span>
|
||||
case_p<span class="special"><</span><span class="literal">'a'</span><span class="special">>(</span>parser_a<span class="special">),</span>
|
||||
case_p<span class="special"><</span><span class="literal">'b'</span><span class="special">>(</span>parser_b<span class="special">),</span>
|
||||
<span class="comment"> // ...</span>
|
||||
case_p<span class="special"><</span><span class="literal">'n'</span><span class="special">>(</span>parser_n<span class="special">)</span>
|
||||
<span class="special">]</span>
|
||||
;</pre>
|
||||
<p>where <tt>cond</tt> is a parser or a nullary function or function object (functor). If it is a parser, then it is tried and its return value is used to match against the <tt>case_p</tt> branches. If it is a nullary function or functor, then its return value will be used. </p>
|
||||
<p>Please note that during its compilation, the <tt>switch_p</tt> construct is transformed into a real C++ <tt>switch</tt> statement. This makes the runtime execution very efficient. </p>
|
||||
<table width="80%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="note_box"><p><img src="theme/alert.gif" width="16" height="16"> <tt>BOOST_SPIRIT_SWITCH_CASE_LIMIT</tt><br>
|
||||
<br>
|
||||
The number of possible <tt>case_p</tt>/<tt>default_p</tt> branches is limited by the Spirit compile time constant <tt>BOOST_SPIRIT_SWITCH_CASE_LIMIT</tt>, which defaults to 3. There is no theoretical upper limit for this constant, but most compilers won't allow you to specify a very large number.</p>
|
||||
<p>Example:</p>
|
||||
<p class="style1"><span class="comment">// Define these before including switch.hpp <br>
|
||||
</span><span class="preprocessor">#define</span> BOOST_SPIRIT_SWITCH_CASE_LIMIT 10 </p></td>
|
||||
</tr>
|
||||
</table><br>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="select_parser.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="escape_char_parser.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<hr size="1">
|
||||
<p class="copyright">Copyright © 2003-2004 Hartmut Kaiser <br>
|
||||
<br>
|
||||
<font size="2">Use, modification and distribution is subject to the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) </font> </p>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,204 +0,0 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>Symbols</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" href="theme/style.css" type="text/css">
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
|
||||
<tr>
|
||||
<td width="10">
|
||||
</td>
|
||||
<td width="85%">
|
||||
<font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Symbols</b></font>
|
||||
</td>
|
||||
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="distinct.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="trees.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>This class symbols implements a symbol table. The symbol table holds a dictionary
|
||||
of symbols where each symbol is a sequence of CharTs (a <tt>char</tt>, <tt>wchar_t</tt>,
|
||||
<tt>int</tt>, enumeration etc.) . The template class, parameterized by the character
|
||||
type (CharT), can work efficiently with 8, 16, 32 and even 64 bit characters.
|
||||
Mutable data of type T is associated with each symbol.<br>
|
||||
</p>
|
||||
<p>Traditionally, symbol table management is maintained separately outside the
|
||||
BNF grammar through semantic actions. Contrary to standard practice, the Spirit
|
||||
symbol table class <tt>symbols</tt> is-a parser. An instance of which may be
|
||||
used anywhere in the EBNF grammar specification. It is an example of a dynamic
|
||||
parser. A dynamic parser is characterized by its ability to modify its behavior
|
||||
at run time. Initially, an empty symbols object matches nothing. At any time,
|
||||
symbols may be added, thus, dynamically altering its behavior.</p>
|
||||
<p>Each entry in a symbol table has an associated mutable data slot. In this regard,
|
||||
one can view the symbol table as an associative container (or map) of key-value
|
||||
pairs where the keys are strings. </p>
|
||||
<p>The symbols class expects two template parameters (actually there is a third,
|
||||
see detail box). The first parameter <tt>T</tt> specifies the data type associated
|
||||
with each symbol (defaults to <tt>int</tt>) and the second parameter <tt>CharT</tt>
|
||||
specifies the character type of the symbols (defaults to <tt>char</tt>). </p>
|
||||
<pre><span class=identifier> </span><span class=keyword>template
|
||||
</span><span class=special><
|
||||
</span><span class=keyword>typename </span><span class=identifier>T </span><span class=special>= </span><span class=keyword>int</span><span class=special>,
|
||||
</span><span class=keyword>typename </span><span class=identifier>CharT </span><span class=special>= </span><span class=keyword>char</span><span class=special>,
|
||||
</span><span class=keyword>typename </span><span class=identifier>SetT </span><span class=special>= </span><span class=identifier>impl</span><span class=special>::</span><span class=identifier>tst</span><span class=special><</span><span class=identifier>T</span><span class=special>, </span><span class=identifier>CharT</span><span class=special>>
|
||||
</span><span class=special>>
|
||||
</span><span class=keyword>class </span><span class=identifier>symbols</span><span class=special>;</span></pre>
|
||||
<table width="80%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="note_box"><img src="theme/lens.gif" width="15" height="16"> <b>Ternary
|
||||
State Trees</b><br>
|
||||
<br>
|
||||
The actual set implementation is supplied by the SetT template parameter
|
||||
(3rd template parameter of the symbols class) . By default, this uses the
|
||||
tst class which is an implementation of the Ternary Search Tree. <br>
|
||||
<br>
|
||||
Ternary Search Trees are faster than hashing for many typical search problems
|
||||
especially when the search interface is iterator based. Searching for a
|
||||
string of length k in a ternary search tree with n strings will require
|
||||
at most O(log n+k) character comparisons. TSTs are many times faster than
|
||||
hash tables for unsuccessful searches since mismatches are discovered earlier
|
||||
after examining only a few characters. Hash tables always examine an entire
|
||||
key when searching.<br>
|
||||
<br>
|
||||
For details see <a href="http://www.cs.princeton.edu/%7Ers/strings/">http://www.cs.princeton.edu/~rs/strings/</a>.</td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>Here are some sample declarations:</p>
|
||||
<pre><span class=identifier> </span><span class=identifier>symbols</span><span class=special><> </span><span class=identifier>sym</span><span class=special>;
|
||||
</span><span class=identifier>symbols</span><span class=special><</span><span class=keyword>short</span><span class=special>, </span><span class=keyword>wchar_t</span><span class=special>> </span><span class=identifier>sym2</span><span class=special>;
|
||||
|
||||
</span><span class=keyword>struct </span><span class=identifier>my_info
|
||||
</span><span class=special>{
|
||||
</span><span class=keyword>int </span><span class=identifier>id</span><span class=special>;
|
||||
</span><span class=keyword>double </span><span class=identifier>value</span><span class=special>;
|
||||
</span><span class=special>};
|
||||
|
||||
</span><span class=identifier>symbols</span><span class=special><</span><span class=identifier>my_info</span><span class=special>> </span><span class=identifier>sym3</span><span class=special>;</span></pre>
|
||||
<p>After having declared our symbol tables, symbols may be added statically using
|
||||
the construct:</p>
|
||||
<pre><span class=identifier> sym </span><span class=special>= </span><span class=identifier>a</span><span class=special>, </span><span class=identifier>b</span><span class=special>, </span><span class=identifier>c</span><span class=special>, </span><span class=identifier>d </span><span class=special>...;</span></pre>
|
||||
<p>where <tt>sym</tt> is a symbol table and <tt>a..d</tt> etc. are strings. <img src="theme/note.gif" width="16" height="16">Note
|
||||
that the comma operator is separating the items being added to the symbol table,
|
||||
through an assignment. Due to operator overloading this is possible and correct
|
||||
(though it may take a little getting used to) and is a concise way to initialize
|
||||
the symbol table with many symbols. Also, it is perfectly valid to make multiple
|
||||
assignments to a symbol table to iteratively add symbols (or groups of symbols)
|
||||
at different times.</p>
|
||||
<p>Simple example:<br>
|
||||
</p>
|
||||
<pre><span class=identifier> sym </span><span class=special>= </span><span class=string>"pineapple"</span><span class=special>, </span><span class=string>"orange"</span><span class=special>, </span><span class=string>"banana"</span><span class=special>, </span><span class=string>"apple"</span><span class=special>, </span><span class=string>"mango"</span><span class=special>;</span></pre>
|
||||
<p>Note that it is invalid to add the same symbol multiple times to a symbol table,
|
||||
though you may modify the value associated with a symbol artibrarily many times.</p>
|
||||
<p>Now, we may use sym in the grammar. Example:</p>
|
||||
<pre><span class=identifier> fruits </span><span class=special>= </span><span class=identifier>sym </span><span class=special>>> </span><span class=special>*(</span><span class=literal>',' </span><span class=special>>> </span><span class=identifier>sym</span><span class=special>);</span></pre>
|
||||
<p>Alternatively, symbols may be added dynamically through the member functor
|
||||
<tt>add</tt> (see <tt><a href="#symbol_inserter">symbol_inserter</a></tt> below).
|
||||
The member functor <tt>add</tt> may be attached to a parser as a semantic action
|
||||
taking in a begin/end pair:</p>
|
||||
<pre><span class=identifier> p</span><span class=special>[</span><span class=identifier>sym</span><span class=special>.</span><span class=identifier>add</span><span class=special>]</span></pre>
|
||||
<p>where p is a parser (and sym is a symbol table). On success, the matching portion
|
||||
of the input is added to the symbol table.</p>
|
||||
<p><tt>add</tt> may also be used to directly initialize data. Examples:</p>
|
||||
<pre><span class=identifier> sym</span><span class=special>.</span><span class=identifier>add</span><span class=special>(</span><span class=string>"hello"</span><span class=special>, </span><span class=number>1</span><span class=special>)(</span><span class=string>"crazy"</span><span class=special>, </span><span class=number>2</span><span class=special>)(</span><span class=string>"world"</span><span class=special>, </span><span class=number>3</span><span class=special>);</span></pre>
|
||||
<p>Assuming of course that the data slot associated with <tt>sym</tt> is an integer.</p>
|
||||
<p>The data associated with each symbol may be modified any time. The most obvious
|
||||
way of course is through <a href="semantic_actions.html">semantic actions</a>.
|
||||
A function or functor, as usual, may be attached to the symbol table. The symbol
|
||||
table expects a function or functor compatible with the signature:</p>
|
||||
<p><b>Signature for functions:</b></p>
|
||||
<pre><code><font color="#000000"><span class=identifier> </span><span class=keyword>void </span><span class=identifier>func</span><span class=special>(</span><span class=identifier>T</span><span class="special">&</span><span class=identifier> data</span><span class=special>);</span></font></code></pre>
|
||||
<p><b>Signature for functors:</b><br>
|
||||
</p>
|
||||
<pre><code><font color="#000000"><span class=special> </span><span class=keyword>struct </span><span class=identifier>ftor
|
||||
</span><span class=special>{
|
||||
</span><span class=keyword>void </span><span class=keyword>operator</span><span class=special>()(</span><span class=identifier>T</span><span class="special">&</span><span class=identifier> data</span><span class=special>) </span><span class=keyword>const</span><span class=special>;
|
||||
</span><span class=special>};</span></font></code></pre>
|
||||
<p>Where <tt>T</tt> is the data type of the symbol table (the <tt>T</tt> in its
|
||||
template parameter list). When the symbol table successfully matches something
|
||||
from the input, the data associated with the matching entry in the symbol table
|
||||
is reported to the semantic action.</p>
|
||||
<h2>Symbol table utilities</h2>
|
||||
<p>Sometimes, one may wish to deal with the symbol table directly. Provided are
|
||||
some symbol table utilities.</p>
|
||||
<p><b>add</b></p>
|
||||
<pre><span class=identifier> </span><span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>T</span><span class=special>, </span><span class=keyword>typename </span><span class=identifier>CharT</span><span class=special>, </span><span class=keyword>typename </span><span class=identifier>SetT</span><span class=special>>
|
||||
</span><span class=identifier>T</span><span class=special>* </span><span class=identifier>add</span><span class=special>(</span><span class=identifier>symbols</span><span class=special><</span><span class=identifier>T</span><span class=special>, </span><span class=identifier>CharT</span><span class=special>, </span><span class=identifier>SetT</span><span class=special>>& </span><span class=identifier>table</span><span class=special>, </span><span class=identifier>CharT </span><span class=keyword>const</span><span class=special>* </span><span class=identifier>sym</span><span class=special>, </span><span class=identifier>T </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>data </span><span class=special>= </span><span class=identifier>T</span><span class=special>());</span></pre>
|
||||
<p>adds a symbol <tt>sym</tt> (C string) to a symbol table <tt>table</tt> plus
|
||||
an optional data <tt>data</tt> associated with the symbol. Returns a pointer
|
||||
to the data associated with the symbol or <tt>NULL</tt> if add failed (e.g.
|
||||
when the symbol is already added before).<br>
|
||||
<br>
|
||||
<b>find</b></p>
|
||||
<pre><span class=special> </span><span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>T</span><span class=special>, </span><span class=keyword>typename </span><span class=identifier>CharT</span><span class=special>, </span><span class=keyword>typename </span><span class=identifier>SetT</span><span class=special>>
|
||||
</span><span class=identifier>T</span><span class=special>* </span><span class=identifier>find</span><span class=special>(</span><span class=identifier>symbols</span><span class=special><</span><span class=identifier>T</span><span class=special>, </span><span class=identifier>CharT</span><span class=special>, </span><span class=identifier>SetT</span><span class=special>> </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>table</span><span class=special>, </span><span class=identifier>CharT </span><span class=keyword>const</span><span class=special>* </span><span class=identifier>sym</span><span class=special>);</span></pre>
|
||||
<p>finds a symbol <tt>sym</tt> (C string) from a symbol table <tt>table</tt>.
|
||||
Returns a pointer to the data associated with the symbol or <tt>NULL</tt> if
|
||||
not found</p>
|
||||
<h2><a name="symbol_inserter"></a>symbol_inserter</h2>
|
||||
<p>The symbols class holds an instance of this class named <tt>add</tt>. This
|
||||
can be called directly just like a member function, passing in a first/last
|
||||
iterator and optional data:<br>
|
||||
<br>
|
||||
</p>
|
||||
<pre><span class=identifier> sym</span><span class=special>.</span><span class=identifier>add</span><span class=special>(</span><span class=identifier>first</span><span class=special>, </span><span class=identifier>last</span><span class=special>, </span><span class=identifier>data</span><span class=special>);</span></pre>
|
||||
<p>Or, passing in a C string and optional data:<br>
|
||||
</p>
|
||||
<pre><span class=identifier> sym</span><span class=special>.</span><span class=identifier>add</span><span class=special>(</span><span class=identifier>c_string</span><span class=special>, </span><span class=identifier>data</span><span class=special>);</span></pre>
|
||||
<p>where <tt>sym</tt> is a symbol table. The <tt>data</tt> argument is optional.
|
||||
The nice thing about this scheme is that it can be cascaded. We've seen this
|
||||
applied above. Here's a snippet from the roman numerals parser</p>
|
||||
<pre> <span class=comment>// Parse roman numerals (1..9) using the symbol table.
|
||||
|
||||
</span> <span class=keyword>struct </span><span class=identifier>ones </span><span class=special>: </span><span class=identifier>symbols</span><span class=special><</span><span class=keyword>unsigned</span><span class=special>>
|
||||
</span><span class=special>{
|
||||
</span><span class=identifier>ones</span><span class=special>()
|
||||
</span><span class=special>{
|
||||
</span><span class=identifier>add
|
||||
</span><span class=special>(</span><span class=string>"I" </span><span class=special>, </span><span class=number>1</span><span class=special>)
|
||||
</span><span class=special>(</span><span class=string>"II" </span><span class=special>, </span><span class=number>2</span><span class=special>)
|
||||
</span><span class=special>(</span><span class=string>"III" </span><span class=special>, </span><span class=number>3</span><span class=special>)
|
||||
</span><span class=special>(</span><span class=string>"IV" </span><span class=special>, </span><span class=number>4</span><span class=special>)
|
||||
</span><span class=special>(</span><span class=string>"V" </span><span class=special>, </span><span class=number>5</span><span class=special>)
|
||||
</span><span class=special>(</span><span class=string>"VI" </span><span class=special>, </span><span class=number>6</span><span class=special>)
|
||||
</span><span class=special>(</span><span class=string>"VII" </span><span class=special>, </span><span class=number>7</span><span class=special>)
|
||||
</span><span class=special>(</span><span class=string>"VIII" </span><span class=special>, </span><span class=number>8</span><span class=special>)
|
||||
</span><span class=special>(</span><span class=string>"IX" </span><span class=special>, </span><span class=number>9</span><span class=special>)
|
||||
</span><span class=special>;
|
||||
</span><span class=special>}
|
||||
|
||||
</span><span class=special>} </span><span class=identifier>ones_p</span><span class=special>;</span></pre>
|
||||
<p>Notice that a user defined struct <tt>ones</tt> is subclassed from <tt>symbols</tt>.
|
||||
Then at construction time, we added all the symbols using the <tt>add</tt> symbol_inserter.</p>
|
||||
<p> <img height="16" width="15" src="theme/lens.gif"> The full source code can be <a href="../example/fundamental/roman_numerals.cpp">viewed here</a>. This is part of the Spirit distribution.</p>
|
||||
<p>Again, <tt>add</tt> may also be used as a semantic action since it conforms
|
||||
to the action interface (see semantic actions):<br>
|
||||
</p>
|
||||
<pre><span class=special></span><span class=identifier> p</span><span class=special>[</span><span class=identifier>sym</span><span class=special>.</span><span class=identifier>add</span><span class=special>]</span></pre>
|
||||
<p>where p is a parser of course.<span class=special><br>
|
||||
</span></p>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="distinct.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="trees.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<hr size="1">
|
||||
<p class="copyright">Copyright © 1998-2003 Joel de Guzman<br>
|
||||
<br>
|
||||
<font size="2">Use, modification and distribution is subject to the Boost Software
|
||||
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
||||
http://www.boost.org/LICENSE_1_0.txt)</font></p>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,373 +0,0 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>Techniques</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" href="theme/style.css" type="text/css">
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
|
||||
<tr>
|
||||
<td width="10">
|
||||
</td>
|
||||
<td width="85%"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Techniques</b></font></td>
|
||||
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="style_guide.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="faq.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<ul>
|
||||
<li><a href="#templatized_functors">Templatized Functors</a></li>
|
||||
<li><a href="#multiple_scanner_support">Rule With Multiple Scanners</a></li>
|
||||
<li><a href="#no_rules">Look Ma' No Rules!</a></li>
|
||||
<li><a href="#typeof">typeof</a></li>
|
||||
<li><a href="#nabialek_trick">Nabialek trick</a></li>
|
||||
</ul>
|
||||
<h3><a name="templatized_functors"></a> Templatized Functors</h3>
|
||||
<p>For the sake of genericity, it is often better to make the functor's member
|
||||
<tt>operator()</tt> a template. That way, we do not have to concern ourselves
|
||||
with the type of the argument to expect as long as the behavior is appropriate.
|
||||
For instance, rather than hard-coding <tt>char const*</tt> as the argument of
|
||||
a generic semantic action, it is better to make it a template member function.
|
||||
That way, it can accept any type of iterator:</p>
|
||||
<pre><code><font color="#000000"><span class=special> </span><span class=keyword>struct </span><span class=identifier>my_functor
|
||||
</span><span class=special>{
|
||||
</span><span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>IteratorT</span><span class=special>>
|
||||
</span><span class=keyword>void </span><span class=keyword>operator</span><span class=special>()(</span><span class=identifier>IteratorT </span><span class=identifier>first</span><span class=special>, </span><span class=identifier>IteratorT </span><span class=identifier>last</span><span class=special>) </span><span class=keyword>const</span><span class=special>;
|
||||
</span><span class=special>};</span></font></code></pre>
|
||||
<p>Take note that this is only possible with functors. It is not possible to pass
|
||||
in template functions as semantic actions unless you cast it to the correct
|
||||
function signature; in which case, you <em>monomorphize</em> the function. This
|
||||
clearly shows that functors are superior to plain functions.</p>
|
||||
<h3><b><a name="multiple_scanner_support" id="multiple_scanner_support"></a> Rule
|
||||
With Multiple Scanners</b></h3>
|
||||
<p>As of v1.8.0, rules can use one or more scanner types. There are cases, for
|
||||
instance, where we need a rule that can work on the phrase and character levels.
|
||||
Rule/scanner mismatch has been a source of confusion and is the no. 1 <a href="faq.html#scanner_business">FAQ</a>.
|
||||
To address this issue, we now have <a href="rule.html#multiple_scanner_support">multiple
|
||||
scanner support</a>. </p>
|
||||
<p>Here is an example of a grammar with a rule <tt>r</tt> that can be called with
|
||||
3 types of scanners (phrase-level, lexeme, and lower-case). See the <a href="rule.html">rule</a>,
|
||||
<a href="grammar.html">grammar</a>, <a href="scanner.html#lexeme_scanner">lexeme_scanner</a>
|
||||
and <a href="scanner.html#as_lower_scanner">as_lower_scanner </a>for more information.
|
||||
</p>
|
||||
<p>Here's the grammar (see <a href="../example/techniques/multiple_scanners.cpp">multiple_scanners.cpp</a>):
|
||||
</p>
|
||||
<pre><span class=special> </span><span class=keyword>struct </span><span class=identifier>my_grammar </span><span class=special>: </span><span class=identifier>grammar</span><span class=special><</span><span class=identifier>my_grammar</span><span class=special>>
|
||||
</span><span class=special>{
|
||||
</span><span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>>
|
||||
</span><span class=keyword>struct </span><span class=identifier>definition
|
||||
</span><span class=special>{
|
||||
</span><span class=identifier>definition</span><span class=special>(</span><span class=identifier>my_grammar </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>self</span><span class=special>)
|
||||
</span><span class=special>{
|
||||
</span><span class=identifier>r </span><span class=special>= </span><span class=identifier>lower_p</span><span class=special>;
|
||||
</span><span class=identifier>rr </span><span class=special>= </span><span class=special>+(</span><span class=identifier>lexeme_d</span><span class=special>[</span><span class=identifier>r</span><span class=special>] </span><span class=special>>> </span><span class=identifier>as_lower_d</span><span class=special>[</span><span class=identifier>r</span><span class=special>] </span><span class=special>>> </span><span class=identifier>r</span><span class=special>);
|
||||
</span><span class=special>}
|
||||
|
||||
</span><span class=keyword>typedef </span><span class=identifier>scanner_list</span><span class=special><
|
||||
</span><span class=identifier>ScannerT
|
||||
</span><span class=special>, </span><span class=keyword>typename </span><span class=identifier>lexeme_scanner</span><span class=special><</span><span class=identifier>ScannerT</span><span class=special>>::</span><span class=identifier>type
|
||||
</span><span class=special>, </span><span class=keyword>typename </span><span class=identifier>as_lower_scanner</span><span class=special><</span><span class=identifier>ScannerT</span><span class=special>>::</span><span class=identifier>type
|
||||
</span><span class=special>> </span><span class=identifier>scanners</span><span class=special>;
|
||||
|
||||
</span><span class=identifier>rule</span><span class=special><</span><span class=identifier>scanners</span><span class=special>> </span><span class=identifier>r</span><span class=special>;
|
||||
</span><span class=identifier>rule</span><span class=special><</span><span class=identifier>ScannerT</span><span class=special>> </span><span class=identifier>rr</span><span class=special>;
|
||||
</span><span class=identifier>rule</span><span class=special><</span><span class=identifier>ScannerT</span><span class=special>> </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>start</span><span class=special>() </span><span class=keyword>const </span><span class=special>{ </span><span class=keyword>return </span><span class=identifier>rr</span><span class=special>; </span><span class=special>}
|
||||
</span><span class=special>};
|
||||
</span><span class=special>};</span></pre>
|
||||
<p>By default support for multiple scanners is disabled. The macro
|
||||
<tt>BOOST_SPIRIT_RULE_SCANNERTYPE_LIMIT</tt> must be defined to the
|
||||
maximum number of scanners allowed in a scanner_list. The value must
|
||||
be greater than 1 to enable multiple scanners. Given the
|
||||
example above, to define a limit of three scanners for the list, the
|
||||
following line must be inserted into the source file before the
|
||||
inclusion of Spirit headers:
|
||||
</p>
|
||||
<pre><span class=special> </span><span class=preprocessor>#define </span><span class=identifier>BOOST_SPIRIT_RULE_SCANNERTYPE_LIMIT</span> <span class=literal>3</span></pre>
|
||||
<h3><span class=special></span><b> <a name="no_rules" id="no_rules"></a> Look
|
||||
Ma' No Rules</b></h3>
|
||||
<p>You use grammars and you use lots of 'em? Want a fly-weight, no-cholesterol,
|
||||
super-optimized grammar? Read on...</p>
|
||||
<p>I have a love-hate relationship with rules. I guess you know the reasons why.
|
||||
A lot of problems stem from the limitation of rules. Dynamic polymorphism and
|
||||
static polymorphism in C++ do not mix well. There is no notion of virtual template
|
||||
functions in C++; at least not just yet. Thus, the <strong>rule is tied to a
|
||||
specific scanner type</strong>. This results in problems such as the <a href="faq.html#scanner_business">scanner
|
||||
business</a>, our no. 1 FAQ. Apart from that, the virtual functions in rules
|
||||
slow down parsing, kill all meta-information, and kills inlining, hence bloating
|
||||
the generated code, especially for very tiny rules such as:</p>
|
||||
<pre> r <span class="special">=</span> ch_p<span class="special">(</span><span class="quotes">'x'</span><span class="special">) >></span> uint_p<span class="special">;</span></pre>
|
||||
<p> The rule's limitation is the main reason why the grammar is designed the way
|
||||
it is now, with a nested template definition class. The rule's limitation is
|
||||
also the reason why subrules exists. But do we really need rules? Of course!
|
||||
Before C++ adopts some sort of auto-type deduction, such as that proposed by
|
||||
David Abrahams in clc++m:</p>
|
||||
<pre>
|
||||
<code><span class=keyword>auto </span><span class=identifier>r </span><span class=special>= ...</span><span class=identifier>definition </span><span class=special>...</span></code></pre>
|
||||
<p> we are tied to the rule as RHS placeholders. However.... in some occasions
|
||||
we can get by without rules! For instance, rather than writing:</p>
|
||||
<pre>
|
||||
<code><span class=identifier>rule</span><span class=special><> </span><span class=identifier>x </span><span class=special>= </span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>'x'</span><span class=special>);</span></code></pre>
|
||||
<p> It's better to write:</p>
|
||||
<pre>
|
||||
<code><span class=identifier>chlit</span><span class=special><> </span><span class=identifier>x </span><span class=special>= </span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>'x'</span><span class=special>);</span></code></pre>
|
||||
<p> That's trivial. But what if the rule is rather complicated? Ok, let's proceed
|
||||
stepwise... I'll investigate a simple skip_parser based on the C grammar from
|
||||
Hartmut Kaiser. Basically, the grammar is written as (see <a href="../example/techniques/no_rules/no_rule1.cpp">no_rule1.cpp</a>):</p>
|
||||
<pre><code> <span class=keyword>struct </span><span class=identifier>skip_grammar </span><span class=special>: </span><span class=identifier>grammar</span><span class=special><</span><span class=identifier>skip_grammar</span><span class=special>>
|
||||
{
|
||||
</span><span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>>
|
||||
</span><span class=keyword>struct </span><span class=identifier>definition
|
||||
</span><span class=special>{
|
||||
</span><span class=identifier>definition</span><span class=special>(</span><span class=identifier>skip_grammar </span><span class=keyword>const</span><span class=special>& /*</span><span class=identifier>self</span><span class=special>*/)
|
||||
{
|
||||
</span><span class=identifier>skip
|
||||
</span><span class=special>= </span><span class=identifier>space_p
|
||||
</span><span class=special>| </span><span class=string>"//" </span><span class=special>>> *(</span><span class=identifier>anychar_p </span><span class=special>- </span><span class=literal>'\n'</span><span class=special>) >> </span><span class=literal>'\n'
|
||||
</span><span class=special>| </span><span class=string>"/*" </span><span class=special>>> *(</span><span class=identifier>anychar_p </span><span class=special>- </span><span class=string>"*/"</span><span class=special>) >> </span><span class=string>"*/"
|
||||
</span><span class=special>;
|
||||
}
|
||||
|
||||
</span><span class=identifier>rule</span><span class=special><</span><span class=identifier>ScannerT</span><span class=special>> </span><span class=identifier>skip</span><span class=special>;
|
||||
|
||||
</span><span class=identifier>rule</span><span class=special><</span><span class=identifier>ScannerT</span><span class=special>> </span><span class=keyword>const</span><span class=special>&
|
||||
</span><span class=identifier>start</span><span class=special>() </span><span class=keyword>const </span><span class=special>{ </span><span class=keyword>return </span><span class=identifier>skip</span><span class=special>; }
|
||||
};
|
||||
};</span></code></pre>
|
||||
<p> Ok, so far so good. Can we do better? Well... since there are no recursive
|
||||
rules there (in fact there's only one rule), you can expand the type of rule's
|
||||
RHS as the rule type (see <a href="../example/techniques/no_rules/no_rule2.cpp">no_rule2.cpp</a>):</p>
|
||||
<pre><code><span class=special> </span><span class=keyword>struct </span><span class=identifier>skip_grammar </span><span class=special>: </span><span class=identifier>grammar</span><span class=special><</span><span class=identifier>skip_grammar</span><span class=special>>
|
||||
{
|
||||
</span><span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>>
|
||||
</span><span class=keyword>struct </span><span class=identifier>definition
|
||||
</span><span class=special>{
|
||||
</span> <span class=identifier>definition</span><span class=special>(</span><span class=identifier>skip_grammar </span><span class=keyword>const</span><span class=special>& /*</span><span class=identifier>self</span><span class=special>*/)
|
||||
: </span><span class=identifier>skip</span><span class=special>
|
||||
( </span><span class=identifier>space_p
|
||||
</span><span class=special>| </span><span class=string>"//" </span><span class=special>>> *(</span><span class=identifier>anychar_p </span><span class=special>- </span><span class=literal>'\n'</span><span class=special>) >> </span><span class=literal>'\n'
|
||||
</span><span class=special>| </span><span class=string>"/*" </span><span class=special>>> *(</span><span class=identifier>anychar_p </span><span class=special>- </span><span class=string>"*/"</span><span class=special>) >> </span><span class=string>"*/"
|
||||
</span><span class=special>)
|
||||
{
|
||||
}
|
||||
|
||||
</span><span class=keyword>typedef
|
||||
</span><span class=identifier>alternative</span><span class=special><</span><span class=identifier>alternative</span><span class=special><</span><span class=identifier>space_parser</span><span class=special>, </span><span class=identifier>sequence</span><span class=special><</span><span class=identifier>sequence</span><span class=special><
|
||||
</span><span class=identifier>strlit</span><span class=special><</span><span class=keyword>const </span><span class=keyword>char</span><span class=special>*>, </span><span class=identifier>kleene_star</span><span class=special><</span><span class=identifier>difference</span><span class=special><</span><span class=identifier>anychar_parser</span><span class=special>,
|
||||
</span><span class=identifier>chlit</span><span class=special><</span><span class=keyword>char</span><span class=special>> > > >, </span><span class=identifier>chlit</span><span class=special><</span><span class=keyword>char</span><span class=special>> > >, </span><span class=identifier>sequence</span><span class=special><</span><span class=identifier>sequence</span><span class=special><
|
||||
</span><span class=identifier>strlit</span><span class=special><</span><span class=keyword>const </span><span class=keyword>char</span><span class=special>*>, </span><span class=identifier>kleene_star</span><span class=special><</span><span class=identifier>difference</span><span class=special><</span><span class=identifier>anychar_parser</span><span class=special>,
|
||||
</span><span class=identifier>strlit</span><span class=special><</span><span class=keyword>const </span><span class=keyword>char</span><span class=special>*> > > >, </span><span class=identifier>strlit</span><span class=special><</span><span class=keyword>const </span><span class=keyword>char</span><span class=special>*> > >
|
||||
</span><span class=identifier>skip_t</span><span class=special>;
|
||||
</span><span class=special> </span><span class=identifier>skip_t </span><span class=identifier>skip</span><span class=special>;
|
||||
|
||||
</span><span class=identifier>skip_t </span><span class=keyword>const</span><span class=special>&
|
||||
</span><span class=identifier>start</span><span class=special>() </span><span class=keyword>const </span><span class=special>{ </span><span class=keyword>return </span><span class=identifier>skip</span><span class=special>; }
|
||||
};
|
||||
};</span></code></pre>
|
||||
<p> Ughhh! How did I do that? How was I able to get at the complex typedef? Am
|
||||
I insane? Well, not really... there's a trick! What you do is define the typedef
|
||||
<tt>skip_t</tt> first as int:</p>
|
||||
<pre>
|
||||
<code><span class=keyword>typedef </span><span class=keyword>int </span><span class=identifier>skip_t</span><span class=special>;</span></code></pre>
|
||||
<p> Try to compile. Then, the compiler will generate an obnoxious error message
|
||||
such as:</p>
|
||||
<pre>
|
||||
<code><span class=string>"cannot convert boost::spirit::alternative<... blah blah...to int"</span><span class=special>.</span></code></pre>
|
||||
<p> <strong>THERE YOU GO!</strong> You got it's type! I just copy and paste the
|
||||
correct type (removing explicit qualifications, if preferred).</p>
|
||||
<p> Can we still go further? Yes. Remember that the grammar was designed for rules.
|
||||
The nested template definition class is needed to get around the rule's limitations.
|
||||
Without rules, I propose a new class called <tt>sub_grammar</tt>, the grammar's
|
||||
low-fat counterpart:</p>
|
||||
<pre><code><span class=special> </span><span class=keyword>namespace </span><span class=identifier>boost </span><span class=special>{ </span><span class=keyword>namespace </span><span class=identifier>spirit
|
||||
</span><span class=special>{
|
||||
</span><span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>DerivedT</span><span class=special>>
|
||||
</span><span class=keyword>struct </span><span class=identifier>sub_grammar </span><span class=special>: </span><span class=identifier>parser</span><span class=special><</span><span class=identifier>DerivedT</span><span class=special>>
|
||||
{
|
||||
</span><span class=keyword>typedef </span><span class=identifier>sub_grammar </span><span class=identifier>self_t</span><span class=special>;
|
||||
</span><span class=keyword>typedef </span><span class=identifier>DerivedT </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>embed_t</span><span class=special>;
|
||||
|
||||
</span><span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>>
|
||||
</span><span class=keyword>struct </span><span class=identifier>result
|
||||
</span><span class=special>{
|
||||
</span><span class=keyword>typedef </span><span class=keyword>typename </span><span class=identifier>parser_result</span><span class=special><
|
||||
</span><span class=keyword>typename </span><span class=identifier>DerivedT</span><span class=special>::</span><span class=identifier>start_t</span><span class=special>, </span><span class=identifier>ScannerT</span><span class=special>>::</span><span class=identifier>type
|
||||
</span><span class=identifier>type</span><span class=special>;
|
||||
};
|
||||
|
||||
</span><span class=identifier>DerivedT </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>derived</span><span class=special>() </span><span class=keyword>const
|
||||
</span><span class=special>{ </span><span class=keyword>return </span><span class=special>*</span><span class=keyword>static_cast</span><span class=special><</span><span class=identifier>DerivedT </span><span class=keyword>const</span><span class=special>*>(</span><span class=keyword>this</span><span class=special>); }
|
||||
|
||||
</span><span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>>
|
||||
</span><span class=keyword>typename </span><span class=identifier>parser_result</span><span class=special><</span><span class=identifier>self_t</span><span class=special>, </span><span class=identifier>ScannerT</span><span class=special>>::</span><span class=identifier>type
|
||||
</span><span class=identifier>parse</span><span class=special>(</span><span class=identifier>ScannerT </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>scan</span><span class=special>) </span><span class=keyword>const
|
||||
</span><span class=special>{
|
||||
</span><span class=keyword>return </span><span class=identifier>derived</span><span class=special>().</span><span class=identifier>start</span><span class=special>.</span><span class=identifier>parse</span><span class=special>(</span><span class=identifier>scan</span><span class=special>);
|
||||
}
|
||||
};
|
||||
}}</span></code></pre>
|
||||
<p>With the <tt>sub_grammar</tt> class, we can define our skipper grammar this
|
||||
way (see <a href="../example/techniques/no_rules/no_rule3.cpp">no_rule3.cpp</a>):</p>
|
||||
<pre><code><span class=special> </span><span class=keyword>struct </span><span class=identifier>skip_grammar </span><span class=special>: </span><span class=identifier>sub_grammar</span><span class=special><</span><span class=identifier>skip_grammar</span><span class=special>>
|
||||
{
|
||||
</span><span class=keyword>typedef
|
||||
</span><span class=identifier>alternative</span><span class=special><</span><span class=identifier>alternative</span><span class=special><</span><span class=identifier>space_parser</span><span class=special>, </span><span class=identifier>sequence</span><span class=special><</span><span class=identifier>sequence</span><span class=special><
|
||||
</span><span class=identifier>strlit</span><span class=special><</span><span class=keyword>const </span><span class=keyword>char</span><span class=special>*>, </span><span class=identifier>kleene_star</span><span class=special><</span><span class=identifier>difference</span><span class=special><</span><span class=identifier>anychar_parser</span><span class=special>,
|
||||
</span><span class=identifier>chlit</span><span class=special><</span><span class=keyword>char</span><span class=special>> > > >, </span><span class=identifier>chlit</span><span class=special><</span><span class=keyword>char</span><span class=special>> > >, </span><span class=identifier>sequence</span><span class=special><</span><span class=identifier>sequence</span><span class=special><
|
||||
</span><span class=identifier>strlit</span><span class=special><</span><span class=keyword>const </span><span class=keyword>char</span><span class=special>*>, </span><span class=identifier>kleene_star</span><span class=special><</span><span class=identifier>difference</span><span class=special><</span><span class=identifier>anychar_parser</span><span class=special>,
|
||||
</span><span class=identifier>strlit</span><span class=special><</span><span class=keyword>const </span><span class=keyword>char</span><span class=special>*> > > >, </span><span class=identifier>strlit</span><span class=special><</span><span class=keyword>const </span><span class=keyword>char</span><span class=special>*> > >
|
||||
</span><span class=identifier>start_t</span><span class=special>;
|
||||
|
||||
</span><span class=identifier>skip_grammar</span><span class=special>()
|
||||
: </span><span class=identifier>start
|
||||
</span><span class=special>(
|
||||
</span><span class=identifier>space_p
|
||||
</span><span class=special>| </span><span class=string>"//" </span><span class=special>>> *(</span><span class=identifier>anychar_p </span><span class=special>- </span><span class=literal>'\n'</span><span class=special>) >> </span><span class=literal>'\n'
|
||||
</span><span class=special>| </span><span class=string>"/*" </span><span class=special>>> *(</span><span class=identifier>anychar_p </span><span class=special>- </span><span class=string>"*/"</span><span class=special>) >> </span><span class=string>"*/"
|
||||
</span><span class=special>)
|
||||
{}
|
||||
|
||||
</span><span class=identifier>start_t </span><span class=identifier>start</span><span class=special>;
|
||||
};</span></code></pre>
|
||||
<p>But what for, you ask? You can simply use the <tt>start_t</tt> type above as-is.
|
||||
It's already a parser! We can just type:</p>
|
||||
<pre>
|
||||
<code><span class=identifier>skipper_t </span><span class=identifier>skipper </span><span class=special>=
|
||||
</span><span class=identifier>space_p
|
||||
</span><span class=special>| </span><span class=string>"//" </span><span class=special>>> *(</span><span class=identifier>anychar_p </span><span class=special>- </span><span class=literal>'\n'</span><span class=special>) >> </span><span class=literal>'\n' </span><br> <span class=special>| </span><span class=string>"/*" </span><span class=special>>> *(</span><span class=identifier>anychar_p </span><span class=special>- </span><span class=string>"*/"</span><span class=special>) >> </span><span class=string>"*/"</span>
|
||||
<span class=special> ;</span></code></pre>
|
||||
<p> and use <tt>skipper</tt> just as we would any parser? Well, a subtle difference
|
||||
is that <tt>skipper</tt>, used this way will be embedded <strong>by value </strong>when<strong>
|
||||
</strong>you compose more complex parsers using it. That is, if we use <tt>skipper</tt>
|
||||
inside another production, the whole thing will be stored in the composite.
|
||||
Heavy!</p>
|
||||
<p> The proposed <tt>sub_grammar</tt> OTOH will be held by reference. Note:</p>
|
||||
<pre><code> <span class=keyword>typedef </span><span class=identifier>DerivedT </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>embed_t</span><span class=special>;</span></code></pre>
|
||||
<p>The proposed <tt>sub_grammar</tt> does not have the inherent limitations of
|
||||
rules, is very lighweight, and should be blazingly fast (can be fully inlined
|
||||
and does not use virtual functions). Perhaps this class will be part of a future
|
||||
spirit release. </p>
|
||||
<table width="80%" border="0" align="center">
|
||||
<tr>
|
||||
<td class="note_box"><img src="theme/note.gif" width="16" height="16"> <strong>The
|
||||
no-rules result</strong><br> <br>
|
||||
So, how much did we save? On MSVCV7.1, the original code: <a href="../example/techniques/no_rules/no_rule1.cpp">no_rule1.cpp</a>
|
||||
compiles to <strong>28k</strong>. Eliding rules, <a href="../example/techniques/no_rules/no_rule2.cpp">no_rule2.cpp</a>,
|
||||
we got <strong>24k</strong>. Not bad, we shaved off 4k amounting to a 14%
|
||||
reduction. But you'll be in for a surprise. The last version, using the
|
||||
sub-grammar: <a href="../example/techniques/no_rules/no_rule3.cpp">no_rule3.cpp</a>,
|
||||
compiles to <strong>5.5k</strong>! That's a whopping 80% reduction.<br>
|
||||
<br>
|
||||
<table width="100%" border="1">
|
||||
<tr>
|
||||
<td><a href="../example/techniques/no_rules/no_rule1.cpp">no_rule1.cpp</a></td>
|
||||
<td><strong>28k</strong></td>
|
||||
<td>standard rule and grammar</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="../example/techniques/no_rules/no_rule2.cpp">no_rule2.cpp</a></td>
|
||||
<td><strong>24k</strong></td>
|
||||
<td>standard grammar, no rule</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="../example/techniques/no_rules/no_rule3.cpp">no_rule3.cpp</a></td>
|
||||
<td><strong>5.5k</strong></td>
|
||||
<td>sub_grammar, no rule, no grammar</td>
|
||||
</tr>
|
||||
</table> </td>
|
||||
</tr>
|
||||
</table>
|
||||
<h3><b> <a name="typeof" id="typeof"></a> typeof</b></h3>
|
||||
<p>Some compilers already support the <tt>typeof</tt> keyword. Examples are g++
|
||||
and Metrowerks CodeWarrior. Someday, <tt>typeof</tt> will become commonplace.
|
||||
It is worth noting that we can use <tt>typeof</tt> to define non-recursive rules
|
||||
without using the rule class. To give an example, we'll use the skipper example
|
||||
above; this time using <tt>typeof</tt>. First, to avoid redundancy, we'll introduce
|
||||
a macro <tt>RULE</tt>: </p>
|
||||
<pre><code> <span class=preprocessor>#define </span><span class=identifier>RULE</span><span class=special>(</span><span class=identifier>name</span><span class=special>, </span><span class=identifier>definition</span><span class=special>) </span><span class="keyword">typeof</span><span class=special>(</span><span class=identifier>definition</span><span class=special>) </span><span class=identifier>name </span><span class=special>= </span><span class=identifier>definition</span></code></pre>
|
||||
<p>Then, simply:</p>
|
||||
<pre><code><span class=identifier> </span><span class=identifier>RULE</span><span class=special>(
|
||||
</span><span class=identifier>skipper</span><span class=special>,
|
||||
( </span><span class=identifier>space_p
|
||||
</span><span class=special>| </span><span class=string>"//" </span><span class=special>>> *(</span><span class=identifier>anychar_p </span><span class=special>- </span><span class=literal>'\n'</span><span class=special>) >> </span><span class=literal>'\n'
|
||||
</span><span class=special>| </span><span class=string>"/*" </span><span class=special>>> *(</span><span class=identifier>anychar_p </span><span class=special>- </span><span class=string>"*/"</span><span class=special>) >> </span><span class=string>"*/"
|
||||
</span><span class=special>)
|
||||
);</span></code></pre>
|
||||
<p>(see <a href="../example/techniques/typeof.cpp">typeof.cpp</a>)</p>
|
||||
<p>That's it! Now you can use skipper just as you would any parser. Be reminded,
|
||||
however, that <tt>skipper</tt> above will be embedded by value when<strong>
|
||||
</strong>you compose more complex parsers using it (see <tt>sub_grammar</tt> rationale above). You can use the <tt>sub_grammar</tt> class to avoid this problem.</p>
|
||||
<h3><a name="nabialek_trick"></a> Nabialek trick</h3>
|
||||
<p>This technique, I'll call the <strong><em>"Nabialek trick" </em></strong>(from the name of its inventor, Sam Nabialek), can improve the rule dispatch from linear non-deterministic to deterministic. The trick applies to grammars where a keyword (operator, etc), precedes a production. There are lots of grammars similar to this:</p>
|
||||
<pre> <span class=identifier>r </span><span class=special>=
|
||||
</span><span class=identifier>keyword1 </span><span class=special>>> </span><span class=identifier>production1
|
||||
</span><span class=special>| </span><span class=identifier>keyword2 </span><span class=special>>> </span><span class=identifier>production2
|
||||
</span><span class=special>| </span><span class=identifier>keyword3 </span><span class=special>>> </span><span class=identifier>production3
|
||||
</span><span class=special>| </span><span class=identifier>keyword4 </span><span class=special>>> </span><span class=identifier>production4
|
||||
</span><span class=special>| </span><span class=identifier>keyword5 </span><span class=special>>> </span><span class=identifier>production5
|
||||
</span><span class=comment>/*** etc ***/
|
||||
</span><span class=special>;</span></pre>
|
||||
<p>The cascaded alternatives are tried one at a time through trial and error until something matches. The Nabialek trick takes advantage of the <a href="symbols.html">symbol table</a>'s search properties to optimize the dispatching of the alternatives. For an example, see <a href="../example/techniques/nabialek.cpp">nabialek.cpp</a>. The grammar works as follows. There are two rules (<tt>one</tt> and <tt>two</tt>). When "one" is recognized, rule <tt>one</tt> is invoked. When "two" is recognized, rule <tt>two</tt> is invoked. Here's the grammar:</p>
|
||||
<pre><span class=special> </span><span class=identifier>one </span><span class=special>= </span><span class=identifier>name</span><span class=special>;
|
||||
</span><span class=identifier>two </span><span class=special>= </span><span class=identifier>name </span><span class=special>>> </span><span class=literal>',' </span><span class=special>>> </span><span class=identifier>name</span><span class=special>;
|
||||
|
||||
</span><span class=identifier>continuations</span><span class=special>.</span><span class=identifier>add
|
||||
</span><span class=special>(</span><span class=string>"one"</span><span class=special>, &</span><span class=identifier>one</span><span class=special>)
|
||||
</span><span class=special>(</span><span class=string>"two"</span><span class=special>, &</span><span class=identifier>two</span><span class=special>)
|
||||
</span><span class=special>;
|
||||
|
||||
</span><span class=identifier>line </span><span class=special>= </span><span class=identifier>continuations</span><span class=special>[</span><span class=identifier>set_rest</span><span class=special><</span><span class=identifier>rule_t</span><span class=special>>(</span><span class=identifier>rest</span><span class=special>)] </span><span class=special>>> </span><span class=identifier>rest</span><span class=special>;</span></pre>
|
||||
<p>where continuations is a <a href="symbols.html">symbol table</a> with pointer to rule_t slots. one, two, name, line and rest are rules:</p>
|
||||
<pre><span class=special> </span><span class=identifier>rule_t </span><span class=identifier>name</span><span class=special>;
|
||||
</span><span class=identifier>rule_t </span><span class=identifier>line</span><span class=special>;
|
||||
</span><span class=identifier>rule_t </span><span class=identifier>rest</span><span class=special>;
|
||||
</span><span class=identifier>rule_t </span><span class=identifier>one</span><span class=special>;
|
||||
</span><span class=identifier>rule_t </span><span class=identifier>two</span><span class=special>;
|
||||
|
||||
</span><span class=identifier>symbols</span><span class=special><</span><span class=identifier>rule_t</span><span class=special>*> </span><span class=identifier>continuations</span><span class=special>;</span></pre>
|
||||
<p>set_rest, the semantic action attached to continuations is:</p>
|
||||
<pre><span class=special> </span><span class=keyword>template </span><span class=special><</span><span class=keyword>typename </span><span class=identifier>Rule</span><span class=special>>
|
||||
</span><span class=keyword>struct </span><span class=identifier>set_rest
|
||||
</span><span class=special>{
|
||||
</span><span class=identifier>set_rest</span><span class=special>(</span><span class=identifier>Rule</span><span class=special>& </span><span class=identifier>the_rule</span><span class=special>)
|
||||
</span><span class=special>: </span><span class=identifier>the_rule</span><span class=special>(</span><span class=identifier>the_rule</span><span class=special>) </span><span class=special>{}
|
||||
|
||||
</span><span class=keyword>void </span><span class=keyword>operator</span><span class=special>()(</span><span class=identifier>Rule</span><span class=special>* </span><span class=identifier>newRule</span><span class=special>) </span><span class=keyword>const
|
||||
</span><span class=special>{ </span><span class=identifier>m_theRule </span><span class=special>= </span><span class=special>*</span><span class=identifier>newRule</span><span class=special>; </span><span class=special>}
|
||||
|
||||
</span><span class=identifier>Rule</span><span class=special>& </span><span class=identifier>the_rule</span><span class=special>;
|
||||
</span><span class=special>};</span></pre>
|
||||
<p>Notice how the rest <tt>rule</tt> gets set dynamically when the set_rule action is called. The dynamic grammar parses inputs such as:</p>
|
||||
<p> "one only"<br>
|
||||
"one again"<br>
|
||||
"two first, second"</p>
|
||||
<p>The cool part is that the <tt>rest</tt> rule is set (by the <tt>set_rest</tt> action) depending on what the symbol table got. If it got a <em>"one"</em> then rest = one. If it got <em>"two"</em>, then rest = two. Very nifty! This technique should be very fast, especially when there are lots of keywords. It would be nice to add special facilities to make this easy to use. I imagine:</p>
|
||||
<pre><span class=special> </span><span class=identifier>r </span><span class=special>= </span><span class=identifier>keywords </span><span class=special>>> </span><span class=identifier>rest</span><span class=special>;</span></pre>
|
||||
<p>where <tt>keywords</tt> is a special parser (based on the symbol table) that automatically sets its RHS (rest) depending on the acquired symbol. This, I think, is mighty cool! Someday perhaps... </p>
|
||||
<p><img src="theme/note.gif" width="16" height="16"> Also, see the <a href="switch_parser.html">switch parser</a> for another deterministic parsing trick for character/token prefixes. </p>
|
||||
<span class=special></span>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="style_guide.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="faq.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<hr size="1">
|
||||
<p class="copyright">Copyright © 1998-2003 Joel de Guzman<br>
|
||||
<br>
|
||||
<font size="2">Use, modification and distribution is subject to the Boost Software
|
||||
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
||||
http://www.boost.org/LICENSE_1_0.txt)</font></p>
|
||||
<p class="copyright"> </p>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,117 +0,0 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>The Lazy Parsers</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<link rel="stylesheet" href="theme/style.css" type="text/css">
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
|
||||
<tr>
|
||||
<td width="10">
|
||||
</td>
|
||||
<td width="85%"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>The
|
||||
Lazy Parser</b></font></td>
|
||||
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="dynamic_parsers.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="select_parser.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>Closures are cool. It allows us to inject stack based local variables anywhere
|
||||
in our parse descent hierarchy. Typically, we store temporary variables, generated
|
||||
by our semantic actions, in our closure variables, as a means to pass information
|
||||
up and down the recursive descent.</p>
|
||||
<p>Now imagine this... Having in mind that closure variables can be just about
|
||||
any type, we can store a parser, a rule, or a pointer to a parser or rule, in
|
||||
a closure variable. <em>Yeah, right, so what?...</em> Ok, hold on... What if
|
||||
we can use this closure variable to initiate a parse? Think about it for a second.
|
||||
Suddenly we'll have some powerful dynamic parsers! Suddenly we'll have a full
|
||||
round trip from to <a href="../phoenix/index.html">Phoenix</a> and Spirit and
|
||||
back! <a href="../phoenix/index.html">Phoenix</a> semantic actions choose the
|
||||
right Spirit parser and Spirit parsers choose the right <a href="../phoenix/index.html">Phoenix</a>
|
||||
semantic action. Oh MAN, what a honky cool idea, I might say!!</p>
|
||||
<h2>lazy_p</h2>
|
||||
<p>This is the idea behind the <tt>lazy_p</tt> parser. The <tt>lazy_p</tt> syntax
|
||||
is:</p>
|
||||
<pre> lazy_p<span class="special">(</span>actor<span class="special">)</span></pre>
|
||||
<p>where actor is a <a href="../phoenix/index.html">Phoenix</a> expression that
|
||||
returns a Spirit parser. This returned parser is used in the parsing process.
|
||||
</p>
|
||||
<p>Example: </p>
|
||||
<pre> lazy_p<span class="special">(</span>phoenix<span class="special">::</span>val<span class="special">(</span>int_p<span class="special">))[</span>assign_a<span class="special">(</span>result<span class="special">)]</span>
|
||||
</pre>
|
||||
<p>Semantic actions attached to the <tt>lazy_p</tt> parser expects the same signature
|
||||
as that of the returned parser (<tt>int_p</tt>, in our example above).</p>
|
||||
<h2>lazy_p example</h2>
|
||||
<p>To give you a better glimpse (see the <tt><a href="../example/intermediate/lazy_parser.cpp">lazy_parser.cpp</a></tt>),
|
||||
say you want to parse inputs such as:</p>
|
||||
<pre> <span class=identifier>dec
|
||||
</span><span class="special">{</span><span class=identifier><br> 1 2 3<br> bin
|
||||
</span><span class="special">{</span><span class=identifier><br> 1 10 11<br> </span><span class="special">}</span><span class=identifier><br> 4 5 6<br> </span><span class="special">}</span></pre>
|
||||
<p>where <tt>bin {...}</tt> and <tt>dec {...}</tt> specifies the numeric format
|
||||
(binary or decimal) that we are expecting to read. If we analyze the input,
|
||||
we want a grammar like:</p>
|
||||
<pre><code><font color="#000000"><span class=special> </span><span class=identifier>base </span><span class="special">=</span><span class=identifier> </span><span class="string">"bin"</span><span class=identifier> </span><span class="special">|</span><span class=identifier> </span><span class="string">"dec"</span><span class="special">;</span><span class=identifier>
|
||||
block </span><span class=special>= </span><span class="identifier">base</span><span class=special> >> </span><span class="literal">'{'</span><span class=special> >> *</span><span class="identifier">block_line</span><span class=special> >> </span><span class="literal">'}'</span><span class=special>;
|
||||
</span>block_line <span class=special>= </span><span class="identifier">number</span><span class=special> | </span><span class=identifier>block</span><span class=special>;</span></font></code></pre>
|
||||
<p>We intentionally left out the <code><font color="#000000"><span class="identifier"><tt>number</tt></span></font></code>
|
||||
rule. The tricky part is that the way <tt>number</tt> rule behaves depends on
|
||||
the result of the <tt>base</tt> rule. If <tt>base</tt> got a <em>"bin"</em>,
|
||||
then number should parse binary numbers. If <tt>base</tt> got a <em>"dec"</em>,
|
||||
then number should parse decimal numbers. Typically we'll have to rewrite our
|
||||
grammar to accommodate the different parsing behavior:</p>
|
||||
<pre><code><font color="#000000"><span class=identifier> block </span><span class=special>=
|
||||
</span><span class=identifier>"bin"</span> <span class=special>>> </span><span class="literal">'{'</span><span class=special> >> *</span>bin_line<span class=special> >> </span><span class="literal">'}'</span><span class=special>
|
||||
| </span><span class=identifier>"dec"</span> <span class=special>>> </span><span class="literal">'{'</span><span class=special> >> *</span>dec_line<span class=special> >> </span><span class="literal">'}'</span><span class=special>
|
||||
;
|
||||
</span>bin_line <span class=special>= </span><span class="identifier">bin_p</span><span class=special> | </span><span class=identifier>block</span><span class=special>;
|
||||
</span>dec_line <span class=special>= </span><span class="identifier">int_p</span><span class=special> | </span><span class=identifier>block</span><span class=special>;</span></font></code></pre>
|
||||
<p>while this is fine, the redundancy makes us want to find a better solution;
|
||||
after all, we'd want to make full use of Spirit's dynamic parsing capabilities.
|
||||
Apart from that, there will be cases where the set of parsing behaviors for
|
||||
our <tt>number</tt> rule is not known when the grammar is written. We'll only
|
||||
be given a map of string descriptors and corresponding rules [e.g. (("dec",
|
||||
int_p), ("bin", bin_p) ... etc...)].</p>
|
||||
<p>The basic idea is to have a rule for binary and decimal numbers. That's easy
|
||||
enough to do (see <a href="numerics.html">numerics</a>). When <tt>base</tt>
|
||||
is being parsed, in your semantic action, store a pointer to the selected base
|
||||
in a closure variable (e.g. <tt>block.int_rule</tt>). Here's an example:</p>
|
||||
<pre><code><font color="#000000"><span class=special> </span><span class=identifier>base
|
||||
</span><span class="special">=</span><span class=identifier> str_p</span><span class="special">(</span><span class="string">"bin"</span><span class="special">)[</span><span class=identifier>block.int_rule</span> = <span class="special">&</span>var<span class="special">(</span><span class="identifier">bin_rule</span><span class="special">)]
|
||||
| </span><span class=identifier>str_p</span><span class="special">(</span><span class="string">"dec"</span><span class="special">)[</span><span class=identifier>block.int_rule</span> = <span class="special">&</span>var<span class="special">(</span><span class="identifier">dec_rule</span><span class="special">)]
|
||||
;</span></font></code></pre>
|
||||
<p>With this setup, your number rule will now look something like:</p>
|
||||
<pre><code><font color="#000000"><span class=special> </span><span class=identifier>number </span><span class="special">=</span><span class=identifier> lazy_p</span><span class="special">(*</span><span class=identifier>block.int_rule</span><span class="special">);</span></font></code></pre>
|
||||
<p>The <tt><a href="../example/intermediate/lazy_parser.cpp">lazy_parser.cpp</a></tt>
|
||||
does it a bit differently, ingeniously using the <a href="symbols.html">symbol
|
||||
table</a> to dispatch the correct rule, but in essence, both strategies are
|
||||
similar. This technique, using the symbol table, is detailed in the Techiques section: <a href="techniques.html#nabialek_trick">nabialek_trick</a>. Admitedly, when you add up all the rules, the resulting grammar is
|
||||
more complex than the hard-coded grammar above. Yet, for more complex grammar
|
||||
patterns with a lot more rules to choose from, the additional setup is well
|
||||
worth it.</p>
|
||||
<table border="0">
|
||||
<tr>
|
||||
<td width="10"></td>
|
||||
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="dynamic_parsers.html"><img src="theme/l_arr.gif" border="0"></a></td>
|
||||
<td width="30"><a href="select_parser.html"><img src="theme/r_arr.gif" border="0"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<br>
|
||||
<hr size="1">
|
||||
<p class="copyright">Copyright © 2003 Joel de Guzman<br>
|
||||
Copyright © 2003 Vaclav Vesely<br>
|
||||
<br>
|
||||
<font size="2">Use, modification and distribution is subject to the Boost Software
|
||||
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
||||
http://www.boost.org/LICENSE_1_0.txt)</font></p>
|
||||
<p class="copyright"> </p>
|
||||
</body>
|
||||
</html>
|
||||
BIN
classic/doc/theme/alert.gif
vendored
|
Before Width: | Height: | Size: 577 B |
BIN
classic/doc/theme/arrow.gif
vendored
|
Before Width: | Height: | Size: 70 B |
BIN
classic/doc/theme/bkd.gif
vendored
|
Before Width: | Height: | Size: 1.3 KiB |
BIN
classic/doc/theme/bkd2.gif
vendored
|
Before Width: | Height: | Size: 2.5 KiB |
BIN
classic/doc/theme/bulb.gif
vendored
|
Before Width: | Height: | Size: 944 B |
BIN
classic/doc/theme/bullet.gif
vendored
|
Before Width: | Height: | Size: 152 B |
BIN
classic/doc/theme/closure1.png
vendored
|
Before Width: | Height: | Size: 17 KiB |
BIN
classic/doc/theme/error_handling.png
vendored
|
Before Width: | Height: | Size: 9.4 KiB |
BIN
classic/doc/theme/intro1.png
vendored
|
Before Width: | Height: | Size: 37 KiB |
BIN
classic/doc/theme/intro2.png
vendored
|
Before Width: | Height: | Size: 9.2 KiB |
BIN
classic/doc/theme/intro3.png
vendored
|
Before Width: | Height: | Size: 13 KiB |
BIN
classic/doc/theme/intro4.png
vendored
|
Before Width: | Height: | Size: 23 KiB |
BIN
classic/doc/theme/intro5.png
vendored
|
Before Width: | Height: | Size: 15 KiB |
BIN
classic/doc/theme/intro6.png
vendored
|
Before Width: | Height: | Size: 30 KiB |
BIN
classic/doc/theme/intro7.png
vendored
|
Before Width: | Height: | Size: 21 KiB |
BIN
classic/doc/theme/l_arr.gif
vendored
|
Before Width: | Height: | Size: 147 B |
BIN
classic/doc/theme/l_arr_disabled.gif
vendored
|
Before Width: | Height: | Size: 91 B |
BIN
classic/doc/theme/lambda.png
vendored
|
Before Width: | Height: | Size: 509 B |
BIN
classic/doc/theme/lens.gif
vendored
|
Before Width: | Height: | Size: 897 B |
BIN
classic/doc/theme/note.gif
vendored
|
Before Width: | Height: | Size: 151 B |
BIN
classic/doc/theme/organization1.png
vendored
|
Before Width: | Height: | Size: 30 KiB |
BIN
classic/doc/theme/r_arr.gif
vendored
|
Before Width: | Height: | Size: 147 B |
BIN
classic/doc/theme/r_arr_disabled.gif
vendored
|
Before Width: | Height: | Size: 91 B |
BIN
classic/doc/theme/scanner1.png
vendored
|
Before Width: | Height: | Size: 28 KiB |
BIN
classic/doc/theme/spirit.gif
vendored
|
Before Width: | Height: | Size: 3.5 KiB |
174
classic/doc/theme/style.css
vendored
@@ -1,174 +0,0 @@
|
||||
/* Use, modification and distribution is subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
||||
http://www.boost.org/LICENSE_1_0.txt)
|
||||
*/
|
||||
body
|
||||
{
|
||||
background-image: url(bkd.gif);
|
||||
background-color: #FFFFFF;
|
||||
margin: 1em 2em 1em 2em;
|
||||
}
|
||||
|
||||
h1 { font-family: Verdana, Arial, Helvetica, sans-serif; font-weight: bold; text-align: left; }
|
||||
h2 { font: 140% sans-serif; font-weight: bold; text-align: left; }
|
||||
h3 { font: 120% sans-serif; font-weight: bold; text-align: left; }
|
||||
h4 { font: bold 100% sans-serif; font-weight: bold; text-align: left; }
|
||||
h5 { font: italic 100% sans-serif; font-weight: bold; text-align: left; }
|
||||
h6 { font: small-caps 100% sans-serif; font-weight: bold; text-align: left; }
|
||||
|
||||
pre
|
||||
{
|
||||
border-top: gray 1pt solid;
|
||||
border-right: gray 1pt solid;
|
||||
border-left: gray 1pt solid;
|
||||
border-bottom: gray 1pt solid;
|
||||
|
||||
padding-top: 2pt;
|
||||
padding-right: 2pt;
|
||||
padding-left: 2pt;
|
||||
padding-bottom: 2pt;
|
||||
|
||||
display: block;
|
||||
font-family: "courier new", courier, mono;
|
||||
background-color: #eeeeee; font-size: small
|
||||
}
|
||||
|
||||
code
|
||||
{
|
||||
font-family: "Courier New", Courier, mono;
|
||||
font-size: small
|
||||
}
|
||||
|
||||
tt
|
||||
{
|
||||
display: inline;
|
||||
font-family: "Courier New", Courier, mono;
|
||||
color: #000099;
|
||||
font-size: small
|
||||
}
|
||||
|
||||
p
|
||||
{
|
||||
text-align: justify;
|
||||
font-family: Georgia, "Times New Roman", Times, serif
|
||||
}
|
||||
|
||||
ul
|
||||
{
|
||||
list-style-image: url(bullet.gif);
|
||||
font-family: Georgia, "Times New Roman", Times, serif
|
||||
}
|
||||
|
||||
ol
|
||||
{
|
||||
font-family: Georgia, "Times New Roman", Times, serif
|
||||
}
|
||||
|
||||
a
|
||||
{
|
||||
font-weight: bold;
|
||||
color: #003366;
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
a:hover { color: #8080FF; }
|
||||
|
||||
.literal { color: #666666; font-style: italic}
|
||||
.keyword { color: #000099}
|
||||
.identifier {}
|
||||
.comment { font-style: italic; color: #990000}
|
||||
.special { color: #800040}
|
||||
.preprocessor { color: #FF0000}
|
||||
.string { font-style: italic; color: #666666}
|
||||
.copyright { color: #666666; font-size: small}
|
||||
.white_bkd { background-color: #FFFFFF}
|
||||
.dk_grey_bkd { background-color: #999999}
|
||||
.quotes { color: #666666; font-style: italic; font-weight: bold}
|
||||
|
||||
.note_box
|
||||
{
|
||||
display: block;
|
||||
|
||||
border-top: gray 1pt solid;
|
||||
border-right: gray 1pt solid;
|
||||
border-left: gray 1pt solid;
|
||||
border-bottom: gray 1pt solid;
|
||||
|
||||
padding-right: 12pt;
|
||||
padding-left: 12pt;
|
||||
padding-bottom: 12pt;
|
||||
padding-top: 12pt;
|
||||
|
||||
font-family: Arial, Helvetica, sans-serif;
|
||||
background-color: #E2E9EF;
|
||||
font-size: small; text-align: justify
|
||||
}
|
||||
|
||||
.table_title
|
||||
{
|
||||
background-color: #648CCA;
|
||||
|
||||
font-family: Verdana, Arial, Helvetica, sans-serif; color: #FFFFFF;
|
||||
font-weight: bold
|
||||
; padding-top: 4px; padding-right: 4px; padding-bottom: 4px; padding-left: 4px
|
||||
}
|
||||
|
||||
.table_cells
|
||||
{
|
||||
background-color: #E2E9EF;
|
||||
|
||||
font-family: Geneva, Arial, Helvetica, san-serif;
|
||||
font-size: small
|
||||
; padding-top: 4px; padding-right: 4px; padding-bottom: 4px; padding-left: 4px
|
||||
}
|
||||
|
||||
.toc
|
||||
{
|
||||
DISPLAY: block;
|
||||
background-color: #E2E9EF
|
||||
font-family: Arial, Helvetica, sans-serif;
|
||||
|
||||
border-top: gray 1pt solid;
|
||||
border-left: gray 1pt solid;
|
||||
border-bottom: gray 1pt solid;
|
||||
border-right: gray 1pt solid;
|
||||
|
||||
padding-top: 24pt;
|
||||
padding-right: 24pt;
|
||||
padding-left: 24pt;
|
||||
padding-bottom: 24pt;
|
||||
}
|
||||
|
||||
.toc_title
|
||||
{
|
||||
background-color: #648CCA;
|
||||
padding-top: 4px;
|
||||
padding-right: 4px;
|
||||
padding-bottom: 4px;
|
||||
padding-left: 4px;
|
||||
font-family: Geneva, Arial, Helvetica, san-serif;
|
||||
color: #FFFFFF;
|
||||
font-weight: bold
|
||||
}
|
||||
|
||||
.toc_cells
|
||||
{
|
||||
background-color: #E2E9EF;
|
||||
padding-top: 4px;
|
||||
padding-right: 4px;
|
||||
padding-bottom: 4px;
|
||||
padding-left: 4px;
|
||||
font-family: Geneva, Arial, Helvetica, san-serif;
|
||||
font-size: small
|
||||
}
|
||||
|
||||
div.logo
|
||||
{
|
||||
float: right;
|
||||
}
|
||||
|
||||
.toc_cells_L0 { background-color: #E2E9EF; padding-top: 4px; padding-right: 4px; padding-bottom: 4px; padding-left: 4px; font-family: Geneva, Arial, Helvetica, san-serif; font-size: small }
|
||||
.toc_cells_L1 { background-color: #E2E9EF; padding-top: 4px; padding-right: 4px; padding-bottom: 4px; padding-left: 44px; font-family: Geneva, Arial, Helvetica, san-serif; font-size: small }
|
||||
.toc_cells_L2 { background-color: #E2E9EF; padding-top: 4px; padding-right: 4px; padding-bottom: 4px; padding-left: 88px; font-family: Geneva, Arial, Helvetica, san-serif; font-size: small }
|
||||
.toc_cells_L3 { background-color: #E2E9EF; padding-top: 4px; padding-right: 4px; padding-bottom: 4px; padding-left: 122px; font-family: Geneva, Arial, Helvetica, san-serif; font-size: small }
|
||||
.toc_cells_L4 { background-color: #E2E9EF; padding-top: 4px; padding-right: 4px; padding-bottom: 4px; padding-left: 166px; font-family: Geneva, Arial, Helvetica, san-serif; font-size: small }
|
||||
BIN
classic/doc/theme/subrule1.png
vendored
|
Before Width: | Height: | Size: 20 KiB |
BIN
classic/doc/theme/trees1.png
vendored
|
Before Width: | Height: | Size: 7.1 KiB |
BIN
classic/doc/theme/trees2.png
vendored
|
Before Width: | Height: | Size: 4.7 KiB |
BIN
classic/doc/theme/trees3.png
vendored
|
Before Width: | Height: | Size: 4.3 KiB |
BIN
classic/doc/theme/trees4.png
vendored
|
Before Width: | Height: | Size: 6.8 KiB |
BIN
classic/doc/theme/u_arr.gif
vendored
|
Before Width: | Height: | Size: 170 B |
@@ -1,300 +0,0 @@
|
||||
#==============================================================================
|
||||
# Copyright (c) 2002 Joel de Guzman
|
||||
# http://spirit.sourceforge.net/
|
||||
#
|
||||
# Use, modification and distribution is subject to the Boost Software
|
||||
# License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
||||
# http://www.boost.org/LICENSE_1_0.txt)
|
||||
#==============================================================================
|
||||
#
|
||||
# Spirit examples boost-jam file
|
||||
# Joel de Guzman [Sep 27, 2002] : created
|
||||
# Joel de Guzman [Oct 30, 2003] : separated the applications
|
||||
# Martin Wille [Jan 15, 2004] : changes for new directory structure
|
||||
# Martin Wille [Jan 20, 2004] : more changes for new directory structure
|
||||
# Joel de Guzman [Jul 29, 2004] : added calc_debug.cpp
|
||||
#
|
||||
|
||||
exe ast_calc
|
||||
: fundamental/ast_calc.cpp
|
||||
:
|
||||
;
|
||||
|
||||
exe "bind"
|
||||
: fundamental/bind.cpp
|
||||
:
|
||||
;
|
||||
|
||||
exe boiler_plate
|
||||
: fundamental/boiler_plate.cpp
|
||||
:
|
||||
;
|
||||
|
||||
exe calc_plain
|
||||
: fundamental/calc_plain.cpp
|
||||
:
|
||||
;
|
||||
|
||||
exe calc_debug
|
||||
: fundamental/calc_debug.cpp
|
||||
:
|
||||
;
|
||||
|
||||
exe comments
|
||||
: fundamental/comments.cpp
|
||||
:
|
||||
;
|
||||
|
||||
exe complex_number
|
||||
: fundamental/complex_number.cpp
|
||||
:
|
||||
;
|
||||
|
||||
exe error_handling
|
||||
: fundamental/error_handling.cpp
|
||||
:
|
||||
;
|
||||
|
||||
exe error_reporting
|
||||
: fundamental/error_reporting.cpp
|
||||
:
|
||||
;
|
||||
|
||||
exe file_parser
|
||||
: fundamental/file_parser.cpp
|
||||
:
|
||||
;
|
||||
|
||||
exe full_calc
|
||||
: fundamental/full_calc.cpp
|
||||
:
|
||||
;
|
||||
|
||||
exe functor_parser
|
||||
: fundamental/functor_parser.cpp
|
||||
:
|
||||
;
|
||||
|
||||
exe list_parser
|
||||
: fundamental/list_parser.cpp
|
||||
:
|
||||
;
|
||||
|
||||
exe matching_tags
|
||||
: fundamental/matching_tags.cpp
|
||||
:
|
||||
;
|
||||
|
||||
exe no_actions
|
||||
: fundamental/no_actions.cpp
|
||||
:
|
||||
;
|
||||
|
||||
exe number_list
|
||||
: fundamental/number_list.cpp
|
||||
:
|
||||
;
|
||||
|
||||
exe parse_tree_calc1
|
||||
: fundamental/parse_tree_calc1.cpp
|
||||
:
|
||||
;
|
||||
|
||||
exe parser_context
|
||||
: fundamental/parser_context.cpp
|
||||
:
|
||||
;
|
||||
|
||||
exe phoenix_calc
|
||||
: fundamental/phoenix_calc.cpp
|
||||
:
|
||||
;
|
||||
|
||||
exe position_iterator
|
||||
: fundamental/position_iterator/position_iterator.cpp
|
||||
:
|
||||
;
|
||||
|
||||
exe refactoring
|
||||
: fundamental/refactoring.cpp
|
||||
:
|
||||
;
|
||||
|
||||
exe regular_expression
|
||||
: fundamental/regular_expression.cpp
|
||||
/boost//regex
|
||||
:
|
||||
;
|
||||
|
||||
exe roman_numerals
|
||||
: fundamental/roman_numerals.cpp
|
||||
:
|
||||
;
|
||||
|
||||
exe stuff_vector
|
||||
: fundamental/stuff_vector.cpp
|
||||
:
|
||||
;
|
||||
|
||||
exe stuff_vector2
|
||||
: fundamental/stuff_vector2.cpp
|
||||
:
|
||||
;
|
||||
|
||||
exe subrule_calc
|
||||
: fundamental/subrule_calc.cpp
|
||||
:
|
||||
;
|
||||
|
||||
exe sum
|
||||
: fundamental/sum.cpp
|
||||
:
|
||||
;
|
||||
|
||||
exe thousand_separated
|
||||
: fundamental/thousand_separated.cpp
|
||||
:
|
||||
;
|
||||
|
||||
exe ast_calc2
|
||||
: fundamental/more_calculators/ast_calc2.cpp
|
||||
:
|
||||
;
|
||||
|
||||
exe calc_with_variables
|
||||
: fundamental/more_calculators/calc_with_variables.cpp
|
||||
:
|
||||
;
|
||||
|
||||
exe phoenix_subrule_calc
|
||||
: fundamental/more_calculators/phoenix_subrule_calc.cpp
|
||||
:
|
||||
;
|
||||
|
||||
exe primitive_calc
|
||||
: fundamental/more_calculators/primitive_calc.cpp
|
||||
:
|
||||
;
|
||||
|
||||
exe rpn_calc
|
||||
: fundamental/more_calculators/rpn_calc.cpp
|
||||
:
|
||||
;
|
||||
|
||||
exe vmachine_calc
|
||||
: fundamental/more_calculators/vmachine_calc.cpp
|
||||
:
|
||||
;
|
||||
|
||||
exe distinct_parser
|
||||
: fundamental/distinct/distinct_parser.cpp
|
||||
:
|
||||
;
|
||||
|
||||
exe distinct_parser_dynamic
|
||||
: fundamental/distinct/distinct_parser_dynamic.cpp
|
||||
:
|
||||
;
|
||||
|
||||
################################################################################
|
||||
|
||||
exe ipv4
|
||||
: intermediate/ipv4.cpp
|
||||
:
|
||||
;
|
||||
|
||||
exe ipv4_opt
|
||||
: intermediate/ipv4_opt.cpp
|
||||
:
|
||||
;
|
||||
|
||||
exe lazy_parser
|
||||
: intermediate/lazy_parser.cpp
|
||||
:
|
||||
;
|
||||
|
||||
exe parameters
|
||||
: intermediate/parameters.cpp
|
||||
:
|
||||
;
|
||||
|
||||
exe regex_convert
|
||||
: intermediate/regex_convert.cpp
|
||||
/boost//regex
|
||||
:
|
||||
;
|
||||
|
||||
exe simple_xml
|
||||
: intermediate/simple_xml/driver.cpp
|
||||
intermediate/simple_xml/tag.cpp
|
||||
intermediate/simple_xml/tag.cpp
|
||||
:
|
||||
;
|
||||
|
||||
################################################################################
|
||||
|
||||
exe dynamic_rule
|
||||
: techniques/dynamic_rule.cpp
|
||||
:
|
||||
;
|
||||
|
||||
exe epsilon
|
||||
: techniques/epsilon.cpp
|
||||
:
|
||||
;
|
||||
|
||||
exe multiple_scanners
|
||||
: techniques/multiple_scanners.cpp
|
||||
:
|
||||
;
|
||||
|
||||
exe nabialek
|
||||
: techniques/nabialek.cpp
|
||||
:
|
||||
;
|
||||
|
||||
exe no_rule1
|
||||
: techniques/no_rules/no_rule1.cpp
|
||||
:
|
||||
;
|
||||
|
||||
exe no_rule2
|
||||
: techniques/no_rules/no_rule2.cpp
|
||||
:
|
||||
;
|
||||
|
||||
exe no_rule3
|
||||
: techniques/no_rules/no_rule3.cpp
|
||||
:
|
||||
;
|
||||
|
||||
exe typeof
|
||||
: techniques/typeof.cpp
|
||||
:
|
||||
;
|
||||
|
||||
exe rule_parser_1_1
|
||||
: techniques/no_rules_with_typeof/rule_parser_1_1.cpp
|
||||
:
|
||||
;
|
||||
|
||||
exe rule_parser_1_2
|
||||
: techniques/no_rules_with_typeof/rule_parser_1_2.cpp
|
||||
:
|
||||
;
|
||||
|
||||
exe rule_parser_2_1
|
||||
: techniques/no_rules_with_typeof/rule_parser_2_1.cpp
|
||||
:
|
||||
;
|
||||
|
||||
exe rule_parser_2_2
|
||||
: techniques/no_rules_with_typeof/rule_parser_2_2.cpp
|
||||
:
|
||||
;
|
||||
|
||||
exe opaque_rule_parser
|
||||
: techniques/no_rules_with_typeof/opaque_rule_parser.cpp
|
||||
:
|
||||
;
|
||||
|
||||
@@ -1,159 +0,0 @@
|
||||
/*=============================================================================
|
||||
Copyright (c) 2001-2003 Daniel Nuffer
|
||||
http://spirit.sourceforge.net/
|
||||
|
||||
Use, modification and distribution is subject to the Boost Software
|
||||
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
||||
http://www.boost.org/LICENSE_1_0.txt)
|
||||
=============================================================================*/
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Demonstrates the ASTs. This is discussed in the
|
||||
// "Trees" chapter in the Spirit User's Guide.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
#define BOOST_SPIRIT_DUMP_PARSETREE_AS_XML
|
||||
|
||||
#include <boost/spirit/include/classic_core.hpp>
|
||||
#include <boost/spirit/include/classic_ast.hpp>
|
||||
#include <boost/spirit/include/classic_tree_to_xml.hpp>
|
||||
#include <boost/assert.hpp>
|
||||
#include "tree_calc_grammar.hpp"
|
||||
|
||||
#include <iostream>
|
||||
#include <stack>
|
||||
#include <functional>
|
||||
#include <string>
|
||||
|
||||
#if defined(BOOST_SPIRIT_DUMP_PARSETREE_AS_XML)
|
||||
#include <map>
|
||||
#endif
|
||||
|
||||
// This example shows how to use an AST.
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
using namespace std;
|
||||
using namespace BOOST_SPIRIT_CLASSIC_NS;
|
||||
|
||||
typedef char const* iterator_t;
|
||||
typedef tree_match<iterator_t> parse_tree_match_t;
|
||||
typedef parse_tree_match_t::tree_iterator iter_t;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
long evaluate(parse_tree_match_t hit);
|
||||
long eval_expression(iter_t const& i);
|
||||
|
||||
long evaluate(tree_parse_info<> info)
|
||||
{
|
||||
return eval_expression(info.trees.begin());
|
||||
}
|
||||
|
||||
long eval_expression(iter_t const& i)
|
||||
{
|
||||
cout << "In eval_expression. i->value = " <<
|
||||
string(i->value.begin(), i->value.end()) <<
|
||||
" i->children.size() = " << i->children.size() << endl;
|
||||
|
||||
if (i->value.id() == calculator::integerID)
|
||||
{
|
||||
BOOST_ASSERT(i->children.size() == 0);
|
||||
|
||||
// extract integer (not always delimited by '\0')
|
||||
string integer(i->value.begin(), i->value.end());
|
||||
|
||||
return strtol(integer.c_str(), 0, 10);
|
||||
}
|
||||
else if (i->value.id() == calculator::factorID)
|
||||
{
|
||||
// factor can only be unary minus
|
||||
BOOST_ASSERT(*i->value.begin() == '-');
|
||||
return - eval_expression(i->children.begin());
|
||||
}
|
||||
else if (i->value.id() == calculator::termID)
|
||||
{
|
||||
if (*i->value.begin() == '*')
|
||||
{
|
||||
BOOST_ASSERT(i->children.size() == 2);
|
||||
return eval_expression(i->children.begin()) *
|
||||
eval_expression(i->children.begin()+1);
|
||||
}
|
||||
else if (*i->value.begin() == '/')
|
||||
{
|
||||
BOOST_ASSERT(i->children.size() == 2);
|
||||
return eval_expression(i->children.begin()) /
|
||||
eval_expression(i->children.begin()+1);
|
||||
}
|
||||
else
|
||||
BOOST_ASSERT(0);
|
||||
}
|
||||
else if (i->value.id() == calculator::expressionID)
|
||||
{
|
||||
if (*i->value.begin() == '+')
|
||||
{
|
||||
BOOST_ASSERT(i->children.size() == 2);
|
||||
return eval_expression(i->children.begin()) +
|
||||
eval_expression(i->children.begin()+1);
|
||||
}
|
||||
else if (*i->value.begin() == '-')
|
||||
{
|
||||
BOOST_ASSERT(i->children.size() == 2);
|
||||
return eval_expression(i->children.begin()) -
|
||||
eval_expression(i->children.begin()+1);
|
||||
}
|
||||
else
|
||||
BOOST_ASSERT(0);
|
||||
}
|
||||
else
|
||||
{
|
||||
BOOST_ASSERT(0); // error
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
int
|
||||
main()
|
||||
{
|
||||
// look in tree_calc_grammar for the definition of calculator
|
||||
calculator calc;
|
||||
|
||||
cout << "/////////////////////////////////////////////////////////\n\n";
|
||||
cout << "\t\tThe simplest working calculator...\n\n";
|
||||
cout << "/////////////////////////////////////////////////////////\n\n";
|
||||
cout << "Type an expression...or [q or Q] to quit\n\n";
|
||||
|
||||
string str;
|
||||
while (getline(cin, str))
|
||||
{
|
||||
if (str.empty() || str[0] == 'q' || str[0] == 'Q')
|
||||
break;
|
||||
|
||||
tree_parse_info<> info = ast_parse(str.c_str(), calc);
|
||||
|
||||
if (info.full)
|
||||
{
|
||||
#if defined(BOOST_SPIRIT_DUMP_PARSETREE_AS_XML)
|
||||
// dump parse tree as XML
|
||||
std::map<parser_id, std::string> rule_names;
|
||||
rule_names[calculator::integerID] = "integer";
|
||||
rule_names[calculator::factorID] = "factor";
|
||||
rule_names[calculator::termID] = "term";
|
||||
rule_names[calculator::expressionID] = "expression";
|
||||
tree_to_xml(cout, info.trees, str.c_str(), rule_names);
|
||||
#endif
|
||||
|
||||
// print the result
|
||||
cout << "parsing succeeded\n";
|
||||
cout << "result = " << evaluate(info) << "\n\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
cout << "parsing failed\n";
|
||||
}
|
||||
}
|
||||
|
||||
cout << "Bye... :-) \n\n";
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,127 +0,0 @@
|
||||
/*=============================================================================
|
||||
Copyright (c) 2002-2003 Joel de Guzman
|
||||
http://spirit.sourceforge.net/
|
||||
|
||||
Use, modification and distribution is subject to the Boost Software
|
||||
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
||||
http://www.boost.org/LICENSE_1_0.txt)
|
||||
=============================================================================*/
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Demonstrates use of boost::bind and spirit
|
||||
// This is discussed in the "Functional" chapter in the Spirit User's Guide.
|
||||
//
|
||||
// [ JDG 9/29/2002 ]
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
#include <boost/spirit/include/classic_core.hpp>
|
||||
#include <boost/bind/bind.hpp>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
using namespace std;
|
||||
using namespace BOOST_SPIRIT_CLASSIC_NS;
|
||||
using namespace boost;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Our comma separated list parser
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
class list_parser
|
||||
{
|
||||
public:
|
||||
|
||||
typedef list_parser self_t;
|
||||
|
||||
bool
|
||||
parse(char const* str)
|
||||
{
|
||||
using namespace boost::placeholders;
|
||||
return BOOST_SPIRIT_CLASSIC_NS::parse(str,
|
||||
|
||||
// Begin grammar
|
||||
(
|
||||
real_p
|
||||
[
|
||||
bind(&self_t::add, this, _1)
|
||||
]
|
||||
|
||||
>> *( ','
|
||||
>> real_p
|
||||
[
|
||||
bind(&self_t::add, this, _1)
|
||||
]
|
||||
)
|
||||
)
|
||||
,
|
||||
// End grammar
|
||||
|
||||
space_p).full;
|
||||
}
|
||||
|
||||
void
|
||||
add(double n)
|
||||
{
|
||||
v.push_back(n);
|
||||
}
|
||||
|
||||
void
|
||||
print() const
|
||||
{
|
||||
for (vector<double>::size_type i = 0; i < v.size(); ++i)
|
||||
cout << i << ": " << v[i] << endl;
|
||||
}
|
||||
|
||||
vector<double> v;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Main program
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
int
|
||||
main()
|
||||
{
|
||||
cout << "/////////////////////////////////////////////////////////\n\n";
|
||||
cout << "\tA comma separated list parser for Spirit...\n";
|
||||
cout << "\tDemonstrates use of boost::bind and spirit\n";
|
||||
cout << "/////////////////////////////////////////////////////////\n\n";
|
||||
|
||||
cout << "Give me a comma separated list of numbers.\n";
|
||||
cout << "The numbers will be inserted in a vector of numbers\n";
|
||||
cout << "Type [q or Q] to quit\n\n";
|
||||
|
||||
string str;
|
||||
while (getline(cin, str))
|
||||
{
|
||||
if (str.empty() || str[0] == 'q' || str[0] == 'Q')
|
||||
break;
|
||||
|
||||
list_parser lp;
|
||||
if (lp.parse(str.c_str()))
|
||||
{
|
||||
cout << "-------------------------\n";
|
||||
cout << "Parsing succeeded\n";
|
||||
cout << str << " Parses OK: " << endl;
|
||||
|
||||
lp.print();
|
||||
|
||||
cout << "-------------------------\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
cout << "-------------------------\n";
|
||||
cout << "Parsing failed\n";
|
||||
cout << "-------------------------\n";
|
||||
}
|
||||
}
|
||||
|
||||
cout << "Bye... :-) \n\n";
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,97 +0,0 @@
|
||||
/*=============================================================================
|
||||
Copyright (c) 2002-2003 Joel de Guzman
|
||||
http://spirit.sourceforge.net/
|
||||
|
||||
Use, modification and distribution is subject to the Boost Software
|
||||
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
||||
http://www.boost.org/LICENSE_1_0.txt)
|
||||
=============================================================================*/
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Boiler plate [ A template for writing your parser ]
|
||||
//
|
||||
// [ JDG 9/17/2002 ]
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
#include <boost/spirit/include/classic_core.hpp>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
using namespace std;
|
||||
using namespace BOOST_SPIRIT_CLASSIC_NS;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Semantic actions
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
struct my_action
|
||||
{
|
||||
template <typename IteratorT>
|
||||
void operator()(IteratorT first, IteratorT last) const
|
||||
{
|
||||
string s(first, last);
|
||||
cout << "\tMy Action got: " << s << endl;
|
||||
}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// My grammar
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
struct my_grammar : public grammar<my_grammar>
|
||||
{
|
||||
template <typename ScannerT>
|
||||
struct definition
|
||||
{
|
||||
definition(my_grammar const& self)
|
||||
{
|
||||
my_rule =
|
||||
*lexeme_d[(+graph_p)[my_action()]]
|
||||
;
|
||||
}
|
||||
|
||||
rule<ScannerT> my_rule;
|
||||
rule<ScannerT> const&
|
||||
start() const { return my_rule; }
|
||||
};
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Main program
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
int
|
||||
main()
|
||||
{
|
||||
cout << "/////////////////////////////////////////////////////////\n\n";
|
||||
cout << "\t\t A boiler-plate parser...\n\n";
|
||||
cout << "/////////////////////////////////////////////////////////\n\n";
|
||||
cout << "Type anything or [q or Q] to quit\n\n";
|
||||
|
||||
my_grammar g;
|
||||
|
||||
string str;
|
||||
while (getline(cin, str))
|
||||
{
|
||||
if (str.empty() || str[0] == 'q' || str[0] == 'Q')
|
||||
break;
|
||||
|
||||
if (parse(str.c_str(), g, space_p).full)
|
||||
{
|
||||
cout << "parsing succeeded\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
cout << "parsing failed\n";
|
||||
}
|
||||
}
|
||||
|
||||
cout << "Bye... :-) \n\n";
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,199 +0,0 @@
|
||||
/*=============================================================================
|
||||
Copyright (c) 2001-2003 Dan Nuffer
|
||||
Copyright (c) 2002-2003 Joel de Guzman
|
||||
http://spirit.sourceforge.net/
|
||||
|
||||
Use, modification and distribution is subject to the Boost Software
|
||||
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
||||
http://www.boost.org/LICENSE_1_0.txt)
|
||||
=============================================================================*/
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Full calculator example using STL functors with debugging enabled.
|
||||
// This is discussed in the "Functional" chapter in the Spirit User's Guide
|
||||
// and the Debugging chapter.
|
||||
//
|
||||
// Ported to Spirit v1.5 from v1.2/1.3 example by Dan Nuffer
|
||||
// [ JDG 9/18/2002 ]
|
||||
// [ JDG 7/29/2004 ]
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#define BOOST_SPIRIT_DEBUG
|
||||
#include <boost/spirit/include/classic_core.hpp>
|
||||
#include <iostream>
|
||||
#include <stack>
|
||||
#include <functional>
|
||||
#include <string>
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
using namespace std;
|
||||
using namespace BOOST_SPIRIT_CLASSIC_NS;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Semantic actions
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
struct push_int
|
||||
{
|
||||
push_int(stack<long>& eval_)
|
||||
: eval(eval_) {}
|
||||
|
||||
void operator()(char const* str, char const* /*end*/) const
|
||||
{
|
||||
long n = strtol(str, 0, 10);
|
||||
eval.push(n);
|
||||
cout << "push\t" << long(n) << endl;
|
||||
}
|
||||
|
||||
stack<long>& eval;
|
||||
};
|
||||
|
||||
template <typename op>
|
||||
struct do_op
|
||||
{
|
||||
do_op(op const& the_op, stack<long>& eval_)
|
||||
: m_op(the_op), eval(eval_) {}
|
||||
|
||||
void operator()(char const*, char const*) const
|
||||
{
|
||||
long rhs = eval.top();
|
||||
eval.pop();
|
||||
long lhs = eval.top();
|
||||
eval.pop();
|
||||
|
||||
cout << "popped " << lhs << " and " << rhs << " from the stack. ";
|
||||
cout << "pushing " << m_op(lhs, rhs) << " onto the stack.\n";
|
||||
eval.push(m_op(lhs, rhs));
|
||||
}
|
||||
|
||||
op m_op;
|
||||
stack<long>& eval;
|
||||
};
|
||||
|
||||
template <class op>
|
||||
do_op<op>
|
||||
make_op(op const& the_op, stack<long>& eval)
|
||||
{
|
||||
return do_op<op>(the_op, eval);
|
||||
}
|
||||
|
||||
struct do_negate
|
||||
{
|
||||
do_negate(stack<long>& eval_)
|
||||
: eval(eval_) {}
|
||||
|
||||
void operator()(char const*, char const*) const
|
||||
{
|
||||
long lhs = eval.top();
|
||||
eval.pop();
|
||||
|
||||
cout << "popped " << lhs << " from the stack. ";
|
||||
cout << "pushing " << -lhs << " onto the stack.\n";
|
||||
eval.push(-lhs);
|
||||
}
|
||||
|
||||
stack<long>& eval;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Our calculator grammar
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
struct calculator : public grammar<calculator>
|
||||
{
|
||||
calculator(stack<long>& eval_)
|
||||
: eval(eval_) {}
|
||||
|
||||
template <typename ScannerT>
|
||||
struct definition
|
||||
{
|
||||
definition(calculator const& self)
|
||||
{
|
||||
integer =
|
||||
lexeme_d[ (+digit_p)[push_int(self.eval)] ]
|
||||
;
|
||||
|
||||
factor =
|
||||
integer
|
||||
| '(' >> expression >> ')'
|
||||
| ('-' >> factor)[do_negate(self.eval)]
|
||||
| ('+' >> factor)
|
||||
;
|
||||
|
||||
term =
|
||||
factor
|
||||
>> *( ('*' >> factor)[make_op(multiplies<long>(), self.eval)]
|
||||
| ('/' >> factor)[make_op(divides<long>(), self.eval)]
|
||||
)
|
||||
;
|
||||
|
||||
expression =
|
||||
term
|
||||
>> *( ('+' >> term)[make_op(plus<long>(), self.eval)]
|
||||
| ('-' >> term)[make_op(minus<long>(), self.eval)]
|
||||
)
|
||||
;
|
||||
|
||||
BOOST_SPIRIT_DEBUG_NODE(integer);
|
||||
BOOST_SPIRIT_DEBUG_NODE(factor);
|
||||
BOOST_SPIRIT_DEBUG_NODE(term);
|
||||
BOOST_SPIRIT_DEBUG_NODE(expression);
|
||||
}
|
||||
|
||||
rule<ScannerT> expression, term, factor, integer;
|
||||
rule<ScannerT> const&
|
||||
start() const { return expression; }
|
||||
};
|
||||
|
||||
stack<long>& eval;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Main program
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
int
|
||||
main()
|
||||
{
|
||||
cout << "/////////////////////////////////////////////////////////\n\n";
|
||||
cout << "\t\tThe simplest working calculator...\n\n";
|
||||
cout << "/////////////////////////////////////////////////////////\n\n";
|
||||
cout << "Type an expression...or [q or Q] to quit\n\n";
|
||||
|
||||
stack<long> eval;
|
||||
calculator calc(eval); // Our parser
|
||||
BOOST_SPIRIT_DEBUG_NODE(calc);
|
||||
|
||||
string str;
|
||||
while (getline(cin, str))
|
||||
{
|
||||
if (str.empty() || str[0] == 'q' || str[0] == 'Q')
|
||||
break;
|
||||
|
||||
parse_info<> info = parse(str.c_str(), calc, space_p);
|
||||
|
||||
if (info.full)
|
||||
{
|
||||
cout << "-------------------------\n";
|
||||
cout << "Parsing succeeded\n";
|
||||
cout << "result = " << calc.eval.top() << endl;
|
||||
cout << "-------------------------\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
cout << "-------------------------\n";
|
||||
cout << "Parsing failed\n";
|
||||
cout << "stopped at: \": " << info.stop << "\"\n";
|
||||
cout << "-------------------------\n";
|
||||
}
|
||||
}
|
||||
|
||||
cout << "Bye... :-) \n\n";
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,129 +0,0 @@
|
||||
/*=============================================================================
|
||||
Copyright (c) 2002-2003 Joel de Guzman
|
||||
http://spirit.sourceforge.net/
|
||||
|
||||
Use, modification and distribution is subject to the Boost Software
|
||||
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
||||
http://www.boost.org/LICENSE_1_0.txt)
|
||||
=============================================================================*/
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Plain calculator example demostrating the grammar and semantic actions.
|
||||
// This is discussed in the "Grammar" and "Semantic Actions" chapters in
|
||||
// the Spirit User's Guide.
|
||||
//
|
||||
// [ JDG 5/10/2002 ]
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
#include <boost/spirit/include/classic_core.hpp>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
using namespace std;
|
||||
using namespace BOOST_SPIRIT_CLASSIC_NS;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Semantic actions
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
namespace
|
||||
{
|
||||
void do_int(char const* str, char const* end)
|
||||
{
|
||||
string s(str, end);
|
||||
cout << "PUSH(" << s << ')' << endl;
|
||||
}
|
||||
|
||||
void do_add(char const*, char const*) { cout << "ADD\n"; }
|
||||
void do_subt(char const*, char const*) { cout << "SUBTRACT\n"; }
|
||||
void do_mult(char const*, char const*) { cout << "MULTIPLY\n"; }
|
||||
void do_div(char const*, char const*) { cout << "DIVIDE\n"; }
|
||||
void do_neg(char const*, char const*) { cout << "NEGATE\n"; }
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Our calculator grammar
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
struct calculator : public grammar<calculator>
|
||||
{
|
||||
template <typename ScannerT>
|
||||
struct definition
|
||||
{
|
||||
definition(calculator const& /*self*/)
|
||||
{
|
||||
expression
|
||||
= term
|
||||
>> *( ('+' >> term)[&do_add]
|
||||
| ('-' >> term)[&do_subt]
|
||||
)
|
||||
;
|
||||
|
||||
term
|
||||
= factor
|
||||
>> *( ('*' >> factor)[&do_mult]
|
||||
| ('/' >> factor)[&do_div]
|
||||
)
|
||||
;
|
||||
|
||||
factor
|
||||
= lexeme_d[(+digit_p)[&do_int]]
|
||||
| '(' >> expression >> ')'
|
||||
| ('-' >> factor)[&do_neg]
|
||||
| ('+' >> factor)
|
||||
;
|
||||
}
|
||||
|
||||
rule<ScannerT> expression, term, factor;
|
||||
|
||||
rule<ScannerT> const&
|
||||
start() const { return expression; }
|
||||
};
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Main program
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
int
|
||||
main()
|
||||
{
|
||||
cout << "/////////////////////////////////////////////////////////\n\n";
|
||||
cout << "\t\tExpression parser...\n\n";
|
||||
cout << "/////////////////////////////////////////////////////////\n\n";
|
||||
cout << "Type an expression...or [q or Q] to quit\n\n";
|
||||
|
||||
calculator calc; // Our parser
|
||||
|
||||
string str;
|
||||
while (getline(cin, str))
|
||||
{
|
||||
if (str.empty() || str[0] == 'q' || str[0] == 'Q')
|
||||
break;
|
||||
|
||||
parse_info<> info = parse(str.c_str(), calc, space_p);
|
||||
|
||||
if (info.full)
|
||||
{
|
||||
cout << "-------------------------\n";
|
||||
cout << "Parsing succeeded\n";
|
||||
cout << "-------------------------\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
cout << "-------------------------\n";
|
||||
cout << "Parsing failed\n";
|
||||
cout << "stopped at: \": " << info.stop << "\"\n";
|
||||
cout << "-------------------------\n";
|
||||
}
|
||||
}
|
||||
|
||||
cout << "Bye... :-) \n\n";
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,232 +0,0 @@
|
||||
/*=============================================================================
|
||||
Copyright (c) 2001-2003 Hartmut Kaiser
|
||||
http://spirit.sourceforge.net/
|
||||
|
||||
Use, modification and distribution is subject to the Boost Software
|
||||
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
||||
http://www.boost.org/LICENSE_1_0.txt)
|
||||
=============================================================================*/
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// This example shows:
|
||||
// 1. Parsing of different comment styles
|
||||
// parsing C/C++-style comment
|
||||
// parsing C++-style comment
|
||||
// parsing PASCAL-style comment
|
||||
// 2. Parsing tagged data with the help of the confix_parser
|
||||
// 3. Parsing tagged data with the help of the confix_parser but the semantic
|
||||
// action is directly attached to the body sequence parser
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <cassert>
|
||||
|
||||
#include <boost/spirit/include/classic_core.hpp>
|
||||
#include <boost/spirit/include/classic_confix.hpp>
|
||||
#include <boost/spirit/include/classic_chset.hpp>
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// used namespaces
|
||||
using namespace std;
|
||||
using namespace BOOST_SPIRIT_CLASSIC_NS;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// actor called after successfully matching a single character
|
||||
class actor_string
|
||||
{
|
||||
public:
|
||||
actor_string(std::string &rstr) :
|
||||
matched(rstr)
|
||||
{
|
||||
}
|
||||
|
||||
void operator() (const char *pbegin, const char *pend) const
|
||||
{
|
||||
matched += std::string(pbegin, pend-pbegin);
|
||||
}
|
||||
|
||||
private:
|
||||
std::string &matched;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// actor called after successfully matching a C++-comment
|
||||
void actor_cpp (const char *pfirst, const char *plast)
|
||||
{
|
||||
cout << "Parsing C++-comment" <<endl;
|
||||
cout << "Matched (" << plast-pfirst << ") characters: ";
|
||||
cout << "\"" << std::string(pfirst, plast) << "\"" << endl;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// main entry point
|
||||
int main ()
|
||||
{
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// 1. Parsing different comment styles
|
||||
// parsing C/C++-style comments (non-nested!)
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
char const* pCComment = "/* This is a /* nested */ C-comment */";
|
||||
|
||||
rule<> cpp_comment;
|
||||
|
||||
cpp_comment =
|
||||
comment_p("/*", "*/") // rule for C-comments
|
||||
| comment_p("//") // rule for C++ comments
|
||||
;
|
||||
|
||||
std::string comment_c;
|
||||
parse_info<> result;
|
||||
|
||||
result = parse (pCComment, cpp_comment[actor_string(comment_c)]);
|
||||
if (result.hit)
|
||||
{
|
||||
cout << "Parsed C-comment successfully!" << endl;
|
||||
cout << "Matched (" << (int)comment_c.size() << ") characters: ";
|
||||
cout << "\"" << comment_c << "\"" << endl;
|
||||
}
|
||||
else
|
||||
{
|
||||
cout << "Failed to parse C/C++-comment!" << endl;
|
||||
}
|
||||
cout << endl;
|
||||
|
||||
// parsing C++-style comment
|
||||
char const* pCPPComment = "// This is a C++-comment\n";
|
||||
std::string comment_cpp;
|
||||
|
||||
result = parse (pCPPComment, cpp_comment[&actor_cpp]);
|
||||
if (result.hit)
|
||||
cout << "Parsed C++-comment successfully!" << endl;
|
||||
else
|
||||
cout << "Failed to parse C++-comment!" << endl;
|
||||
|
||||
cout << endl;
|
||||
|
||||
|
||||
// parsing PASCAL-style comment (nested!)
|
||||
char const* pPComment = "{ This is a (* nested *) PASCAL-comment }";
|
||||
|
||||
rule<> pascal_comment;
|
||||
|
||||
pascal_comment = // in PASCAL we have two comment styles
|
||||
comment_nest_p('{', '}') // both may be nested
|
||||
| comment_nest_p("(*", "*)")
|
||||
;
|
||||
|
||||
std::string comment_pascal;
|
||||
|
||||
result = parse (pPComment, pascal_comment[actor_string(comment_pascal)]);
|
||||
if (result.hit)
|
||||
{
|
||||
cout << "Parsed PASCAL-comment successfully!" << endl;
|
||||
cout << "Matched (" << (int)comment_pascal.size() << ") characters: ";
|
||||
cout << "\"" << comment_pascal << "\"" << endl;
|
||||
}
|
||||
else
|
||||
{
|
||||
cout << "Failed to parse PASCAL-comment!" << endl;
|
||||
}
|
||||
cout << endl;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// 2. Parsing tagged data with the help of the confix parser
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
std::string body;
|
||||
rule<> open_tag, html_tag, close_tag, body_text;
|
||||
|
||||
open_tag =
|
||||
str_p("<b>")
|
||||
;
|
||||
|
||||
body_text =
|
||||
anychar_p
|
||||
;
|
||||
|
||||
close_tag =
|
||||
str_p("</b>")
|
||||
;
|
||||
|
||||
html_tag =
|
||||
confix_p (open_tag, (*body_text)[actor_string(body)], close_tag)
|
||||
;
|
||||
|
||||
char const* pTag = "<b>Body text</b>";
|
||||
|
||||
result = parse (pTag, html_tag);
|
||||
if (result.hit)
|
||||
{
|
||||
cout << "Parsed HTML snippet \"<b>Body text</b>\" successfully "
|
||||
"(with re-attached actor)!" << endl;
|
||||
cout << "Found body (" << (int)body.size() << " characters): ";
|
||||
cout << "\"" << body << "\"" << endl;
|
||||
}
|
||||
else
|
||||
{
|
||||
cout << "Failed to parse HTML snippet (with re-attached actor)!"
|
||||
<< endl;
|
||||
}
|
||||
cout << endl;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// 3. Parsing tagged data with the help of the confix_parser but the
|
||||
// semantic action is directly attached to the body sequence parser
|
||||
// (see comment in confix.hpp) and out of the usage of the 'direct()'
|
||||
// construction function no automatic refactoring takes place.
|
||||
//
|
||||
// As you can see, for successful parsing it is required to refactor the
|
||||
// confix parser by hand. To see, how it fails, you can try the following:
|
||||
//
|
||||
// html_tag_direct =
|
||||
// confix_p.direct(
|
||||
// str_p("<b>"),
|
||||
// (*body_text)[actor_string(bodydirect)],
|
||||
// str_p("</b>")
|
||||
// )
|
||||
// ;
|
||||
//
|
||||
// Here the *body_text parser eats up all the input up to the end of the
|
||||
// input sequence.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
rule<> html_tag_direct;
|
||||
std::string bodydirect;
|
||||
|
||||
html_tag_direct =
|
||||
confix_p.direct(
|
||||
str_p("<b>"),
|
||||
(*(body_text - str_p("</b>")))[actor_string(bodydirect)],
|
||||
str_p("</b>")
|
||||
)
|
||||
;
|
||||
|
||||
char const* pTagDirect = "<b>Body text</b>";
|
||||
|
||||
result = parse (pTagDirect, html_tag_direct);
|
||||
if (result.hit)
|
||||
{
|
||||
cout << "Parsed HTML snippet \"<b>Body text</b>\" successfully "
|
||||
"(with direct actor)!" << endl;
|
||||
cout << "Found body (" << (int)bodydirect.size() << " characters): ";
|
||||
cout << "\"" << bodydirect << "\"" << endl;
|
||||
}
|
||||
else
|
||||
{
|
||||
cout << "Failed to parse HTML snippet (with direct actor)!" << endl;
|
||||
}
|
||||
cout << endl;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1,103 +0,0 @@
|
||||
/*=============================================================================
|
||||
Copyright (c) 2001-2003 Joel de Guzman
|
||||
http://spirit.sourceforge.net/
|
||||
|
||||
Use, modification and distribution is subject to the Boost Software
|
||||
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
||||
http://www.boost.org/LICENSE_1_0.txt)
|
||||
=============================================================================*/
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// A complex number micro parser (using subrules)
|
||||
//
|
||||
// [ JDG 5/10/2002 ]
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
#include <boost/spirit/include/classic_core.hpp>
|
||||
#include <iostream>
|
||||
#include <complex>
|
||||
#include <string>
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
using namespace std;
|
||||
using namespace BOOST_SPIRIT_CLASSIC_NS;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Our complex number micro parser
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
bool
|
||||
parse_complex(char const* str, complex<double>& c)
|
||||
{
|
||||
double rN = 0.0;
|
||||
double iN = 0.0;
|
||||
|
||||
subrule<0> first;
|
||||
subrule<1> r;
|
||||
subrule<2> i;
|
||||
|
||||
if (parse(str,
|
||||
|
||||
// Begin grammar
|
||||
(
|
||||
first = '(' >> r >> !(',' >> i) >> ')' | r,
|
||||
r = real_p[assign(rN)],
|
||||
i = real_p[assign(iN)]
|
||||
)
|
||||
,
|
||||
// End grammar
|
||||
|
||||
space_p).full)
|
||||
{
|
||||
c = complex<double>(rN, iN);
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Main program
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
int
|
||||
main()
|
||||
{
|
||||
cout << "/////////////////////////////////////////////////////////\n\n";
|
||||
cout << "\t\tA complex number micro parser for Spirit...\n\n";
|
||||
cout << "/////////////////////////////////////////////////////////\n\n";
|
||||
|
||||
cout << "Give me a complex number of the form r or (r) or (r,i) \n";
|
||||
cout << "Type [q or Q] to quit\n\n";
|
||||
|
||||
string str;
|
||||
while (getline(cin, str))
|
||||
{
|
||||
if (str.empty() || str[0] == 'q' || str[0] == 'Q')
|
||||
break;
|
||||
|
||||
complex<double> c;
|
||||
if (parse_complex(str.c_str(), c))
|
||||
{
|
||||
cout << "-------------------------\n";
|
||||
cout << "Parsing succeeded\n";
|
||||
cout << str << " Parses OK: " << c << endl;
|
||||
cout << "-------------------------\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
cout << "-------------------------\n";
|
||||
cout << "Parsing failed\n";
|
||||
cout << "-------------------------\n";
|
||||
}
|
||||
}
|
||||
|
||||
cout << "Bye... :-) \n\n";
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,68 +0,0 @@
|
||||
/*=============================================================================
|
||||
Copyright (c) 2003 Vaclav Vesely
|
||||
http://spirit.sourceforge.net/
|
||||
|
||||
Use, modification and distribution is subject to the Boost Software
|
||||
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
||||
http://www.boost.org/LICENSE_1_0.txt)
|
||||
=============================================================================*/
|
||||
#include <boost/assert.hpp>
|
||||
#include <iostream>
|
||||
#include <boost/cstdlib.hpp>
|
||||
#include <boost/spirit/include/classic_core.hpp>
|
||||
#include <boost/spirit/include/classic_distinct.hpp>
|
||||
|
||||
using namespace std;
|
||||
using namespace boost;
|
||||
using namespace BOOST_SPIRIT_CLASSIC_NS;
|
||||
|
||||
// keyword_p for C++
|
||||
// (for basic usage instead of std_p)
|
||||
const distinct_parser<> keyword_p("0-9a-zA-Z_");
|
||||
|
||||
// keyword_d for C++
|
||||
// (for mor intricate usage, for example together with symbol tables)
|
||||
const distinct_directive<> keyword_d("0-9a-zA-Z_");
|
||||
|
||||
struct my_grammar: public grammar<my_grammar>
|
||||
{
|
||||
template <typename ScannerT>
|
||||
struct definition
|
||||
{
|
||||
typedef rule<ScannerT> rule_t;
|
||||
|
||||
definition(my_grammar const& self)
|
||||
{
|
||||
top
|
||||
=
|
||||
keyword_p("declare") // use keyword_p instead of std_p
|
||||
>> !ch_p(':')
|
||||
>> keyword_d[str_p("ident")] // use keyword_d
|
||||
;
|
||||
}
|
||||
|
||||
rule_t top;
|
||||
|
||||
rule_t const& start() const
|
||||
{
|
||||
return top;
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
int main()
|
||||
{
|
||||
my_grammar gram;
|
||||
parse_info<> info;
|
||||
|
||||
info = parse("declare ident", gram, space_p);
|
||||
BOOST_ASSERT(info.full); // valid input
|
||||
|
||||
info = parse("declare: ident", gram, space_p);
|
||||
BOOST_ASSERT(info.full); // valid input
|
||||
|
||||
info = parse("declareident", gram, space_p);
|
||||
BOOST_ASSERT(!info.hit); // invalid input
|
||||
|
||||
return exit_success;
|
||||
}
|
||||
@@ -1,64 +0,0 @@
|
||||
/*=============================================================================
|
||||
Copyright (c) 2003 Vaclav Vesely
|
||||
http://spirit.sourceforge.net/
|
||||
|
||||
Use, modification and distribution is subject to the Boost Software
|
||||
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
||||
http://www.boost.org/LICENSE_1_0.txt)
|
||||
=============================================================================*/
|
||||
#include <boost/assert.hpp>
|
||||
#include <iostream>
|
||||
#include <boost/cstdlib.hpp>
|
||||
#include <boost/spirit/include/classic_core.hpp>
|
||||
#include <boost/spirit/include/classic_distinct.hpp>
|
||||
|
||||
using namespace std;
|
||||
using namespace boost;
|
||||
using namespace BOOST_SPIRIT_CLASSIC_NS;
|
||||
|
||||
struct my_grammar: public grammar<my_grammar>
|
||||
{
|
||||
template <typename ScannerT>
|
||||
struct definition
|
||||
{
|
||||
typedef rule<ScannerT> rule_t;
|
||||
|
||||
// keyword_p for ASN.1
|
||||
dynamic_distinct_parser<ScannerT> keyword_p;
|
||||
|
||||
definition(my_grammar const& self)
|
||||
: keyword_p(alnum_p | ('-' >> ~ch_p('-'))) // ASN.1 has quite complex naming rules
|
||||
{
|
||||
top
|
||||
=
|
||||
keyword_p("asn-declare") // use keyword_p instead of std_p
|
||||
>> !str_p("--")
|
||||
>> keyword_p("ident")
|
||||
;
|
||||
}
|
||||
|
||||
rule_t top;
|
||||
|
||||
rule_t const& start() const
|
||||
{
|
||||
return top;
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
int main()
|
||||
{
|
||||
my_grammar gram;
|
||||
parse_info<> info;
|
||||
|
||||
info = parse("asn-declare ident", gram, space_p);
|
||||
BOOST_ASSERT(info.full); // valid input
|
||||
|
||||
info = parse("asn-declare--ident", gram, space_p);
|
||||
BOOST_ASSERT(info.full); // valid input
|
||||
|
||||
info = parse("asn-declare-ident", gram, space_p);
|
||||
BOOST_ASSERT(!info.hit); // invalid input
|
||||
|
||||
return exit_success;
|
||||
}
|
||||