mirror of
https://github.com/boostorg/spirit.git
synced 2026-01-19 04:42:11 +00:00
spirit2 ! :)
[SVN r44360]
This commit is contained in:
@@ -11,7 +11,7 @@
|
|||||||
// or https://sf.net/mailarchive/forum.php?thread_id=2692308&forum_id=1595
|
// or https://sf.net/mailarchive/forum.php?thread_id=2692308&forum_id=1595
|
||||||
// for a description of the bug being tested for by this program
|
// for a description of the bug being tested for by this program
|
||||||
//
|
//
|
||||||
// the problem should be solved with version 1.3 of phoenix/closures.hpp
|
// the problem should be solved with version 1.3 of phoenix/closures.hpp>
|
||||||
|
|
||||||
#if defined(BOOST_SPIRIT_DEBUG) && defined(__GNUC__) && defined(__WIN32__)
|
#if defined(BOOST_SPIRIT_DEBUG) && defined(__GNUC__) && defined(__WIN32__)
|
||||||
// It seems that MinGW has some problems with threads and iostream ?
|
// It seems that MinGW has some problems with threads and iostream ?
|
||||||
|
|||||||
27
doc/Jamfile
Normal file
27
doc/Jamfile
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
#==============================================================================
|
||||||
|
# Copyright (c) 2001-2007 Joel de Guzman
|
||||||
|
# Copyright (c) 2001-2007 Hartmut Kaiser
|
||||||
|
#
|
||||||
|
# Use, modification and distribution is subject to the Boost Software
|
||||||
|
# License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
||||||
|
# http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
#==============================================================================
|
||||||
|
|
||||||
|
project spirit/doc ;
|
||||||
|
|
||||||
|
import boostbook : boostbook ;
|
||||||
|
using quickbook : quickbook ;
|
||||||
|
|
||||||
|
boostbook spirit2
|
||||||
|
:
|
||||||
|
spirit2.qbk
|
||||||
|
:
|
||||||
|
<xsl:param>boost.root=../../../..
|
||||||
|
<xsl:param>boost.libraries=../../../libraries.htm
|
||||||
|
<xsl:param>html.stylesheet=../../../../doc/html/boostbook.css
|
||||||
|
<xsl:param>chunk.section.depth=5
|
||||||
|
<xsl:param>chunk.first.sections=1
|
||||||
|
<xsl:param>toc.section.depth=4
|
||||||
|
<xsl:param>toc.max.depth=4
|
||||||
|
<xsl:param>generate.section.toc.level=4
|
||||||
|
;
|
||||||
46
doc/_concepts_template_.qbk
Normal file
46
doc/_concepts_template_.qbk
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section XXX]
|
||||||
|
|
||||||
|
[heading Description]
|
||||||
|
|
||||||
|
Description of XXX concept
|
||||||
|
|
||||||
|
[variablelist Notation
|
||||||
|
[[`xxx`] [An XXX]]
|
||||||
|
]
|
||||||
|
|
||||||
|
[heading Valid Expressions]
|
||||||
|
|
||||||
|
(For any Forward Sequence the following expressions must be valid:)
|
||||||
|
|
||||||
|
In addition to the requirements defined in _XXX-Basic_concept_, for any
|
||||||
|
XXX the following must be met:
|
||||||
|
|
||||||
|
[table
|
||||||
|
[[Expression] [Semantics] [Return type] [Complexity]]
|
||||||
|
[[`xxx`] [Semantics of `xxx`] [XXX] [Constant]]
|
||||||
|
]
|
||||||
|
|
||||||
|
[heading Type Requirements]
|
||||||
|
|
||||||
|
[table
|
||||||
|
[[Expression] [Requirements]]
|
||||||
|
[[`xxx`] [Requirements for `xxx`]]
|
||||||
|
]
|
||||||
|
|
||||||
|
[heading Invariants]
|
||||||
|
|
||||||
|
For any XXX xxx the following invariants always hold:
|
||||||
|
|
||||||
|
[heading Models]
|
||||||
|
|
||||||
|
Links to models of XXX concept
|
||||||
|
|
||||||
|
[endsect]
|
||||||
56
doc/_reference_template_.qbk
Normal file
56
doc/_reference_template_.qbk
Normal file
@@ -0,0 +1,56 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section XXX]
|
||||||
|
|
||||||
|
[heading Description]
|
||||||
|
|
||||||
|
Description of XXX
|
||||||
|
|
||||||
|
[heading Header]
|
||||||
|
|
||||||
|
#include <boost/spirit/xxx.hpp>
|
||||||
|
|
||||||
|
[heading Synopsis]
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
struct XXX;
|
||||||
|
|
||||||
|
[heading Template parameters]
|
||||||
|
|
||||||
|
[table
|
||||||
|
[[Parameter] [Description] [Default]]
|
||||||
|
[[`T`] [What is T] []]
|
||||||
|
]
|
||||||
|
|
||||||
|
[heading Model of]
|
||||||
|
|
||||||
|
Link to concept
|
||||||
|
|
||||||
|
[heading Objects]
|
||||||
|
|
||||||
|
Objects provided by the library
|
||||||
|
|
||||||
|
[variablelist Notation
|
||||||
|
[[`xxx`] [An XXX]]
|
||||||
|
]
|
||||||
|
|
||||||
|
Semantics of an expression is defined only where it differs from, or is not
|
||||||
|
defined in _concept-of_XXX_.
|
||||||
|
|
||||||
|
[table
|
||||||
|
[[Expression] [Semantics] [Return type] [Complexity]]
|
||||||
|
[[`xxx`] [Semantics of `xxx`] [XXX] [Constant]]
|
||||||
|
]
|
||||||
|
|
||||||
|
[heading Example]
|
||||||
|
|
||||||
|
Real example code. Use Quickbook import mechanism to link to actual
|
||||||
|
working code snippets here.
|
||||||
|
|
||||||
|
[endsect]
|
||||||
147
doc/acknowledgments.qbk
Normal file
147
doc/acknowledgments.qbk
Normal file
@@ -0,0 +1,147 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section Acknowledgments]
|
||||||
|
|
||||||
|
This version of Spirit is a complete rewrite of the /classic/ Spirit many
|
||||||
|
people have been contributing to (see below). But there are a couple of people
|
||||||
|
who already managed to help significantly during this rewrite. We would like to
|
||||||
|
express our special acknowledgement to:
|
||||||
|
|
||||||
|
[*Eric Niebler] for writing Boost.Proto, without which this rewrite wouldn't
|
||||||
|
have been possible, and helping with examples, advices, and suggestions on
|
||||||
|
how to use Boost.Proto in the best possible way.
|
||||||
|
|
||||||
|
[*Ben Hanson] for providing us with an early version of his Lexertl library,
|
||||||
|
which is proposed to be included into Boost (as Boost.Lexer), but at the time
|
||||||
|
of this writing the Boost review for this library is still pending.
|
||||||
|
|
||||||
|
__fixme__: Add more people
|
||||||
|
|
||||||
|
|
||||||
|
[heading Acknowledgements from the Spirit V1 /classic/ Documentation]
|
||||||
|
|
||||||
|
Special thanks for working on Spirit /classic/ to:
|
||||||
|
|
||||||
|
[*Dan Nuffer] for his work on lexers, parse trees, ASTs, XML parsers, the
|
||||||
|
multi-pass iterator as well as administering Spirit's site, editing,
|
||||||
|
maintaining the CVS and doing the releases plus a zillion of other chores that
|
||||||
|
were almost taken for granted.
|
||||||
|
|
||||||
|
[*Hartmut Kaiser] for his work on the C parser, the work on the C/C++
|
||||||
|
preprocessor, utility parsers, the original port to Intel 5.0, various work on
|
||||||
|
Phoenix, porting to v1.5, the meta-parsers, the grouping-parsers, extensive
|
||||||
|
testing and painstaking attention to details.
|
||||||
|
|
||||||
|
[*Martin Wille] who improved grammar multi thread safety, contributed the eol_p
|
||||||
|
parser, the dynamic parsers, documentation and for taking an active role in
|
||||||
|
almost every aspect from brainstorming and design to coding. And, as always,
|
||||||
|
helps keep the regression tests for g++ on Linux as green as ever :-).
|
||||||
|
|
||||||
|
[*Martijn W. Van Der Lee] our Web site administrator and for contributing the
|
||||||
|
RFC821 parser.
|
||||||
|
|
||||||
|
[*Giovanni Bajo] for last minute tweaks of Spirit 1.8.0 for CodeWarrior 8.3.
|
||||||
|
Actually, I'm ashamed Giovanni was not in this list already. He's done a lot
|
||||||
|
since Spirit 1.5, the first Boost.Spirit release. He's instrumental in the
|
||||||
|
porting of the Spirit iterators stuff to the new Boost Iterators Library
|
||||||
|
(version 2). He also did various bug fixes and wrote some tests here and there.
|
||||||
|
|
||||||
|
[*Juan Carlos Arevalo-Baeza (JCAB)*] for his work on the C++ parser, the position
|
||||||
|
iterator, ports to v1.5 and keeping the mailing list discussions alive and
|
||||||
|
kicking.
|
||||||
|
|
||||||
|
[*Vaclav Vesely], lots of stuff, the no\_actions directive, various patches
|
||||||
|
fixes, the distinct parsers, the lazy parser, some phoenix tweaks and add-ons
|
||||||
|
(e.g. new\_). Also, *Stefan Slapeta] and wife for editing Vaclav's distinct
|
||||||
|
parser doc.
|
||||||
|
|
||||||
|
[*Raghavendra Satish] for doing the original v1.3 port to VC++ and his work on
|
||||||
|
Phoenix.
|
||||||
|
|
||||||
|
[*Noah Stein] for following up and helping Ragav on the VC++ ports.
|
||||||
|
|
||||||
|
[*Hakki Dogusan], for his original v1.0 Pascal parser.
|
||||||
|
|
||||||
|
[*John (EBo) David] for his work on the VM and watching over my shoulder as I
|
||||||
|
code giving the impression of distance eXtreme programming.
|
||||||
|
|
||||||
|
[*Chris Uzdavinis] for feeding in comments and valuable suggestions as well as
|
||||||
|
editing the documentation.
|
||||||
|
|
||||||
|
[*Carsten Stoll], for his work on dynamic parsers.
|
||||||
|
|
||||||
|
[*Andy Elvey] and his conifer parser.
|
||||||
|
|
||||||
|
[*Bruce Florman], who did the original v1.0 port to VC++.
|
||||||
|
|
||||||
|
[*Jeff Westfahl] for porting the loop parsers to v1.5 and contributing the file
|
||||||
|
iterator.
|
||||||
|
|
||||||
|
[*Peter Simons] for the RFC date parser example and tutorial plus helping out
|
||||||
|
with some nitty gritty details.
|
||||||
|
|
||||||
|
[*Markus Sch'''ö'''pflin] for suggesting the end_p parser and lots of other
|
||||||
|
nifty things and his active presence in the mailing list.
|
||||||
|
|
||||||
|
[*Doug Gregor] for mentoring and his ability to see things that others don't.
|
||||||
|
|
||||||
|
[*David Abrahams] for giving Joel a job that allows him to still work on Spirit,
|
||||||
|
plus countless advice and help on C++ and specifically template
|
||||||
|
metaprogramming.
|
||||||
|
|
||||||
|
[*Aleksey Gurtovoy] for his MPL library from which we stole many metaprogramming
|
||||||
|
tricks especially for less conforming compilers such as Borland and VC6/7.
|
||||||
|
|
||||||
|
[*Gustavo Guerra] for his last minute review of Spirit and constant feedback,
|
||||||
|
plus patches here and there (e.g. proposing the new dot behavior of the real
|
||||||
|
numerics parsers).
|
||||||
|
|
||||||
|
[*Nicola Musatti], [*Paul Snively], [*Alisdair Meredith] and [*Hugo Duncan] for
|
||||||
|
testing and sending in various patches.
|
||||||
|
|
||||||
|
[*Steve Rowe] for his splendid work on the TSTs that will soon be taken into
|
||||||
|
Spirit.
|
||||||
|
|
||||||
|
[*Jonathan de Halleux] for his work on actors.
|
||||||
|
|
||||||
|
[*Angus Leeming] for last minute editing work on the 1.8.0 release
|
||||||
|
documentation, his work on Phoenix and his active presence in the Spirit
|
||||||
|
mailing list.
|
||||||
|
|
||||||
|
[*Joao Abecasis] for his active presence in the Spirit mailing list, providing
|
||||||
|
user support, participating in the discussions and so on.
|
||||||
|
|
||||||
|
[*Guillaume Melquiond] for a last minute patch to multi_pass for 1.8.1.
|
||||||
|
|
||||||
|
[*Peder Holt] for his porting work on Phoenix, Fusion and Spirit to VC6.
|
||||||
|
|
||||||
|
To Joels wife Mariel who did the graphics in this document.
|
||||||
|
|
||||||
|
My, there's a lot in this list! And it's a continuing list. We add people to
|
||||||
|
this list everytime. We hope we did not forget anyone. If we missed
|
||||||
|
someone you know who has helped in any way, please inform us.
|
||||||
|
|
||||||
|
Special thanks also to people who gave feedback and valuable comments,
|
||||||
|
particularly members of Boost and Spirit mailing lists. This includes all those
|
||||||
|
who participated in the review:
|
||||||
|
|
||||||
|
[*John Maddock], our review manager, [*Aleksey Gurtovoy], [*Andre Hentz],
|
||||||
|
[*Beman Dawes], [*Carl Daniel], [*Christopher Currie], [*Dan Gohman],
|
||||||
|
[*Dan Nuffer], [*Daryle Walker], [*David Abrahams], [*David B. Held],
|
||||||
|
[*Dirk Gerrits], [*Douglas Gregor], [*Hartmut Kaiser], [*Iain K.Hanson],
|
||||||
|
[*Juan Carlos Arevalo-Baeza], [*Larry Evans], [*Martin Wille],
|
||||||
|
[*Mattias Flodin], [*Noah Stein], [*Nuno Lucas], [*Peter Dimov],
|
||||||
|
[*Peter Simons], [*Petr Kocmid], [*Ross Smith], [*Scott Kirkwood],
|
||||||
|
[*Steve Cleary], [*Thorsten Ottosen], [*Tom Wenisch], [*Vladimir Prus]
|
||||||
|
|
||||||
|
Finally thanks to SourceForge for hosting the Spirit project and Boost: a C++
|
||||||
|
community comprised of extremely talented library authors who participate in
|
||||||
|
the discussion and peer review of well crafted C++ libraries.
|
||||||
|
|
||||||
|
[endsect]
|
||||||
10
doc/faq.qbk
Normal file
10
doc/faq.qbk
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section FAQ]
|
||||||
|
[endsect]
|
||||||
BIN
doc/html/images/FlowOfControl.png
Normal file
BIN
doc/html/images/FlowOfControl.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 78 KiB |
BIN
doc/html/images/Thumbs.db
Normal file
BIN
doc/html/images/Thumbs.db
Normal file
Binary file not shown.
BIN
doc/html/images/TokenStructure.png
Normal file
BIN
doc/html/images/TokenStructure.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 58 KiB |
10
doc/introduction.qbk
Normal file
10
doc/introduction.qbk
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section Introduction]
|
||||||
|
[endsect]
|
||||||
50
doc/lex.qbk
Normal file
50
doc/lex.qbk
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section __lex__]
|
||||||
|
|
||||||
|
[include lex/introduction.qbk]
|
||||||
|
|
||||||
|
[section __lex__ Tutorials]
|
||||||
|
[include lex/lexer_tutorials.qbk]
|
||||||
|
[include lex/lexer_quickstart1.qbk]
|
||||||
|
[include lex/lexer_quickstart2.qbk]
|
||||||
|
[include lex/lexer_quickstart3.qbk]
|
||||||
|
[endsect]
|
||||||
|
|
||||||
|
[section Abstracts]
|
||||||
|
[section Lexer Primitives]
|
||||||
|
[include lex/lexer_primitives.qbk]
|
||||||
|
[include lex/tokens_values.qbk]
|
||||||
|
[include lex/token_definition.qbk]
|
||||||
|
[endsect]
|
||||||
|
[include lex/tokenizing.qbk]
|
||||||
|
[include lex/lexer_semantic_actions.qbk]
|
||||||
|
[include lex/lexer_static_model.qbk]
|
||||||
|
[include lex/parsing_using_a_lexer.qbk]
|
||||||
|
[include lex/lexer_attributes.qbk]
|
||||||
|
[include lex/lexer_states.qbk]
|
||||||
|
[endsect]
|
||||||
|
|
||||||
|
[section Quick Reference]
|
||||||
|
[endsect]
|
||||||
|
|
||||||
|
[section Reference]
|
||||||
|
[section Concepts]
|
||||||
|
[include reference/lex/lexer.qbk]
|
||||||
|
[include reference/lex/token.qbk]
|
||||||
|
[include reference/lex/tokendef.qbk]
|
||||||
|
[include reference/lex/tokenset.qbk]
|
||||||
|
[endsect]
|
||||||
|
[include reference/lex/lexer_class.qbk]
|
||||||
|
[include reference/lex/token_class.qbk]
|
||||||
|
[include reference/lex/tokendef_class.qbk]
|
||||||
|
[include reference/lex/tokenset_class.qbk]
|
||||||
|
[endsect]
|
||||||
|
|
||||||
|
[endsect]
|
||||||
137
doc/lex/introduction.qbk
Normal file
137
doc/lex/introduction.qbk
Normal file
@@ -0,0 +1,137 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section Introduction to __lex__]
|
||||||
|
|
||||||
|
Lexical scanning is the process of analyzing the stream of input characters and
|
||||||
|
separating it into strings called tokens, separated by whitespace.
|
||||||
|
Most compiler texts start here, and devote several chapters to discussing
|
||||||
|
various ways to build scanners. __lex__ is a library built to take care of the
|
||||||
|
complexities of creating a lexer for your grammar (in this documentation we
|
||||||
|
will use the terms 'lexical analyzer', 'lexer' and 'scanner' interchangably).
|
||||||
|
All it needs to create a lexer is to know the set of patterns describing the
|
||||||
|
different tokens you want to recognize in the input. To make this a bit more
|
||||||
|
formal, here are some definitions:
|
||||||
|
|
||||||
|
* A token is a sequence of consecutive characters having a collective meaning.
|
||||||
|
Tokens may have attributes specific to the token type, carrying additional
|
||||||
|
information about the matched character sequence.
|
||||||
|
* A pattern is a rule expressed as a regular expression and describing how a
|
||||||
|
particular token can be formed. For example, [^\[A-Za-z\]\[A-Za-z_0-9\]*] is
|
||||||
|
a pattern for a rule matching C++ identifiers.
|
||||||
|
* Characters between tokens are called whitespace; these include spaces, tabs,
|
||||||
|
newlines, and formfeeds. Many people also count comments as whitespace,
|
||||||
|
though since some tools such as lint look at comments, this conflation is not
|
||||||
|
perfect.
|
||||||
|
|
||||||
|
[heading Why Using a Separate Lexer]
|
||||||
|
|
||||||
|
Typically, lexical scanning is done in a separate module from the parser,
|
||||||
|
feeding the parser with a stream of input tokens only. Now, theoretically it is
|
||||||
|
not necessary to do this separation. In the end there is only one set of
|
||||||
|
syntactical rules defining the language, so in theory we could write the whole
|
||||||
|
parser in one module. In fact, __qi__ allows to write parsers without using a
|
||||||
|
lexer, parsing the input character stream directly, and for the most part this
|
||||||
|
is the way __spirit__ has been used since its invention.
|
||||||
|
|
||||||
|
However, the separation has both practical and theoretical bases and proves to
|
||||||
|
be very useful in practical applications. In 1956, Noam Chomsky defined the
|
||||||
|
"Chomsky Hierarchy" of grammars:
|
||||||
|
|
||||||
|
* Type 0: Unrestricted grammars (e.g., natural languages)
|
||||||
|
* Type 1: Context-Sensitive grammars
|
||||||
|
* Type 2: Context-Free grammars
|
||||||
|
* Type 3: Regular grammars
|
||||||
|
|
||||||
|
The complexity of these grammars increases from regular grammars being the
|
||||||
|
simplest to unrestricted grammars being the most complex. Similarily, the
|
||||||
|
complexity of the recognizers for these grammars increases. Although, a few
|
||||||
|
features of some programming languages (such as C++) are Type 1, fortunately
|
||||||
|
for the most part programming languages can be described using only the Types 3
|
||||||
|
and 2. The neat part about these two types is that they are well known and the
|
||||||
|
ways to parse them are well understood. It has been shown that any regular
|
||||||
|
grammar can be parsed using a state machine (finite automaton). Similarly,
|
||||||
|
context-free grammars can always be parsed using a push-down automaton
|
||||||
|
(essentially a state machine augmented by a stack).
|
||||||
|
|
||||||
|
In real programming languages and practical grammars the parts that can be
|
||||||
|
handled as regular expressions tend to be the lower-level parts, such as the
|
||||||
|
definition of an identifier or of an integer value:
|
||||||
|
|
||||||
|
letter := [a-zA-Z]
|
||||||
|
digit := [0-9]
|
||||||
|
|
||||||
|
identifier := letter [ letter | digit ]*
|
||||||
|
integer := digit*
|
||||||
|
|
||||||
|
Higher level parts of practical grammars tend to be more complex and can't be
|
||||||
|
implemented using plain regular expressions anymore. We need to store
|
||||||
|
information on the built-in hardware stack while recursing the grammar
|
||||||
|
hierarchy, and that in fact this is the preferred approach used for top-down
|
||||||
|
parsing. Since it takes a different kind of abstract machine to parse the two
|
||||||
|
types of grammars, it proved to be efficient to separate the lexical scanner
|
||||||
|
into a separate module which is built around the idea of a state machine. The
|
||||||
|
goal here is to use the simplest parsing technique needed for the job.
|
||||||
|
|
||||||
|
Another, more practical reason for separating the scanner from the parser is
|
||||||
|
the need for backtracking during parsing. The input data is a stream of
|
||||||
|
characters, which is often thought to be processed left to right without any
|
||||||
|
backtracking. Unfortunately, in practice most of the time that isn't possible.
|
||||||
|
Almost every language has certain keywords such as IF, FOR, and WHILE. The
|
||||||
|
decision if a certain character sequence actually comprises a keyword or just
|
||||||
|
an identifier often can be made only after seeing the first delimiter /after/
|
||||||
|
it. This already is a limited form of backtracking, since we need to store the
|
||||||
|
string long enough to be able to make the decision. The same is true for more
|
||||||
|
coarse grained language features such as nested IF/ELSE statements, where the
|
||||||
|
decision about to which IF belongs the last ELSE statement can be made only
|
||||||
|
after seeing the whole construct.
|
||||||
|
|
||||||
|
So the structure of a conventional compiler often involves splitting up the
|
||||||
|
functions of the lower-level and higher-level parsing. The lexical scanner
|
||||||
|
deals with things at the character level, collecting characters into strings,
|
||||||
|
converting character sequence into different representations as integers, etc.,
|
||||||
|
and passing them along to the parser proper as indivisible tokens. It's also
|
||||||
|
considered normal to let the scanner do additional jobs, such as identifying
|
||||||
|
keywords, storing identifiers in tables, etc.
|
||||||
|
|
||||||
|
Now, __spirit__ follows this structure, where __lex__ can be used to implement
|
||||||
|
state machine based recognizers, while __qi__ can be used to build recognizers
|
||||||
|
for context free grammars. Since both modules are seemlessly integrated with
|
||||||
|
each other and with the C++ target language it is even possible to use the
|
||||||
|
provided functionality to build more complex grammar recognizers.
|
||||||
|
|
||||||
|
[heading Advantages of using __lex__]
|
||||||
|
|
||||||
|
The advantage of using __lex__ to create the lexical analyzer over using more
|
||||||
|
traditional tools such as __flex__ is its carefully crafted integration with
|
||||||
|
the __spirit__ library and the C++ host language. You don't need any external
|
||||||
|
tools to generate the code, your lexer will be perfectly integrated with the
|
||||||
|
rest of your program, making it possible to freely access any context
|
||||||
|
information and data structure. Since the C++ compiler sees all the code it
|
||||||
|
will generate optimal code nomatter what configuration options have been chosen
|
||||||
|
by the user. __lex__ gives you all the features you could get from a similar
|
||||||
|
__flex__ program without the need to leave C++ as a host language:
|
||||||
|
|
||||||
|
* the definition of tokens is done using regular expressions (patterns)
|
||||||
|
* the token definitions can refer to special substitution string (pattern
|
||||||
|
macros) simplifying pattern definitions
|
||||||
|
* the generated lexical scanner may have multiple start states
|
||||||
|
* it is possible to attach code to any of the token definitions; this code gets
|
||||||
|
executed whenever the corresponding token pattern has been matched
|
||||||
|
|
||||||
|
Even if it is possible to use __lex__ to generate C++ code representing
|
||||||
|
the lexical analyzer (we will refer to that as the /static/ model, described in
|
||||||
|
more detail in the section __sec_lex_static_model__) - a model
|
||||||
|
very similar to the way __flex__ operates - we will mainly focus on the
|
||||||
|
opposite, the /dynamic/ model. You can directly integrate the token definitions
|
||||||
|
into your C++ program, building the lexical analyzer dynamicly at runtime. The
|
||||||
|
dynamic model is something not supported by __flex__ or other lexical scanner
|
||||||
|
generators (such as __re2c__, __ragel__, etc.). But it is very flexible and
|
||||||
|
allows to speed up the development of your application.
|
||||||
|
|
||||||
|
[endsect]
|
||||||
12
doc/lex/lexer_attributes.qbk
Normal file
12
doc/lex/lexer_attributes.qbk
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section Lexer Attributes]
|
||||||
|
|
||||||
|
|
||||||
|
[endsect]
|
||||||
15
doc/lex/lexer_primitives.qbk
Normal file
15
doc/lex/lexer_primitives.qbk
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section Lexer Primitives]
|
||||||
|
|
||||||
|
[/ Describe the primitive lexer constructs, such as token_def, token_set? ]
|
||||||
|
[/ Describe the primitive lexer constructs usable in parsers, such as
|
||||||
|
in_state[], set_state(), token(), etc. ]
|
||||||
|
|
||||||
|
[endsect]
|
||||||
97
doc/lex/lexer_quickstart1.qbk
Normal file
97
doc/lex/lexer_quickstart1.qbk
Normal file
@@ -0,0 +1,97 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section Quickstart 1 - A word counter using __lex__]
|
||||||
|
|
||||||
|
__lex__ is very modular, which follows the general building principle of the
|
||||||
|
__spirit__ libraries. You never pay for features you don't use. It is nicely
|
||||||
|
integrated with the other parts of __spirit__ but nevertheless can be used
|
||||||
|
separately to build standalone lexical analyzers.
|
||||||
|
The first quick start example describes a standalone application:
|
||||||
|
counting characters, words and lines in a file, very similar to what the well
|
||||||
|
known Unix command `wc` is doing (for the full example code see here:
|
||||||
|
[@../../example/lex/word_count_functor.cpp word_count_functor.cpp]).
|
||||||
|
|
||||||
|
[import ../example/lex/word_count_functor.cpp]
|
||||||
|
|
||||||
|
|
||||||
|
[heading Prerequisites]
|
||||||
|
|
||||||
|
The only required `#include` specific to /Spirit.Lex/ follows. It is a wrapper
|
||||||
|
for all necessary definitions to use /Spirit.Lex/ in a standalone fashion, and
|
||||||
|
on top of the __lexertl__ library. Additionally we `#include` two of the Boost
|
||||||
|
headers to define `boost::bind()` and `boost::ref()`.
|
||||||
|
|
||||||
|
[wcf_includes]
|
||||||
|
|
||||||
|
To make all the code below more readable we introduce the following namespaces.
|
||||||
|
|
||||||
|
[wcf_namespaces]
|
||||||
|
|
||||||
|
|
||||||
|
[heading Defining Tokens]
|
||||||
|
|
||||||
|
The most important step while creating a lexer using __lex__ is to define the
|
||||||
|
tokens to be recognized in the input sequence. This is normally done by
|
||||||
|
defining the regular expressions describing the matching character sequences,
|
||||||
|
and optionally their corresponding token ids. Additionally the defined tokens
|
||||||
|
need to be associated with an instance of a lexer object as provided by the
|
||||||
|
library. The following code snippet shows how this can be done using __lex__.
|
||||||
|
|
||||||
|
[wcf_token_definition]
|
||||||
|
|
||||||
|
|
||||||
|
[heading Doing the Useful Work]
|
||||||
|
|
||||||
|
We will use a setup, where we want the __lex__ library to invoke a given
|
||||||
|
function after any of of the generated tokens is recognized. For this reason
|
||||||
|
we need to implement a functor taking at least the generated token as an
|
||||||
|
argument and returning a boolean value allowing to stop the tokenization
|
||||||
|
process. The default token type used in this example carries a token value of
|
||||||
|
the type `iterator_range<BaseIterator>` pointing to the matched range in the
|
||||||
|
underlying input sequence.
|
||||||
|
|
||||||
|
[wcf_functor]
|
||||||
|
|
||||||
|
All what's left is to write some boilerplate code helping to tie together the
|
||||||
|
pieces described so far. To simplify this example we call the `lex::tokenize()`
|
||||||
|
function implemented in __lex__ (for a more detailed description of this
|
||||||
|
function see here: __fixme__), even if we could have written a loop to iterate
|
||||||
|
over the lexer iterators [`first`, `last`) as well.
|
||||||
|
|
||||||
|
|
||||||
|
[heading Pulling Everything Together]
|
||||||
|
|
||||||
|
[wcf_main]
|
||||||
|
|
||||||
|
|
||||||
|
[heading Comparing __lex__ with __flex__]
|
||||||
|
|
||||||
|
This example was deliberately chosen to be similar as much as possible to the
|
||||||
|
equivalent __flex__ program (see below), which isn't too different from what
|
||||||
|
has to be written when using __lex__.
|
||||||
|
|
||||||
|
[note Interestingly enough, performance comparisons of lexical analyzers
|
||||||
|
written using __lex__ with equivalent programs generated by
|
||||||
|
__flex__ show that both have comparable execution speeds!
|
||||||
|
Generally, thanks to the highly optimized __lexertl__ library and
|
||||||
|
due its carefully designed integration with __spirit__ the
|
||||||
|
abstraction penalty to be paid for using __lex__ is neglectible.
|
||||||
|
]
|
||||||
|
|
||||||
|
The remaining examples in this tutorial will use more sophisticated features
|
||||||
|
of __lex__, mainly to allow further simplification of the code to be written,
|
||||||
|
while maintaining the similarity with corresponding features of __flex__.
|
||||||
|
__lex__ has been designed to be as much as possible similar to __flex__, that
|
||||||
|
is why this documentation will provide the corresponding __flex__ code for the
|
||||||
|
shown __lex__ examples almost everywhere. So consequently, here is the __flex__
|
||||||
|
code corresponding to the example as shown above.
|
||||||
|
|
||||||
|
[wcf_flex_version]
|
||||||
|
|
||||||
|
[endsect]
|
||||||
133
doc/lex/lexer_quickstart2.qbk
Normal file
133
doc/lex/lexer_quickstart2.qbk
Normal file
@@ -0,0 +1,133 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section Quickstart 2 - A better word counter using __lex__]
|
||||||
|
|
||||||
|
People knowing __flex__ will probably complain about the example from the
|
||||||
|
section __sec_lex_quickstart_1__ as being overly complex and not being
|
||||||
|
written to leverage the possibilities provided by this tool. In particular the
|
||||||
|
previous example did not directly use the lexer actions to count the lines,
|
||||||
|
words and characters. So the example provided in this step of the tutorial will
|
||||||
|
show how to use semantic actions in __lex__. Even if it still
|
||||||
|
will allow to count text elements only it introduces other new concepts and
|
||||||
|
configuration options along the lines (for the full example code
|
||||||
|
see here: [@../../example/lex/word_count_lexer.cpp word_count_lexer.cpp]).
|
||||||
|
|
||||||
|
[import ../example/lex/word_count_lexer.cpp]
|
||||||
|
|
||||||
|
|
||||||
|
[heading Prerequisites]
|
||||||
|
|
||||||
|
In addition to the only required `#include` specific to /Spirit.Lex/ this
|
||||||
|
example needs to include a couple of header files from the __phoenix2__
|
||||||
|
library. This example shows how to attach functors to token definitions, which
|
||||||
|
could be done using any type of C++ technique resulting in a callable object.
|
||||||
|
Using __phoenix2__ for this task simplifies things and avoids adding
|
||||||
|
dependencies to other libraries (__phoenix2__ is already in use for
|
||||||
|
__spirit__ anyway).
|
||||||
|
|
||||||
|
[wcl_includes]
|
||||||
|
|
||||||
|
To make all the code below more readable we introduce the following namespaces.
|
||||||
|
|
||||||
|
[wcl_namespaces]
|
||||||
|
|
||||||
|
To give a preview at what to expect from this example, here is the flex program
|
||||||
|
which has been used as the starting point. The useful code is directly included
|
||||||
|
inside the actions associated with each of the token definitions.
|
||||||
|
|
||||||
|
[wcl_flex_version]
|
||||||
|
|
||||||
|
|
||||||
|
[heading Semantic Actions in __lex__]
|
||||||
|
|
||||||
|
__lex__ uses a very similar way of associating actions with the token
|
||||||
|
definitions (which should look familiar to anybody knowlegdeable with
|
||||||
|
__spirit__ as well): specifying the operations to execute inside of a pair of
|
||||||
|
`[]` brackets. In order to be able to attach semantic actions to token
|
||||||
|
definitions for each of them there is defined an instance of a `token_def<>`.
|
||||||
|
|
||||||
|
[wcl_token_definition]
|
||||||
|
|
||||||
|
The semantics of the shown code is as follows. The code inside the `[]`
|
||||||
|
brackets will be executed whenever the corresponding token has been matched by
|
||||||
|
the lexical analyzer. This is very similar to __flex__, where the action code
|
||||||
|
associated with a token definition gets executed after the recognition of a
|
||||||
|
matching input sequence. The code above uses functors constructed using
|
||||||
|
__phoenix2__, but it is possible to insert any C++ functor as long as it
|
||||||
|
exposes the interface:
|
||||||
|
|
||||||
|
void f (Range r, std::size_t id, Context& ctx, bool& matched);
|
||||||
|
|
||||||
|
[variablelist where:
|
||||||
|
[[`Range r`] [This is a `boost::iterator_range` holding two
|
||||||
|
iterators pointing to the matched range in the
|
||||||
|
underlying input sequence. The type of the
|
||||||
|
held iterators is the same as specified while
|
||||||
|
defining the type of the `lexertl_lexer<...>`
|
||||||
|
(its first template parameter).]]
|
||||||
|
[[`std::size_t id`] [This is the token id for the matched token.]]
|
||||||
|
[[`Context& ctx`] [This is a reference to a lexer specific,
|
||||||
|
unspecified type, providing the context for the
|
||||||
|
current lexer state. It can be used to access
|
||||||
|
different internal data items and is needed for
|
||||||
|
lexer state control from inside a semantic
|
||||||
|
action.]]
|
||||||
|
[[`bool& matched`] [This boolean value is pre/initialized to `true`.
|
||||||
|
If the functor sets it to `false` the lexer
|
||||||
|
stops calling any semantic actions attached to
|
||||||
|
this token and behaves as if the token have not
|
||||||
|
been matched in the first place.]]
|
||||||
|
]
|
||||||
|
|
||||||
|
Even if it is possible to write your own functor implementations, the preferred
|
||||||
|
way of defining lexer semantic actions is to use __phoenix2__. In this case you
|
||||||
|
can access the three parameters described in the table above by using the
|
||||||
|
predefined __phoenix2__ placeholders: `_1` for the iterator range, `_2` for the
|
||||||
|
token id, `_3` for the reference to the lexer state, and `_4` for the reference
|
||||||
|
to the boolean value signaling the outcome of the semantic action.
|
||||||
|
|
||||||
|
[important All placeholders (`_1`, `_2`, etc.) used in /lexer/ semantic
|
||||||
|
actions in conjunction with functors created based on __phoenix2__
|
||||||
|
need to be imported from the `namespace boost::phoenix::arg_names`
|
||||||
|
(and *not* `namespace boost::spirit::arg_names`, which is
|
||||||
|
different from using placeholders in __qi__ or __karma__).
|
||||||
|
Using the wrong placeholders leads to subtle compilation errors
|
||||||
|
which are difficult to backtrack to their cause.
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
[heading Associating Token Definitions with the Lexer]
|
||||||
|
|
||||||
|
If you compare the with the code from __sec_lex_quickstart_1__ with regard to
|
||||||
|
the way how token definitions are associated with the lexer, you will notice
|
||||||
|
a different syntax being used here. If in the previous example we have been
|
||||||
|
using the `self.add()` style of the API, then here we directly assign the token
|
||||||
|
definitions to `self`, combining the different token definitions using the `|`
|
||||||
|
operator. Here is the code snippet again:
|
||||||
|
|
||||||
|
self = word [++ref(w), ref(c) += distance(_1)]
|
||||||
|
| eol [++ref(c), ++ref(l)]
|
||||||
|
| any [++ref(c)]
|
||||||
|
;
|
||||||
|
|
||||||
|
This way we have a very powerful and natural way of building the lexical
|
||||||
|
analyzer. If translated into English this may be read as: The lexical analyer
|
||||||
|
will recognize ('`=`') tokens as defined by any of ('`|`') the token
|
||||||
|
definitions `word`, `eol`, and `any`.
|
||||||
|
|
||||||
|
A second difference to the previous example is that we do not explicitly
|
||||||
|
specify any token ids to use for the separate tokens. Using semantic actions to
|
||||||
|
trigger some useful work free'd us from the need to define these. To ensure
|
||||||
|
every token gets assigned a id the __lex__ library internally assigns unique
|
||||||
|
numbers to the token definitions, starting with the constant defined by
|
||||||
|
`boost::spirit::lex::min_token_id`.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
[endsect]
|
||||||
151
doc/lex/lexer_quickstart3.qbk
Normal file
151
doc/lex/lexer_quickstart3.qbk
Normal file
@@ -0,0 +1,151 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section Quickstart 3 - Counting Words Using a Parser]
|
||||||
|
|
||||||
|
The whole purpose of integrating __lex__ as part of the __spirit__ library was
|
||||||
|
to add a library allowing to merge lexical analysis with the parsing
|
||||||
|
process as defined by a __spirit__ grammar. __spirit__ parsers read their input
|
||||||
|
from an input sequence accessed by iterators. So naturally, we chose iterators
|
||||||
|
to be used as the interface beween the lexer and the parser. A second goal of
|
||||||
|
the lexer/parser integration was to enable the usage of possibly different
|
||||||
|
lexical analyzer libraries. The utilization of iterators seemed to be the
|
||||||
|
right choice from this standpoint as well, mainly because these can be used as
|
||||||
|
an abstraction layer hiding implementation specifics of the used lexer
|
||||||
|
library. The [link spirit.lex.flowcontrol picture] below shows the common
|
||||||
|
flow control implemented while parsing combined with lexical analysis.
|
||||||
|
|
||||||
|
[fig ./images/FlowOfControl.png..The common flow control implemented while parsing combined with lexical analysis..spirit.lex.flowcontrol]
|
||||||
|
|
||||||
|
Another problem related to the integration of the lexical analyzer with the
|
||||||
|
parser was to find a way how the defined tokens syntactically could be blended
|
||||||
|
with the grammar definition syntax of __spirit__. For tokens defined as
|
||||||
|
instances of the `token_def<>` class the most natural way of integration was
|
||||||
|
to allow to directly use these as parser components. Semantically these parser
|
||||||
|
components succeed matching their input whenever the corresponding token type
|
||||||
|
has been matched by the lexer. This quick start example will demonstrate this
|
||||||
|
(and more) by counting words again, simply by adding up the numbers inside
|
||||||
|
of semantic actions of a parser (for the full example code see here:
|
||||||
|
[@../../example/lex/word_count.cpp word_count.cpp]).
|
||||||
|
|
||||||
|
|
||||||
|
[import ../example/lex/word_count.cpp]
|
||||||
|
|
||||||
|
|
||||||
|
[heading Prerequisites]
|
||||||
|
|
||||||
|
This example uses two of the __spirit__ library components: __lex__ and __qi__,
|
||||||
|
consequently we have to `#include` the corresponding header files. Again, we
|
||||||
|
need to include a couple of header files from the __phoenix2__ library. This
|
||||||
|
example shows how to attach functors to parser components, which
|
||||||
|
could be done using any type of C++ technique resulting in a callable object.
|
||||||
|
Using __phoenix2__ for this task simplifies things and avoids adding
|
||||||
|
dependencies to other libraries (__phoenix2__ is already in use for
|
||||||
|
__spirit__ anyway).
|
||||||
|
|
||||||
|
[wcp_includes]
|
||||||
|
|
||||||
|
To make all the code below more readable we introduce the following namespaces.
|
||||||
|
|
||||||
|
[wcp_namespaces]
|
||||||
|
|
||||||
|
|
||||||
|
[heading Defining Tokens]
|
||||||
|
|
||||||
|
If compared to the two previous quick start examples (__sec_lex_quickstart_1__
|
||||||
|
and __sec_lex_quickstart_2__) the token definition class for this example does
|
||||||
|
not reveal any surprises. However, it uses lexer token definition macros to
|
||||||
|
simplify the composition of the regular expressions, which will be described in
|
||||||
|
more detail in the section __fixme__. Generally, any token definition is usable
|
||||||
|
without modification either for a standalone lexical analyzer or in conjunction
|
||||||
|
with a parser.
|
||||||
|
|
||||||
|
[wcp_token_definition]
|
||||||
|
|
||||||
|
|
||||||
|
[heading Using Token Definition Instances as Parsers]
|
||||||
|
|
||||||
|
While the integration of lexer and parser in the control flow is achieved by
|
||||||
|
using special iterators wrapping the lexical analyzer, we still nead a means of
|
||||||
|
expressing in the grammar what tokens to match and where. The token definition
|
||||||
|
class above uses three different ways of defining a token:
|
||||||
|
|
||||||
|
* Using an instance of a `token_def<>`, which is handy whenever you need to
|
||||||
|
specify a token attribute (for more information about lexer related
|
||||||
|
attributes please look here: __sec_lex_attributes__).
|
||||||
|
* Using a single character as the token, in this case the character represents
|
||||||
|
itself as a token, where the token id is the ASCII character value.
|
||||||
|
* Using a regular expression represented as a string, where the token id needs
|
||||||
|
to be specified explicitly to make the token accessible from the grammar
|
||||||
|
level.
|
||||||
|
|
||||||
|
All three token definition methods require a different method of grammar
|
||||||
|
integration. But as you can see from the following code snippet, each of this
|
||||||
|
methods is straightforward and blends the corresponding token instance
|
||||||
|
naturally with the surrounding __qi__ grammar syntax.
|
||||||
|
|
||||||
|
[table
|
||||||
|
[[Token definition] [Parser integration]]
|
||||||
|
[[`token_def<>`] [The `token_def<>` instance is directly usable as a
|
||||||
|
parser component. Parsing of this component will
|
||||||
|
succeed if the regular expression used to define
|
||||||
|
this has been matched successfully.]]
|
||||||
|
[[single character] [The single character is directly usable in the
|
||||||
|
grammar, under certain circumstances it needs to be
|
||||||
|
wrapped by a `char_()` parser component, though.
|
||||||
|
Parsing of this component will succeed if the
|
||||||
|
single character has been matched.]]
|
||||||
|
[[explicit token id] [To use an explicit token id in a __qi__ grammar you
|
||||||
|
are required to wrap it with the special `token()`
|
||||||
|
parser component. Parsing of this component will
|
||||||
|
succeed if the current token has the same token
|
||||||
|
id as specified in the expression `token(<id>)`.]]
|
||||||
|
]
|
||||||
|
|
||||||
|
The grammar definition below uses each of the three types demonstrating their
|
||||||
|
usage.
|
||||||
|
|
||||||
|
[wcp_grammar_definition]
|
||||||
|
|
||||||
|
As already described (see: __sec_qi_karma_attributes__), the __qi__ parser
|
||||||
|
library builds upon a set of of fully attributed parser components.
|
||||||
|
Consequently, all the token definitions do support the this attribute model as
|
||||||
|
well. The most natural way of implementing this was to use the token values as
|
||||||
|
the attributes exposed by the parser component corresponding to the token
|
||||||
|
definition (you can read more about this topic here: __sec_lex_tokenvalues__).
|
||||||
|
The example above takes advantage of the full integration of the token values
|
||||||
|
as the `token_def<>`'s parser attributes: the `word` token definition is
|
||||||
|
declared as a `token_def<std::string>`, making every instance of a `word` token
|
||||||
|
carry the string representation of the matched input sequence as its value.
|
||||||
|
The semantic action attached to `tok.word` receives this string (represented by
|
||||||
|
the `_1` placeholder) and uses it to calculate the number of matched
|
||||||
|
characters: `ref(c) += size(_1)`.
|
||||||
|
|
||||||
|
[important All placeholders (`_1`, `_2`, etc.) used in /parser/ semantic
|
||||||
|
actions in conjunction with functors created based on __phoenix2__
|
||||||
|
need to be imported from the `namespace boost::spirit::arg_names`
|
||||||
|
(and *not* `namespace boost::phoenix::arg_names`, which is
|
||||||
|
different from using placeholders in __lex__).
|
||||||
|
Using the wrong placeholders leads to subtle compilation errors
|
||||||
|
which are difficult to backtrack to their cause.
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
[heading Pulling Everything Together]
|
||||||
|
|
||||||
|
The main function needs to implement a bit more logic now as we have to
|
||||||
|
initialize and start not only the lexical analysis but the parsing process as
|
||||||
|
well. The three type definitions (`typedef` statements) simplify the creation
|
||||||
|
of the lexical analyzer and the grammar. After reading the contents of the
|
||||||
|
given file into memory it calls the function __api_tokenize_and_parse__ to
|
||||||
|
initialize the lexical analysis and parsing processes.
|
||||||
|
|
||||||
|
[wcp_main]
|
||||||
|
|
||||||
|
|
||||||
|
[endsect]
|
||||||
10
doc/lex/lexer_semantic_actions.qbk
Normal file
10
doc/lex/lexer_semantic_actions.qbk
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section Lexer Semantic Actions]
|
||||||
|
[endsect]
|
||||||
21
doc/lex/lexer_states.qbk
Normal file
21
doc/lex/lexer_states.qbk
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section Lexer States]
|
||||||
|
|
||||||
|
|
||||||
|
[heading Controlling the Lexer State from Lexer Semantic Actions]
|
||||||
|
|
||||||
|
|
||||||
|
[heading Controlling the Lexer State from Parser Semantic Actions]
|
||||||
|
|
||||||
|
|
||||||
|
[heading Using a Lexer State for the Skip Parser]
|
||||||
|
|
||||||
|
|
||||||
|
[endsect]
|
||||||
119
doc/lex/lexer_static_model.qbk
Normal file
119
doc/lex/lexer_static_model.qbk
Normal file
@@ -0,0 +1,119 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section The /Static/ Lexer Model]
|
||||||
|
|
||||||
|
The documentation of __lex__ so far mostly was about describing the features of
|
||||||
|
the /dynamic/ model, where the tables needed for lexical analysis are generated
|
||||||
|
from the regular expressions at runtime. The big advantage of the dynamic model
|
||||||
|
is its flexibility, and its integration with the __spirit__ library and the C++
|
||||||
|
host language. Its big disadvantage is the need to spend additional runtime to
|
||||||
|
generate the tables, which especially might be a limitation for larger lexical
|
||||||
|
analyers. The /static/ model strives to build upon the smooth integration with
|
||||||
|
__spirit__ and C++, and reuses large parts of the __lex__ library as described
|
||||||
|
so far, while overcoming the additional runtime requirements by using
|
||||||
|
pre-generated tables and tokenizer routines. To make the code generation as
|
||||||
|
simple as possible, it is possible reuse the token definition types developed
|
||||||
|
using the /dynamic/ model without any changes. As will be shown in this
|
||||||
|
section, building a code generator based on an existing token definition type
|
||||||
|
is a matter of writing 3 lines of code.
|
||||||
|
|
||||||
|
Assuming you already built a dynamic lexer for your problem, there are two more
|
||||||
|
steps needed to create a static lexical analyzer using __lex__:
|
||||||
|
|
||||||
|
# generating the C++ code for the static analyzer (including the tokenization
|
||||||
|
function and corresponding tables), and
|
||||||
|
# modifying the dynamic lexical anlyzer to use the generated code.
|
||||||
|
|
||||||
|
Both steps are described in more detail in the two sections below (for the full
|
||||||
|
source code used in this example see the code here:
|
||||||
|
[@../../example/lex/static_lexer/word_count_tokens.hpp the common token definition],
|
||||||
|
[@../../example/lex/static_lexer/word_count_generate.cpp the code generator],
|
||||||
|
[@../../example/lex/static_lexer/word_count_static.hpp the generated code], and
|
||||||
|
[@../../example/lex/static_lexer/word_count_static.cpp the static lexical analyzer]).
|
||||||
|
|
||||||
|
[import ../example/lex/static_lexer/word_count_tokens.hpp]
|
||||||
|
[import ../example/lex/static_lexer/word_count_static.cpp]
|
||||||
|
[import ../example/lex/static_lexer/word_count_generate.cpp]
|
||||||
|
|
||||||
|
But first we provide the code snippets needed to understand the further
|
||||||
|
descriptions. Both, the definition of the used token identifier and the of the
|
||||||
|
token definition class in this example are put into a separate header file to
|
||||||
|
make these available to the code generator and the static lexical analyzer.
|
||||||
|
|
||||||
|
[wc_static_tokenids]
|
||||||
|
|
||||||
|
The important point here is, that the token definition class is not different
|
||||||
|
from a similar class to be used for a dynamic lexical analyzer. The library
|
||||||
|
has been designed in a way, that all components (dynamic lexical analyzer, code
|
||||||
|
generator, and static lexical analyzer) can reuse the very same token definition
|
||||||
|
syntax.
|
||||||
|
|
||||||
|
[wc_static_tokendef]
|
||||||
|
|
||||||
|
The only thing changing between the three different use cases is the template
|
||||||
|
parameter used to instantiate a concrete token definition. Fot the dynamic
|
||||||
|
model and the code generator you probably will use the __class_lexertl_lexer__
|
||||||
|
template, where for the static model you will use the
|
||||||
|
__class_lexertl_static_lexer__ type as the template parameter.
|
||||||
|
|
||||||
|
This example not only shows how to build a static lexer, but it additionally
|
||||||
|
demonstrates, how such a lexer can be used for parsing in conjunction with a
|
||||||
|
__qi__ grammar. For completeness we provide the simple grammar used in this
|
||||||
|
example. As you can see, this grammar does not have any dependencies on the
|
||||||
|
static lexical analyzer, and for this reason it is not different from a grammar
|
||||||
|
used either without a lexer or using a dynamic lexical analyzer as described
|
||||||
|
before.
|
||||||
|
|
||||||
|
[wc_static_grammar]
|
||||||
|
|
||||||
|
|
||||||
|
[heading Generating the Static Analyzer]
|
||||||
|
|
||||||
|
The first additional step to perform in order to create a static lexical
|
||||||
|
analyzer is to create a small standalone program for creating the lexer tables
|
||||||
|
and the corresponding tokenization function. For this purpose the __lex__
|
||||||
|
library exposes a special API - the function __api_generate_static__. It
|
||||||
|
implements the whole code generator, no further code is needed. All what it
|
||||||
|
takes to invoke this function is to supply a token definition instance, an
|
||||||
|
output stream to use to generate the code to, and an optional string to be used
|
||||||
|
as a prefix for the name of the generated function. All in all just a couple
|
||||||
|
lines of code.
|
||||||
|
|
||||||
|
[wc_static_generate_main]
|
||||||
|
|
||||||
|
The shown code generator will generate output, which should be stored in a file
|
||||||
|
for later inclusion into the static lexical analzyer as shown in the next
|
||||||
|
topic (the full generated code can be viewed
|
||||||
|
[@../../example/lex/static_lexer/word_count_static.hpp here]).
|
||||||
|
|
||||||
|
|
||||||
|
[heading Modifying the Dynamic Analyzer]
|
||||||
|
|
||||||
|
The second required step to convert an existing dynamic lexer into a static one
|
||||||
|
is to change your main program at two places. First, you need to change the
|
||||||
|
type of the used lexer (that is the template parameter used while instantiating
|
||||||
|
your token definition class). While in the dynamic model we have been using the
|
||||||
|
__class_lexertl_lexer__ template, we now need to change that to the
|
||||||
|
__class_lexertl_static_lexer__ type. The second change is tightly related to
|
||||||
|
the first one and involves correcting the corresponding `#include` statement to:
|
||||||
|
|
||||||
|
[wc_static_include]
|
||||||
|
|
||||||
|
Otherwise the main program is not different from an equivalent program using
|
||||||
|
the dynamic model. This feature makes it really easy for instance to develop
|
||||||
|
the lexer in dynamic mode and to switch to the static mode after the code has
|
||||||
|
been stabilized. The simple generator application showed above enables the
|
||||||
|
integration of the code generator into any existing build process. The
|
||||||
|
following code snippet provides the overall main function, highlighting
|
||||||
|
the code to be changed.
|
||||||
|
|
||||||
|
[wc_static_main]
|
||||||
|
|
||||||
|
|
||||||
|
[endsect]
|
||||||
59
doc/lex/lexer_tutorials.qbk
Normal file
59
doc/lex/lexer_tutorials.qbk
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section __lex__ Tutorials Overview]
|
||||||
|
|
||||||
|
The __lex__ library implements several components on top of possibly different
|
||||||
|
lexer generator libraries. It exposes a pair of iterators, which, when
|
||||||
|
dereferenced, return a stream of tokens generated from the underlying character
|
||||||
|
stream. The generated tokens are based on the token definitions supplied by the
|
||||||
|
user.
|
||||||
|
|
||||||
|
Currently, __lex__ is built on top of Ben Hansons excellent __lexertl__
|
||||||
|
library (which is a proposed Boost library). __lexertl__ provides the necessary
|
||||||
|
functionality to build state
|
||||||
|
machines based on a set of supplied regular expressions. But __lex__ is not
|
||||||
|
restricted to be used with __lexertl__. We expect it to be usable in
|
||||||
|
conjunction with any other lexical scanner generator library, all what needs
|
||||||
|
to be implemented is a set of wrapper objects exposing a well defined
|
||||||
|
interface as described in this documentation.
|
||||||
|
|
||||||
|
[note For the sake of clarity all examples in this documentation assume
|
||||||
|
__lex__ to be used on top of __lexertl__.]
|
||||||
|
|
||||||
|
Building a lexer using __lex__ is highly configurable, where most of this
|
||||||
|
configuration has to be done at compile time. Almost all of the configurable
|
||||||
|
parameters have generally useful default values, though, which means that
|
||||||
|
starting a project is easy and straightforward. Here is a (non-complete) list
|
||||||
|
of features you can tweak to adjust the generated lexer instance to the actual
|
||||||
|
needs:
|
||||||
|
|
||||||
|
* Select and customize the token type to be generated by the lexer instance.
|
||||||
|
* Select and customize the token value types the generated token instances will
|
||||||
|
be able to hold.
|
||||||
|
* Select the iterator type of the underlying input stream, which will be used
|
||||||
|
as the source for the character stream to tokenize.
|
||||||
|
* Customize the iterator type returned by the lexer to enable debug support,
|
||||||
|
special handling of certain input sequences, etc.
|
||||||
|
* Select the /dynamic/ or the /static/ runtime model for the lexical
|
||||||
|
analyzer.
|
||||||
|
|
||||||
|
Special care has been taken during the development of the library that
|
||||||
|
optimal code will be generated regardless of the configuration options
|
||||||
|
selected.
|
||||||
|
|
||||||
|
The series of tutorial examples of this section will guide you through some
|
||||||
|
common use cases helping to understand the big picture. The first two quick
|
||||||
|
start examples (__sec_lex_quickstart_1__ and __sec_lex_quickstart_2__)
|
||||||
|
introduce the __lex__ library while building two standalone applications, not
|
||||||
|
being connected to or depending on any other part of __spirit__. The section
|
||||||
|
__sec_lex_quickstart_3__ demonstrates how to use a lexer in conjunction with a
|
||||||
|
parser (where certainly the parser is built using __qi__).
|
||||||
|
|
||||||
|
[endsect]
|
||||||
|
|
||||||
15
doc/lex/parsing_using_a_lexer.qbk
Normal file
15
doc/lex/parsing_using_a_lexer.qbk
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section Parsing using a Lexer]
|
||||||
|
|
||||||
|
[/ write about integration of lexer component with __qi__]
|
||||||
|
|
||||||
|
[/ write about iterator interface exposed by a __lex__ lexer]
|
||||||
|
|
||||||
|
[endsect]
|
||||||
11
doc/lex/token_definition.qbk
Normal file
11
doc/lex/token_definition.qbk
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section Ways to define Tokens]
|
||||||
|
|
||||||
|
[endsect]
|
||||||
15
doc/lex/tokenizing.qbk
Normal file
15
doc/lex/tokenizing.qbk
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section Tokenizing Input Data]
|
||||||
|
|
||||||
|
[heading The tokenize() function]
|
||||||
|
|
||||||
|
[heading The generate_static() function]
|
||||||
|
|
||||||
|
[endsect]
|
||||||
207
doc/lex/tokens_values.qbk
Normal file
207
doc/lex/tokens_values.qbk
Normal file
@@ -0,0 +1,207 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section About Tokens and Token Values]
|
||||||
|
|
||||||
|
As already discussed, lexical scanning is the process of analyzing the stream
|
||||||
|
of input characters and separating it into strings called tokens, most of the
|
||||||
|
time separated by whitespace. The different token types recognized by a lexical
|
||||||
|
analyzer often get assigned unique integer token identifiers (token ids). These
|
||||||
|
token ids arenormally used by the parser to identifiy the current token without
|
||||||
|
having to look at the matched string again. The __lex__ library is not
|
||||||
|
different with respect to this, as it uses the token ids as the main means of
|
||||||
|
identification of the different token types defined for a particular lexical
|
||||||
|
analyzer. However, it is different from commonly used lexical analyzers in the
|
||||||
|
sense that it returns (references to) instances of a (user defined) token class
|
||||||
|
to the user. The only real limitation posed on this token class is consequently,
|
||||||
|
that it has to carry at least the token id of the token it represents. For more
|
||||||
|
information about the interface a user defined token type has to expose please
|
||||||
|
look at the __sec_ref_lex_token__ reference. The library provides a default
|
||||||
|
token type based on the __lexertl__ library which should be sufficient in most
|
||||||
|
use cases: the __class_lexertl_token__ type. This section focusses on the
|
||||||
|
description of general features a token class may implement and how this
|
||||||
|
integrates with the other parts of the __lex__ library.
|
||||||
|
|
||||||
|
[heading The Anatomy of a Token]
|
||||||
|
|
||||||
|
It is very important to understand the difference between a token definition
|
||||||
|
(represented by the __class_token_def__ template) and a token itself (for
|
||||||
|
instance represented by the __class_lexertl_token__ template).
|
||||||
|
|
||||||
|
The token definition is used to describe the main features of a particular
|
||||||
|
token type, especially:
|
||||||
|
|
||||||
|
* to simplify the definition of a token type using a regular expression pattern
|
||||||
|
applied while matching this token type,
|
||||||
|
* to associate a token type with a particular lexer state,
|
||||||
|
* to optionally assign a token id to a token type,
|
||||||
|
* to optionally associate some code to execute whenever an instance of this
|
||||||
|
token type has been matched,
|
||||||
|
* and to optionally specify the attribute type of the token value.
|
||||||
|
|
||||||
|
The token itself is a data structure returned by the lexer iterators.
|
||||||
|
Dereferencing a lexer iterator returns a reference to the last matched token
|
||||||
|
instance. It encapsulates the part of the underlying input sequence matched by
|
||||||
|
the regular expression used during the definiton of this token type.
|
||||||
|
Incrementing the lexer iterator invokes the lexical analyzer to
|
||||||
|
match the next token by advancing the underlying input stream. The token data
|
||||||
|
structure contains at least the token id of the matched token type,
|
||||||
|
allowing to identify the matched character sequence. Optionally, the token
|
||||||
|
instance may contain a token value and/or the lexer state this token instance
|
||||||
|
was matched in. The following [link spirit.lex.tokenstructure figure] shows the
|
||||||
|
schematic structure of a token.
|
||||||
|
|
||||||
|
[fig ./images/TokenStructure.png..The structure of a token..spirit.lex.tokenstructure]
|
||||||
|
|
||||||
|
The token value and the token state may be omitted for optimization reasons,
|
||||||
|
avoiding the token to carry more data than actually required. This
|
||||||
|
configuration can be achieved by supplying appropriate template parameters
|
||||||
|
for the __class_lexertl_token__ template while defining the token type.
|
||||||
|
|
||||||
|
The lexer iterator returns the same token type for each of the different
|
||||||
|
matched token definitions. To accomodate for the possibly different token
|
||||||
|
/value/ types exposed by the various token types (token definitions), the
|
||||||
|
general type of the token value is a __boost_variant__. As a minimum (for the
|
||||||
|
default configuration) this token value variant will be configured to always
|
||||||
|
hold a __boost_iterator_range__ containing the pair of iterators pointing to
|
||||||
|
the matched input sequence for this token instance.
|
||||||
|
|
||||||
|
[note If the lexical analyzer is used in conjunction with a __qi__ parser, the
|
||||||
|
stored __boost_iterator_range__ token value will be converted to the
|
||||||
|
requested token type (parser attribute) exactly once. This happens at the
|
||||||
|
time of the first access to the token value requiring the
|
||||||
|
corresponding type conversion. The converted token value will be stored
|
||||||
|
in the __boost_variant__ replacing the initially stored iterator range.
|
||||||
|
This avoids to convert the input sequence to the token value more than
|
||||||
|
once, thus optimizing the integration of the lexer with __qi__, even
|
||||||
|
during parser backtracking.
|
||||||
|
]
|
||||||
|
|
||||||
|
Here is the template prototype of the __class_lexertl_token__ template:
|
||||||
|
|
||||||
|
template <
|
||||||
|
typename Iterator = char const*,
|
||||||
|
typename AttributeTypes = mpl::vector0<>,
|
||||||
|
typename HasState = mpl::true_
|
||||||
|
>
|
||||||
|
struct lexertl_token;
|
||||||
|
|
||||||
|
[variablelist where:
|
||||||
|
[[Iterator] [This is the type of the iterator used to access the
|
||||||
|
underlying input stream. It defaults to a plain
|
||||||
|
`char const*`.]]
|
||||||
|
[[AttributeTypes] [This is either a mpl sequence containing all
|
||||||
|
attribute types used for the token definitions or the
|
||||||
|
type `omitted`. If the mpl sequence is empty (which is
|
||||||
|
the default), all token instances will store a
|
||||||
|
`boost::iterator_range<Iterator>` pointing to the start
|
||||||
|
and the end of the matched section in the input stream.
|
||||||
|
If the type is `omitted`, the generated tokens will
|
||||||
|
contain no token value (attribute) at all.]]
|
||||||
|
[[HasState] [This is either `mpl::true_` or `mpl::false_`, allowing
|
||||||
|
to control whether the generated token instances will
|
||||||
|
contain the lexer state they were generated in. The
|
||||||
|
default is mpl::true_, so all token instances will
|
||||||
|
contain the lexer state.]]
|
||||||
|
]
|
||||||
|
|
||||||
|
Normally, during its construction, a token instance always holds the
|
||||||
|
__boost_iterator_range__ as its token value (except, if it has been defined
|
||||||
|
using the `omitted` token value type). This iterator range then is
|
||||||
|
converted in place to the requested token value type (attribute) when it is
|
||||||
|
requested for the first time.
|
||||||
|
|
||||||
|
|
||||||
|
[heading The Physiognomy of a Token Definition]
|
||||||
|
|
||||||
|
The token definitions (represented by the __class_token_def__ template) are
|
||||||
|
normally used as part of the definition of the lexical analyzer. At the same
|
||||||
|
time a token definition instance may be used as a parser component in __qi__.
|
||||||
|
|
||||||
|
The template prototype of this class is shown here:
|
||||||
|
|
||||||
|
template<
|
||||||
|
typename Attribute = unused_type,
|
||||||
|
typename Char = char
|
||||||
|
>
|
||||||
|
class token_def;
|
||||||
|
|
||||||
|
[variablelist where:
|
||||||
|
[[Attribute] [This is the type of the token value (attribute)
|
||||||
|
supported by token instances representing this token
|
||||||
|
type. This attribute type is exposed to the __qi__
|
||||||
|
library, whenever this token definition is used as a
|
||||||
|
parser component. The default attribute type is
|
||||||
|
`unused_type`, which means the token instance holds a
|
||||||
|
__boost_iterator_range__ pointing to the start
|
||||||
|
and the end of the matched section in the input stream.
|
||||||
|
If the attribute is `omitted` the token instance will
|
||||||
|
expose no token type at all. Any other type will be
|
||||||
|
used directly as the token value type.]]
|
||||||
|
[[Char] [This is the value type of the iterator for the
|
||||||
|
underlying input sequence. It defaults to `char`.]]
|
||||||
|
]
|
||||||
|
|
||||||
|
The semantics of the template parameters for the token type and the token
|
||||||
|
definition type are very similar and interdependent. As a rule of thumb you can
|
||||||
|
think of the token definition type as the means of specifying everything
|
||||||
|
related to a single specific token type (such as `identifier` or `integer`).
|
||||||
|
On the other hand the token type is used to define the general proerties of all
|
||||||
|
token instances generated by the __lex__ library.
|
||||||
|
|
||||||
|
[important If you don't list any token value types in the token type definition
|
||||||
|
declaration (resulting in the usage of the default __boost_iterator_range__
|
||||||
|
token type) everything will compile and work just fine, just a bit
|
||||||
|
less efficient. This is because the token value will be converted
|
||||||
|
from the matched input sequence every time it is requested.
|
||||||
|
|
||||||
|
But as soon as you specify at least one token value type while
|
||||||
|
defining the token type you'll have to list all value types used for
|
||||||
|
__class_token_def__ declarations in the token definition class,
|
||||||
|
otherwise compilation errors will occur.
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
[heading Examples of using __class_lexertl_token__]
|
||||||
|
|
||||||
|
Let's start with some examples. We refer to one of the __lex__ examples (for
|
||||||
|
the full source code of this example please see
|
||||||
|
[@../../example/lex/example4.cpp example4.cpp]).
|
||||||
|
|
||||||
|
[import ../example/lex/example4.cpp]
|
||||||
|
|
||||||
|
The first code snippet shows an excerpt of the token definition class, the
|
||||||
|
definition of a couple of token types. Some of the token types do not expose a
|
||||||
|
special token value (`if_`, `else_`, and `while_`). Their token value will
|
||||||
|
always hold the iterator range of the matched input sequence only. The token
|
||||||
|
definitions for the `identifier` and the integer `constant` are specialized
|
||||||
|
to expose an explicit token type each: `std::string` and `unsigned int`.
|
||||||
|
|
||||||
|
[example4_token_def]
|
||||||
|
|
||||||
|
As the parsers generated by __qi__ are fully attributed, any __qi__ parser
|
||||||
|
component needs to expose a certain type as its parser attribute. Naturally,
|
||||||
|
the __class_token_def__ exposes the token value type as its parser attribute,
|
||||||
|
enabling a smooth integration with __qi__.
|
||||||
|
|
||||||
|
The next code snippet demonstrates how the required token value types are
|
||||||
|
specified while defining the token type to use. All of the token value types
|
||||||
|
used for at least one of the token definitions have to be re-iterated for the
|
||||||
|
token definition as well.
|
||||||
|
|
||||||
|
[example4_token]
|
||||||
|
|
||||||
|
To avoid the token to have a token value at all, the special tag `omitted` can
|
||||||
|
be used: `token_def<omitted>` and `lexertl_token<base_iterator_type, omitted>`.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
[endsect]
|
||||||
24
doc/notes.qbk
Normal file
24
doc/notes.qbk
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section Notes]
|
||||||
|
|
||||||
|
[section Portability]
|
||||||
|
[endsect]
|
||||||
|
|
||||||
|
[section Porting from Spirit 1.8.x]
|
||||||
|
[endsect]
|
||||||
|
|
||||||
|
[section Style Guide]
|
||||||
|
[include notes/style_guide.qbk]
|
||||||
|
[endsect]
|
||||||
|
|
||||||
|
[section Techniques]
|
||||||
|
[endsect]
|
||||||
|
|
||||||
|
[endsect]
|
||||||
87
doc/notes/style_guide.qbk
Normal file
87
doc/notes/style_guide.qbk
Normal file
@@ -0,0 +1,87 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
At some point, especially when there are lots of semantic actions attached to
|
||||||
|
various points, the grammar tends to be quite difficult to follow. In order to
|
||||||
|
keep an easy-to-read, consistent en aesthetically pleasing look to the Spirit
|
||||||
|
code, the following coding styleguide is advised.
|
||||||
|
|
||||||
|
This coding style is adapted and extended from the ANTLR/PCCTS style (Terrence
|
||||||
|
Parr) and [@http://groups.yahoo.com/group/boost/files/coding_guidelines.html
|
||||||
|
Boost coding guidelines] (David Abrahams and Nathan Myers) and is the
|
||||||
|
combined work of Joel de Guzman, Chris Uzdavinis and Hartmut Kaiser.
|
||||||
|
|
||||||
|
* Rule names use std C++ (Boost) convention. The rule name may be very long.
|
||||||
|
* The '=' is neatly indented 4 spaces below. Like in Boost, use spaces instead
|
||||||
|
of tabs.
|
||||||
|
* Breaking the operands into separate lines puts the semantic actions neatly
|
||||||
|
to the right.
|
||||||
|
* Semicolon at the last line terminates the rule.
|
||||||
|
* The adjacent parts of a sequence should be indented accordingly to have all,
|
||||||
|
what belongs to one level, at one indentation level.
|
||||||
|
|
||||||
|
program
|
||||||
|
= program_heading [heading_action]
|
||||||
|
>> block [block_action]
|
||||||
|
>> '.'
|
||||||
|
| another_sequence
|
||||||
|
>> etc
|
||||||
|
;
|
||||||
|
|
||||||
|
* Prefer literals in the grammar instead of identifiers. e.g. `"program"` instead
|
||||||
|
of `PROGRAM`, `'>='` instead of `GTE` and `'.'` instead of `DOT`. This makes it much
|
||||||
|
easier to read. If this isn't possible (for instance where the used tokens
|
||||||
|
must be identified through integers) capitalized identifiers should be used
|
||||||
|
instead.
|
||||||
|
* Breaking the operands may not be needed for short expressions.
|
||||||
|
e.g. `*(',' >> file_identifier)` as long as the line does not
|
||||||
|
exceed 80 characters.
|
||||||
|
* If a sequence fits on one line, put spaces inside the parentheses
|
||||||
|
to clearly separate them from the rules.
|
||||||
|
|
||||||
|
program_heading
|
||||||
|
= no_case["program"]
|
||||||
|
>> identifier
|
||||||
|
>> '('
|
||||||
|
>> file_identifier
|
||||||
|
>> *( ',' >> file_identifier )
|
||||||
|
>> ')'
|
||||||
|
>> ';'
|
||||||
|
;
|
||||||
|
|
||||||
|
* Nesting directives: If a rule does not fit on one line (80 characters)
|
||||||
|
it should be continued on the next line intended by one level. The brackets
|
||||||
|
of directives, semantic expressions (using Phoenix or LL lambda expressions)
|
||||||
|
or parsers should be placed as follows.
|
||||||
|
|
||||||
|
identifier
|
||||||
|
= no_case
|
||||||
|
[
|
||||||
|
lexeme
|
||||||
|
[
|
||||||
|
alpha >> *(alnum | '_') [id_action]
|
||||||
|
]
|
||||||
|
]
|
||||||
|
;
|
||||||
|
|
||||||
|
* Nesting unary operators (e.g.Kleene star): Unary rule operators
|
||||||
|
(Kleene star, `'!'`, `'+'` etc.) should be moved out one space before
|
||||||
|
the corresponding indentation level, if this rule has a body or a
|
||||||
|
sequence after it, which does not fit on on line. This makes the
|
||||||
|
formatting more consistent and moves the rule 'body' at the same
|
||||||
|
indentation level as the rule itself, highlighting the unary operator.
|
||||||
|
|
||||||
|
block
|
||||||
|
= *( label_declaration_part
|
||||||
|
| constant_definition_part
|
||||||
|
| type_definition_part
|
||||||
|
| variable_declaration_part
|
||||||
|
| procedure_and_function_declaration_part
|
||||||
|
)
|
||||||
|
>> statement_part
|
||||||
|
;
|
||||||
97
doc/outline.txt
Normal file
97
doc/outline.txt
Normal file
@@ -0,0 +1,97 @@
|
|||||||
|
# Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
# Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
#
|
||||||
|
# Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
|
||||||
|
|
||||||
|
Preface
|
||||||
|
What's New
|
||||||
|
Introduction
|
||||||
|
Qi and Karma
|
||||||
|
Tutorials
|
||||||
|
Abstracts
|
||||||
|
Parsing Expression Grammar
|
||||||
|
Parsing and Generating
|
||||||
|
Primitives
|
||||||
|
Operators
|
||||||
|
Attributes
|
||||||
|
Semantic Actions
|
||||||
|
Directives
|
||||||
|
Rules
|
||||||
|
Grammars
|
||||||
|
Debugging
|
||||||
|
Error Handling
|
||||||
|
Parse Trees and ASTs
|
||||||
|
Quick Reference
|
||||||
|
Reference
|
||||||
|
Concepts
|
||||||
|
Parser
|
||||||
|
Generator
|
||||||
|
Parser Director
|
||||||
|
Generator Director
|
||||||
|
Char
|
||||||
|
String
|
||||||
|
Numeric
|
||||||
|
Binary
|
||||||
|
Directive
|
||||||
|
Action
|
||||||
|
Nonterminal
|
||||||
|
Operator
|
||||||
|
Stream
|
||||||
|
Auxiliary
|
||||||
|
Debug
|
||||||
|
Lex
|
||||||
|
Introduction
|
||||||
|
Tutorials
|
||||||
|
Abstracts
|
||||||
|
Parsing using a Lexer
|
||||||
|
Lexer Primitives
|
||||||
|
Lexer States
|
||||||
|
Lexer Attributes
|
||||||
|
Lexer Semantic Actions
|
||||||
|
Quick Reference
|
||||||
|
Reference
|
||||||
|
Concepts
|
||||||
|
Lexer
|
||||||
|
Token
|
||||||
|
TokenDef
|
||||||
|
TokenSet
|
||||||
|
Lexer Class
|
||||||
|
Token Class
|
||||||
|
TokenDef Class
|
||||||
|
TokenSet Class
|
||||||
|
FAQ
|
||||||
|
Notes
|
||||||
|
Portability
|
||||||
|
Porting from Spirit 1.8.x
|
||||||
|
Style Guide
|
||||||
|
Techniques
|
||||||
|
Rationale
|
||||||
|
Acknowledgments
|
||||||
|
References
|
||||||
|
|
||||||
|
-----------------------------------------------------------------
|
||||||
|
|
||||||
|
Concepts Outline:
|
||||||
|
Description
|
||||||
|
Notation
|
||||||
|
Valid Expressions
|
||||||
|
Expression | Semantics | Return type | Complexity
|
||||||
|
Type Requirements
|
||||||
|
Expression | Requirements
|
||||||
|
Invariants
|
||||||
|
Models
|
||||||
|
|
||||||
|
Reference Page Outline:
|
||||||
|
Description
|
||||||
|
Header
|
||||||
|
Synopsis
|
||||||
|
Template parameters
|
||||||
|
Model of
|
||||||
|
Objects
|
||||||
|
Expression Semantics
|
||||||
|
Expression | Semantics | Return type | Complexity
|
||||||
|
Example
|
||||||
|
|
||||||
|
|
||||||
217
doc/preface.qbk
Normal file
217
doc/preface.qbk
Normal file
@@ -0,0 +1,217 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section Preface]
|
||||||
|
|
||||||
|
[:['["Examples of designs that meet most of the criteria for
|
||||||
|
"goodness" (easy to understand, flexible, efficient) are a recursive-
|
||||||
|
descent parser, which is traditional procedural code. Another example
|
||||||
|
is the STL, which is a generic library of containers and algorithms
|
||||||
|
depending crucially on both traditional procedural code and on
|
||||||
|
parametric polymorphism.]] [*--Bjarne Stroustrup]]
|
||||||
|
|
||||||
|
[heading History]
|
||||||
|
|
||||||
|
[heading /80s/]
|
||||||
|
|
||||||
|
In the Mid 80s, Joel wrote his first calculator in Pascal. It has been
|
||||||
|
an unforgettable coding experience. He was amazed how a mutually
|
||||||
|
recursive set of functions can model a grammar specification. In time,
|
||||||
|
the skills he acquired from that academic experience became very
|
||||||
|
practical. Periodically Joel was tasked to do some parsing. For
|
||||||
|
instance, whenever he needs to perform any form of I/O, even in
|
||||||
|
binary, he tries to approach the task somewhat formally by writing a
|
||||||
|
grammar using Pascal- like syntax diagrams and then write a
|
||||||
|
corresponding recursive-descent parser. This worked very well.
|
||||||
|
|
||||||
|
[heading /90s/]
|
||||||
|
|
||||||
|
The arrival of the Internet and the World Wide Web magnified this
|
||||||
|
thousand-fold. At one point Joel had to write an HTML parser for a Web
|
||||||
|
browser project. He got a recursive-descent HTML parser working based
|
||||||
|
on the W3C formal specifications easily. He was certainly glad that
|
||||||
|
HTML had a formal grammar specification. Because of the influence of
|
||||||
|
the Internet, Joel then had to do more parsing. RFC specifications
|
||||||
|
were everywhere. SGML, HTML, XML, even email addresses and those
|
||||||
|
seemingly trivial URLs were all formally specified using small EBNF-
|
||||||
|
style grammar specifications. This made him wish for a tool similar to
|
||||||
|
big- time parser generators such as YACC and ANTLR, where a parser is
|
||||||
|
built automatically from a grammar specification. Yet, he wants it to
|
||||||
|
be extremely small; small enough to fit in my pocket, yet scalable.
|
||||||
|
|
||||||
|
It must be able to practically parse simple grammars such as email
|
||||||
|
addresses to moderately complex grammars such as XML and perhaps some
|
||||||
|
small to medium-sized scripting languages. Scalability is a prime
|
||||||
|
goal. You should be able to use it for small tasks such as parsing
|
||||||
|
command lines without incurring a heavy payload, as you do when you
|
||||||
|
are using YACC or PCCTS. Even now that it has evolved and matured to
|
||||||
|
become a multi-module library, true to its original intent, Spirit can
|
||||||
|
still be used for extreme micro-parsing tasks. You only pay for
|
||||||
|
features that you need. The power of Spirit comes from its modularity
|
||||||
|
and extensibility. Instead of giving you a sledgehammer, it gives you
|
||||||
|
the right ingredients to create a sledgehammer easily.
|
||||||
|
|
||||||
|
The result was Spirit. Spirit was a personal project that was
|
||||||
|
conceived when Joel was doing R&D in Japan. Inspired by the GoF's
|
||||||
|
composite and interpreter patterns, he realized that he can model a
|
||||||
|
recursive-descent parser with hierarchical-object composition of
|
||||||
|
primitives (terminals) and composites (productions). The original
|
||||||
|
version was implemented with run-time polymorphic classes. A parser is
|
||||||
|
generated at run time by feeding in production rule strings such as:
|
||||||
|
|
||||||
|
"prod ::= {'A' | 'B'} 'C';"
|
||||||
|
|
||||||
|
A compile function compiled the parser, dynamically creating a
|
||||||
|
hierarchy of objects and linking semantic actions on the fly. A very
|
||||||
|
early text can be found here: __early_spirit__.
|
||||||
|
|
||||||
|
[heading /2001 to 2006/]
|
||||||
|
|
||||||
|
Version 1.0 to 1.8 was a complete rewrite of the original Spirit
|
||||||
|
parser using expression templates and static polymorphism, inspired by
|
||||||
|
the works of Todd Veldhuizen (__todd__exprtemplates__, C++ Report,
|
||||||
|
June 1995). Initially, the static-Spirit version was meant only to
|
||||||
|
replace the core of the original dynamic-Spirit. Dynamic-spirit
|
||||||
|
needed a parser to implement itself anyway. The original employed a
|
||||||
|
hand-coded recursive-descent parser to parse the input grammar
|
||||||
|
specification strings. Incidentially it was the time, when Hartmut
|
||||||
|
joined the Spirit development.
|
||||||
|
|
||||||
|
After its initial "open-source" debut in May 2001, static-Spirit
|
||||||
|
became a success. At around November 2001, the Spirit website had an
|
||||||
|
activity percentile of 98%, making it the number one parser tool at
|
||||||
|
Source Forge at the time. Not bad for such a niche project such as a
|
||||||
|
parser library. The "static" portion of Spirit was forgotten and
|
||||||
|
static-Spirit simply became Spirit. The library soon evolved to
|
||||||
|
acquire more dynamic features.
|
||||||
|
|
||||||
|
Spirit was formally accepted into __boost__ in October 2002. Boost is
|
||||||
|
a peer-reviewed, open collaborative development effort that is a
|
||||||
|
collection of free Open Source C++ libraries covering a wide range of
|
||||||
|
domains. The Boost Libraries have become widely known as an industry
|
||||||
|
standard for design and implementation quality, robustness, and
|
||||||
|
reusability.
|
||||||
|
|
||||||
|
[heading /2007/]
|
||||||
|
|
||||||
|
Over the years, especially after Spirit was accepted into Boost,
|
||||||
|
Spirit has served its purpose quite admirably. The focus of what we'll
|
||||||
|
now call [*/Classic-Spirit/] (versions prior to 2.0) was on
|
||||||
|
transduction parsing where the input string is merely translated to an
|
||||||
|
output string. A lot of parsers are of the transduction type. When the
|
||||||
|
time came to add attributes to the parser library, it was done rather
|
||||||
|
in an ad-hoc manner, with the goal being 100% backward compatible with
|
||||||
|
classic Spirit. Some parsers have attributes, some don't.
|
||||||
|
|
||||||
|
Spirit V2 is another major rewrite. Spirit V2 grammars are fully
|
||||||
|
attributed (see __attr_grammar__). All parser components have
|
||||||
|
attributes. To do this efficiently and ellegantly, we had to use a
|
||||||
|
couple of infrastructure libraries. Some of which haven't been written
|
||||||
|
yet at the time, some were quite new when Spirit debuted, and some
|
||||||
|
needed work. __mpl__ is an important infrastructure library, yet is
|
||||||
|
not sufficient to implement Spirit V2. Another library had to be
|
||||||
|
written: __fusion__. Fusion sits between MPL and STL --between compile
|
||||||
|
time and runtime -- mapping types to values. Fusion is a direct
|
||||||
|
descendant of both MPL and __boost_tuples__ (Fusion is now a full
|
||||||
|
fledged __boost__ library). __phoenix__ also had to be beefed up to
|
||||||
|
support Spirit V2. The result is __phoenix2__. Last but not least,
|
||||||
|
Spirit V2 uses an __todd__exprtemplates__ library called
|
||||||
|
__boost_proto__.
|
||||||
|
|
||||||
|
[heading New Ideas: Spirit V2]
|
||||||
|
|
||||||
|
Just before the development of Spirit V2 began, Hartmut came across
|
||||||
|
the __string_template__ library which is a part of the ANTLR parser
|
||||||
|
framework. It is a Java template engine (with ports for C# and Python)
|
||||||
|
for generating source code, web pages, emails, or any other formatted
|
||||||
|
text output. With it, he got the the idea of using a formal notation
|
||||||
|
(a grammar) to describe the expected structure of an input character
|
||||||
|
sequence. The same grammar may be used to formalize the structure of a
|
||||||
|
corresponding output character sequence. This is possible because
|
||||||
|
parsing, most of the time, is implemented by comparing the input with
|
||||||
|
the patterns defined by the grammar. If we use the same patterns to
|
||||||
|
format a matching output, the generated sequence will follow the rules
|
||||||
|
of the grammar as well.
|
||||||
|
|
||||||
|
This insight lead to the implementation of a grammar driven output generation
|
||||||
|
library compatibile with the Spirit parser library. As it turned out, parsing
|
||||||
|
and generation are tightly connected and have very similar concepts. The
|
||||||
|
duality of these two sides of the same medal is ubiquitous, which
|
||||||
|
allowed us to build the parser library __qi__ and the generator library
|
||||||
|
__karma__ using the same component infastructure.
|
||||||
|
|
||||||
|
The idea of creating a lexer library well integrated with the Spirit parsers is
|
||||||
|
not new. This has been discussed almost for the whole time of the existence of
|
||||||
|
Classic-Spirit (pre V2) now. Several attempts to integrate existing lexer
|
||||||
|
libraries and frameworks with Spirit have been made and served as a proof of
|
||||||
|
concept and usability (for example see __wave__: The Boost C/C++ Preprocessor
|
||||||
|
Library, and __slex__: a fully dynamic C++ lexer implemented with Spirit).
|
||||||
|
Based on these experiences we added __lex__: a fully integrated lexer library
|
||||||
|
to the mix, allowing to take advantage of the power of regular expressions for
|
||||||
|
token matching, removing pressure from the parser components, simplifying
|
||||||
|
parser grammars. Again, Spirit's modular structure allowed us to reuse the same
|
||||||
|
underlying component library as for the parser and generator libraries.
|
||||||
|
|
||||||
|
|
||||||
|
[heading How to use this manual]
|
||||||
|
|
||||||
|
Each major section (there are two: __sec_qi_and_karma__, and __sec_lex__) is
|
||||||
|
roughly divided into 3 parts:
|
||||||
|
|
||||||
|
# Tutorials: A step by step guide with heavily annotated code. These
|
||||||
|
are meant to get the user acquainted with the library as quickly as
|
||||||
|
possible. The objective is to build the confidence of the user in
|
||||||
|
using the library using abundant examples and detailed instructions.
|
||||||
|
Examples speak volumes.
|
||||||
|
|
||||||
|
# Abstracts: A high level summary of key topics. The objective is to
|
||||||
|
give the user a high level view of the library, the key concepts,
|
||||||
|
background and theories.
|
||||||
|
|
||||||
|
# Reference: Detailed formal technical reference. We start with a quick
|
||||||
|
reference -- an easy to use table that maps into the reference proper.
|
||||||
|
The reference proper starts with C++ __cpp_concepts__ followed by
|
||||||
|
models of the concepts.
|
||||||
|
|
||||||
|
Some icons are used to mark certain topics indicative of their relevance.
|
||||||
|
These icons precede some text to indicate:
|
||||||
|
|
||||||
|
[table Icons
|
||||||
|
|
||||||
|
[[Icon] [Name] [Meaning]]
|
||||||
|
|
||||||
|
[[__note__] [Note] [Generally useful information (an aside that
|
||||||
|
doesn't fit in the flow of the text)]]
|
||||||
|
|
||||||
|
[[__tip__] [Tip] [Suggestion on how to do something
|
||||||
|
(especially something that not be obvious)]]
|
||||||
|
|
||||||
|
[[__important__] [Important] [Important note on something to take
|
||||||
|
particular notice of]]
|
||||||
|
|
||||||
|
[[__caution__] [Caution] [Take special care with this - it may
|
||||||
|
not be what you expect and may cause bad
|
||||||
|
results]]
|
||||||
|
|
||||||
|
[[__danger__] [Danger] [This is likely to cause serious
|
||||||
|
trouble if ignored]]
|
||||||
|
]
|
||||||
|
|
||||||
|
This documentation is automatically generated by Boost QuickBook documentation
|
||||||
|
tool. QuickBook can be found in the __boost_tools__.
|
||||||
|
|
||||||
|
[heading Support]
|
||||||
|
|
||||||
|
Please direct all questions to Spirit's mailing list. You can subscribe to the
|
||||||
|
__spirit_list__. The mailing list has a searchable archive. A search link to
|
||||||
|
this archive is provided in __spirit__'s home page. You may also read and post
|
||||||
|
messages to the mailing list through __spirit_general__ (thanks to __gmane__).
|
||||||
|
The news group mirrors the mailing list. Here is a link to the archives:
|
||||||
|
__mlist_archive__.
|
||||||
|
|
||||||
|
[endsect] [/ Preface]
|
||||||
52
doc/qi_and_karma.qbk
Normal file
52
doc/qi_and_karma.qbk
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section Qi and Karma]
|
||||||
|
|
||||||
|
[include qi_and_karma/tutorials.qbk]
|
||||||
|
|
||||||
|
[section Abstracts]
|
||||||
|
[include qi_and_karma/peg.qbk]
|
||||||
|
[include qi_and_karma/parsing.qbk]
|
||||||
|
[include qi_and_karma/generating.qbk]
|
||||||
|
[include qi_and_karma/primitives.qbk]
|
||||||
|
[include qi_and_karma/operators.qbk]
|
||||||
|
[include qi_and_karma/attributes.qbk]
|
||||||
|
[include qi_and_karma/semantic_actions.qbk]
|
||||||
|
[include qi_and_karma/directives.qbk]
|
||||||
|
[include qi_and_karma/rules.qbk]
|
||||||
|
[include qi_and_karma/grammars.qbk]
|
||||||
|
[include qi_and_karma/debugging.qbk]
|
||||||
|
[include qi_and_karma/error_handling.qbk]
|
||||||
|
[include qi_and_karma/parse_trees_and_asts.qbk]
|
||||||
|
[endsect]
|
||||||
|
|
||||||
|
[/section Quick Reference]
|
||||||
|
[include qi_and_karma/quick_reference.qbk]
|
||||||
|
[/endsect]
|
||||||
|
|
||||||
|
[section Reference]
|
||||||
|
[section Concepts]
|
||||||
|
[include reference/qi_and_karma/parser.qbk]
|
||||||
|
[include reference/qi_and_karma/generator.qbk]
|
||||||
|
[endsect]
|
||||||
|
[include reference/qi_and_karma/char.qbk]
|
||||||
|
[include reference/qi_and_karma/string.qbk]
|
||||||
|
[include reference/qi_and_karma/numeric.qbk]
|
||||||
|
[include reference/qi_and_karma/binary.qbk]
|
||||||
|
[include reference/qi_and_karma/directive.qbk]
|
||||||
|
[include reference/qi_and_karma/action.qbk]
|
||||||
|
[include reference/qi_and_karma/nonterminal.qbk]
|
||||||
|
[include reference/qi_and_karma/operator.qbk]
|
||||||
|
[include reference/qi_and_karma/stream.qbk]
|
||||||
|
[include reference/qi_and_karma/auxiliary.qbk]
|
||||||
|
[include reference/qi_and_karma/debug.qbk]
|
||||||
|
[endsect]
|
||||||
|
|
||||||
|
[endsect]
|
||||||
|
|
||||||
10
doc/qi_and_karma/attributes.qbk
Normal file
10
doc/qi_and_karma/attributes.qbk
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section Attributes]
|
||||||
|
[endsect]
|
||||||
10
doc/qi_and_karma/debugging.qbk
Normal file
10
doc/qi_and_karma/debugging.qbk
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section Debugging]
|
||||||
|
[endsect]
|
||||||
10
doc/qi_and_karma/directives.qbk
Normal file
10
doc/qi_and_karma/directives.qbk
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section Directives]
|
||||||
|
[endsect]
|
||||||
10
doc/qi_and_karma/error_handling.qbk
Normal file
10
doc/qi_and_karma/error_handling.qbk
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section Error Handling]
|
||||||
|
[endsect]
|
||||||
24
doc/qi_and_karma/generating.qbk
Normal file
24
doc/qi_and_karma/generating.qbk
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section Parsing and Generating]
|
||||||
|
|
||||||
|
[heading The API functions exposed by __qi__ ]
|
||||||
|
|
||||||
|
[heading The parse() function]
|
||||||
|
|
||||||
|
[heading The phrase_parse() function]
|
||||||
|
|
||||||
|
[heading The tokenize_and_parse() function]
|
||||||
|
|
||||||
|
[heading The tokenize_and_phrase_parse() function]
|
||||||
|
|
||||||
|
[heading The make_parser() function]
|
||||||
|
|
||||||
|
[endsect]
|
||||||
|
|
||||||
10
doc/qi_and_karma/grammars.qbk
Normal file
10
doc/qi_and_karma/grammars.qbk
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section Grammars]
|
||||||
|
[endsect]
|
||||||
10
doc/qi_and_karma/operators.qbk
Normal file
10
doc/qi_and_karma/operators.qbk
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section Operators]
|
||||||
|
[endsect]
|
||||||
10
doc/qi_and_karma/parse_trees_and_asts.qbk
Normal file
10
doc/qi_and_karma/parse_trees_and_asts.qbk
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section Parse Trees and ASTs]
|
||||||
|
[endsect]
|
||||||
44
doc/qi_and_karma/parsing.qbk
Normal file
44
doc/qi_and_karma/parsing.qbk
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section Parsing]
|
||||||
|
|
||||||
|
Central to the library is the parser. The parser does the actual
|
||||||
|
work of recognizing a linear input stream of data read sequentially
|
||||||
|
from start to end by the supplied iterators. The parser attempts to
|
||||||
|
match the input following a well-defined set of specifications known
|
||||||
|
as grammar rules. The parser returns a `bool` to report the success or
|
||||||
|
failure. When successful, the parser calls a client-supplied semantic
|
||||||
|
action, if there is one. The semantic action extracts structural
|
||||||
|
information depending on the data passed by the parser and the
|
||||||
|
hierarchical context of the parser it is attached to.
|
||||||
|
|
||||||
|
Parsers come in different flavors. The Spirit library comes bundled with an extensive set of pre-defined parsers that perform various parsing tasks from the trivial to the complex. The parser, as a concept, has a public conceptual interface contract. Following the contract, anyone can write a conforming parser that will play along well with the library's predefined components. We shall provide a blueprint detailing the conceptual interface of the parser later.
|
||||||
|
|
||||||
|
Clients of the library generally do not need to write their own hand-coded parsers at all. Spirit has an immense repertoire of pre-defined parsers covering all aspects of syntax and semantic analysis. We shall examine this repertoire of parsers in the following sections. In the rare case where a specific functionality is not available, it is extremely easy to write a user-defined parser. The ease in writing a parser entity is the main reason for Spirit's extensibility.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
[heading The API functions exposed by __qi__ ]
|
||||||
|
|
||||||
|
[heading The parse() function]
|
||||||
|
|
||||||
|
[heading The phrase_parse() function]
|
||||||
|
|
||||||
|
[heading The tokenize_and_parse() function]
|
||||||
|
|
||||||
|
[heading The tokenize_and_phrase_parse() function]
|
||||||
|
|
||||||
|
[heading The make_parser() function]
|
||||||
|
|
||||||
|
[endsect]
|
||||||
|
|
||||||
10
doc/qi_and_karma/peg.qbk
Normal file
10
doc/qi_and_karma/peg.qbk
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section Parsing Expression Grammar]
|
||||||
|
[endsect]
|
||||||
10
doc/qi_and_karma/primitives.qbk
Normal file
10
doc/qi_and_karma/primitives.qbk
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section Primitives]
|
||||||
|
[endsect]
|
||||||
43
doc/qi_and_karma/quick_reference.qbk
Normal file
43
doc/qi_and_karma/quick_reference.qbk
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section Quick Reference]
|
||||||
|
|
||||||
|
The following tables use some conventions to encode the attribute type exposed
|
||||||
|
by a component
|
||||||
|
|
||||||
|
[variablelist
|
||||||
|
[[`attribute_of(P)`] [The component exposes the same attribute as the
|
||||||
|
component 'P' used as part of the overall
|
||||||
|
construct]]
|
||||||
|
[[`value_type(I)`] [The component exposes the value_type of the
|
||||||
|
underlying iterator 'I' as its attribute type]]
|
||||||
|
]
|
||||||
|
|
||||||
|
[table Character Parsers
|
||||||
|
[[Component] [Description] [Attribute]]
|
||||||
|
[[`char_`] [] [`char`]]
|
||||||
|
[[`wchar`] [] [`wchar_t`]]
|
||||||
|
[[`lit`] [] [`unused`]]
|
||||||
|
[[`wlit`] [] [`unused`]]
|
||||||
|
[[`'x'`] [] [`unused`]]
|
||||||
|
[[`L'x'`] [] [`unused`]]
|
||||||
|
[[`alnum`] [] [`Char`]]
|
||||||
|
[[`alpha`] [] [`Char`]]
|
||||||
|
[[`blank`] [] [`Char`]]
|
||||||
|
[[`cntrl`] [] [`Char`]]
|
||||||
|
[[`digit`] [] [`Char`]]
|
||||||
|
[[`graph`] [] [`Char`]]
|
||||||
|
[[`print`] [] [`Char`]]
|
||||||
|
[[`punct`] [] [`Char`]]
|
||||||
|
[[`space`] [] [`Char`]]
|
||||||
|
[[`xdigit`] [] [`Char`]]
|
||||||
|
[[`~P`] [] [`attribute_of(P)`]]
|
||||||
|
]
|
||||||
|
|
||||||
|
[endsect]
|
||||||
10
doc/qi_and_karma/rules.qbk
Normal file
10
doc/qi_and_karma/rules.qbk
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section Rules]
|
||||||
|
[endsect]
|
||||||
10
doc/qi_and_karma/semantic_actions.qbk
Normal file
10
doc/qi_and_karma/semantic_actions.qbk
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section Semantic Actions]
|
||||||
|
[endsect]
|
||||||
10
doc/qi_and_karma/tutorials.qbk
Normal file
10
doc/qi_and_karma/tutorials.qbk
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section Tutorials]
|
||||||
|
[endsect]
|
||||||
10
doc/rationale.qbk
Normal file
10
doc/rationale.qbk
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section Rationale]
|
||||||
|
[endsect]
|
||||||
10
doc/reference/lex/lexer.qbk
Normal file
10
doc/reference/lex/lexer.qbk
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section Lexer]
|
||||||
|
[endsect]
|
||||||
19
doc/reference/lex/lexer_class.qbk
Normal file
19
doc/reference/lex/lexer_class.qbk
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section Lexer Class]
|
||||||
|
|
||||||
|
[heading The lexertl_lexer Class Implementing the Dynamic Model]
|
||||||
|
|
||||||
|
[heading The lexertl_actor_lexer Class Implementing the Dynamic Model]
|
||||||
|
|
||||||
|
[heading The lexertl_static_lexer Class Implementing the Static Model]
|
||||||
|
|
||||||
|
[heading The lexertl_static_actor_lexer Class Implementing the Static Model]
|
||||||
|
|
||||||
|
[endsect]
|
||||||
10
doc/reference/lex/token.qbk
Normal file
10
doc/reference/lex/token.qbk
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section Token]
|
||||||
|
[endsect]
|
||||||
10
doc/reference/lex/token_class.qbk
Normal file
10
doc/reference/lex/token_class.qbk
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section Token Class]
|
||||||
|
[endsect]
|
||||||
10
doc/reference/lex/tokendef.qbk
Normal file
10
doc/reference/lex/tokendef.qbk
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section TokenDef]
|
||||||
|
[endsect]
|
||||||
10
doc/reference/lex/tokendef_class.qbk
Normal file
10
doc/reference/lex/tokendef_class.qbk
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section TokenDef Class]
|
||||||
|
[endsect]
|
||||||
10
doc/reference/lex/tokenset.qbk
Normal file
10
doc/reference/lex/tokenset.qbk
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section TokenSet]
|
||||||
|
[endsect]
|
||||||
10
doc/reference/lex/tokenset_class.qbk
Normal file
10
doc/reference/lex/tokenset_class.qbk
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section TokenSet Class]
|
||||||
|
[endsect]
|
||||||
10
doc/reference/qi_and_karma/action.qbk
Normal file
10
doc/reference/qi_and_karma/action.qbk
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section Action]
|
||||||
|
[endsect]
|
||||||
10
doc/reference/qi_and_karma/auxiliary.qbk
Normal file
10
doc/reference/qi_and_karma/auxiliary.qbk
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section Auxiliary]
|
||||||
|
[endsect]
|
||||||
10
doc/reference/qi_and_karma/binary.qbk
Normal file
10
doc/reference/qi_and_karma/binary.qbk
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section Binary]
|
||||||
|
[endsect]
|
||||||
10
doc/reference/qi_and_karma/char.qbk
Normal file
10
doc/reference/qi_and_karma/char.qbk
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section Char]
|
||||||
|
[endsect]
|
||||||
10
doc/reference/qi_and_karma/debug.qbk
Normal file
10
doc/reference/qi_and_karma/debug.qbk
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section Debug]
|
||||||
|
[endsect]
|
||||||
10
doc/reference/qi_and_karma/directive.qbk
Normal file
10
doc/reference/qi_and_karma/directive.qbk
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section Directive]
|
||||||
|
[endsect]
|
||||||
10
doc/reference/qi_and_karma/generator.qbk
Normal file
10
doc/reference/qi_and_karma/generator.qbk
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section Generator]
|
||||||
|
[endsect]
|
||||||
10
doc/reference/qi_and_karma/nonterminal.qbk
Normal file
10
doc/reference/qi_and_karma/nonterminal.qbk
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section Nonterminal]
|
||||||
|
[endsect]
|
||||||
10
doc/reference/qi_and_karma/numeric.qbk
Normal file
10
doc/reference/qi_and_karma/numeric.qbk
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section Numeric]
|
||||||
|
[endsect]
|
||||||
10
doc/reference/qi_and_karma/operator.qbk
Normal file
10
doc/reference/qi_and_karma/operator.qbk
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section Operators]
|
||||||
|
[endsect]
|
||||||
43
doc/reference/qi_and_karma/parser.qbk
Normal file
43
doc/reference/qi_and_karma/parser.qbk
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section Parser]
|
||||||
|
|
||||||
|
[heading Description]
|
||||||
|
|
||||||
|
Description of Parser concept
|
||||||
|
|
||||||
|
[variablelist Notation
|
||||||
|
[[`p`] [A Parser]]
|
||||||
|
]
|
||||||
|
|
||||||
|
[heading Valid Expressions]
|
||||||
|
|
||||||
|
For any Parser the following expressions must be valid:
|
||||||
|
|
||||||
|
[table
|
||||||
|
[[Expression] [Semantics] [Return type] [Complexity]]
|
||||||
|
[[`xxx`] [Semantics of `xxx`] [Parser] [Constant]]
|
||||||
|
]
|
||||||
|
|
||||||
|
[heading Type Requirements]
|
||||||
|
|
||||||
|
[table
|
||||||
|
[[Expression] [Requirements]]
|
||||||
|
[[`xxx`] [Requirements for `xxx`]]
|
||||||
|
]
|
||||||
|
|
||||||
|
[heading Invariants]
|
||||||
|
|
||||||
|
For any Parser xxx the following invariants always hold:
|
||||||
|
|
||||||
|
[heading Models]
|
||||||
|
|
||||||
|
Links to models of Parser concept
|
||||||
|
|
||||||
|
[endsect]
|
||||||
10
doc/reference/qi_and_karma/stream.qbk
Normal file
10
doc/reference/qi_and_karma/stream.qbk
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section Stream]
|
||||||
|
[endsect]
|
||||||
10
doc/reference/qi_and_karma/string.qbk
Normal file
10
doc/reference/qi_and_karma/string.qbk
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section String]
|
||||||
|
[endsect]
|
||||||
91
doc/references.qbk
Normal file
91
doc/references.qbk
Normal file
@@ -0,0 +1,91 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section References]
|
||||||
|
|
||||||
|
[table
|
||||||
|
[[ ] [Authors] [Title, Publisher/link, Date Published]]
|
||||||
|
[[1.] [Todd Veldhuizen] [[@http://www.extreme.indiana.edu/%7Etveldhui/papers/Expression-Templates/exprtmpl.html
|
||||||
|
"Expression Templates"]. C++ Report, June 1995.]]
|
||||||
|
[[2.] [Peter Naur (ed.)] [[@http://www.masswerk.at/algol60/report.htm
|
||||||
|
"Report on the Algorithmic Language ALGOL 60"]. CACM, May 1960.]]
|
||||||
|
[[3.] [ISO/IEC] [[@http://www.cl.cam.ac.uk/%7Emgk25/iso-14977.pdf "ISO-EBNF"],
|
||||||
|
ISO/IEC 14977: 1996(E).]]
|
||||||
|
[[4.] [Richard J.Botting, Ph.D.] [[@http://www.csci.csusb.edu/dick/maths/intro_ebnf.html
|
||||||
|
"XBNF"] (citing Leu-Weiner, 1973).
|
||||||
|
California State University, San Bernardino, 1998.]]
|
||||||
|
[[5.] [James Coplien.] ["Curiously Recurring Template Pattern".
|
||||||
|
C++ Report, Feb. 1995.]]
|
||||||
|
[[6.] [Thierry Geraud and
|
||||||
|
Alexandre Duret-Lutz] [[@http://www.coldewey.com/europlop2000/papers/geraud%2Bduret.zip
|
||||||
|
Generic Programming Redesign of Patterns]
|
||||||
|
Proceedings of the 5th European Conference on Pattern Languages
|
||||||
|
of Programs(EuroPLoP'2000) Irsee, Germany, July 2000.]]
|
||||||
|
[[7.] [Geoffrey Furnish] [[@http://www.adtmag.com/joop/carticle.aspx?ID=627
|
||||||
|
"Disambiguated Glommable Expression Templates Reintroduced"]
|
||||||
|
C++ Report, May 2000]]
|
||||||
|
[[8.] [Erich Gamma,
|
||||||
|
Richard Helm,
|
||||||
|
Ralph Jhonson,
|
||||||
|
and John Vlissides] [Design Patterns, Elements of Reusable Object-Oriented Software.
|
||||||
|
Addison-Wesley, 1995.]]
|
||||||
|
[[9.] [Alfred V. Aho,
|
||||||
|
Revi Sethi,
|
||||||
|
Feffrey D. Ulman] [Compilers, Principles, Techniques and Tools
|
||||||
|
Addison-Wesley, June 1987.]]
|
||||||
|
[[10.] [Dick Grune and
|
||||||
|
Ceriel Jacobs] [[@http://www.cs.vu.nl/%7Edick/PTAPG.html
|
||||||
|
Parsing Techniques: A Practical Guide.]
|
||||||
|
Ellis Horwood Ltd.: West Sussex, England, 1990.
|
||||||
|
(electronic copy, 1998).]]
|
||||||
|
[[11.] [T. J. Parr,
|
||||||
|
H. G. Dietz, and
|
||||||
|
W. E. Cohen] [[@http://citeseer.ist.psu.edu/6885.html
|
||||||
|
PCCTS Reference Manual (Version 1.00)].
|
||||||
|
School of Electrical Engineering, Purdue University,
|
||||||
|
West Lafayette, August 1991.]]
|
||||||
|
[[12.] [Adrian Johnstone and
|
||||||
|
Elizabeth Scott.] [[@ftp://ftp.cs.rhul.ac.uk/pub/rdp
|
||||||
|
RDP, A Recursive Descent Compiler Compiler].
|
||||||
|
Technical Report CSD TR 97 25, Dept. of Computer Science,
|
||||||
|
Egham, Surrey, England, Dec. 20, 1997.]]
|
||||||
|
[[13.] [Adrian Johnstone] [[@http://www.cs.rhul.ac.uk/research/languages/projects/lookahead_backtrack.shtml
|
||||||
|
Languages and Architectures,
|
||||||
|
Parser generators with backtrack or extended lookahead capability]
|
||||||
|
Department of Computer Science, Royal Holloway, University of London,
|
||||||
|
Egham, Surrey, England]]
|
||||||
|
[[14.] [Damian Conway] [[@http://www.csse.monash.edu.au/%7Edamian/papers/#Embedded_Input_Parsing_for_C
|
||||||
|
Parsing with C++ Classes].
|
||||||
|
ACM SIGPLAN Notices, 29:1, 1994.]]
|
||||||
|
[[15.] [Joel de Guzman] [[@http://spirit.sourceforge.net/distrib/spirit_1_8_5/libs/spirit/index.html
|
||||||
|
"Spirit Version 1.8"], 1998-2003.]]
|
||||||
|
[[16.] [S. Doaitse Swierstra and
|
||||||
|
Luc Duponcheel] [[@http://citeseer.ist.psu.edu/448665.html
|
||||||
|
Deterministic, Error-Correcting Combinator Parsers]
|
||||||
|
Dept. of Computer Science, Utrecht University P.O.Box 80.089,
|
||||||
|
3508 TB Utrecht, The Netherland]]
|
||||||
|
[[17.] [Bjarne Stroustrup] [[@http://www.research.att.com/%7Ebs/whitespace98.pdf
|
||||||
|
Generalizing Overloading for C++2000]
|
||||||
|
Overload, Issue 25. April 1, 1998.]]
|
||||||
|
[[18.] [Dr. John Maddock] [[@http://www.boost.org/libs/regex/index.html
|
||||||
|
Regex++ Documentation]
|
||||||
|
http://www.boost.org/libs/regex/index.htm]]
|
||||||
|
[[19.] [Anonymous
|
||||||
|
Edited by Graham Hutton] [[@http://www.cs.nott.ac.uk/~gmh//faq.html
|
||||||
|
Frequently Asked Questions for comp.lang.functional].
|
||||||
|
Edited by Graham Hutton, University of Nottingham.]]
|
||||||
|
[[20.] [Hewlett-Packard] [[@http://www.sgi.com/tech/stl/
|
||||||
|
Standard Template Library Programmer's Guide.], Hewlett-Packard Company, 1994]]
|
||||||
|
[[21.] [Boost Libraries] [[@http://boost.org/libs/libraries.htm
|
||||||
|
Boost Libraries Documentation].]]
|
||||||
|
[[22.] [Brian McNamara and
|
||||||
|
Yannis Smaragdakis] [[@http://www.cc.gatech.edu/~yannis/fc++/ FC++:Functional Programming in C++].]]
|
||||||
|
[[23.] [Todd Veldhuizen] [[@ftp://ftp.cs.indiana.edu/pub/techreports/TR542.pdf Techniques for Scientic C++.]]]
|
||||||
|
]
|
||||||
|
|
||||||
|
[endsect]
|
||||||
143
doc/spirit2.qbk
Normal file
143
doc/spirit2.qbk
Normal file
@@ -0,0 +1,143 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[article Spirit
|
||||||
|
[quickbook 1.4]
|
||||||
|
[version 2.0]
|
||||||
|
[authors [de Guzman, Joel], [Kaiser, Hartmut]]
|
||||||
|
[copyright 2001 2002 2003 2004 2005 2006 2007 2008 Joel de Guzman, Hartmut Kaiser]
|
||||||
|
[purpose Parser and Generator Library]
|
||||||
|
[license
|
||||||
|
Distributed under the Boost Software License, Version 1.0.
|
||||||
|
(See accompanying file LICENSE_1_0.txt or copy at
|
||||||
|
[@http://www.boost.org/LICENSE_1_0.txt])
|
||||||
|
]
|
||||||
|
]
|
||||||
|
|
||||||
|
[/ November 14, 2007 ]
|
||||||
|
|
||||||
|
[/ Some links ]
|
||||||
|
|
||||||
|
[def __spirit__ [@http://spirit.sourceforge.net Spirit]]
|
||||||
|
[def __phoenix__ [@http://boost.org/libs/spirit/phoenix/index.html Phoenix]]
|
||||||
|
[def __phoenix2__ [@http://spirit.sourceforge.net/dl_more/phoenix_v2/libs/spirit/phoenix/doc/html/index.html Phoenix2]]
|
||||||
|
[def __fusion__ [@http://spirit.sourceforge.net/dl_more/fusion_v2/libs/fusion/doc/html/index.html Fusion]]
|
||||||
|
[def __mpl__ [@http://www.boost.org/libs/mpl/index.html MPL]]
|
||||||
|
[def __boost_tuples__ [@http://www.boost.org/libs/tuple/index.html Boost.Tuples]]
|
||||||
|
[def __boost_proto__ -Boost.Proto-]
|
||||||
|
[def __boost__ [@http://www.boost.org/ Boost]]
|
||||||
|
[def __boost_tools__ [@http://www.boost.org/tools/index.html Boost Tools]]
|
||||||
|
[def __spirit_list__ [@https://lists.sourceforge.net/lists/listinfo/spirit-general Spirit Mailing List]]
|
||||||
|
[def __spirit_general__ [@news://news.gmane.org/gmane.comp.spirit.general Spirit General NNTP news portal]]
|
||||||
|
[def __gmane__ [@http://www.gmane.org Gmane]]
|
||||||
|
[def __mlist_archive__ [@http://news.gmane.org/gmane.comp.parsers.spirit.general]]
|
||||||
|
|
||||||
|
[def __early_spirit__ [@http://spirit.sourceforge.net/dl_docs/pre-spirit.htm pre-Spirit]]
|
||||||
|
[def __todd__exprtemplates__ [@http://ubiety.uwaterloo.ca/~tveldhui/papers/Expression-Templates/exprtmpl.html Expression Templates]]
|
||||||
|
[def __cpp_concepts__ [@http://en.wikipedia.org/wiki/C%2B%2B0x#Concept Concepts]]
|
||||||
|
[def __attr_grammar__ [@http://en.wikipedia.org/wiki/Attribute_grammar Attribute Grammar]]
|
||||||
|
[def __string_template__ [@http://www.stringtemplate.org/ StringTemplate]]
|
||||||
|
[def __lexertl__ [@http://www.benhanson.net/lexertl.html Lexertl]]
|
||||||
|
[def __wave__ [@http://www.boost.org/libs/wave/index.html Wave]]
|
||||||
|
[def __slex__ [@http://spirit.sourceforge.net/repository/applications/slex.zip SLex]]
|
||||||
|
[def __flex__ [@http://flex.sourceforge.net/ Flex]]
|
||||||
|
[def __re2c__ [@http://re2c.sourceforge.net/ re2c]]
|
||||||
|
[def __ragel__ [@http://www.cs.queensu.ca/~thurston/ragel/ Ragel]]
|
||||||
|
|
||||||
|
[def __boost_variant__ [@http://www.boost.org/doc/html/variant.html `boost::variant<>`]]
|
||||||
|
[def __boost_iterator_range__ [@http://www.boost.org/libs/range/doc/utility_class.html#iter_range `boost::iterator_range<>`]]
|
||||||
|
|
||||||
|
|
||||||
|
[def __qi__ /Spirit.Qi/]
|
||||||
|
[def __karma__ /Spirit.Karma/]
|
||||||
|
[def __lex__ /Spirit.Lex/]
|
||||||
|
|
||||||
|
|
||||||
|
[def __fixme__ *FIXME*]
|
||||||
|
|
||||||
|
|
||||||
|
[/ Sections ]
|
||||||
|
|
||||||
|
[def __sec_qi_and_karma__ [link spirit.qi_and_karma Qi and Karma]]
|
||||||
|
[def __sec_qi_karma_attributes__ [link spirit.qi_and_karma.abstracts.attributes Attributes]]
|
||||||
|
|
||||||
|
[def __sec_lex__ [link spirit.__lex__ Lex]]
|
||||||
|
[def __sec_lex_quickstart_1__ [link spirit.__lex__.__lex___tutorials.quickstart_1___a_word_counter_using___lex__ Lex Quickstart 1 - A word counter using __lex__]]
|
||||||
|
[def __sec_lex_quickstart_2__ [link spirit.__lex__.__lex___tutorials.quickstart_2___a_better_word_counter_using___lex__ Lex Quickstart 2 - A better word counter using __lex__]]
|
||||||
|
[def __sec_lex_quickstart_3__ [link spirit.__lex__.__lex___tutorials.quickstart_3___counting_words_using_a_parser Lex Quickstart 3 - Counting Words Using a Parser]]
|
||||||
|
|
||||||
|
[def __sec_lex_static_model__ [link spirit.__lex__.abstracts.the__static__lexer_model The /Static/ Model]]
|
||||||
|
[def __sec_lex_primitives__ [link spirit.__lex__.abstracts.lexer_primitives Lexer Primitives]]
|
||||||
|
[def __sec_lex_tokenvalues__ [link spirit.__lex__.abstracts.lexer_primitives.about_tokens_and_token_values About Tokens and Token Values]]
|
||||||
|
[def __sec_lex_attributes__ [link spirit.__lex__.abstracts.lexer_attributes Lexer Attributes]]
|
||||||
|
|
||||||
|
[def __sec_ref_lex_token__ [link spirit.__lex__.reference.concepts.token Token Reference]]
|
||||||
|
[def __sec_ref_lex_token_def__ [link spirit.__lex__.reference.concepts.tokendef TokenDef Reference]]
|
||||||
|
|
||||||
|
[/ References to API descriptions ]
|
||||||
|
|
||||||
|
[def __api_tokenize_and_parse__ [link spirit.qi_and_karma.abstracts.parsing_and_generating.the_tokenize_and_phrase_parse___function `tokenize_and_parse()`]]
|
||||||
|
[def __api_generate_static__ [link spirit.__lex__.abstracts.tokenizing_input_data.the_generate_static___function `generate_static()`]]
|
||||||
|
|
||||||
|
|
||||||
|
[/ References to classes ]
|
||||||
|
|
||||||
|
[def __class_token_def__ [link spirit.__lex__.reference.tokendef_class `token_def<>`]]
|
||||||
|
|
||||||
|
[def __class_lexertl_token__ [link spirit.__lex__.reference.token_class `lexertl_token<>`]]
|
||||||
|
[def __class_lexertl_lexer__ [link spirit.__lex__.reference.lexer_class.the_lexertl_lexer_class_implementing_the_dynamic_model `lexertl_lexer<>`]]
|
||||||
|
[def __class_lexertl_static_lexer__ [link spirit.__lex__.reference.lexer_class.the_lexertl_static_lexer_class_implementing_the_static_model `lexertl_static_lexer<>`]]
|
||||||
|
|
||||||
|
|
||||||
|
[/ Some images ]
|
||||||
|
|
||||||
|
[def __note__ [$../../../../doc/html/images/adm_note.png]]
|
||||||
|
[def __tip__ [$../../../../doc/html/images/adm_tip.png]]
|
||||||
|
[def __important__ [$../../../../doc/html/images/adm_important.png]]
|
||||||
|
[def __caution__ [$../../../../doc/html/images/adm_caution.png]]
|
||||||
|
[def __danger__ [$../../../../doc/html/images/adm_danger.png]]
|
||||||
|
|
||||||
|
|
||||||
|
[/ some templates]
|
||||||
|
|
||||||
|
[/ fig[ref title label]
|
||||||
|
Image element with a title.
|
||||||
|
|
||||||
|
ref := Reference to the image file.
|
||||||
|
title := The title to associate with this figure.
|
||||||
|
label := the id to use to be able to reference this picture
|
||||||
|
]
|
||||||
|
[template fig[ref title label]'''
|
||||||
|
<figure id="'''[label]'''">
|
||||||
|
<title>'''[title]'''</title>
|
||||||
|
<inlinemediaobject>
|
||||||
|
<imageobject>
|
||||||
|
<imagedata fileref="'''[ref]'''"></imagedata>
|
||||||
|
</imageobject>
|
||||||
|
<textobject>
|
||||||
|
<phrase role="alt">'''[title]'''</phrase>
|
||||||
|
</textobject>
|
||||||
|
</inlinemediaobject>
|
||||||
|
</figure>
|
||||||
|
''']
|
||||||
|
|
||||||
|
|
||||||
|
[/ Here we go ]
|
||||||
|
|
||||||
|
[include preface.qbk]
|
||||||
|
[include what_s_new.qbk]
|
||||||
|
[include introduction.qbk]
|
||||||
|
[include qi_and_karma.qbk]
|
||||||
|
[include lex.qbk]
|
||||||
|
[include faq.qbk]
|
||||||
|
[include notes.qbk]
|
||||||
|
[include rationale.qbk]
|
||||||
|
[include acknowledgments.qbk]
|
||||||
|
[include references.qbk]
|
||||||
|
|
||||||
|
|
||||||
10
doc/what_s_new.qbk
Normal file
10
doc/what_s_new.qbk
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
[/==============================================================================
|
||||||
|
Copyright (C) 2001-2008 Joel de Guzman
|
||||||
|
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
===============================================================================/]
|
||||||
|
|
||||||
|
[section What's New]
|
||||||
|
[endsect]
|
||||||
12
example/karma/Jamfile
Normal file
12
example/karma/Jamfile
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
#==============================================================================
|
||||||
|
# Copyright (c) 2001-2007 Joel de Guzman
|
||||||
|
# Copyright (c) 2001-2007 Hartmut Kaiser
|
||||||
|
#
|
||||||
|
# Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
#==============================================================================
|
||||||
|
project spirit-karma-example ;
|
||||||
|
|
||||||
|
exe basic_facilities : basic_facilities.cpp ;
|
||||||
|
exe functor_facilities : functor_facilities.cpp ;
|
||||||
|
|
||||||
178
example/karma/basic_facilities.cpp
Normal file
178
example/karma/basic_facilities.cpp
Normal file
@@ -0,0 +1,178 @@
|
|||||||
|
// Copyright (c) 2001-2008 Hartmut Kaiser
|
||||||
|
//
|
||||||
|
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
|
||||||
|
// The main purpose of this example is to show the uniform and easy way of
|
||||||
|
// output formatting for different container types.
|
||||||
|
//
|
||||||
|
// Since the 'stream' primitive used below uses the streaming operator defined
|
||||||
|
// for the container value_type, you must make sure to have a corresponding
|
||||||
|
// operator<<() available for this contained data type. OTOH this means, that
|
||||||
|
// the format descriptions used below will be usable for any contained type as
|
||||||
|
// long as this type has an associated streaming operator defined.
|
||||||
|
|
||||||
|
// use a larger value for the alignment field width (default is 10)
|
||||||
|
#define BOOST_KARMA_DEFAULT_FIELD_LENGTH 25
|
||||||
|
|
||||||
|
#include <boost/spirit/include/karma.hpp>
|
||||||
|
#include <boost/spirit/include/karma_stream.hpp>
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#include <list>
|
||||||
|
#include <algorithm>
|
||||||
|
#include <cstdlib>
|
||||||
|
|
||||||
|
#include <boost/range.hpp>
|
||||||
|
#include <boost/date_time//gregorian/gregorian.hpp>
|
||||||
|
|
||||||
|
using namespace boost::spirit;
|
||||||
|
using namespace boost::spirit::ascii;
|
||||||
|
namespace karma = boost::spirit::karma;
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Output the given containers in list format
|
||||||
|
// Note: the format description does not depend on the type of the sequence
|
||||||
|
// nor does it depend on the type of the elements contained in the
|
||||||
|
// sequence
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
template <typename Container>
|
||||||
|
void output_container(std::ostream& os, Container const& c)
|
||||||
|
{
|
||||||
|
// output the container as a space separated sequence
|
||||||
|
os <<
|
||||||
|
karma::format_delimited(
|
||||||
|
*stream, // format description
|
||||||
|
c, // data
|
||||||
|
space // delimiter
|
||||||
|
) << std::endl << std::endl;
|
||||||
|
|
||||||
|
os <<
|
||||||
|
karma::format_delimited(
|
||||||
|
'[' << *stream << ']', // format description
|
||||||
|
c, // data
|
||||||
|
space // delimiter
|
||||||
|
) << std::endl << std::endl;
|
||||||
|
|
||||||
|
// output the container as a comma separated list
|
||||||
|
os <<
|
||||||
|
karma::format(
|
||||||
|
stream % ", ", // format description
|
||||||
|
c // data
|
||||||
|
) << std::endl << std::endl;
|
||||||
|
|
||||||
|
os <<
|
||||||
|
karma::format(
|
||||||
|
'[' << (stream % ", ") << ']', // format description
|
||||||
|
c // data
|
||||||
|
) << std::endl << std::endl;
|
||||||
|
|
||||||
|
// output the container as a comma separated list of items enclosed in '()'
|
||||||
|
os <<
|
||||||
|
karma::format(
|
||||||
|
('(' << stream << ')') % ", ", // format description
|
||||||
|
c // data
|
||||||
|
) << std::endl << std::endl;
|
||||||
|
|
||||||
|
os <<
|
||||||
|
karma::format(
|
||||||
|
'[' << (
|
||||||
|
('(' << stream << ')') % ", "
|
||||||
|
) << ']', // format description
|
||||||
|
c // data
|
||||||
|
) << std::endl << std::endl;
|
||||||
|
|
||||||
|
// output the container as a HTML list
|
||||||
|
os <<
|
||||||
|
karma::format_delimited(
|
||||||
|
"<ol>" <<
|
||||||
|
*verbatim["<li>" << stream << "</li>"]
|
||||||
|
<< "</ol>", // format description
|
||||||
|
c, // data
|
||||||
|
'\n' // delimiter
|
||||||
|
) << std::endl;
|
||||||
|
|
||||||
|
// output the container as right aligned column
|
||||||
|
os <<
|
||||||
|
karma::format_delimited(
|
||||||
|
*verbatim[
|
||||||
|
"|" << right_align[stream] << "|"
|
||||||
|
], // format description
|
||||||
|
c, // data
|
||||||
|
'\n' // delimiter
|
||||||
|
) << std::endl;
|
||||||
|
|
||||||
|
os << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
///////////////////////////////////////////////////////////////////////////
|
||||||
|
// vector
|
||||||
|
std::vector<int> v (8);
|
||||||
|
std::generate(v.begin(), v.end(), std::rand); // randomly fill the vector
|
||||||
|
|
||||||
|
std::cout << "-------------------------------------------------------------"
|
||||||
|
<< std::endl;
|
||||||
|
std::cout << "std::vector<int>" << std::endl;
|
||||||
|
output_container(std::cout, v);
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////
|
||||||
|
// list
|
||||||
|
std::list<char> l;
|
||||||
|
l.push_back('A');
|
||||||
|
l.push_back('B');
|
||||||
|
l.push_back('C');
|
||||||
|
|
||||||
|
std::cout << "-------------------------------------------------------------"
|
||||||
|
<< std::endl;
|
||||||
|
std::cout << "std::list<char>" << std::endl;
|
||||||
|
output_container(std::cout, l);
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////
|
||||||
|
// C-style array
|
||||||
|
int i[4] = { 3, 6, 9, 12 };
|
||||||
|
|
||||||
|
std::cout << "-------------------------------------------------------------"
|
||||||
|
<< std::endl;
|
||||||
|
std::cout << "int i[]" << std::endl;
|
||||||
|
output_container(std::cout, boost::make_iterator_range(i, i+4));
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////
|
||||||
|
// strings
|
||||||
|
std::string str("Hello world!");
|
||||||
|
|
||||||
|
std::cout << "-------------------------------------------------------------"
|
||||||
|
<< std::endl;
|
||||||
|
std::cout << "std::string" << std::endl;
|
||||||
|
output_container(std::cout, str);
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////
|
||||||
|
// vector of boost::date objects
|
||||||
|
// Note: any registered facets get used!
|
||||||
|
using namespace boost::gregorian;
|
||||||
|
std::vector<date> dates;
|
||||||
|
dates.push_back(date(2005, Jun, 25));
|
||||||
|
dates.push_back(date(2006, Jan, 13));
|
||||||
|
dates.push_back(date(2007, May, 03));
|
||||||
|
|
||||||
|
date_facet* facet(new date_facet("%A %B %d, %Y"));
|
||||||
|
std::cout.imbue(std::locale(std::cout.getloc(), facet));
|
||||||
|
|
||||||
|
std::cout << "-------------------------------------------------------------"
|
||||||
|
<< std::endl;
|
||||||
|
std::cout << "std::vector<boost::date>" << std::endl;
|
||||||
|
output_container(std::cout, dates);
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////
|
||||||
|
// fusion tuples
|
||||||
|
// this will work in the future
|
||||||
|
// boost::fusion::vector<int, char, double> fv(42, 'a', 45.8);
|
||||||
|
//
|
||||||
|
// std::cout << "boost::fusion::vector<int, char, double>" << std::endl;
|
||||||
|
// output_container(std::cout, fv);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
202
example/karma/functor_facilities.cpp
Normal file
202
example/karma/functor_facilities.cpp
Normal file
@@ -0,0 +1,202 @@
|
|||||||
|
// Copyright (c) 2001-2008 Hartmut Kaiser
|
||||||
|
//
|
||||||
|
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
|
||||||
|
// This examples demonstrate how to write functor based generators for special
|
||||||
|
// purposes.
|
||||||
|
|
||||||
|
#include <boost/spirit/include/karma.hpp>
|
||||||
|
#include <boost/spirit/include/karma_stream.hpp>
|
||||||
|
|
||||||
|
#include <boost/spirit/include/phoenix_core.hpp>
|
||||||
|
#include <boost/spirit/include/phoenix_bind.hpp>
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#include <list>
|
||||||
|
#include <algorithm>
|
||||||
|
#include <cstdlib>
|
||||||
|
|
||||||
|
using namespace boost::spirit;
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
// The functor generator 'counter' can be used for output annotation with some
|
||||||
|
// item counting information.
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
struct counter_impl : boost::spirit::karma::functor_base
|
||||||
|
{
|
||||||
|
template <typename OutputIterator, typename Context, typename Parameter>
|
||||||
|
bool operator()(Parameter const&, Context& ctx, OutputIterator& sink) const
|
||||||
|
{
|
||||||
|
namespace karma = boost::spirit::karma;
|
||||||
|
return karma::generate(sink, int_ << ": ", counter++);
|
||||||
|
}
|
||||||
|
|
||||||
|
counter_impl(int& counter_)
|
||||||
|
: counter(counter_) {}
|
||||||
|
|
||||||
|
int& counter;
|
||||||
|
};
|
||||||
|
|
||||||
|
inline boost::spirit::result_of::as_generator<counter_impl>::type
|
||||||
|
counter(int& counter_)
|
||||||
|
{
|
||||||
|
using namespace boost::spirit::karma;
|
||||||
|
return as_generator(counter_impl(counter_));
|
||||||
|
}
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
// The functor generator 'confix' allows a simple syntax for generating
|
||||||
|
// output wrapped inside a pair of a prefix and a suffix.
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
template <typename Expr>
|
||||||
|
struct confix_impl : public boost::spirit::karma::functor_base
|
||||||
|
{
|
||||||
|
template <typename Context>
|
||||||
|
struct apply
|
||||||
|
{
|
||||||
|
typedef boost::spirit::hold_any type;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename OutputIterator, typename Context, typename Parameter>
|
||||||
|
bool operator()(Parameter const& v, Context& ctx, OutputIterator& sink) const
|
||||||
|
{
|
||||||
|
namespace karma = boost::spirit::karma;
|
||||||
|
return karma::generate(sink, open << xpr << close, v);
|
||||||
|
}
|
||||||
|
|
||||||
|
confix_impl(char const* open_, char const* close_, Expr const& xpr_)
|
||||||
|
: open(open_), close(close_), xpr(xpr_) {}
|
||||||
|
|
||||||
|
std::string open;
|
||||||
|
std::string close;
|
||||||
|
Expr xpr;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Expr>
|
||||||
|
inline typename boost::spirit::result_of::as_generator<confix_impl<Expr> >::type
|
||||||
|
confix(Expr const& xpr_, char const* open_ = "", char const* close_ = "")
|
||||||
|
{
|
||||||
|
using namespace boost::spirit::karma;
|
||||||
|
return as_generator(confix_impl<Expr>(open_, close_, xpr_));
|
||||||
|
}
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
// The functor generator 'list' allows a simple syntax for generating
|
||||||
|
// list formatted output.
|
||||||
|
//
|
||||||
|
// This example uses phoenix::bind to allow to omit the second argument from
|
||||||
|
// the operator() and to allow to switch the remaining two arguments.
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
template <typename Expr>
|
||||||
|
struct list_impl : boost::spirit::karma::functor_base
|
||||||
|
{
|
||||||
|
// this function will be called to generate the output
|
||||||
|
template <typename OutputIterator, typename Parameter>
|
||||||
|
bool operator()(OutputIterator& sink, Parameter const& v) const
|
||||||
|
{
|
||||||
|
namespace karma = boost::spirit::karma;
|
||||||
|
return karma::generate(sink, xpr % delim, v);
|
||||||
|
}
|
||||||
|
|
||||||
|
list_impl(Expr const& xpr_, char const* delim_)
|
||||||
|
: xpr(xpr_), delim(delim_) {}
|
||||||
|
|
||||||
|
Expr xpr;
|
||||||
|
std::string delim;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Supply the expected parameter type explicitly
|
||||||
|
struct list_impl_mf
|
||||||
|
{
|
||||||
|
// the expected parameter type of a functor has to be defined using a
|
||||||
|
// embedded apply metafunction
|
||||||
|
template <typename Context>
|
||||||
|
struct apply
|
||||||
|
{
|
||||||
|
typedef boost::spirit::hold_any type;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Expr>
|
||||||
|
inline list_impl<Expr>
|
||||||
|
list(Expr const& xpr, char const* delim)
|
||||||
|
{
|
||||||
|
return list_impl<Expr>(xpr, delim);
|
||||||
|
}
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
namespace karma = boost::spirit::karma;
|
||||||
|
using namespace boost::phoenix;
|
||||||
|
using namespace boost::phoenix::arg_names;
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////
|
||||||
|
// Output the given containers in list format
|
||||||
|
// We use a special functor generator here to annotate the output with
|
||||||
|
// a integer counting the entries.
|
||||||
|
///////////////////////////////////////////////////////////////////////////
|
||||||
|
std::vector<int> v (8);
|
||||||
|
std::generate(v.begin(), v.end(), std::rand); // randomly fill the vector
|
||||||
|
|
||||||
|
int counter1 = 1;
|
||||||
|
std::cout <<
|
||||||
|
karma::format(
|
||||||
|
(counter(counter1) << int_) % ", ", // format description
|
||||||
|
v // data
|
||||||
|
) << std::endl;
|
||||||
|
|
||||||
|
// Here we initialize the counter to 100
|
||||||
|
int counter2 = 100;
|
||||||
|
std::cout <<
|
||||||
|
karma::format(
|
||||||
|
'[' << (
|
||||||
|
(counter(counter2) << int_) % ", "
|
||||||
|
) << ']', // format description
|
||||||
|
v // data
|
||||||
|
) << std::endl;
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////
|
||||||
|
// list
|
||||||
|
// The output format description used below adds special item formatting
|
||||||
|
///////////////////////////////////////////////////////////////////////////
|
||||||
|
std::list<std::string> names;
|
||||||
|
names.push_back("Spirit");
|
||||||
|
names.push_back("Qi");
|
||||||
|
names.push_back("Karma");
|
||||||
|
|
||||||
|
// specifying a prefix item suffix scheme directly
|
||||||
|
std::cout <<
|
||||||
|
karma::format(
|
||||||
|
('{' << stream << '}') % ", ", // format description
|
||||||
|
names // data
|
||||||
|
) << std::endl;
|
||||||
|
|
||||||
|
// The confix generator nicely wraps the given expression with prefix and
|
||||||
|
// suffix strings
|
||||||
|
std::cout <<
|
||||||
|
karma::format(
|
||||||
|
confix(stream % ", ", "[", "]"), // format description
|
||||||
|
names // data
|
||||||
|
) << std::endl;
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////
|
||||||
|
// Output the given container as a list
|
||||||
|
// We use a separate metafunction list_impl_mf to specify the expected
|
||||||
|
// parameter type of this functor generator.
|
||||||
|
// We use phoenix::bind to allow to omit the 2nd argument from the functor
|
||||||
|
// function operator and to change the sequence of the remaining two
|
||||||
|
// arguments.
|
||||||
|
///////////////////////////////////////////////////////////////////////////
|
||||||
|
std::string str("Hello world!");
|
||||||
|
std::cout <<
|
||||||
|
karma::format(
|
||||||
|
karma::as_generator_mf<list_impl_mf>(bind(list(stream, ", "), _3, _1)),
|
||||||
|
str
|
||||||
|
) << std::endl;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
119
example/karma/quick_start1.cpp
Normal file
119
example/karma/quick_start1.cpp
Normal file
@@ -0,0 +1,119 @@
|
|||||||
|
// Copyright (c) 2001-2008 Hartmut Kaiser
|
||||||
|
//
|
||||||
|
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
|
||||||
|
// The main purpose of this example is to show how a single container type can
|
||||||
|
// be formatted using different output grammars.
|
||||||
|
|
||||||
|
#include <boost/spirit/include/karma.hpp>
|
||||||
|
#include <boost/spirit/include/karma_stream.hpp>
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <vector>
|
||||||
|
#include <algorithm>
|
||||||
|
#include <cstdlib>
|
||||||
|
|
||||||
|
using namespace boost::spirit;
|
||||||
|
using namespace boost::spirit::ascii;
|
||||||
|
namespace karma = boost::spirit::karma;
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
///////////////////////////////////////////////////////////////////////////
|
||||||
|
// vector
|
||||||
|
std::vector<int> v (8);
|
||||||
|
std::generate(v.begin(), v.end(), std::rand); // randomly fill the vector
|
||||||
|
|
||||||
|
std::cout << "Output 8 integers from a std::vector<int>..." << std::endl;
|
||||||
|
|
||||||
|
// output the container as a sequence without any separation
|
||||||
|
std::cout << "...without any separation" << std::endl;
|
||||||
|
std::cout <<
|
||||||
|
karma::format(
|
||||||
|
*int_, // format description
|
||||||
|
v // data
|
||||||
|
) << std::endl << std::endl;
|
||||||
|
|
||||||
|
// output the container as a space separated sequence
|
||||||
|
std::cout << "...as space delited list" << std::endl;
|
||||||
|
std::cout <<
|
||||||
|
karma::format_delimited(
|
||||||
|
*int_, // format description
|
||||||
|
v, // data
|
||||||
|
space // delimiter
|
||||||
|
) << std::endl << std::endl;
|
||||||
|
|
||||||
|
std::cout <<
|
||||||
|
karma::format_delimited(
|
||||||
|
'[' << *int_ << ']', // format description
|
||||||
|
v, // data
|
||||||
|
space // delimiter
|
||||||
|
) << std::endl << std::endl;
|
||||||
|
|
||||||
|
// output the container as a comma separated list
|
||||||
|
std::cout << "...as comma separated list" << std::endl;
|
||||||
|
std::cout <<
|
||||||
|
karma::format(
|
||||||
|
int_ % ", ", // format description
|
||||||
|
v // data
|
||||||
|
) << std::endl << std::endl;
|
||||||
|
|
||||||
|
std::cout <<
|
||||||
|
karma::format(
|
||||||
|
'[' << (int_ % ", ") << ']', // format description
|
||||||
|
v // data
|
||||||
|
) << std::endl << std::endl;
|
||||||
|
|
||||||
|
// output the container as a comma separated list of double's
|
||||||
|
std::cout << "...as comma separated list of doubles" << std::endl;
|
||||||
|
std::cout <<
|
||||||
|
karma::format(
|
||||||
|
double_ % ", ", // format description
|
||||||
|
v // data
|
||||||
|
) << std::endl << std::endl;
|
||||||
|
|
||||||
|
// output the container as a comma separated list of items enclosed in '()'
|
||||||
|
std::cout << "..as list of ints enclosed in '()'" << std::endl;
|
||||||
|
std::cout <<
|
||||||
|
karma::format(
|
||||||
|
('(' << int_ << ')') % ", ", // format description
|
||||||
|
v // data
|
||||||
|
) << std::endl << std::endl;
|
||||||
|
|
||||||
|
std::cout <<
|
||||||
|
karma::format(
|
||||||
|
'[' << (
|
||||||
|
('(' << int_ << ')') % ", "
|
||||||
|
) << ']', // format description
|
||||||
|
v // data
|
||||||
|
) << std::endl << std::endl;
|
||||||
|
|
||||||
|
// output the container as a HTML list
|
||||||
|
std::cout << "...as HTML bullet list" << std::endl;
|
||||||
|
std::cout <<
|
||||||
|
karma::format_delimited(
|
||||||
|
"<ol>" <<
|
||||||
|
// no delimiting within verbatim
|
||||||
|
*verbatim[" <li>" << int_ << "</li>"]
|
||||||
|
<< "</ol>", // format description
|
||||||
|
v, // data
|
||||||
|
'\n' // delimiter
|
||||||
|
) << std::endl;
|
||||||
|
|
||||||
|
// output the container as right aligned column
|
||||||
|
std::cout << "...right aligned in a column" << std::endl;
|
||||||
|
std::cout <<
|
||||||
|
karma::format_delimited(
|
||||||
|
*verbatim[
|
||||||
|
"|" << right_align[int_] << "|"
|
||||||
|
], // format description
|
||||||
|
v, // data
|
||||||
|
'\n' // delimiter
|
||||||
|
) << std::endl;
|
||||||
|
|
||||||
|
std::cout << std::endl;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
22
example/lex/Jamfile
Normal file
22
example/lex/Jamfile
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
#==============================================================================
|
||||||
|
# Copyright (c) 2001-2007 Joel de Guzman
|
||||||
|
# Copyright (c) 2001-2008 Hartmut Kaiser
|
||||||
|
#
|
||||||
|
# Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
#==============================================================================
|
||||||
|
|
||||||
|
project spirit-lexer-example ;
|
||||||
|
|
||||||
|
exe example1 : example1.cpp ;
|
||||||
|
exe example2 : example2.cpp ;
|
||||||
|
exe example3 : example3.cpp ;
|
||||||
|
exe example4 : example4.cpp ;
|
||||||
|
exe example5 : example5.cpp ;
|
||||||
|
exe example6 : example6.cpp ;
|
||||||
|
exe print_numbers : print_numbers.cpp ;
|
||||||
|
exe word_count : word_count.cpp ;
|
||||||
|
exe word_count_functor : word_count_functor.cpp ;
|
||||||
|
exe word_count_lexer : word_count_lexer.cpp ;
|
||||||
|
exe strip_comments : strip_comments.cpp ;
|
||||||
|
|
||||||
26
example/lex/example.hpp
Normal file
26
example/lex/example.hpp
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
// Copyright (c) 2001-2008 Hartmut Kaiser
|
||||||
|
// Copyright (c) 2001-2007 Joel de Guzman
|
||||||
|
//
|
||||||
|
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <fstream>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Helper function reading a file into a string
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
inline std::string
|
||||||
|
read_from_file(char const* infile)
|
||||||
|
{
|
||||||
|
std::ifstream instream(infile);
|
||||||
|
if (!instream.is_open()) {
|
||||||
|
std::cerr << "Couldn't open file: " << infile << std::endl;
|
||||||
|
exit(-1);
|
||||||
|
}
|
||||||
|
instream.unsetf(std::ios::skipws); // No white space skipping!
|
||||||
|
return std::string(std::istreambuf_iterator<char>(instream.rdbuf()),
|
||||||
|
std::istreambuf_iterator<char>());
|
||||||
|
}
|
||||||
|
|
||||||
136
example/lex/example1.cpp
Normal file
136
example/lex/example1.cpp
Normal file
@@ -0,0 +1,136 @@
|
|||||||
|
// Copyright (c) 2001-2008 Hartmut Kaiser
|
||||||
|
// Copyright (c) 2001-2007 Joel de Guzman
|
||||||
|
//
|
||||||
|
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
|
||||||
|
// Simple lexer/parser to test the Spirit installation.
|
||||||
|
//
|
||||||
|
// This example shows, how to create a simple lexer recognizing 4 different
|
||||||
|
// tokens, and how to use a single token definition as the skip parser during
|
||||||
|
// the parsing. Additionally it demonstrates how to use one of the defined
|
||||||
|
// tokens as a parser component in the grammar.
|
||||||
|
//
|
||||||
|
// The grammar recognizes a simple input structure, for instance:
|
||||||
|
//
|
||||||
|
// {
|
||||||
|
// hello world, hello it is me
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// Any number of simple sentences (optionally comma separated) inside a pair
|
||||||
|
// of curly braces will be matched.
|
||||||
|
|
||||||
|
#include <boost/spirit/include/qi.hpp>
|
||||||
|
#include <boost/spirit/include/lex_lexer_lexertl.hpp>
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <fstream>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include "example.hpp"
|
||||||
|
|
||||||
|
using namespace boost::spirit;
|
||||||
|
using namespace boost::spirit::qi;
|
||||||
|
using namespace boost::spirit::lex;
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Token definition
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
template <typename Lexer>
|
||||||
|
struct example1_tokens : lexer_def<Lexer>
|
||||||
|
{
|
||||||
|
template <typename Self>
|
||||||
|
void def (Self& self)
|
||||||
|
{
|
||||||
|
// define tokens and associate them with the lexer
|
||||||
|
identifier = "[a-zA-Z_][a-zA-Z0-9_]*";
|
||||||
|
self = token_def<>(',') | '{' | '}' | identifier;
|
||||||
|
|
||||||
|
// any token definition to be used as the skip parser during parsing
|
||||||
|
// has to be associated with a separate lexer state (here 'WS')
|
||||||
|
white_space = "[ \\t\\n]+";
|
||||||
|
self("WS") = white_space;
|
||||||
|
}
|
||||||
|
|
||||||
|
token_def<> identifier, white_space;
|
||||||
|
};
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Grammar definition
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
template <typename Iterator>
|
||||||
|
struct example1_grammar
|
||||||
|
: grammar_def<Iterator, in_state_skipper<token_def<> > >
|
||||||
|
{
|
||||||
|
template <typename TokenDef>
|
||||||
|
example1_grammar(TokenDef const& tok)
|
||||||
|
{
|
||||||
|
start = '{' >> *(tok.identifier >> -char_(',')) >> '}';
|
||||||
|
}
|
||||||
|
|
||||||
|
rule<Iterator, in_state_skipper<token_def<> > > start;
|
||||||
|
};
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
// iterator type used to expose the underlying input stream
|
||||||
|
typedef std::string::iterator base_iterator_type;
|
||||||
|
|
||||||
|
// This is the token type to return from the lexer iterator
|
||||||
|
typedef lexertl_token<base_iterator_type> token_type;
|
||||||
|
|
||||||
|
// This is the lexer type to use to tokenize the input.
|
||||||
|
// We use the lexertl based lexer engine.
|
||||||
|
typedef lexertl_lexer<token_type> lexer_type;
|
||||||
|
|
||||||
|
// This is the token definition type (derived from the given lexer type).
|
||||||
|
typedef example1_tokens<lexer_type> example1_tokens;
|
||||||
|
|
||||||
|
// This is the iterator type exposed by the lexer
|
||||||
|
typedef lexer<example1_tokens>::iterator_type iterator_type;
|
||||||
|
|
||||||
|
// This is the type of the grammar to parse
|
||||||
|
typedef example1_grammar<iterator_type> example1_grammar;
|
||||||
|
|
||||||
|
// now we use the types defined above to create the lexer and grammar
|
||||||
|
// object instances needed to invoke the parsing process
|
||||||
|
example1_tokens tokens; // Our token definition
|
||||||
|
example1_grammar def (tokens); // Our grammar definition
|
||||||
|
|
||||||
|
lexer<example1_tokens> lex(tokens); // Our lexer
|
||||||
|
grammar<example1_grammar> calc(def); // Our parser
|
||||||
|
|
||||||
|
std::string str (read_from_file("example1.input"));
|
||||||
|
|
||||||
|
// At this point we generate the iterator pair used to expose the
|
||||||
|
// tokenized input stream.
|
||||||
|
std::string::iterator it = str.begin();
|
||||||
|
iterator_type iter = lex.begin(it, str.end());
|
||||||
|
iterator_type end = lex.end();
|
||||||
|
|
||||||
|
// Parsing is done based on the the token stream, not the character
|
||||||
|
// stream read from the input.
|
||||||
|
// Note, how we use the token_def defined above as the skip parser. It must
|
||||||
|
// be explicitly wrapped inside a state directive, switching the lexer
|
||||||
|
// state for the duration of skipping whitespace.
|
||||||
|
bool r = phrase_parse(iter, end, calc, in_state("WS")[tokens.white_space]);
|
||||||
|
|
||||||
|
if (r && iter == end)
|
||||||
|
{
|
||||||
|
std::cout << "-------------------------\n";
|
||||||
|
std::cout << "Parsing succeeded\n";
|
||||||
|
std::cout << "-------------------------\n";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
std::string rest(iter, end);
|
||||||
|
std::cout << "-------------------------\n";
|
||||||
|
std::cout << "Parsing failed\n";
|
||||||
|
std::cout << "stopped at: \"" << rest << "\"\n";
|
||||||
|
std::cout << "-------------------------\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << "Bye... :-) \n\n";
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
169
example/lex/example2.cpp
Normal file
169
example/lex/example2.cpp
Normal file
@@ -0,0 +1,169 @@
|
|||||||
|
// Copyright (c) 2001-2008 Hartmut Kaiser
|
||||||
|
// Copyright (c) 2001-2007 Joel de Guzman
|
||||||
|
//
|
||||||
|
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
|
||||||
|
// This example shows how to create a simple lexer recognizing a couple of
|
||||||
|
// different tokens and how to use this with a grammar. This example has a
|
||||||
|
// heavily backtracking grammar which makes it a candidate for lexer based
|
||||||
|
// parsing (all tokens are scanned and generated only once, even if
|
||||||
|
// backtracking is required) which speeds up the overall parsing process
|
||||||
|
// considerably, out-weighting the overhead needed for setting up the lexer.
|
||||||
|
// Additionally it demonstrates how to use one of the defined tokens as a
|
||||||
|
// parser component in the grammar.
|
||||||
|
//
|
||||||
|
// The grammar recognizes a simple input structure: any number of English
|
||||||
|
// simple sentences (statements, questions and commands) are recognized and
|
||||||
|
// are being counted separately.
|
||||||
|
|
||||||
|
// #define BOOST_SPIRIT_DEBUG
|
||||||
|
// #define BOOST_SPIRIT_LEXERTL_DEBUG
|
||||||
|
|
||||||
|
#include <boost/spirit/include/qi.hpp>
|
||||||
|
#include <boost/spirit/include/lex_lexer_lexertl.hpp>
|
||||||
|
#include <boost/spirit/include/phoenix_operator.hpp>
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <fstream>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include "example.hpp"
|
||||||
|
|
||||||
|
using namespace boost::spirit;
|
||||||
|
using namespace boost::spirit::qi;
|
||||||
|
using namespace boost::spirit::lex;
|
||||||
|
using boost::phoenix::ref;
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Token definition
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
template <typename Lexer>
|
||||||
|
struct example2_tokens : lexer_def<Lexer>
|
||||||
|
{
|
||||||
|
template <typename Self>
|
||||||
|
void def (Self& self)
|
||||||
|
{
|
||||||
|
// A 'word' is comprised of one or more letters and an optional
|
||||||
|
// apostrophe. If it contains an apostrophe, there may only be one and
|
||||||
|
// the apostrophe must be preceded and succeeded by at least 1 letter.
|
||||||
|
// For example, "I'm" and "doesn't" meet the definition of 'word' we
|
||||||
|
// define below.
|
||||||
|
word = "[a-zA-Z]+('[a-zA-Z]+)?";
|
||||||
|
|
||||||
|
// associate the tokens and the token set with the lexer
|
||||||
|
self = token_def<>(',') | '!' | '.' | '?' | ' ' | '\n' | word;
|
||||||
|
}
|
||||||
|
|
||||||
|
token_def<> word;
|
||||||
|
};
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Grammar definition
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
template <typename Iterator>
|
||||||
|
struct example2_grammar : grammar_def<Iterator>
|
||||||
|
{
|
||||||
|
template <typename TokenDef>
|
||||||
|
example2_grammar(TokenDef const& tok)
|
||||||
|
: paragraphs(0), commands(0), questions(0), statements(0)
|
||||||
|
{
|
||||||
|
story
|
||||||
|
= +paragraph
|
||||||
|
;
|
||||||
|
|
||||||
|
paragraph
|
||||||
|
= ( +( command [ ++ref(commands) ]
|
||||||
|
| question [ ++ref(questions) ]
|
||||||
|
| statement [ ++ref(statements) ]
|
||||||
|
)
|
||||||
|
>> *char_(' ') >> +char_('\n')
|
||||||
|
)
|
||||||
|
[ ++ref(paragraphs) ]
|
||||||
|
;
|
||||||
|
|
||||||
|
command
|
||||||
|
= +(tok.word | ' ' | ',') >> '!'
|
||||||
|
;
|
||||||
|
|
||||||
|
question
|
||||||
|
= +(tok.word | ' ' | ',') >> '?'
|
||||||
|
;
|
||||||
|
|
||||||
|
statement
|
||||||
|
= +(tok.word | ' ' | ',') >> '.'
|
||||||
|
;
|
||||||
|
|
||||||
|
BOOST_SPIRIT_DEBUG_NODE(story);
|
||||||
|
BOOST_SPIRIT_DEBUG_NODE(paragraph);
|
||||||
|
BOOST_SPIRIT_DEBUG_NODE(command);
|
||||||
|
BOOST_SPIRIT_DEBUG_NODE(question);
|
||||||
|
BOOST_SPIRIT_DEBUG_NODE(statement);
|
||||||
|
}
|
||||||
|
|
||||||
|
rule<Iterator> story, paragraph, command, question, statement;
|
||||||
|
int paragraphs, commands, questions, statements;
|
||||||
|
};
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
// iterator type used to expose the underlying input stream
|
||||||
|
typedef std::string::iterator base_iterator_type;
|
||||||
|
|
||||||
|
// This is the token type to return from the lexer iterator
|
||||||
|
typedef lexertl_token<base_iterator_type> token_type;
|
||||||
|
|
||||||
|
// This is the lexer type to use to tokenize the input.
|
||||||
|
// Here we use the lexertl based lexer engine.
|
||||||
|
typedef lexertl_lexer<token_type> lexer_type;
|
||||||
|
|
||||||
|
// This is the token definition type (derived from the given lexer type).
|
||||||
|
typedef example2_tokens<lexer_type> example2_tokens;
|
||||||
|
|
||||||
|
// this is the iterator type exposed by the lexer
|
||||||
|
typedef lexer<example2_tokens>::iterator_type iterator_type;
|
||||||
|
|
||||||
|
// this is the type of the grammar to parse
|
||||||
|
typedef example2_grammar<iterator_type> example2_grammar;
|
||||||
|
|
||||||
|
// now we use the types defined above to create the lexer and grammar
|
||||||
|
// object instances needed to invoke the parsing process
|
||||||
|
example2_tokens tokens; // Our token definition
|
||||||
|
example2_grammar def (tokens); // Our grammar definition
|
||||||
|
|
||||||
|
lexer<example2_tokens> lex(tokens); // Our lexer
|
||||||
|
grammar<example2_grammar> calc(def, def.story); // Our grammar
|
||||||
|
|
||||||
|
std::string str (read_from_file("example2.input"));
|
||||||
|
|
||||||
|
// At this point we generate the iterator pair used to expose the
|
||||||
|
// tokenized input stream.
|
||||||
|
std::string::iterator it = str.begin();
|
||||||
|
iterator_type iter = lex.begin(it, str.end());
|
||||||
|
iterator_type end = lex.end();
|
||||||
|
|
||||||
|
// Parsing is done based on the the token stream, not the character
|
||||||
|
// stream read from the input.
|
||||||
|
bool r = parse(iter, end, calc);
|
||||||
|
|
||||||
|
if (r && iter == end)
|
||||||
|
{
|
||||||
|
std::cout << "-------------------------\n";
|
||||||
|
std::cout << "Parsing succeeded\n";
|
||||||
|
std::cout << "There were "
|
||||||
|
<< def.commands << " commands, "
|
||||||
|
<< def.questions << " questions, and "
|
||||||
|
<< def.statements << " statements.\n";
|
||||||
|
std::cout << "-------------------------\n";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
std::cout << "-------------------------\n";
|
||||||
|
std::cout << "Parsing failed\n";
|
||||||
|
std::cout << "-------------------------\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << "Bye... :-) \n\n";
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
161
example/lex/example3.cpp
Normal file
161
example/lex/example3.cpp
Normal file
@@ -0,0 +1,161 @@
|
|||||||
|
// Copyright (c) 2001-2008 Hartmut Kaiser
|
||||||
|
// Copyright (c) 2001-2007 Joel de Guzman
|
||||||
|
//
|
||||||
|
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
|
||||||
|
// This example shows how to create a simple lexer recognizing a couple of
|
||||||
|
// different tokens and how to use this with a grammar. This example has a
|
||||||
|
// heavily backtracking grammar which makes it a candidate for lexer based
|
||||||
|
// parsing (all tokens are scanned and generated only once, even if
|
||||||
|
// backtracking is required) which speeds up the overall parsing process
|
||||||
|
// considerably, out-weighting the overhead needed for setting up the lexer.
|
||||||
|
//
|
||||||
|
// Additionally, this example demonstrates, how to define a token set usable
|
||||||
|
// as the skip parser during parsing, allowing to define several tokens to be
|
||||||
|
// ignored.
|
||||||
|
//
|
||||||
|
// This example recognizes couplets, which are sequences of numbers enclosed
|
||||||
|
// in matching pairs of parenthesis. See the comments below to for details
|
||||||
|
// and examples.
|
||||||
|
|
||||||
|
// #define BOOST_SPIRIT_LEXERTL_DEBUG
|
||||||
|
// #define BOOST_SPIRIT_DEBUG
|
||||||
|
|
||||||
|
#include <boost/spirit/include/qi.hpp>
|
||||||
|
#include <boost/spirit/include/lex_lexer_lexertl.hpp>
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <fstream>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include "example.hpp"
|
||||||
|
|
||||||
|
using namespace boost::spirit;
|
||||||
|
using namespace boost::spirit::qi;
|
||||||
|
using namespace boost::spirit::lex;
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Token definition
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
template <typename Lexer>
|
||||||
|
struct example3_tokens : lexer_def<Lexer>
|
||||||
|
{
|
||||||
|
typedef typename Lexer::token_set token_set;
|
||||||
|
|
||||||
|
template <typename Self>
|
||||||
|
void def (Self& self)
|
||||||
|
{
|
||||||
|
// define the tokens to match
|
||||||
|
ellipses = "\\.\\.\\.";
|
||||||
|
number = "[0-9]+";
|
||||||
|
|
||||||
|
// define the whitespace to ignore (spaces, tabs, newlines and C-style
|
||||||
|
// comments)
|
||||||
|
white_space
|
||||||
|
= token_def<>("[ \\t\\n]+") // whitespace
|
||||||
|
| "\\/\\*[^*]*\\*+([^/*][^*]*\\*+)*\\/" // C style comments
|
||||||
|
;
|
||||||
|
|
||||||
|
// associate the tokens and the token set with the lexer
|
||||||
|
self = ellipses | '(' | ')' | number;
|
||||||
|
self("WS") = white_space;
|
||||||
|
}
|
||||||
|
|
||||||
|
// these tokens expose the iterator_range of the matched input sequence
|
||||||
|
token_def<> ellipses, identifier, number;
|
||||||
|
token_set white_space;
|
||||||
|
};
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Grammar definition
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
template <typename Iterator, typename Lexer>
|
||||||
|
struct example3_grammar
|
||||||
|
: grammar_def<Iterator, in_state_skipper<typename Lexer::token_set> >
|
||||||
|
{
|
||||||
|
template <typename TokenDef>
|
||||||
|
example3_grammar(TokenDef const& tok)
|
||||||
|
{
|
||||||
|
start
|
||||||
|
= +(couplet | tok.ellipses)
|
||||||
|
;
|
||||||
|
|
||||||
|
// A couplet matches nested left and right parenthesis.
|
||||||
|
// For example:
|
||||||
|
// (1) (1 2) (1 2 3) ...
|
||||||
|
// ((1)) ((1 2)(3 4)) (((1) (2 3) (1 2 (3) 4))) ...
|
||||||
|
// (((1))) ...
|
||||||
|
couplet
|
||||||
|
= tok.number
|
||||||
|
| '(' >> +couplet >> ')'
|
||||||
|
;
|
||||||
|
|
||||||
|
BOOST_SPIRIT_DEBUG_NODE(start);
|
||||||
|
BOOST_SPIRIT_DEBUG_NODE(couplet);
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef typename Lexer::token_set token_set;
|
||||||
|
rule<Iterator, in_state_skipper<token_set> > start, couplet;
|
||||||
|
};
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
// iterator type used to expose the underlying input stream
|
||||||
|
typedef std::string::iterator base_iterator_type;
|
||||||
|
|
||||||
|
// This is the token type to return from the lexer iterator
|
||||||
|
typedef lexertl_token<base_iterator_type> token_type;
|
||||||
|
|
||||||
|
// This is the lexer type to use to tokenize the input.
|
||||||
|
// Here we use the lexertl based lexer engine.
|
||||||
|
typedef lexertl_lexer<token_type> lexer_type;
|
||||||
|
|
||||||
|
// This is the token definition type (derived from the given lexer type).
|
||||||
|
typedef example3_tokens<lexer_type> example3_tokens;
|
||||||
|
|
||||||
|
// this is the iterator type exposed by the lexer
|
||||||
|
typedef lexer<example3_tokens>::iterator_type iterator_type;
|
||||||
|
|
||||||
|
// this is the type of the grammar to parse
|
||||||
|
typedef example3_grammar<iterator_type, lexer_type> example3_grammar;
|
||||||
|
|
||||||
|
// now we use the types defined above to create the lexer and grammar
|
||||||
|
// object instances needed to invoke the parsing process
|
||||||
|
example3_tokens tokens; // Our token definition
|
||||||
|
example3_grammar def (tokens); // Our grammar definition
|
||||||
|
|
||||||
|
lexer<example3_tokens> lex(tokens); // Our lexer
|
||||||
|
grammar<example3_grammar> calc(def); // Our grammar
|
||||||
|
|
||||||
|
std::string str (read_from_file("example3.input"));
|
||||||
|
|
||||||
|
// At this point we generate the iterator pair used to expose the
|
||||||
|
// tokenized input stream.
|
||||||
|
std::string::iterator it = str.begin();
|
||||||
|
iterator_type iter = lex.begin(it, str.end());
|
||||||
|
iterator_type end = lex.end();
|
||||||
|
|
||||||
|
// Parsing is done based on the the token stream, not the character
|
||||||
|
// stream read from the input.
|
||||||
|
// Note, how we use the token_set defined above as the skip parser.
|
||||||
|
std::string ws("WS");
|
||||||
|
bool r = phrase_parse(iter, end, calc, in_state(ws)[tokens.white_space]);
|
||||||
|
|
||||||
|
if (r && iter == end)
|
||||||
|
{
|
||||||
|
std::cout << "-------------------------\n";
|
||||||
|
std::cout << "Parsing succeeded\n";
|
||||||
|
std::cout << "-------------------------\n";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
std::cout << "-------------------------\n";
|
||||||
|
std::cout << "Parsing failed\n";
|
||||||
|
std::cout << "-------------------------\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << "Bye... :-) \n\n";
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
239
example/lex/example4.cpp
Normal file
239
example/lex/example4.cpp
Normal file
@@ -0,0 +1,239 @@
|
|||||||
|
// Copyright (c) 2001-2008 Hartmut Kaiser
|
||||||
|
// Copyright (c) 2001-2007 Joel de Guzman
|
||||||
|
//
|
||||||
|
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
|
||||||
|
// This example shows how to create a simple lexer recognizing a couple of
|
||||||
|
// different tokens aimed at a simple language and how to use this lexer with
|
||||||
|
// a grammar. It shows how to associate values to tokens and how to access the
|
||||||
|
// token values from inside the grammar.
|
||||||
|
//
|
||||||
|
// We use explicit token value types, making the corresponding token instances
|
||||||
|
// carry convert the matched input into an instance of that type. The token
|
||||||
|
// value is exposed as the parser attribute if this token is used as a
|
||||||
|
// parser component somewhere in a grammar.
|
||||||
|
//
|
||||||
|
// Additionally, this example demonstrates, how to define a token set usable
|
||||||
|
// as the skip parser during parsing, allowing to define several tokens to be
|
||||||
|
// ignored.
|
||||||
|
//
|
||||||
|
// This example recognizes a very simple programming language having
|
||||||
|
// assignment statements and if and while control structures. Look at the file
|
||||||
|
// example4.input for an example.
|
||||||
|
|
||||||
|
#include <boost/spirit/include/qi.hpp>
|
||||||
|
#include <boost/spirit/include/lex_lexer_lexertl.hpp>
|
||||||
|
#include <boost/spirit/include/phoenix_operator.hpp>
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <fstream>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include "example.hpp"
|
||||||
|
|
||||||
|
using namespace boost::spirit;
|
||||||
|
using namespace boost::spirit::qi;
|
||||||
|
using namespace boost::spirit::lex;
|
||||||
|
using namespace boost::spirit::arg_names;
|
||||||
|
|
||||||
|
using boost::phoenix::val;
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Token definition
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
template <typename Lexer>
|
||||||
|
struct example4_tokens : lexer_def<Lexer>
|
||||||
|
{
|
||||||
|
typedef typename Lexer::token_set token_set;
|
||||||
|
|
||||||
|
template <typename Self>
|
||||||
|
void def (Self& self)
|
||||||
|
{
|
||||||
|
// define the tokens to match
|
||||||
|
identifier = "[a-zA-Z_][a-zA-Z0-9_]*";
|
||||||
|
constant = "[0-9]+";
|
||||||
|
if_ = "if";
|
||||||
|
else_ = "else";
|
||||||
|
while_ = "while";
|
||||||
|
|
||||||
|
// define the whitespace to ignore (spaces, tabs, newlines and C-style
|
||||||
|
// comments)
|
||||||
|
white_space
|
||||||
|
= token_def<>("[ \\t\\n]+")
|
||||||
|
| "\\/\\*[^*]*\\*+([^/*][^*]*\\*+)*\\/"
|
||||||
|
;
|
||||||
|
|
||||||
|
// associate the tokens and the token set with the lexer
|
||||||
|
self = token_def<>('(') | ')' | '{' | '}' | '=' | ';' | constant;
|
||||||
|
self += if_ | else_ | while_ | identifier;
|
||||||
|
self("WS") = white_space;
|
||||||
|
}
|
||||||
|
|
||||||
|
//[example4_token_def
|
||||||
|
// these tokens expose the iterator_range of the matched input sequence
|
||||||
|
token_def<> if_, else_, while_;
|
||||||
|
|
||||||
|
// The following two tokens have an associated value type, 'identifier'
|
||||||
|
// carries a string (the identifier name) and 'constant' carries the
|
||||||
|
// matched integer value.
|
||||||
|
//
|
||||||
|
// Note: any token value type specified explicitly during a token_def<>
|
||||||
|
// declaration needs to be listed during token type definition as
|
||||||
|
// well (see the typedef for the token_type below).
|
||||||
|
//
|
||||||
|
// The conversion of the matched input to an instance of this type occurs
|
||||||
|
// once (on first access), which makes token values as efficient as
|
||||||
|
// possible. Moreover, token instances are constructed once by the lexer
|
||||||
|
// library. From this point on tokens are passed by reference only,
|
||||||
|
// avoiding tokens being copied around.
|
||||||
|
token_def<std::string> identifier;
|
||||||
|
token_def<unsigned int> constant;
|
||||||
|
//]
|
||||||
|
|
||||||
|
// token set to be used as the skip parser
|
||||||
|
token_set white_space;
|
||||||
|
};
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Grammar definition
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
template <typename Iterator, typename Lexer>
|
||||||
|
struct example4_grammar
|
||||||
|
: grammar_def<Iterator, in_state_skipper<typename Lexer::token_set> >
|
||||||
|
{
|
||||||
|
template <typename TokenDef>
|
||||||
|
example4_grammar(TokenDef const& tok)
|
||||||
|
{
|
||||||
|
program
|
||||||
|
= +block
|
||||||
|
;
|
||||||
|
|
||||||
|
block
|
||||||
|
= '{' >> *statement >> '}'
|
||||||
|
;
|
||||||
|
|
||||||
|
statement
|
||||||
|
= assignment
|
||||||
|
| if_stmt
|
||||||
|
| while_stmt
|
||||||
|
;
|
||||||
|
|
||||||
|
assignment
|
||||||
|
= (tok.identifier >> '=' >> expression >> ';')
|
||||||
|
[
|
||||||
|
std::cout << val("assignment statement to: ") << _1 << "\n"
|
||||||
|
]
|
||||||
|
;
|
||||||
|
|
||||||
|
if_stmt
|
||||||
|
= ( tok.if_ >> '(' >> expression >> ')' >> block
|
||||||
|
>> -(tok.else_ >> block)
|
||||||
|
)
|
||||||
|
[
|
||||||
|
std::cout << val("if expression: ") << _2 << "\n"
|
||||||
|
]
|
||||||
|
;
|
||||||
|
|
||||||
|
while_stmt
|
||||||
|
= (tok.while_ >> '(' >> expression >> ')' >> block)
|
||||||
|
[
|
||||||
|
std::cout << val("while expression: ") << _2 << "\n"
|
||||||
|
]
|
||||||
|
;
|
||||||
|
|
||||||
|
// since expression has a variant return type accommodating for
|
||||||
|
// std::string and unsigned integer, both possible values may be
|
||||||
|
// returned to the calling rule
|
||||||
|
expression
|
||||||
|
= tok.identifier [ _val = _1 ]
|
||||||
|
| tok.constant [ _val = _1 ]
|
||||||
|
;
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef typename Lexer::token_set token_set;
|
||||||
|
typedef boost::variant<unsigned int, std::string> expression_type;
|
||||||
|
|
||||||
|
rule<Iterator, in_state_skipper<token_set> > program, block, statement;
|
||||||
|
rule<Iterator, in_state_skipper<token_set> > assignment, if_stmt;
|
||||||
|
rule<Iterator, in_state_skipper<token_set> > while_stmt;
|
||||||
|
|
||||||
|
// the expression is the only rule having a return value
|
||||||
|
rule<Iterator, expression_type(), in_state_skipper<token_set> > expression;
|
||||||
|
};
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
// iterator type used to expose the underlying input stream
|
||||||
|
typedef std::string::iterator base_iterator_type;
|
||||||
|
|
||||||
|
//[example4_token
|
||||||
|
// This is the lexer token type to use. The second template parameter lists
|
||||||
|
// all attribute types used for token_def's during token definition (see
|
||||||
|
// calculator_tokens<> above). Here we use the predefined lexertl token
|
||||||
|
// type, but any compatible token type may be used instead.
|
||||||
|
//
|
||||||
|
// If you don't list any token value types in the following declaration
|
||||||
|
// (or just use the default token type: lexertl_token<base_iterator_type>)
|
||||||
|
// it will compile and work just fine, just a bit less efficient. This is
|
||||||
|
// because the token value will be generated from the matched input
|
||||||
|
// sequence every time it is requested. But as soon as you specify at
|
||||||
|
// least one token value type you'll have to list all value types used
|
||||||
|
// for token_def<> declarations in the token definition class above,
|
||||||
|
// otherwise compilation errors will occur.
|
||||||
|
typedef lexertl_token<
|
||||||
|
base_iterator_type, boost::mpl::vector<unsigned int, std::string>
|
||||||
|
> token_type;
|
||||||
|
//]
|
||||||
|
// Here we use the lexertl based lexer engine.
|
||||||
|
typedef lexertl_lexer<token_type> lexer_type;
|
||||||
|
|
||||||
|
// This is the token definition type (derived from the given lexer type).
|
||||||
|
typedef example4_tokens<lexer_type> example4_tokens;
|
||||||
|
|
||||||
|
// this is the iterator type exposed by the lexer
|
||||||
|
typedef lexer<example4_tokens>::iterator_type iterator_type;
|
||||||
|
|
||||||
|
// this is the type of the grammar to parse
|
||||||
|
typedef example4_grammar<iterator_type, lexer_type> example4_grammar;
|
||||||
|
|
||||||
|
// now we use the types defined above to create the lexer and grammar
|
||||||
|
// object instances needed to invoke the parsing process
|
||||||
|
example4_tokens tokens; // Our token definition
|
||||||
|
example4_grammar def (tokens); // Our grammar definition
|
||||||
|
|
||||||
|
lexer<example4_tokens> lex(tokens); // Our lexer
|
||||||
|
grammar<example4_grammar> calc(def, def.program); // Our grammar
|
||||||
|
|
||||||
|
std::string str (read_from_file("example4.input"));
|
||||||
|
|
||||||
|
// At this point we generate the iterator pair used to expose the
|
||||||
|
// tokenized input stream.
|
||||||
|
std::string::iterator it = str.begin();
|
||||||
|
iterator_type iter = lex.begin(it, str.end());
|
||||||
|
iterator_type end = lex.end();
|
||||||
|
|
||||||
|
// Parsing is done based on the the token stream, not the character
|
||||||
|
// stream read from the input.
|
||||||
|
// Note, how we use the token_set defined above as the skip parser. It must
|
||||||
|
// be explicitly wrapped inside a state directive, switching the lexer
|
||||||
|
// state for the duration of skipping whitespace.
|
||||||
|
bool r = phrase_parse(iter, end, calc, in_state("WS")[tokens.white_space]);
|
||||||
|
|
||||||
|
if (r && iter == end)
|
||||||
|
{
|
||||||
|
std::cout << "-------------------------\n";
|
||||||
|
std::cout << "Parsing succeeded\n";
|
||||||
|
std::cout << "-------------------------\n";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
std::cout << "-------------------------\n";
|
||||||
|
std::cout << "Parsing failed\n";
|
||||||
|
std::cout << "-------------------------\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << "Bye... :-) \n\n";
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
283
example/lex/example5.cpp
Normal file
283
example/lex/example5.cpp
Normal file
@@ -0,0 +1,283 @@
|
|||||||
|
// Copyright (c) 2001-2008 Hartmut Kaiser
|
||||||
|
// Copyright (c) 2001-2007 Joel de Guzman
|
||||||
|
//
|
||||||
|
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
|
||||||
|
// This example shows how to create a simple lexer recognizing a couple of
|
||||||
|
// different tokens aimed at a simple language and how to use this lexer with
|
||||||
|
// a grammar. It shows how to associate values to tokens and how to access the
|
||||||
|
// token values from inside the grammar.
|
||||||
|
//
|
||||||
|
// Additionally, this example demonstrates, how to define a token set usable
|
||||||
|
// as the skip parser during parsing, allowing to define several tokens to be
|
||||||
|
// ignored.
|
||||||
|
//
|
||||||
|
// The main purpose of this example is to show, how inheritance can be used to
|
||||||
|
// overload parts of a base grammar and add token definitions to a base lexer.
|
||||||
|
//
|
||||||
|
// Further, it shows how you can use the 'omitted' attribute type specifier
|
||||||
|
// for token definitions to force the token to have no attribute (expose an
|
||||||
|
// unused attribute).
|
||||||
|
//
|
||||||
|
// This example recognizes a very simple programming language having
|
||||||
|
// assignment statements and if and while control structures. Look at the file
|
||||||
|
// example5.input for an example.
|
||||||
|
|
||||||
|
#include <boost/spirit/include/qi.hpp>
|
||||||
|
#include <boost/spirit/include/lex_lexer_lexertl.hpp>
|
||||||
|
#include <boost/spirit/include/phoenix_operator.hpp>
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <fstream>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include "example.hpp"
|
||||||
|
|
||||||
|
using namespace boost::spirit;
|
||||||
|
using namespace boost::spirit::qi;
|
||||||
|
using namespace boost::spirit::lex;
|
||||||
|
using namespace boost::spirit::arg_names;
|
||||||
|
|
||||||
|
using boost::phoenix::val;
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Token definition base, defines all tokens for the base grammar below
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
template <typename Lexer>
|
||||||
|
struct example5_base_tokens : lexer_def<Lexer>
|
||||||
|
{
|
||||||
|
typedef typename Lexer::token_set token_set;
|
||||||
|
|
||||||
|
template <typename Self>
|
||||||
|
void def (Self& self)
|
||||||
|
{
|
||||||
|
// define the tokens to match
|
||||||
|
identifier = "[a-zA-Z_][a-zA-Z0-9_]*";
|
||||||
|
constant = "[0-9]+";
|
||||||
|
if_ = "if";
|
||||||
|
while_ = "while";
|
||||||
|
|
||||||
|
// define the whitespace to ignore (spaces, tabs, newlines and C-style
|
||||||
|
// comments)
|
||||||
|
white_space
|
||||||
|
= token_def<>("[ \\t\\n]+")
|
||||||
|
| "\\/\\*[^*]*\\*+([^/*][^*]*\\*+)*\\/"
|
||||||
|
;
|
||||||
|
|
||||||
|
// associate the tokens and the token set with the lexer
|
||||||
|
self += token_def<>('(') | ')' | '{' | '}' | '=' | ';' | constant;
|
||||||
|
self += if_ | while_ | identifier;
|
||||||
|
self("WS") = white_space;
|
||||||
|
}
|
||||||
|
|
||||||
|
// these tokens have no value
|
||||||
|
token_def<omitted> if_, while_;
|
||||||
|
|
||||||
|
// The following two tokens have an associated value type, identifier
|
||||||
|
// carries a string (the identifier name) and constant carries the matched
|
||||||
|
// integer value.
|
||||||
|
//
|
||||||
|
// Note: any explicitly token value type specified during a token_def<>
|
||||||
|
// declaration needs to be listed during token type definition as
|
||||||
|
// well (see the typedef for the token_type below).
|
||||||
|
//
|
||||||
|
// The conversion of the matched input to an instance of this type occurs
|
||||||
|
// once (on first access), which makes token values as efficient as
|
||||||
|
// possible. Moreover, token instances are constructed once by the lexer
|
||||||
|
// library. From this point on tokens are passed by reference only,
|
||||||
|
// avoiding tokens being copied around.
|
||||||
|
token_def<std::string> identifier;
|
||||||
|
token_def<unsigned int> constant;
|
||||||
|
|
||||||
|
// token set to be used as the skip parser
|
||||||
|
token_set white_space;
|
||||||
|
};
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Grammar definition base, defines a basic language
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
template <typename Iterator, typename Lexer>
|
||||||
|
struct example5_base_grammar
|
||||||
|
: grammar_def<Iterator, in_state_skipper<typename Lexer::token_set> >
|
||||||
|
{
|
||||||
|
template <typename TokenDef>
|
||||||
|
example5_base_grammar(TokenDef const& tok)
|
||||||
|
{
|
||||||
|
program
|
||||||
|
= +block
|
||||||
|
;
|
||||||
|
|
||||||
|
block
|
||||||
|
= '{' >> *statement >> '}'
|
||||||
|
;
|
||||||
|
|
||||||
|
statement
|
||||||
|
= assignment
|
||||||
|
| if_stmt
|
||||||
|
| while_stmt
|
||||||
|
;
|
||||||
|
|
||||||
|
assignment
|
||||||
|
= (tok.identifier >> '=' >> expression >> ';')
|
||||||
|
[
|
||||||
|
std::cout << val("assignment statement to: ") << _1 << "\n"
|
||||||
|
]
|
||||||
|
;
|
||||||
|
|
||||||
|
if_stmt
|
||||||
|
= (tok.if_ >> '(' >> expression >> ')' >> block)
|
||||||
|
[
|
||||||
|
std::cout << val("if expression: ") << _1 << "\n"
|
||||||
|
]
|
||||||
|
;
|
||||||
|
|
||||||
|
while_stmt
|
||||||
|
= (tok.while_ >> '(' >> expression >> ')' >> block)
|
||||||
|
[
|
||||||
|
std::cout << val("while expression: ") << _1 << "\n"
|
||||||
|
]
|
||||||
|
;
|
||||||
|
|
||||||
|
// since expression has a variant return type accommodating for
|
||||||
|
// std::string and unsigned integer, both possible values may be
|
||||||
|
// returned to the calling rule
|
||||||
|
expression
|
||||||
|
= tok.identifier [ _val = _1 ]
|
||||||
|
| tok.constant [ _val = _1 ]
|
||||||
|
;
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef
|
||||||
|
grammar_def<Iterator, in_state_skipper<typename Lexer::token_set> >
|
||||||
|
base_type;
|
||||||
|
typedef typename base_type::skipper_type skipper_type;
|
||||||
|
|
||||||
|
rule<Iterator, skipper_type> program, block, statement;
|
||||||
|
rule<Iterator, skipper_type> assignment, if_stmt;
|
||||||
|
rule<Iterator, skipper_type> while_stmt;
|
||||||
|
|
||||||
|
// the expression is the only rule having a return value
|
||||||
|
typedef boost::variant<unsigned int, std::string> expression_type;
|
||||||
|
rule<Iterator, expression_type(), skipper_type> expression;
|
||||||
|
};
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Token definition for derived lexer, defines additional tokens
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
template <typename Lexer>
|
||||||
|
struct example5_tokens : example5_base_tokens<Lexer>
|
||||||
|
{
|
||||||
|
typedef typename Lexer::token_set token_set;
|
||||||
|
|
||||||
|
template <typename Self>
|
||||||
|
void def (Self& self)
|
||||||
|
{
|
||||||
|
// define the additional token to match
|
||||||
|
else_ = "else";
|
||||||
|
|
||||||
|
// associate the new token with the lexer, note we add 'else' before
|
||||||
|
// anything else to add it to the token set before the identifier
|
||||||
|
// token, otherwise "else" would be matched as an identifier
|
||||||
|
self = else_;
|
||||||
|
|
||||||
|
// call the base class definition function
|
||||||
|
example5_base_tokens<Lexer>::def(self);
|
||||||
|
}
|
||||||
|
|
||||||
|
// this token has no value
|
||||||
|
token_def<omitted> else_;
|
||||||
|
};
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Derived grammar definition, defines a language extension
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
template <typename Iterator, typename Lexer>
|
||||||
|
struct example5_grammar : example5_base_grammar<Iterator, Lexer>
|
||||||
|
{
|
||||||
|
template <typename TokenDef>
|
||||||
|
example5_grammar(TokenDef const& tok)
|
||||||
|
: example5_base_grammar<Iterator, Lexer>(tok)
|
||||||
|
{
|
||||||
|
// we alter the if_stmt only
|
||||||
|
this->if_stmt
|
||||||
|
= this->if_stmt.copy() >> -(tok.else_ >> this->block)
|
||||||
|
;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
// iterator type used to expose the underlying input stream
|
||||||
|
typedef std::string::iterator base_iterator_type;
|
||||||
|
|
||||||
|
// This is the lexer token type to use. The second template parameter lists
|
||||||
|
// all attribute types used for token_def's during token definition (see
|
||||||
|
// calculator_tokens<> above). Here we use the predefined lexertl token
|
||||||
|
// type, but any compatible token type may be used instead.
|
||||||
|
//
|
||||||
|
// If you don't list any token value types in the following declaration
|
||||||
|
// (or just use the default token type: lexertl_token<base_iterator_type>)
|
||||||
|
// it will compile and work just fine, just a bit less efficient. This is
|
||||||
|
// because the token value will be generated from the matched input
|
||||||
|
// sequence every time it is requested. But as soon as you specify at
|
||||||
|
// least one token value type you'll have to list all value types used
|
||||||
|
// for token_def<> declarations in the token definition class above,
|
||||||
|
// otherwise compilation errors will occur.
|
||||||
|
typedef lexertl_token<
|
||||||
|
base_iterator_type, boost::mpl::vector<unsigned int, std::string>
|
||||||
|
> token_type;
|
||||||
|
|
||||||
|
// Here we use the lexertl based lexer engine.
|
||||||
|
typedef lexertl_lexer<token_type> lexer_type;
|
||||||
|
|
||||||
|
// This is the token definition type (derived from the given lexer type).
|
||||||
|
typedef example5_tokens<lexer_type> example5_tokens;
|
||||||
|
|
||||||
|
// this is the iterator type exposed by the lexer
|
||||||
|
typedef lexer<example5_tokens>::iterator_type iterator_type;
|
||||||
|
|
||||||
|
// this is the type of the grammar to parse
|
||||||
|
typedef example5_grammar<iterator_type, lexer_type> example5_grammar;
|
||||||
|
|
||||||
|
// now we use the types defined above to create the lexer and grammar
|
||||||
|
// object instances needed to invoke the parsing process
|
||||||
|
example5_tokens tokens; // Our token definition
|
||||||
|
example5_grammar def (tokens); // Our grammar definition
|
||||||
|
|
||||||
|
lexer<example5_tokens> lex(tokens); // Our lexer
|
||||||
|
grammar<example5_grammar> calc(def, def.program); // Our grammar
|
||||||
|
|
||||||
|
std::string str (read_from_file("example5.input"));
|
||||||
|
|
||||||
|
// At this point we generate the iterator pair used to expose the
|
||||||
|
// tokenized input stream.
|
||||||
|
std::string::iterator it = str.begin();
|
||||||
|
iterator_type iter = lex.begin(it, str.end());
|
||||||
|
iterator_type end = lex.end();
|
||||||
|
|
||||||
|
// Parsing is done based on the the token stream, not the character
|
||||||
|
// stream read from the input.
|
||||||
|
// Note, how we use the token_set defined above as the skip parser. It must
|
||||||
|
// be explicitly wrapped inside a state directive, switching the lexer
|
||||||
|
// state for the duration of skipping whitespace.
|
||||||
|
std::string ws("WS");
|
||||||
|
bool r = phrase_parse(iter, end, calc, in_state(ws)[tokens.white_space]);
|
||||||
|
|
||||||
|
if (r && iter == end)
|
||||||
|
{
|
||||||
|
std::cout << "-------------------------\n";
|
||||||
|
std::cout << "Parsing succeeded\n";
|
||||||
|
std::cout << "-------------------------\n";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
std::cout << "-------------------------\n";
|
||||||
|
std::cout << "Parsing failed\n";
|
||||||
|
std::cout << "-------------------------\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << "Bye... :-) \n\n";
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
263
example/lex/example6.cpp
Normal file
263
example/lex/example6.cpp
Normal file
@@ -0,0 +1,263 @@
|
|||||||
|
// Copyright (c) 2001-2008 Hartmut Kaiser
|
||||||
|
// Copyright (c) 2001-2007 Joel de Guzman
|
||||||
|
//
|
||||||
|
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
|
||||||
|
// This example shows how to create a simple lexer recognizing a couple of
|
||||||
|
// different tokens aimed at a simple language and how to use this lexer with
|
||||||
|
// a grammar. It shows how to associate values to tokens and how to access the
|
||||||
|
// token values from inside the grammar.
|
||||||
|
//
|
||||||
|
// Additionally, this example demonstrates, how to define a token set usable
|
||||||
|
// as the skip parser during parsing, allowing to define several tokens to be
|
||||||
|
// ignored.
|
||||||
|
//
|
||||||
|
// The example demonstrates how to use the add(...)(...) syntax to associate
|
||||||
|
// token definitions with the lexer and how token ids can be used in the
|
||||||
|
// parser to refer to a token, without having to directly reference its
|
||||||
|
// definition.
|
||||||
|
//
|
||||||
|
// This example recognizes a very simple programming language having
|
||||||
|
// assignment statements and if and while control structures. Look at the file
|
||||||
|
// example6.input for an example.
|
||||||
|
//
|
||||||
|
// This example is essentially identical to example4.cpp. The only difference
|
||||||
|
// is that we use the self.add() syntax to define tokens and to associate them
|
||||||
|
// with the lexer.
|
||||||
|
|
||||||
|
#include <boost/spirit/include/qi.hpp>
|
||||||
|
#include <boost/spirit/include/lex_lexer_lexertl.hpp>
|
||||||
|
#include <boost/spirit/include/phoenix_operator.hpp>
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <fstream>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include "example.hpp"
|
||||||
|
|
||||||
|
using namespace boost::spirit;
|
||||||
|
using namespace boost::spirit::qi;
|
||||||
|
using namespace boost::spirit::lex;
|
||||||
|
using namespace boost::spirit::arg_names;
|
||||||
|
|
||||||
|
using boost::phoenix::val;
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Token id definitions
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
enum token_ids
|
||||||
|
{
|
||||||
|
ID_CONSTANT = 1000,
|
||||||
|
ID_IF,
|
||||||
|
ID_ELSE,
|
||||||
|
ID_WHILE,
|
||||||
|
ID_IDENTIFIER
|
||||||
|
};
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Token definitions
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
template <typename Lexer>
|
||||||
|
struct example6_tokens : lexer_def<Lexer>
|
||||||
|
{
|
||||||
|
typedef typename Lexer::token_set token_set;
|
||||||
|
|
||||||
|
template <typename Self>
|
||||||
|
void def (Self& self)
|
||||||
|
{
|
||||||
|
// define the tokens to match
|
||||||
|
identifier = "[a-zA-Z_][a-zA-Z0-9_]*";
|
||||||
|
constant = "[0-9]+";
|
||||||
|
|
||||||
|
// define the whitespace to ignore (spaces, tabs, newlines and C-style
|
||||||
|
// comments)
|
||||||
|
white_space
|
||||||
|
= token_def<>("[ \\t\\n]+")
|
||||||
|
| "\\/\\*[^*]*\\*+([^/*][^*]*\\*+)*\\/"
|
||||||
|
;
|
||||||
|
|
||||||
|
// associate the tokens and the token set with the lexer
|
||||||
|
self = token_def<>('(') | ')' | '{' | '}' | '=' | ';';
|
||||||
|
|
||||||
|
// Token definitions can be added by using some special syntactic
|
||||||
|
// construct as shown below.
|
||||||
|
// Note, that the token definitions added this way expose the iterator
|
||||||
|
// pair pointing to the matched input stream as their attribute.
|
||||||
|
self.add
|
||||||
|
(constant, ID_CONSTANT)
|
||||||
|
("if", ID_IF)
|
||||||
|
("else", ID_ELSE)
|
||||||
|
("while", ID_WHILE)
|
||||||
|
(identifier, ID_IDENTIFIER)
|
||||||
|
;
|
||||||
|
|
||||||
|
// add whitespace tokens to another lexer state (here: "WS")
|
||||||
|
self("WS") = white_space;
|
||||||
|
}
|
||||||
|
|
||||||
|
// The following two tokens have an associated value type, identifier
|
||||||
|
// carries a string (the identifier name) and constant carries the matched
|
||||||
|
// integer value.
|
||||||
|
//
|
||||||
|
// Note: any explicitly token value type specified during a token_def<>
|
||||||
|
// declaration needs to be listed during token type definition as
|
||||||
|
// well (see the typedef for the token_type below).
|
||||||
|
//
|
||||||
|
// The conversion of the matched input to an instance of this type occurs
|
||||||
|
// once (on first access), which makes token values as efficient as
|
||||||
|
// possible. Moreover, token instances are constructed once by the lexer
|
||||||
|
// library. From this point on tokens are passed by reference only,
|
||||||
|
// avoiding tokens being copied around.
|
||||||
|
token_def<std::string> identifier;
|
||||||
|
token_def<unsigned int> constant;
|
||||||
|
|
||||||
|
// token set to be used as the skip parser
|
||||||
|
token_set white_space;
|
||||||
|
};
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Grammar definition
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
template <typename Iterator, typename Lexer>
|
||||||
|
struct example6_grammar
|
||||||
|
: grammar_def<Iterator, in_state_skipper<typename Lexer::token_set> >
|
||||||
|
{
|
||||||
|
template <typename TokenDef>
|
||||||
|
example6_grammar(TokenDef const& tok)
|
||||||
|
{
|
||||||
|
program
|
||||||
|
= +block
|
||||||
|
;
|
||||||
|
|
||||||
|
block
|
||||||
|
= '{' >> *statement >> '}'
|
||||||
|
;
|
||||||
|
|
||||||
|
statement
|
||||||
|
= assignment
|
||||||
|
| if_stmt
|
||||||
|
| while_stmt
|
||||||
|
;
|
||||||
|
|
||||||
|
assignment
|
||||||
|
= (tok.identifier >> '=' >> expression >> ';')
|
||||||
|
[
|
||||||
|
std::cout << val("assignment statement to: ")
|
||||||
|
<< _1 << "\n"
|
||||||
|
]
|
||||||
|
;
|
||||||
|
|
||||||
|
if_stmt
|
||||||
|
= ( token(ID_IF) >> '(' >> expression >> ')' >> block
|
||||||
|
>> -(token(ID_ELSE) >> block)
|
||||||
|
)
|
||||||
|
[
|
||||||
|
std::cout << val("if expression: ")
|
||||||
|
<< _2 << "\n"
|
||||||
|
]
|
||||||
|
;
|
||||||
|
|
||||||
|
while_stmt
|
||||||
|
= (token(ID_WHILE) >> '(' >> expression >> ')' >> block)
|
||||||
|
[
|
||||||
|
std::cout << val("while expression: ")
|
||||||
|
<< _2 << "\n"
|
||||||
|
]
|
||||||
|
;
|
||||||
|
|
||||||
|
// since expression has a variant return type accommodating for
|
||||||
|
// std::string and unsigned integer, both possible values may be
|
||||||
|
// returned to the calling rule
|
||||||
|
expression
|
||||||
|
= tok.identifier [ _val = _1 ]
|
||||||
|
| tok.constant [ _val = _1 ]
|
||||||
|
;
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef typename Lexer::token_set token_set;
|
||||||
|
typedef boost::variant<unsigned int, std::string> expression_type;
|
||||||
|
|
||||||
|
rule<Iterator, in_state_skipper<token_set> > program, block, statement;
|
||||||
|
rule<Iterator, in_state_skipper<token_set> > assignment, if_stmt;
|
||||||
|
rule<Iterator, in_state_skipper<token_set> > while_stmt;
|
||||||
|
|
||||||
|
// the expression is the only rule having a return value
|
||||||
|
rule<Iterator, expression_type(), in_state_skipper<token_set> > expression;
|
||||||
|
};
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
// iterator type used to expose the underlying input stream
|
||||||
|
typedef std::string::iterator base_iterator_type;
|
||||||
|
|
||||||
|
// This is the lexer token type to use. The second template parameter lists
|
||||||
|
// all attribute types used for token_def's during token definition (see
|
||||||
|
// calculator_tokens<> above). Here we use the predefined lexertl token
|
||||||
|
// type, but any compatible token type may be used instead.
|
||||||
|
//
|
||||||
|
// If you don't list any token value types in the following declaration
|
||||||
|
// (or just use the default token type: lexertl_token<base_iterator_type>)
|
||||||
|
// it will compile and work just fine, just a bit less efficient. This is
|
||||||
|
// because the token value will be generated from the matched input
|
||||||
|
// sequence every time it is requested. But as soon as you specify at
|
||||||
|
// least one token value type you'll have to list all value types used
|
||||||
|
// for token_def<> declarations in the token definition class above,
|
||||||
|
// otherwise compilation errors will occur.
|
||||||
|
typedef lexertl_token<
|
||||||
|
base_iterator_type, boost::mpl::vector<unsigned int, std::string>
|
||||||
|
> token_type;
|
||||||
|
|
||||||
|
// Here we use the lexertl based lexer engine.
|
||||||
|
typedef lexertl_lexer<token_type> lexer_type;
|
||||||
|
|
||||||
|
// This is the token definition type (derived from the given lexer type).
|
||||||
|
typedef example6_tokens<lexer_type> example6_tokens;
|
||||||
|
|
||||||
|
// this is the iterator type exposed by the lexer
|
||||||
|
typedef lexer<example6_tokens>::iterator_type iterator_type;
|
||||||
|
|
||||||
|
// this is the type of the grammar to parse
|
||||||
|
typedef example6_grammar<iterator_type, lexer_type> example6_grammar;
|
||||||
|
|
||||||
|
// now we use the types defined above to create the lexer and grammar
|
||||||
|
// object instances needed to invoke the parsing process
|
||||||
|
example6_tokens tokens; // Our token definition
|
||||||
|
example6_grammar def (tokens); // Our grammar definition
|
||||||
|
|
||||||
|
lexer<example6_tokens> lex(tokens); // Our lexer
|
||||||
|
grammar<example6_grammar> calc(def, def.program); // Our grammar
|
||||||
|
|
||||||
|
std::string str (read_from_file("example6.input"));
|
||||||
|
|
||||||
|
// At this point we generate the iterator pair used to expose the
|
||||||
|
// tokenized input stream.
|
||||||
|
std::string::iterator it = str.begin();
|
||||||
|
iterator_type iter = lex.begin(it, str.end());
|
||||||
|
iterator_type end = lex.end();
|
||||||
|
|
||||||
|
// Parsing is done based on the the token stream, not the character
|
||||||
|
// stream read from the input.
|
||||||
|
// Note, how we use the token_def defined above as the skip parser. It must
|
||||||
|
// be explicitly wrapped inside a state directive, switching the lexer
|
||||||
|
// state for the duration of skipping whitespace.
|
||||||
|
std::string ws("WS");
|
||||||
|
bool r = phrase_parse(iter, end, calc, in_state(ws)[tokens.white_space]);
|
||||||
|
|
||||||
|
if (r && iter == end)
|
||||||
|
{
|
||||||
|
std::cout << "-------------------------\n";
|
||||||
|
std::cout << "Parsing succeeded\n";
|
||||||
|
std::cout << "-------------------------\n";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
std::cout << "-------------------------\n";
|
||||||
|
std::cout << "Parsing failed\n";
|
||||||
|
std::cout << "-------------------------\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << "Bye... :-) \n\n";
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
118
example/lex/print_numbers.cpp
Normal file
118
example/lex/print_numbers.cpp
Normal file
@@ -0,0 +1,118 @@
|
|||||||
|
// Copyright (c) 2001-2008 Hartmut Kaiser
|
||||||
|
//
|
||||||
|
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
|
||||||
|
// This example is the equivalent to the following lex program:
|
||||||
|
//
|
||||||
|
// %{
|
||||||
|
// #include <stdio.h>
|
||||||
|
// %}
|
||||||
|
// %%
|
||||||
|
// [0-9]+ { printf("%s\n", yytext); }
|
||||||
|
// .|\n ;
|
||||||
|
// %%
|
||||||
|
// main()
|
||||||
|
// {
|
||||||
|
// yylex();
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// Its purpose is to print all the (integer) numbers found in a file
|
||||||
|
|
||||||
|
#include <boost/spirit/include/qi.hpp>
|
||||||
|
#include <boost/spirit/include/lex_lexer_lexertl.hpp>
|
||||||
|
#include <boost/spirit/include/phoenix_operator.hpp>
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include "example.hpp"
|
||||||
|
|
||||||
|
using namespace boost::spirit;
|
||||||
|
using namespace boost::spirit::qi;
|
||||||
|
using namespace boost::spirit::lex;
|
||||||
|
using namespace boost::spirit::arg_names;
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Token definition: We use the lexertl based lexer engine as the underlying
|
||||||
|
// lexer type.
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
template <typename Lexer>
|
||||||
|
struct print_numbers_tokens : lexer_def<Lexer>
|
||||||
|
{
|
||||||
|
// define tokens and associate it with the lexer
|
||||||
|
template <typename Self>
|
||||||
|
void def (Self& self)
|
||||||
|
{
|
||||||
|
self = token_def<int>("[0-9]*") | ".|\n";
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Grammar definition
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
template <typename Iterator>
|
||||||
|
struct print_numbers_grammar : grammar_def<Iterator>
|
||||||
|
{
|
||||||
|
print_numbers_grammar()
|
||||||
|
{
|
||||||
|
start = *( token(lex::min_token_id) [ std::cout << _1 << "\n" ]
|
||||||
|
| token(lex::min_token_id+1)
|
||||||
|
)
|
||||||
|
;
|
||||||
|
}
|
||||||
|
|
||||||
|
rule<Iterator> start;
|
||||||
|
};
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
int main(int argc, char* argv[])
|
||||||
|
{
|
||||||
|
// iterator type used to expose the underlying input stream
|
||||||
|
typedef std::string::iterator base_iterator_type;
|
||||||
|
|
||||||
|
// the token type to be used, 'int' is available as the type of the token
|
||||||
|
// value and no lexer state is supported
|
||||||
|
typedef lexertl_token<
|
||||||
|
base_iterator_type, boost::mpl::vector<int>, boost::mpl::false_
|
||||||
|
> token_type;
|
||||||
|
|
||||||
|
// lexer type
|
||||||
|
typedef lexertl_lexer<token_type> lexer_type;
|
||||||
|
|
||||||
|
// iterator type exposed by the lexer
|
||||||
|
typedef
|
||||||
|
lexer_iterator<print_numbers_tokens<lexer_type> >::type
|
||||||
|
iterator_type;
|
||||||
|
|
||||||
|
// now we use the types defined above to create the lexer and grammar
|
||||||
|
// object instances needed to invoke the parsing process
|
||||||
|
print_numbers_tokens<lexer_type> print_tokens; // Our token definition
|
||||||
|
print_numbers_grammar<iterator_type> def; // Our grammar definition
|
||||||
|
|
||||||
|
// Parsing is done based on the the token stream, not the character
|
||||||
|
// stream read from the input.
|
||||||
|
std::string str (read_from_file(1 == argc ? "print_numbers.input" : argv[1]));
|
||||||
|
base_iterator_type first = str.begin();
|
||||||
|
bool r = tokenize_and_parse(first, str.end(), make_lexer(print_tokens),
|
||||||
|
make_parser(def));
|
||||||
|
|
||||||
|
if (r) {
|
||||||
|
std::cout << "-------------------------\n";
|
||||||
|
std::cout << "Parsing succeeded\n";
|
||||||
|
std::cout << "-------------------------\n";
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
std::string rest(first, str.end());
|
||||||
|
std::cout << "-------------------------\n";
|
||||||
|
std::cout << "Parsing failed\n";
|
||||||
|
std::cout << "stopped at: \"" << rest << "\"\n";
|
||||||
|
std::cout << "-------------------------\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << "Bye... :-) \n\n";
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
13
example/lex/static_lexer/Jamfile
Normal file
13
example/lex/static_lexer/Jamfile
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
#==============================================================================
|
||||||
|
# Copyright (c) 2001-2007 Joel de Guzman
|
||||||
|
# Copyright (c) 2001-2008 Hartmut Kaiser
|
||||||
|
#
|
||||||
|
# Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
#==============================================================================
|
||||||
|
|
||||||
|
project spirit-static-lexer-example ;
|
||||||
|
|
||||||
|
exe generate_tables : generate_tables.cpp ;
|
||||||
|
exe word_count_static : word_count_static.cpp ;
|
||||||
|
|
||||||
42
example/lex/static_lexer/word_count_generate.cpp
Normal file
42
example/lex/static_lexer/word_count_generate.cpp
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
// Copyright (c) 2001-2008 Hartmut Kaiser
|
||||||
|
//
|
||||||
|
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
|
||||||
|
// The purpose of this example is to show, how it is possible to use a lexer
|
||||||
|
// token definition for two purposes:
|
||||||
|
//
|
||||||
|
// . To generate C++ code implementing a static lexical analyzer allowing
|
||||||
|
// to recognize all defined tokens (this file)
|
||||||
|
// . To integrate the generated C++ lexer into the /Spirit/ framework.
|
||||||
|
// (see the file: word_count_static.cpp)
|
||||||
|
|
||||||
|
// #define BOOST_SPIRIT_LEXERTL_DEBUG
|
||||||
|
|
||||||
|
#include <boost/spirit/include/lex_lexer_lexertl.hpp>
|
||||||
|
#include <boost/spirit/lex/lexer/lexertl/lexertl_generate_static.hpp>
|
||||||
|
|
||||||
|
#include <fstream>
|
||||||
|
|
||||||
|
#include "word_count_tokens.hpp"
|
||||||
|
|
||||||
|
using namespace boost::spirit;
|
||||||
|
using namespace boost::spirit::lex;
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
//[wc_static_generate_main
|
||||||
|
int main(int argc, char* argv[])
|
||||||
|
{
|
||||||
|
// create the lexer object instance needed to invoke the generator
|
||||||
|
word_count_tokens<lexertl_lexer<> > word_count; // the token definition
|
||||||
|
|
||||||
|
// open the output file, where the generated tokenizer function will be
|
||||||
|
// written to
|
||||||
|
std::ofstream out(argc < 2 ? "word_count_static.hpp" : argv[1]);
|
||||||
|
|
||||||
|
// invoke the generator, passing the token definition, the output stream
|
||||||
|
// and the name prefix of the tokenizing function to be generated
|
||||||
|
char const* function_name = (argc < 3 ? "" : argv[2]);
|
||||||
|
return generate_static(make_lexer(word_count), out, function_name) ? 0 : -1;
|
||||||
|
}
|
||||||
|
//]
|
||||||
118
example/lex/static_lexer/word_count_static.cpp
Normal file
118
example/lex/static_lexer/word_count_static.cpp
Normal file
@@ -0,0 +1,118 @@
|
|||||||
|
// Copyright (c) 2001-2008 Hartmut Kaiser
|
||||||
|
//
|
||||||
|
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
|
||||||
|
// The purpose of this example is to show, how it is possible to use a lexer
|
||||||
|
// token definition for two purposes:
|
||||||
|
//
|
||||||
|
// . To generate C++ code implementing a static lexical analyzer allowing
|
||||||
|
// to recognize all defined tokens
|
||||||
|
// . To integrate the generated C++ lexer into the /Spirit/ framework.
|
||||||
|
//
|
||||||
|
|
||||||
|
// #define BOOST_SPIRIT_LEXERTL_DEBUG
|
||||||
|
#define BOOST_VARIANT_MINIMIZE_SIZE
|
||||||
|
|
||||||
|
#include <boost/spirit/include/qi.hpp>
|
||||||
|
//[wc_static_include
|
||||||
|
#include <boost/spirit/include/lex_lexer_static_lexertl.hpp>
|
||||||
|
//]
|
||||||
|
#include <boost/spirit/include/phoenix_operator.hpp>
|
||||||
|
#include <boost/spirit/include/phoenix_statement.hpp>
|
||||||
|
#include <boost/spirit/include/phoenix_container.hpp>
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include "../example.hpp"
|
||||||
|
#include "word_count_tokens.hpp" // token definition
|
||||||
|
|
||||||
|
#include "word_count_static.hpp" // generated tokenizer
|
||||||
|
|
||||||
|
using namespace boost::spirit;
|
||||||
|
using namespace boost::spirit::qi;
|
||||||
|
using namespace boost::spirit::lex;
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Grammar definition
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
//[wc_static_grammar
|
||||||
|
// This is an ordinary grammar definition following the rules defined by
|
||||||
|
// Spirit.Qi. There is nothing specific about it, except it gets the token
|
||||||
|
// definition class instance passed to the constructor to allow accessing the
|
||||||
|
// embedded token_def<> instances.
|
||||||
|
template <typename Iterator>
|
||||||
|
struct word_count_grammar : grammar_def<Iterator>
|
||||||
|
{
|
||||||
|
template <typename TokenDef>
|
||||||
|
word_count_grammar(TokenDef const& tok)
|
||||||
|
: c(0), w(0), l(0)
|
||||||
|
{
|
||||||
|
using boost::spirit::arg_names::_1;
|
||||||
|
using boost::phoenix::ref;
|
||||||
|
using boost::phoenix::size;
|
||||||
|
|
||||||
|
// associate the defined tokens with the lexer, at the same time
|
||||||
|
// defining the actions to be executed
|
||||||
|
start = *( tok.word [++ref(w), ref(c) += size(_1)]
|
||||||
|
| char_('\n') [++ref(l), ++ref(c)]
|
||||||
|
| token(IDANY) [++ref(c)]
|
||||||
|
)
|
||||||
|
;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::size_t c, w, l; // counter for characters, words, and lines
|
||||||
|
rule<Iterator> start;
|
||||||
|
};
|
||||||
|
//]
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
//[wc_static_main
|
||||||
|
int main(int argc, char* argv[])
|
||||||
|
{
|
||||||
|
// Define the token type to be used: 'std::string' is available as the type
|
||||||
|
// of the token value.
|
||||||
|
typedef lexertl_token<
|
||||||
|
char const*, boost::mpl::vector<std::string>
|
||||||
|
> token_type;
|
||||||
|
|
||||||
|
// Define the lexer type to be used as the base class for our token
|
||||||
|
// definition.
|
||||||
|
//
|
||||||
|
// This is the only place where the code is different from an equivalent
|
||||||
|
// dynamic lexical analyzer. We use the `lexertl_static_lexer<>` instead of
|
||||||
|
// the `lexertl_lexer<>` as the base class for our token defintion type.
|
||||||
|
//
|
||||||
|
typedef lexertl_static_lexer<token_type> lexer_type;
|
||||||
|
|
||||||
|
// Define the iterator type exposed by the lexer.
|
||||||
|
typedef lexer_iterator<word_count_tokens<lexer_type> >::type iterator_type;
|
||||||
|
|
||||||
|
// Now we use the types defined above to create the lexer and grammar
|
||||||
|
// object instances needed to invoke the parsing process.
|
||||||
|
word_count_tokens<lexer_type> word_count; // Our token definition
|
||||||
|
word_count_grammar<iterator_type> def (word_count); // Our grammar definition
|
||||||
|
|
||||||
|
// Read in the file into memory.
|
||||||
|
std::string str (read_from_file(1 == argc ? "word_count.input" : argv[1]));
|
||||||
|
char const* first = str.c_str();
|
||||||
|
char const* last = &first[str.size()];
|
||||||
|
|
||||||
|
// Parsing is done based on the the token stream, not the character
|
||||||
|
// stream read from the input.
|
||||||
|
bool r = tokenize_and_parse(first, last, make_lexer(word_count),
|
||||||
|
make_parser(def));
|
||||||
|
|
||||||
|
if (r) { // success
|
||||||
|
std::cout << "lines: " << def.l << ", words: " << def.w
|
||||||
|
<< ", characters: " << def.c << "\n";
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
std::string rest(first, last);
|
||||||
|
std::cerr << "Parsing failed\n" << "stopped at: \""
|
||||||
|
<< rest << "\"\n";
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
//]
|
||||||
111
example/lex/static_lexer/word_count_static.hpp
Normal file
111
example/lex/static_lexer/word_count_static.hpp
Normal file
@@ -0,0 +1,111 @@
|
|||||||
|
// Copyright (c) 2008 Ben Hanson
|
||||||
|
//
|
||||||
|
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
|
||||||
|
// Auto-generated by boost::lexer
|
||||||
|
#if !defined(BOOST_SPIRIT_LEXER_NEXT_TOKEN_Feb_13_2008_12_01_20)
|
||||||
|
#define BOOST_SPIRIT_LEXER_NEXT_TOKEN_Feb_13_2008_12_01_20
|
||||||
|
|
||||||
|
#include <boost/detail/iterator.hpp>
|
||||||
|
#include <boost/spirit/support/detail/lexer/char_traits.hpp>
|
||||||
|
|
||||||
|
// the generated table of state names and the tokenizer have to be
|
||||||
|
// defined in the boost::spirit::lex::static namespace
|
||||||
|
namespace boost { namespace spirit { namespace lex { namespace static_ {
|
||||||
|
|
||||||
|
// this table defines the names of the lexer states
|
||||||
|
char const* const lexer_state_names[1] =
|
||||||
|
{
|
||||||
|
"INITIAL",
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename Iterator>
|
||||||
|
std::size_t next_token (std::size_t &start_state_, Iterator const& start_,
|
||||||
|
Iterator &start_token_, Iterator const& end_)
|
||||||
|
{
|
||||||
|
enum {end_state_index, id_index, state_index, bol_index, eol_index,
|
||||||
|
dead_state_index, dfa_offset};
|
||||||
|
static const std::size_t npos = static_cast<std::size_t>(~0);
|
||||||
|
static const std::size_t lookup_[256] = {8, 8, 8, 8, 8, 8, 8, 8,
|
||||||
|
8, 7, 6, 8, 8, 8, 8, 8,
|
||||||
|
8, 8, 8, 8, 8, 8, 8, 8,
|
||||||
|
8, 8, 8, 8, 8, 8, 8, 8,
|
||||||
|
7, 8, 8, 8, 8, 8, 8, 8,
|
||||||
|
8, 8, 8, 8, 8, 8, 8, 8,
|
||||||
|
8, 8, 8, 8, 8, 8, 8, 8,
|
||||||
|
8, 8, 8, 8, 8, 8, 8, 8,
|
||||||
|
8, 8, 8, 8, 8, 8, 8, 8,
|
||||||
|
8, 8, 8, 8, 8, 8, 8, 8,
|
||||||
|
8, 8, 8, 8, 8, 8, 8, 8,
|
||||||
|
8, 8, 8, 8, 8, 8, 8, 8,
|
||||||
|
8, 8, 8, 8, 8, 8, 8, 8,
|
||||||
|
8, 8, 8, 8, 8, 8, 8, 8,
|
||||||
|
8, 8, 8, 8, 8, 8, 8, 8,
|
||||||
|
8, 8, 8, 8, 8, 8, 8, 8,
|
||||||
|
8, 8, 8, 8, 8, 8, 8, 8,
|
||||||
|
8, 8, 8, 8, 8, 8, 8, 8,
|
||||||
|
8, 8, 8, 8, 8, 8, 8, 8,
|
||||||
|
8, 8, 8, 8, 8, 8, 8, 8,
|
||||||
|
8, 8, 8, 8, 8, 8, 8, 8,
|
||||||
|
8, 8, 8, 8, 8, 8, 8, 8,
|
||||||
|
8, 8, 8, 8, 8, 8, 8, 8,
|
||||||
|
8, 8, 8, 8, 8, 8, 8, 8,
|
||||||
|
8, 8, 8, 8, 8, 8, 8, 8,
|
||||||
|
8, 8, 8, 8, 8, 8, 8, 8,
|
||||||
|
8, 8, 8, 8, 8, 8, 8, 8,
|
||||||
|
8, 8, 8, 8, 8, 8, 8, 8,
|
||||||
|
8, 8, 8, 8, 8, 8, 8, 8,
|
||||||
|
8, 8, 8, 8, 8, 8, 8, 8,
|
||||||
|
8, 8, 8, 8, 8, 8, 8, 8,
|
||||||
|
8, 8, 8, 8, 8, 8, 8, 8};
|
||||||
|
static const std::size_t dfa_alphabet_ = 9;
|
||||||
|
static const std::size_t dfa_[45] = {0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 3,
|
||||||
|
4, 2, 1, 65536, 0, 0, 0, 0,
|
||||||
|
0, 0, 2, 1, 65537, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 1, 65538, 0, 0,
|
||||||
|
0, 0, 0, 0, 0};
|
||||||
|
|
||||||
|
if (start_token_ == end_) return 0;
|
||||||
|
|
||||||
|
const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
|
||||||
|
Iterator curr_ = start_token_;
|
||||||
|
bool end_state_ = *ptr_ != 0;
|
||||||
|
std::size_t id_ = *(ptr_ + id_index);
|
||||||
|
Iterator end_token_ = start_token_;
|
||||||
|
|
||||||
|
while (curr_ != end_)
|
||||||
|
{
|
||||||
|
std::size_t const state_ =
|
||||||
|
ptr_[lookup_[static_cast<unsigned char>
|
||||||
|
(*curr_++)]];
|
||||||
|
|
||||||
|
if (state_ == 0) break;
|
||||||
|
|
||||||
|
ptr_ = &dfa_[state_ * dfa_alphabet_];
|
||||||
|
|
||||||
|
if (*ptr_)
|
||||||
|
{
|
||||||
|
end_state_ = true;
|
||||||
|
id_ = *(ptr_ + id_index);
|
||||||
|
end_token_ = curr_;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (end_state_)
|
||||||
|
{
|
||||||
|
// return longest match
|
||||||
|
start_token_ = end_token_;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
id_ = npos;
|
||||||
|
}
|
||||||
|
|
||||||
|
return id_;
|
||||||
|
}
|
||||||
|
|
||||||
|
}}}} // namespace boost::spirit::lex::static_
|
||||||
|
|
||||||
|
#endif
|
||||||
40
example/lex/static_lexer/word_count_tokens.hpp
Normal file
40
example/lex/static_lexer/word_count_tokens.hpp
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
// Copyright (c) 2001-2008 Hartmut Kaiser
|
||||||
|
//
|
||||||
|
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
|
||||||
|
#if !defined(SPIRIT_LEXER_EXAMPLE_WORD_COUNT_TOKENS_FEB_10_2008_0739PM)
|
||||||
|
#define SPIRIT_LEXER_EXAMPLE_WORD_COUNT_TOKENS_FEB_10_2008_0739PM
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Token definition: We keep the base class for the token definition as a
|
||||||
|
// template parameter to allow this class to be used for
|
||||||
|
// both: the code generation and the lexical analysis
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
//[wc_static_tokenids
|
||||||
|
enum tokenids
|
||||||
|
{
|
||||||
|
IDANY = boost::spirit::lex::min_token_id + 1,
|
||||||
|
};
|
||||||
|
//]
|
||||||
|
|
||||||
|
//[wc_static_tokendef
|
||||||
|
// This token definition class can be used without any change for all three
|
||||||
|
// possible use cases: a dynamic lexical analyzer, a code generator, and a
|
||||||
|
// static lexical analyzer.
|
||||||
|
template <typename BaseLexer>
|
||||||
|
struct word_count_tokens : boost::spirit::lex::lexer_def<BaseLexer>
|
||||||
|
{
|
||||||
|
template <typename Self>
|
||||||
|
void def (Self& self)
|
||||||
|
{
|
||||||
|
// define tokens and associate them with the lexer
|
||||||
|
word = "[^ \t\n]+";
|
||||||
|
self = word | '\n' | token_def<>(".", IDANY);
|
||||||
|
}
|
||||||
|
|
||||||
|
boost::spirit::lex::token_def<std::string> word;
|
||||||
|
};
|
||||||
|
//]
|
||||||
|
|
||||||
|
#endif
|
||||||
164
example/lex/strip_comments.cpp
Normal file
164
example/lex/strip_comments.cpp
Normal file
@@ -0,0 +1,164 @@
|
|||||||
|
// Copyright (c) 2001-2008 Hartmut Kaiser
|
||||||
|
//
|
||||||
|
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
|
||||||
|
// This example is the equivalent to the following lex program:
|
||||||
|
//
|
||||||
|
// %{
|
||||||
|
// /* INITIAL is the default start state. COMMENT is our new */
|
||||||
|
// /* state where we remove comments. */
|
||||||
|
// %}
|
||||||
|
//
|
||||||
|
// %s COMMENT
|
||||||
|
// %%
|
||||||
|
// <INITIAL>"//".* ;
|
||||||
|
// <INITIAL>"/*" BEGIN COMMENT;
|
||||||
|
// <INITIAL>. ECHO;
|
||||||
|
// <INITIAL>[\n] ECHO;
|
||||||
|
// <COMMENT>"*/" BEGIN INITIAL;
|
||||||
|
// <COMMENT>. ;
|
||||||
|
// <COMMENT>[\n] ;
|
||||||
|
// %%
|
||||||
|
//
|
||||||
|
// main()
|
||||||
|
// {
|
||||||
|
// yylex();
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// Its purpose is to strip comments out of C code.
|
||||||
|
//
|
||||||
|
// Additionally this example demonstrates the use of lexer states to structure
|
||||||
|
// the lexer definition.
|
||||||
|
|
||||||
|
// #define BOOST_SPIRIT_LEXERTL_DEBUG
|
||||||
|
|
||||||
|
#include <boost/spirit/include/qi.hpp>
|
||||||
|
#include <boost/spirit/include/lex_lexer_lexertl.hpp>
|
||||||
|
#include <boost/spirit/include/phoenix_operator.hpp>
|
||||||
|
#include <boost/spirit/include/phoenix_container.hpp>
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include "example.hpp"
|
||||||
|
|
||||||
|
using namespace boost::spirit;
|
||||||
|
using namespace boost::spirit::qi;
|
||||||
|
using namespace boost::spirit::lex;
|
||||||
|
using namespace boost::spirit::arg_names;
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Token definition: We use the lexertl based lexer engine as the underlying
|
||||||
|
// lexer type.
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
enum tokenids
|
||||||
|
{
|
||||||
|
IDANY = lex::min_token_id + 10
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Lexer>
|
||||||
|
struct strip_comments_tokens : lexer_def<Lexer>
|
||||||
|
{
|
||||||
|
template <typename Self>
|
||||||
|
void def (Self& self)
|
||||||
|
{
|
||||||
|
// define tokens and associate them with the lexer
|
||||||
|
cppcomment = "//.*\n";
|
||||||
|
ccomment = "/\\*";
|
||||||
|
endcomment = "\\*/";
|
||||||
|
|
||||||
|
// The following tokens are associated with the default lexer state
|
||||||
|
// (the "INITIAL" state). Specifying 'INITIAL' as a lexer state is
|
||||||
|
// strictly optional.
|
||||||
|
self.add
|
||||||
|
(cppcomment) // no explicit token id is associated
|
||||||
|
(ccomment)
|
||||||
|
(".", IDANY) // IDANY is the token id associated with this token
|
||||||
|
// definition
|
||||||
|
;
|
||||||
|
|
||||||
|
// The following tokens are associated with the lexer state "COMMENT".
|
||||||
|
// We switch lexer states from inside the parsing process using the
|
||||||
|
// in_state("COMMENT")[] parser component as shown below.
|
||||||
|
self("COMMENT").add
|
||||||
|
(endcomment)
|
||||||
|
(".", IDANY)
|
||||||
|
;
|
||||||
|
}
|
||||||
|
|
||||||
|
token_def<> cppcomment, ccomment, endcomment;
|
||||||
|
};
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Grammar definition
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
template <typename Iterator>
|
||||||
|
struct strip_comments_grammar : grammar_def<Iterator>
|
||||||
|
{
|
||||||
|
template <typename TokenDef>
|
||||||
|
strip_comments_grammar(TokenDef const& tok)
|
||||||
|
{
|
||||||
|
// The in_state("COMMENT")[...] parser component switches the lexer
|
||||||
|
// state to be 'COMMENT' during the matching of the embedded parser.
|
||||||
|
start = *( tok.ccomment
|
||||||
|
>> in_state("COMMENT")
|
||||||
|
[
|
||||||
|
// the lexer is in the 'COMMENT' state during
|
||||||
|
// matching of the following parser components
|
||||||
|
*token(IDANY) >> tok.endcomment
|
||||||
|
]
|
||||||
|
| tok.cppcomment
|
||||||
|
| token(IDANY)
|
||||||
|
)
|
||||||
|
;
|
||||||
|
}
|
||||||
|
|
||||||
|
rule<Iterator> start;
|
||||||
|
};
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
int main(int argc, char* argv[])
|
||||||
|
{
|
||||||
|
// iterator type used to expose the underlying input stream
|
||||||
|
typedef std::string::iterator base_iterator_type;
|
||||||
|
|
||||||
|
// lexer type
|
||||||
|
typedef lexertl_lexer<lexertl_token<base_iterator_type> > lexer_type;
|
||||||
|
|
||||||
|
// iterator type exposed by the lexer
|
||||||
|
typedef
|
||||||
|
lexer_iterator<strip_comments_tokens<lexer_type> >::type
|
||||||
|
iterator_type;
|
||||||
|
|
||||||
|
// now we use the types defined above to create the lexer and grammar
|
||||||
|
// object instances needed to invoke the parsing process
|
||||||
|
strip_comments_tokens<lexer_type> strip_comments; // Our token definition
|
||||||
|
strip_comments_grammar<iterator_type> def (strip_comments); // Our grammar definition
|
||||||
|
|
||||||
|
// Parsing is done based on the the token stream, not the character
|
||||||
|
// stream read from the input.
|
||||||
|
std::string str (read_from_file(1 == argc ? "strip_comments.input" : argv[1]));
|
||||||
|
base_iterator_type first = str.begin();
|
||||||
|
bool r = tokenize_and_parse(first, str.end(), make_lexer(strip_comments),
|
||||||
|
make_parser(def));
|
||||||
|
|
||||||
|
if (r) {
|
||||||
|
std::cout << "-------------------------\n";
|
||||||
|
std::cout << "Parsing succeeded\n";
|
||||||
|
std::cout << "-------------------------\n";
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
std::string rest(first, str.end());
|
||||||
|
std::cout << "-------------------------\n";
|
||||||
|
std::cout << "Parsing failed\n";
|
||||||
|
std::cout << "stopped at: \"" << rest << "\"\n";
|
||||||
|
std::cout << "-------------------------\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << "Bye... :-) \n\n";
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
121
example/lex/strip_comments_lexer.cpp
Normal file
121
example/lex/strip_comments_lexer.cpp
Normal file
@@ -0,0 +1,121 @@
|
|||||||
|
// Copyright (c) 2001-2008 Hartmut Kaiser
|
||||||
|
//
|
||||||
|
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
|
||||||
|
// This example is the equivalent to the following lex program:
|
||||||
|
//
|
||||||
|
// %{
|
||||||
|
// /* INITIAL is the default start state. COMMENT is our new */
|
||||||
|
// /* state where we remove comments. */
|
||||||
|
// %}
|
||||||
|
//
|
||||||
|
// %s COMMENT
|
||||||
|
// %%
|
||||||
|
// <INITIAL>"//".* ;
|
||||||
|
// <INITIAL>"/*" BEGIN COMMENT;
|
||||||
|
// <INITIAL>. ECHO;
|
||||||
|
// <INITIAL>[\n] ECHO;
|
||||||
|
// <COMMENT>"*/" BEGIN INITIAL;
|
||||||
|
// <COMMENT>. ;
|
||||||
|
// <COMMENT>[\n] ;
|
||||||
|
// %%
|
||||||
|
//
|
||||||
|
// main()
|
||||||
|
// {
|
||||||
|
// yylex();
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// Its purpose is to strip comments out of C code.
|
||||||
|
//
|
||||||
|
// Additionally this example demonstrates the use of lexer states to structure
|
||||||
|
// the lexer definition.
|
||||||
|
|
||||||
|
// #define BOOST_SPIRIT_LEXERTL_DEBUG
|
||||||
|
|
||||||
|
#include <boost/spirit/include/lex_lexer_lexertl.hpp>
|
||||||
|
#include <boost/spirit/lex/lexer/lexer_actions.hpp>
|
||||||
|
#include <boost/spirit/include/phoenix_operator.hpp>
|
||||||
|
#include <boost/spirit/include/phoenix_statement.hpp>
|
||||||
|
#include <boost/spirit/include/phoenix_core.hpp>
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include "example.hpp"
|
||||||
|
|
||||||
|
using namespace boost::spirit;
|
||||||
|
using namespace boost::spirit::lex;
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Token definition: We use the lexertl based lexer engine as the underlying
|
||||||
|
// lexer type.
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
enum tokenids
|
||||||
|
{
|
||||||
|
IDANY = lex::min_token_id + 10,
|
||||||
|
IDEOL = lex::min_token_id + 11
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Lexer>
|
||||||
|
struct strip_comments_tokens : lexer_def<Lexer>
|
||||||
|
{
|
||||||
|
template <typename Self>
|
||||||
|
void def (Self& self)
|
||||||
|
{
|
||||||
|
// define tokens and associate them with the lexer
|
||||||
|
cppcomment = "//[^\n]*";
|
||||||
|
ccomment = "/\\*";
|
||||||
|
endcomment = "\\*/";
|
||||||
|
any = ".";
|
||||||
|
eol = "\n";
|
||||||
|
|
||||||
|
// The following tokens are associated with the default lexer state
|
||||||
|
// (the "INITIAL" state). Specifying 'INITIAL' as a lexer state is
|
||||||
|
// strictly optional.
|
||||||
|
self = cppcomment
|
||||||
|
| ccomment [ set_state("COMMENT") ]
|
||||||
|
| eol [ echo_input(std::cout) ]
|
||||||
|
| any [ echo_input(std::cout) ]
|
||||||
|
;
|
||||||
|
|
||||||
|
// The following tokens are associated with the lexer state 'COMMENT'.
|
||||||
|
self("COMMENT")
|
||||||
|
= endcomment [ set_state("INITIAL") ]
|
||||||
|
| eol
|
||||||
|
| any
|
||||||
|
;
|
||||||
|
}
|
||||||
|
|
||||||
|
token_def<> cppcomment, ccomment, endcomment, any, eol;
|
||||||
|
};
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
int main(int argc, char* argv[])
|
||||||
|
{
|
||||||
|
// iterator type used to expose the underlying input stream
|
||||||
|
typedef std::string::iterator base_iterator_type;
|
||||||
|
|
||||||
|
// lexer type
|
||||||
|
typedef lexertl_actor_lexer<lexertl_token<base_iterator_type> > lexer_type;
|
||||||
|
|
||||||
|
// now we use the types defined above to create the lexer and grammar
|
||||||
|
// object instances needed to invoke the parsing process
|
||||||
|
strip_comments_tokens<lexer_type> strip_comments; // Our token definition
|
||||||
|
|
||||||
|
// Parsing is done based on the the token stream, not the character
|
||||||
|
// stream read from the input.
|
||||||
|
std::string str (read_from_file(1 == argc ? "strip_comments.input" : argv[1]));
|
||||||
|
base_iterator_type first = str.begin();
|
||||||
|
bool r = tokenize(first, str.end(), make_lexer(strip_comments));
|
||||||
|
|
||||||
|
if (!r) {
|
||||||
|
std::string rest(first, str.end());
|
||||||
|
std::cerr << "Lexical analysis failed\n" << "stopped at: \""
|
||||||
|
<< rest << "\"\n";
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
172
example/lex/word_count.cpp
Normal file
172
example/lex/word_count.cpp
Normal file
@@ -0,0 +1,172 @@
|
|||||||
|
// Copyright (c) 2001-2008 Hartmut Kaiser
|
||||||
|
//
|
||||||
|
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
|
||||||
|
// This example is the equivalent to the following lex program:
|
||||||
|
/*
|
||||||
|
//[wcp_flex_version
|
||||||
|
%{
|
||||||
|
int c = 0, w = 0, l = 0;
|
||||||
|
%}
|
||||||
|
word [^ \t\n]+
|
||||||
|
eol \n
|
||||||
|
%%
|
||||||
|
{word} { ++w; c += yyleng; }
|
||||||
|
{eol} { ++c; ++l; }
|
||||||
|
. { ++c; }
|
||||||
|
%%
|
||||||
|
main()
|
||||||
|
{
|
||||||
|
yylex();
|
||||||
|
printf("%d %d %d\n", l, w, c);
|
||||||
|
}
|
||||||
|
//]
|
||||||
|
*/
|
||||||
|
// Its purpose is to do the word count function of the wc command in UNIX. It
|
||||||
|
// prints the number of lines, words and characters in a file.
|
||||||
|
//
|
||||||
|
// The example additionally demonstrates how to use the add_pattern(...)(...)
|
||||||
|
// syntax to define lexer patterns. These patterns are essentially parameter-
|
||||||
|
// less 'macros' for regular expressions, allowing to simplify their
|
||||||
|
// definition.
|
||||||
|
|
||||||
|
// #define BOOST_SPIRIT_LEXERTL_DEBUG
|
||||||
|
#define BOOST_VARIANT_MINIMIZE_SIZE
|
||||||
|
|
||||||
|
//[wcp_includes
|
||||||
|
#include <boost/spirit/include/qi.hpp>
|
||||||
|
#include <boost/spirit/include/lex_lexer_lexertl.hpp>
|
||||||
|
#include <boost/spirit/include/phoenix_operator.hpp>
|
||||||
|
#include <boost/spirit/include/phoenix_statement.hpp>
|
||||||
|
#include <boost/spirit/include/phoenix_container.hpp>
|
||||||
|
//]
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include "example.hpp"
|
||||||
|
|
||||||
|
//[wcp_namespaces
|
||||||
|
using namespace boost::spirit;
|
||||||
|
using namespace boost::spirit::qi;
|
||||||
|
using namespace boost::spirit::lex;
|
||||||
|
//]
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Token definition: We use the lexertl based lexer engine as the underlying
|
||||||
|
// lexer type.
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
//[wcp_token_ids
|
||||||
|
enum tokenids
|
||||||
|
{
|
||||||
|
IDANY = lex::min_token_id + 10
|
||||||
|
};
|
||||||
|
//]
|
||||||
|
|
||||||
|
//[wcp_token_definition
|
||||||
|
template <typename Lexer>
|
||||||
|
struct word_count_tokens : lexer_def<Lexer>
|
||||||
|
{
|
||||||
|
template <typename Self>
|
||||||
|
void def (Self& self)
|
||||||
|
{
|
||||||
|
// define patterns (lexer macros) to be used during token definition
|
||||||
|
// below
|
||||||
|
self.add_pattern
|
||||||
|
("WORD", "[^ \t\n]+")
|
||||||
|
;
|
||||||
|
|
||||||
|
// define tokens and associate them with the lexer
|
||||||
|
word = "{WORD}"; // reference the pattern 'WORD' as defined above
|
||||||
|
|
||||||
|
// this lexer will recognize 3 token types: words, newlines, and
|
||||||
|
// everything else
|
||||||
|
self.add
|
||||||
|
(word) // no token id is needed here
|
||||||
|
('\n') // characters are usable as tokens as well
|
||||||
|
(".", IDANY)
|
||||||
|
;
|
||||||
|
}
|
||||||
|
|
||||||
|
token_def<std::string> word;
|
||||||
|
};
|
||||||
|
//]
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Grammar definition
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
//[wcp_grammar_definition
|
||||||
|
template <typename Iterator>
|
||||||
|
struct word_count_grammar : grammar_def<Iterator>
|
||||||
|
{
|
||||||
|
template <typename TokenDef>
|
||||||
|
word_count_grammar(TokenDef const& tok)
|
||||||
|
: c(0), w(0), l(0)
|
||||||
|
{
|
||||||
|
using boost::phoenix::ref;
|
||||||
|
using boost::phoenix::size;
|
||||||
|
|
||||||
|
// As documented in the Spirit.Qi documentation, any placeholders
|
||||||
|
// (_1 et.al.) used in semantic actions inside a grammar need to be
|
||||||
|
// imported from the namespace boost::spirit::arg_names, and not from
|
||||||
|
// the corresponding namespace in Phoenix.
|
||||||
|
using boost::spirit::arg_names::_1;
|
||||||
|
|
||||||
|
start = *( tok.word [++ref(w), ref(c) += size(_1)]
|
||||||
|
| char_('\n') [++ref(c), ++ref(l)]
|
||||||
|
| token(IDANY) [++ref(c)]
|
||||||
|
)
|
||||||
|
;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::size_t c, w, l;
|
||||||
|
rule<Iterator> start;
|
||||||
|
};
|
||||||
|
//]
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
//[wcp_main
|
||||||
|
int main(int argc, char* argv[])
|
||||||
|
{
|
||||||
|
/*< define the token type to be used: `std::string` is available as the
|
||||||
|
type of the token value
|
||||||
|
>*/ typedef lexertl_token<
|
||||||
|
char const*, boost::mpl::vector<std::string>
|
||||||
|
> token_type;
|
||||||
|
|
||||||
|
/*< define the lexer type to use implementing the state machine
|
||||||
|
>*/ typedef lexertl_lexer<token_type> lexer_type;
|
||||||
|
|
||||||
|
/*< define the iterator type exposed by the lexer type
|
||||||
|
>*/ typedef lexer_iterator<word_count_tokens<lexer_type> >::type iterator_type;
|
||||||
|
|
||||||
|
// now we use the types defined above to create the lexer and grammar
|
||||||
|
// object instances needed to invoke the parsing process
|
||||||
|
word_count_tokens<lexer_type> word_count; // Our token definition
|
||||||
|
word_count_grammar<iterator_type> def (word_count); // Our grammar definition
|
||||||
|
|
||||||
|
// read in the file int memory
|
||||||
|
std::string str (read_from_file(1 == argc ? "word_count.input" : argv[1]));
|
||||||
|
char const* first = str.c_str();
|
||||||
|
char const* last = &first[str.size()];
|
||||||
|
|
||||||
|
// Parsing is done based on the the token stream, not the character
|
||||||
|
// stream read from the input. The function `tokenize_and_parse()` wraps
|
||||||
|
// the passed iterator range `[first, last)` by the lexical analyzer and
|
||||||
|
// uses its exposed iterators to parse the toke stream.
|
||||||
|
bool r = tokenize_and_parse(first, last, make_lexer(word_count),
|
||||||
|
make_parser(def));
|
||||||
|
|
||||||
|
if (r) {
|
||||||
|
std::cout << "lines: " << def.l << ", words: " << def.w
|
||||||
|
<< ", characters: " << def.c << "\n";
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
std::string rest(first, last);
|
||||||
|
std::cerr << "Parsing failed\n" << "stopped at: \""
|
||||||
|
<< rest << "\"\n";
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
//]
|
||||||
184
example/lex/word_count_functor.cpp
Normal file
184
example/lex/word_count_functor.cpp
Normal file
@@ -0,0 +1,184 @@
|
|||||||
|
// Copyright (c) 2001-2008 Hartmut Kaiser
|
||||||
|
//
|
||||||
|
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
|
||||||
|
// This example is the equivalent to the following flex program:
|
||||||
|
/*
|
||||||
|
//[wcf_flex_version
|
||||||
|
%{
|
||||||
|
#define ID_WORD 1000
|
||||||
|
#define ID_EOL 1001
|
||||||
|
#define ID_CHAR 1002
|
||||||
|
int c = 0, w = 0, l = 0;
|
||||||
|
%}
|
||||||
|
%%
|
||||||
|
[^ \t\n]+ { return ID_WORD; }
|
||||||
|
\n { return ID_EOL; }
|
||||||
|
. { return ID_CHAR; }
|
||||||
|
%%
|
||||||
|
bool count(int tok)
|
||||||
|
{
|
||||||
|
switch (tok) {
|
||||||
|
case ID_WORD: ++w; c += yyleng; break;
|
||||||
|
case ID_EOL: ++l; ++c; break;
|
||||||
|
case ID_CHAR: ++c; break;
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
void main()
|
||||||
|
{
|
||||||
|
int tok = EOF;
|
||||||
|
do {
|
||||||
|
tok = yylex();
|
||||||
|
if (!count(tok))
|
||||||
|
break;
|
||||||
|
} while (EOF != tok);
|
||||||
|
printf("%d %d %d\n", l, w, c);
|
||||||
|
}
|
||||||
|
//]
|
||||||
|
*/
|
||||||
|
// Its purpose is to do the word count function of the wc command in UNIX. It
|
||||||
|
// prints the number of lines, words and characters in a file.
|
||||||
|
//
|
||||||
|
// This examples shows how to use the tokenize() function together with a
|
||||||
|
// simple functor, which gets executed whenever a token got matched in the
|
||||||
|
// input sequence.
|
||||||
|
|
||||||
|
// #define BOOST_SPIRIT_LEXERTL_DEBUG
|
||||||
|
|
||||||
|
//[wcf_includes
|
||||||
|
#include <boost/spirit/include/lex_lexer_lexertl.hpp>
|
||||||
|
#include <boost/bind.hpp>
|
||||||
|
#include <boost/ref.hpp>
|
||||||
|
//]
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include "example.hpp"
|
||||||
|
|
||||||
|
//[wcf_namespaces
|
||||||
|
using namespace boost::spirit;
|
||||||
|
using namespace boost::spirit::lex;
|
||||||
|
//]
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Token id definitions
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
//[wcf_token_ids
|
||||||
|
enum token_ids
|
||||||
|
{
|
||||||
|
ID_WORD = 1000,
|
||||||
|
ID_EOL,
|
||||||
|
ID_CHAR
|
||||||
|
};
|
||||||
|
//]
|
||||||
|
|
||||||
|
//[wcf_token_definition
|
||||||
|
/*` The template `word_count_tokens` defines three different tokens:
|
||||||
|
`ID_WORD`, `ID_EOL`, and `ID_CHAR`, representing a word (anything except
|
||||||
|
a whitespace or a newline), a newline character, and any other character
|
||||||
|
(`ID_WORD`, `ID_EOL`, and `ID_CHAR` are enum values representing the token
|
||||||
|
ids, but could be anything else convertible to an integer as well).
|
||||||
|
The direct base class of any token definition class needs to be the
|
||||||
|
template `lexer_def<>`, where the corresponding template parameter (here:
|
||||||
|
`lexertl_lexer<BaseIterator>`) defines which underlying lexer engine has
|
||||||
|
to be used to provide the required state machine functionality. In this
|
||||||
|
example we use the Lexertl based lexer engine as the underlying lexer type.
|
||||||
|
*/
|
||||||
|
template <typename Lexer>
|
||||||
|
struct word_count_tokens : lexer_def<Lexer>
|
||||||
|
{
|
||||||
|
template <typename Self>
|
||||||
|
void def (Self& self)
|
||||||
|
{
|
||||||
|
// define tokens (the regular expression to match and the corresponding
|
||||||
|
// token id) and add them to the lexer
|
||||||
|
self.add
|
||||||
|
("[^ \t\n]+", ID_WORD) // words (anything except ' ', '\t' or '\n')
|
||||||
|
("\n", ID_EOL) // newline characters
|
||||||
|
(".", ID_CHAR) // anything else is a plain character
|
||||||
|
;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
//]
|
||||||
|
|
||||||
|
//[wcf_functor
|
||||||
|
/*` In this example the struct 'counter' is used as a functor counting the
|
||||||
|
characters, words and lines in the analyzed input sequence by identifying
|
||||||
|
the matched tokens as passed from the /Spirit.Lex/ library.
|
||||||
|
*/
|
||||||
|
struct counter
|
||||||
|
{
|
||||||
|
//<- this is an implementation detail and doesn't show up in the documentation
|
||||||
|
typedef bool result_type;
|
||||||
|
|
||||||
|
//->
|
||||||
|
// the function operator gets called for each of the matched tokens
|
||||||
|
// c, l, w are references to the counters used to keep track of the numbers
|
||||||
|
template <typename Token>
|
||||||
|
bool operator()(Token const& t, std::size_t& c, std::size_t& w, std::size_t& l) const
|
||||||
|
{
|
||||||
|
switch (t.id()) {
|
||||||
|
case ID_WORD: // matched a word
|
||||||
|
// since we're using a default token type in this example, every
|
||||||
|
// token instance contains a `iterator_range<BaseIterator>` as its
|
||||||
|
// token value pointing to the matched character sequence in the input
|
||||||
|
++w; c += t.value().size();
|
||||||
|
break;
|
||||||
|
case ID_EOL: // matched a newline character
|
||||||
|
++l; ++c;
|
||||||
|
break;
|
||||||
|
case ID_CHAR: // matched something else
|
||||||
|
++c;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return true; // always continue to tokenize
|
||||||
|
}
|
||||||
|
};
|
||||||
|
//]
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
//[wcf_main
|
||||||
|
/*` The main function simply loads the given file into memory (as a
|
||||||
|
`std::string`), instantiates an instance of the token definition template
|
||||||
|
using the correct iterator type (`word_count_tokens<char const*>`),
|
||||||
|
and finally calls `lex::tokenize`, passing an instance of the counter functor
|
||||||
|
defined above. The return value of `lex::tokenize` will be `true` if the
|
||||||
|
whole input sequence has been successfully tokenized, and `false` otherwise.
|
||||||
|
*/
|
||||||
|
int main(int argc, char* argv[])
|
||||||
|
{
|
||||||
|
// these variables are used to count characters, words and lines
|
||||||
|
std::size_t c = 0, w = 0, l = 0;
|
||||||
|
|
||||||
|
// read input from the given file
|
||||||
|
std::string str (read_from_file(1 == argc ? "word_count.input" : argv[1]));
|
||||||
|
|
||||||
|
// create the token definition instance needed to invoke the lexical analyzer
|
||||||
|
word_count_tokens<lexertl_lexer<> > word_count_functor;
|
||||||
|
|
||||||
|
// tokenize the given string, the bound functor gets invoked for each of
|
||||||
|
// the matched tokens
|
||||||
|
char const* first = str.c_str();
|
||||||
|
char const* last = &first[str.size()];
|
||||||
|
bool r = lex::tokenize(first, last, make_lexer(word_count_functor),
|
||||||
|
boost::bind(counter(), _1, boost::ref(c), boost::ref(w), boost::ref(l)));
|
||||||
|
|
||||||
|
// print results
|
||||||
|
if (r) {
|
||||||
|
std::cout << "lines: " << l << ", words: " << w
|
||||||
|
<< ", characters: " << c << "\n";
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
std::string rest(first, last);
|
||||||
|
std::cout << "Lexical analysis failed\n" << "stopped at: \""
|
||||||
|
<< rest << "\"\n";
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
//]
|
||||||
|
|
||||||
1571
example/lex/word_count_functor_flex.cpp
Normal file
1571
example/lex/word_count_functor_flex.cpp
Normal file
File diff suppressed because it is too large
Load Diff
138
example/lex/word_count_lexer.cpp
Normal file
138
example/lex/word_count_lexer.cpp
Normal file
@@ -0,0 +1,138 @@
|
|||||||
|
// Copyright (c) 2001-2008 Hartmut Kaiser
|
||||||
|
//
|
||||||
|
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
|
||||||
|
// This example is the equivalent to the following lex program:
|
||||||
|
/*
|
||||||
|
//[wcl_flex_version
|
||||||
|
%{
|
||||||
|
int c = 0, w = 0, l = 0;
|
||||||
|
%}
|
||||||
|
%%
|
||||||
|
[^ \t\n]+ { ++w; c += yyleng; }
|
||||||
|
\n { ++c; ++l; }
|
||||||
|
. { ++c; }
|
||||||
|
%%
|
||||||
|
main()
|
||||||
|
{
|
||||||
|
yylex();
|
||||||
|
printf("%d %d %d\n", l, w, c);
|
||||||
|
}
|
||||||
|
//]
|
||||||
|
*/
|
||||||
|
// Its purpose is to do the word count function of the wc command in UNIX. It
|
||||||
|
// prints the number of lines, words and characters in a file.
|
||||||
|
//
|
||||||
|
// This examples shows how to use semantic actions associated with token
|
||||||
|
// definitions to directly attach actions to tokens. These get executed
|
||||||
|
// whenever the corresponding token got matched in the input sequence. Note,
|
||||||
|
// how this example implements all functionality directly in the lexer
|
||||||
|
// definition without any need for a parser.
|
||||||
|
|
||||||
|
// #define BOOST_SPIRIT_LEXERTL_DEBUG
|
||||||
|
|
||||||
|
//[wcl_includes
|
||||||
|
#include <boost/spirit/include/lex_lexer_lexertl.hpp>
|
||||||
|
#include <boost/spirit/include/phoenix_operator.hpp>
|
||||||
|
#include <boost/spirit/include/phoenix_statement.hpp>
|
||||||
|
#include <boost/spirit/include/phoenix_algorithm.hpp>
|
||||||
|
#include <boost/spirit/include/phoenix_core.hpp>
|
||||||
|
//]
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include "example.hpp"
|
||||||
|
|
||||||
|
//[wcl_namespaces
|
||||||
|
using namespace boost::spirit;
|
||||||
|
using namespace boost::spirit::lex;
|
||||||
|
//]
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Token definition: We use the lexertl based lexer engine as the underlying
|
||||||
|
// lexer type.
|
||||||
|
//
|
||||||
|
// Note, the token definition type is derived from the 'lexertl_actor_lexer'
|
||||||
|
// template, which is a necessary to being able to use lexer semantic actions.
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
//[wcl_token_definition
|
||||||
|
template <typename Lexer>
|
||||||
|
struct word_count_tokens : lexer_def<Lexer>
|
||||||
|
{
|
||||||
|
word_count_tokens()
|
||||||
|
: c(0), w(0), l(0),
|
||||||
|
word("[^ \t\n]+"), eol("\n"), any(".") // define tokens
|
||||||
|
{}
|
||||||
|
|
||||||
|
template <typename Self>
|
||||||
|
void def (Self& self)
|
||||||
|
{
|
||||||
|
using boost::phoenix::ref;
|
||||||
|
using boost::phoenix::distance;
|
||||||
|
|
||||||
|
// Note that all placeholders used in lexer semantic actions in
|
||||||
|
// conjunction with functors created based on Phoenix2 need to be from
|
||||||
|
// the namespace boost::phoenix::arg_names (not spirit::arg_names).
|
||||||
|
// Using the wrong placeholders leads to subtle compilation errors
|
||||||
|
// which are difficult to backtrack to their cause.
|
||||||
|
using boost::phoenix::arg_names::_1;
|
||||||
|
|
||||||
|
// associate tokens with the lexer
|
||||||
|
self = word [++ref(w), ref(c) += distance(_1)]
|
||||||
|
| eol [++ref(c), ++ref(l)]
|
||||||
|
| any [++ref(c)]
|
||||||
|
;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::size_t c, w, l;
|
||||||
|
token_def<> word, eol, any;
|
||||||
|
};
|
||||||
|
//]
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
//[wcl_main
|
||||||
|
int main(int argc, char* argv[])
|
||||||
|
{
|
||||||
|
// read input from the given file
|
||||||
|
std::string str (read_from_file(1 == argc ? "word_count.input" : argv[1]));
|
||||||
|
|
||||||
|
// Specifying 'omitted' as the token value type generates a token class not
|
||||||
|
// holding any token value at all (not even the iterator_range of the
|
||||||
|
// matched input sequence), therefor optimizing the token, the lexer, and
|
||||||
|
// possibly the parser implementation as much as possible.
|
||||||
|
//
|
||||||
|
// Specifying mpl::false_ as the 3rd template parameter generates a token
|
||||||
|
// type and an iterator, both holding no lexer state, allowing for even more
|
||||||
|
// aggressive optimizations.
|
||||||
|
//
|
||||||
|
// As a result the token instances contain the token ids as the only data
|
||||||
|
// member.
|
||||||
|
typedef lexertl_token<char const*, omitted, boost::mpl::false_> token_type;
|
||||||
|
|
||||||
|
// lexer type
|
||||||
|
typedef lexertl_actor_lexer<token_type> lexer_type;
|
||||||
|
|
||||||
|
// create the lexer object instance needed to invoke the lexical analysis
|
||||||
|
word_count_tokens<lexer_type> word_count_lexer;
|
||||||
|
|
||||||
|
// tokenize the given string, all generated tokens are discarded
|
||||||
|
char const* first = str.c_str();
|
||||||
|
char const* last = &first[str.size()];
|
||||||
|
bool r = tokenize(first, last, make_lexer(word_count_lexer));
|
||||||
|
|
||||||
|
if (r) {
|
||||||
|
std::cout << "lines: " << word_count_lexer.l
|
||||||
|
<< ", words: " << word_count_lexer.w
|
||||||
|
<< ", characters: " << word_count_lexer.c
|
||||||
|
<< "\n";
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
std::string rest(first, last);
|
||||||
|
std::cout << "Lexical analysis failed\n" << "stopped at: \""
|
||||||
|
<< rest << "\"\n";
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
//]
|
||||||
46
example/qi/Jamfile
Normal file
46
example/qi/Jamfile
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
#==============================================================================
|
||||||
|
# Copyright (c) 2001-2007 Joel de Guzman
|
||||||
|
#
|
||||||
|
# Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
#==============================================================================
|
||||||
|
project spirit-qi-example ;
|
||||||
|
|
||||||
|
exe sum : sum.cpp ;
|
||||||
|
exe complex_number : complex_number.cpp ;
|
||||||
|
exe employee : employee.cpp ;
|
||||||
|
exe roman : roman.cpp ;
|
||||||
|
exe mini_xml1 : mini_xml1.cpp ;
|
||||||
|
exe mini_xml2 : mini_xml2.cpp ;
|
||||||
|
exe num_list : num_list.cpp ;
|
||||||
|
exe num_list2 : num_list2.cpp ;
|
||||||
|
exe num_list3 : num_list3.cpp ;
|
||||||
|
|
||||||
|
exe calc1 : calc1.cpp ;
|
||||||
|
exe calc2 : calc2.cpp ;
|
||||||
|
exe calc3 : calc3.cpp ;
|
||||||
|
exe calc4 : calc4.cpp ;
|
||||||
|
exe calc5 : calc5.cpp ;
|
||||||
|
|
||||||
|
exe calc6 :
|
||||||
|
calc6/calc6.cpp
|
||||||
|
calc6/calc6a.cpp
|
||||||
|
calc6/calc6b.cpp
|
||||||
|
calc6/calc6c.cpp
|
||||||
|
;
|
||||||
|
|
||||||
|
exe calc7 :
|
||||||
|
calc7/calc7.cpp
|
||||||
|
calc7/calc7a.cpp
|
||||||
|
calc7/calc7b.cpp
|
||||||
|
calc7/calc7c.cpp
|
||||||
|
;
|
||||||
|
|
||||||
|
exe mini_c :
|
||||||
|
mini_c/mini_c.cpp
|
||||||
|
mini_c/mini_ca.cpp
|
||||||
|
mini_c/mini_cb.cpp
|
||||||
|
mini_c/mini_cc.cpp
|
||||||
|
mini_c/mini_cd.cpp
|
||||||
|
;
|
||||||
|
|
||||||
104
example/qi/calc1.cpp
Normal file
104
example/qi/calc1.cpp
Normal file
@@ -0,0 +1,104 @@
|
|||||||
|
/*=============================================================================
|
||||||
|
Copyright (c) 2001-2007 Joel de Guzman
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
=============================================================================*/
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// Plain calculator example demonstrating the grammar. The parser is a
|
||||||
|
// syntax checker only and does not do any semantic evaluation.
|
||||||
|
//
|
||||||
|
// [ JDG May 10, 2002 ] spirit1
|
||||||
|
// [ JDG March 4, 2007 ] spirit2
|
||||||
|
//
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
#include <boost/spirit/include/qi.hpp>
|
||||||
|
#include <iostream>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
using namespace boost::spirit;
|
||||||
|
using namespace boost::spirit::qi;
|
||||||
|
using namespace boost::spirit::ascii;
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Our calculator grammar
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
template <typename Iterator>
|
||||||
|
struct calculator : grammar_def<Iterator, space_type>
|
||||||
|
{
|
||||||
|
calculator()
|
||||||
|
{
|
||||||
|
expression =
|
||||||
|
term
|
||||||
|
>> *( ('+' >> term)
|
||||||
|
| ('-' >> term)
|
||||||
|
)
|
||||||
|
;
|
||||||
|
|
||||||
|
term =
|
||||||
|
factor
|
||||||
|
>> *( ('*' >> factor)
|
||||||
|
| ('/' >> factor)
|
||||||
|
)
|
||||||
|
;
|
||||||
|
|
||||||
|
factor =
|
||||||
|
uint_
|
||||||
|
| '(' >> expression >> ')'
|
||||||
|
| ('-' >> factor)
|
||||||
|
| ('+' >> factor)
|
||||||
|
;
|
||||||
|
}
|
||||||
|
|
||||||
|
rule<Iterator, space_type> expression, term, factor;
|
||||||
|
};
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Main program
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
int
|
||||||
|
main()
|
||||||
|
{
|
||||||
|
std::cout << "/////////////////////////////////////////////////////////\n\n";
|
||||||
|
std::cout << "Expression parser...\n\n";
|
||||||
|
std::cout << "/////////////////////////////////////////////////////////\n\n";
|
||||||
|
std::cout << "Type an expression...or [q or Q] to quit\n\n";
|
||||||
|
|
||||||
|
typedef std::string::const_iterator iterator_type;
|
||||||
|
typedef calculator<iterator_type> calculator;
|
||||||
|
|
||||||
|
calculator def; // Our grammar definition
|
||||||
|
grammar<calculator> calc(def, def.expression); // Our grammar
|
||||||
|
|
||||||
|
std::string str;
|
||||||
|
while (std::getline(std::cin, str))
|
||||||
|
{
|
||||||
|
if (str.empty() || str[0] == 'q' || str[0] == 'Q')
|
||||||
|
break;
|
||||||
|
|
||||||
|
std::string::const_iterator iter = str.begin();
|
||||||
|
std::string::const_iterator end = str.end();
|
||||||
|
bool r = phrase_parse(iter, end, calc, space);
|
||||||
|
|
||||||
|
if (r && iter == end)
|
||||||
|
{
|
||||||
|
std::cout << "-------------------------\n";
|
||||||
|
std::cout << "Parsing succeeded\n";
|
||||||
|
std::cout << "-------------------------\n";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
std::string rest(iter, end);
|
||||||
|
std::cout << "-------------------------\n";
|
||||||
|
std::cout << "Parsing failed\n";
|
||||||
|
std::cout << "stopped at: \": " << rest << "\"\n";
|
||||||
|
std::cout << "-------------------------\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << "Bye... :-) \n\n";
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
123
example/qi/calc2.cpp
Normal file
123
example/qi/calc2.cpp
Normal file
@@ -0,0 +1,123 @@
|
|||||||
|
/*=============================================================================
|
||||||
|
Copyright (c) 2001-2007 Joel de Guzman
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
=============================================================================*/
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// A Calculator example demonstrating the grammar and semantic actions
|
||||||
|
// using phoenix to "bind" plain functions. The parser prints code suitable
|
||||||
|
// for a stack based virtual machine.
|
||||||
|
//
|
||||||
|
// [ JDG May 10, 2002 ] spirit1
|
||||||
|
// [ JDG March 4, 2007 ] spirit2
|
||||||
|
//
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
#include <boost/spirit/include/qi.hpp>
|
||||||
|
#include <boost/spirit/include/phoenix_bind.hpp>
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
using namespace boost::spirit;
|
||||||
|
using namespace boost::spirit::qi;
|
||||||
|
using namespace boost::spirit::ascii;
|
||||||
|
using namespace boost::spirit::arg_names;
|
||||||
|
|
||||||
|
using boost::phoenix::bind;
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Semantic actions
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
namespace
|
||||||
|
{
|
||||||
|
void do_int(int n) { std::cout << "push " << n << std::endl; }
|
||||||
|
void do_add() { std::cout << "add\n"; }
|
||||||
|
void do_subt() { std::cout << "subtract\n"; }
|
||||||
|
void do_mult() { std::cout << "mult\n"; }
|
||||||
|
void do_div() { std::cout << "divide\n"; }
|
||||||
|
void do_neg() { std::cout << "negate\n"; }
|
||||||
|
}
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Our calculator grammar
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
template <typename Iterator>
|
||||||
|
struct calculator : grammar_def<Iterator, space_type>
|
||||||
|
{
|
||||||
|
calculator()
|
||||||
|
{
|
||||||
|
expression =
|
||||||
|
term
|
||||||
|
>> *( ('+' >> term [bind(&do_add)])
|
||||||
|
| ('-' >> term [bind(&do_subt)])
|
||||||
|
)
|
||||||
|
;
|
||||||
|
|
||||||
|
term =
|
||||||
|
factor
|
||||||
|
>> *( ('*' >> factor [bind(&do_mult)])
|
||||||
|
| ('/' >> factor [bind(&do_div)])
|
||||||
|
)
|
||||||
|
;
|
||||||
|
|
||||||
|
factor =
|
||||||
|
uint_ [bind(&do_int, _1)]
|
||||||
|
| '(' >> expression >> ')'
|
||||||
|
| ('-' >> factor [bind(&do_neg)])
|
||||||
|
| ('+' >> factor)
|
||||||
|
;
|
||||||
|
}
|
||||||
|
|
||||||
|
rule<Iterator, space_type> expression, term, factor;
|
||||||
|
};
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Main program
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
int
|
||||||
|
main()
|
||||||
|
{
|
||||||
|
std::cout << "/////////////////////////////////////////////////////////\n\n";
|
||||||
|
std::cout << "Expression parser...\n\n";
|
||||||
|
std::cout << "/////////////////////////////////////////////////////////\n\n";
|
||||||
|
std::cout << "Type an expression...or [q or Q] to quit\n\n";
|
||||||
|
|
||||||
|
typedef std::string::const_iterator iterator_type;
|
||||||
|
typedef calculator<iterator_type> calculator;
|
||||||
|
|
||||||
|
calculator def; // Our grammar definition
|
||||||
|
grammar<calculator> calc(def, def.expression); // Our grammar
|
||||||
|
|
||||||
|
std::string str;
|
||||||
|
while (std::getline(std::cin, str))
|
||||||
|
{
|
||||||
|
if (str.empty() || str[0] == 'q' || str[0] == 'Q')
|
||||||
|
break;
|
||||||
|
|
||||||
|
std::string::const_iterator iter = str.begin();
|
||||||
|
std::string::const_iterator end = str.end();
|
||||||
|
bool r = phrase_parse(iter, end, calc, space);
|
||||||
|
|
||||||
|
if (r && iter == end)
|
||||||
|
{
|
||||||
|
std::cout << "-------------------------\n";
|
||||||
|
std::cout << "Parsing succeeded\n";
|
||||||
|
std::cout << "-------------------------\n";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
std::string rest(iter, end);
|
||||||
|
std::cout << "-------------------------\n";
|
||||||
|
std::cout << "Parsing failed\n";
|
||||||
|
std::cout << "stopped at: \": " << rest << "\"\n";
|
||||||
|
std::cout << "-------------------------\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << "Bye... :-) \n\n";
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
110
example/qi/calc3.cpp
Normal file
110
example/qi/calc3.cpp
Normal file
@@ -0,0 +1,110 @@
|
|||||||
|
/*=============================================================================
|
||||||
|
Copyright (c) 2001-2007 Joel de Guzman
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
=============================================================================*/
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// A calculator example demonstrating the grammar and semantic actions
|
||||||
|
// using phoenix to do the actual expression evaluation. The parser is
|
||||||
|
// essentially an "interpreter" that evaluates expressions on the fly.
|
||||||
|
//
|
||||||
|
// [ JDG June 29, 2002 ] spirit1
|
||||||
|
// [ JDG March 5, 2007 ] spirit2
|
||||||
|
//
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
#include <boost/spirit/include/qi.hpp>
|
||||||
|
#include <boost/spirit/include/phoenix_operator.hpp>
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
using namespace boost::spirit;
|
||||||
|
using namespace boost::spirit::qi;
|
||||||
|
using namespace boost::spirit::ascii;
|
||||||
|
using namespace boost::spirit::arg_names;
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Our calculator grammar
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
template <typename Iterator>
|
||||||
|
struct calculator : grammar_def<Iterator, int(), space_type>
|
||||||
|
{
|
||||||
|
calculator()
|
||||||
|
{
|
||||||
|
expression =
|
||||||
|
term [_val = _1]
|
||||||
|
>> *( ('+' >> term [_val += _1])
|
||||||
|
| ('-' >> term [_val -= _1])
|
||||||
|
)
|
||||||
|
;
|
||||||
|
|
||||||
|
term =
|
||||||
|
factor [_val = _1]
|
||||||
|
>> *( ('*' >> factor [_val *= _1])
|
||||||
|
| ('/' >> factor [_val /= _1])
|
||||||
|
)
|
||||||
|
;
|
||||||
|
|
||||||
|
factor =
|
||||||
|
uint_ [_val = _1]
|
||||||
|
| '(' >> expression [_val = _1] >> ')'
|
||||||
|
| ('-' >> factor [_val = -_1])
|
||||||
|
| ('+' >> factor [_val = _1])
|
||||||
|
;
|
||||||
|
}
|
||||||
|
|
||||||
|
rule<Iterator, int(), space_type> expression, term, factor;
|
||||||
|
};
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Main program
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
int
|
||||||
|
main()
|
||||||
|
{
|
||||||
|
std::cout << "/////////////////////////////////////////////////////////\n\n";
|
||||||
|
std::cout << "Expression parser...\n\n";
|
||||||
|
std::cout << "/////////////////////////////////////////////////////////\n\n";
|
||||||
|
std::cout << "Type an expression...or [q or Q] to quit\n\n";
|
||||||
|
|
||||||
|
typedef std::string::const_iterator iterator_type;
|
||||||
|
typedef calculator<iterator_type> calculator;
|
||||||
|
|
||||||
|
calculator def; // Our grammar definition
|
||||||
|
grammar<calculator> calc(def, def.expression); // Our grammar
|
||||||
|
|
||||||
|
std::string str;
|
||||||
|
int result;
|
||||||
|
while (std::getline(std::cin, str))
|
||||||
|
{
|
||||||
|
if (str.empty() || str[0] == 'q' || str[0] == 'Q')
|
||||||
|
break;
|
||||||
|
|
||||||
|
std::string::const_iterator iter = str.begin();
|
||||||
|
std::string::const_iterator end = str.end();
|
||||||
|
bool r = phrase_parse(iter, end, calc, result, space);
|
||||||
|
|
||||||
|
if (r && iter == end)
|
||||||
|
{
|
||||||
|
std::cout << "-------------------------\n";
|
||||||
|
std::cout << "Parsing succeeded\n";
|
||||||
|
std::cout << "result = " << result << std::endl;
|
||||||
|
std::cout << "-------------------------\n";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
std::string rest(iter, end);
|
||||||
|
std::cout << "-------------------------\n";
|
||||||
|
std::cout << "Parsing failed\n";
|
||||||
|
std::cout << "stopped at: \": " << rest << "\"\n";
|
||||||
|
std::cout << "-------------------------\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << "Bye... :-) \n\n";
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
201
example/qi/calc3_lexer.cpp
Normal file
201
example/qi/calc3_lexer.cpp
Normal file
@@ -0,0 +1,201 @@
|
|||||||
|
/*=============================================================================
|
||||||
|
Copyright (c) 2001-2007 Joel de Guzman
|
||||||
|
Copyright (c) 2001-2008 Hartmut Kaiser
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
=============================================================================*/
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// A calculator example demonstrating the grammar and semantic actions
|
||||||
|
// using phoenix to do the actual expression evaluation. The parser is
|
||||||
|
// essentially an "interpreter" that evaluates expressions on the fly.
|
||||||
|
//
|
||||||
|
// Additionally this examples shows how to build and use a lexer based on
|
||||||
|
// Ben Hansons Lexertl (http://www.benhanson.net/lexertl.html). This way the
|
||||||
|
// parser matches the grammar against the tokens generated by the lexer
|
||||||
|
// component and not against the input character stream.
|
||||||
|
//
|
||||||
|
// Even if the benefits of using a lexer for this small calculator grammar may
|
||||||
|
// not outweight the corresponding overhead, we provide this example because
|
||||||
|
// it allows to concentrate on the essentials without having to understand
|
||||||
|
// the semantics first.
|
||||||
|
//
|
||||||
|
// [ JDG June 29, 2002 ] spirit1
|
||||||
|
// [ JDG March 5, 2007 ] spirit2
|
||||||
|
//
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
#include <boost/spirit/include/qi.hpp>
|
||||||
|
#include <boost/spirit/include/lex_lexer_lexertl.hpp>
|
||||||
|
#include <boost/spirit/include/phoenix_operator.hpp>
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
using namespace boost::spirit;
|
||||||
|
using namespace boost::spirit::qi;
|
||||||
|
using namespace boost::spirit::lex;
|
||||||
|
using namespace boost::spirit::ascii;
|
||||||
|
using namespace boost::spirit::arg_names;
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Our token definition
|
||||||
|
// This class is used to define all the tokens to be recognized by the lexer.
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
template <typename Lexer>
|
||||||
|
struct calculator_tokens : lexer_def<Lexer>
|
||||||
|
{
|
||||||
|
template <typename Self>
|
||||||
|
void def (Self& self)
|
||||||
|
{
|
||||||
|
// unsigned integer token definition
|
||||||
|
ui = "[1-9][0-9]*";
|
||||||
|
|
||||||
|
// whitespace token definitions
|
||||||
|
ws = "[ \\t\\f\\v]+";
|
||||||
|
c_comment = "\\/\\*[^*]*\\*+([^/*][^*]*\\*+)*\\/";
|
||||||
|
|
||||||
|
// build token set
|
||||||
|
skipper = ws | c_comment; // += is allowed as well
|
||||||
|
|
||||||
|
// associate the tokens and the token set with the lexer
|
||||||
|
// default lexer state
|
||||||
|
self = token_def<>('+') | '-' | '*' | '/' | '(' | ')';
|
||||||
|
self += ui; // still default state
|
||||||
|
|
||||||
|
// The token_set 'skipper' get's assigned to a separate lexer state
|
||||||
|
// which allows to use it separately from the main tokenization
|
||||||
|
// (it is used as the skipper parser below)
|
||||||
|
self("SKIPPER") = skipper; // lexer state "SKIPPER"
|
||||||
|
}
|
||||||
|
|
||||||
|
// This are the tokens to be recognized by the lexer.
|
||||||
|
token_def<unsigned int> ui; // matched tokens will have a unsigned int
|
||||||
|
token_def<> ws, c_comment; // attribute will not be used
|
||||||
|
|
||||||
|
// This is the only token set explicitly defined by this lexer because it
|
||||||
|
// needs to be accessible from the outside (used as skip parser below).
|
||||||
|
typename Lexer::token_set skipper;
|
||||||
|
};
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Our calculator grammar
|
||||||
|
//
|
||||||
|
// The difference to the original example (calc3.cpp) is that we are
|
||||||
|
// specifying a second template parameter referring to the lexer. Further, we
|
||||||
|
// use a defined tokenset from above as the skip parser.
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
template <typename Iterator, typename Lexer>
|
||||||
|
struct calculator : grammar_def<Iterator, int(), typename Lexer::token_set>
|
||||||
|
{
|
||||||
|
template <typename TokenDef>
|
||||||
|
calculator(TokenDef const& tok)
|
||||||
|
{
|
||||||
|
// grammar
|
||||||
|
expression =
|
||||||
|
term [_val = _1]
|
||||||
|
>> *( ('+' >> term [_val += _1])
|
||||||
|
| ('-' >> term [_val -= _1])
|
||||||
|
)
|
||||||
|
;
|
||||||
|
|
||||||
|
term =
|
||||||
|
factor [_val = _1]
|
||||||
|
>> *( ('*' >> factor [_val *= _1])
|
||||||
|
| ('/' >> factor [_val /= _1])
|
||||||
|
)
|
||||||
|
;
|
||||||
|
|
||||||
|
factor =
|
||||||
|
tok.ui [_val = _1]
|
||||||
|
| '(' >> expression [_val = _1] >> ')'
|
||||||
|
| ('-' >> factor [_val = -_1])
|
||||||
|
| ('+' >> factor [_val = _1])
|
||||||
|
;
|
||||||
|
}
|
||||||
|
|
||||||
|
rule<Iterator, int(), typename Lexer::token_set> expression, term, factor;
|
||||||
|
};
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Main program
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
int
|
||||||
|
main()
|
||||||
|
{
|
||||||
|
std::cout << "/////////////////////////////////////////////////////////\n\n";
|
||||||
|
std::cout << "Expression parser...\n\n";
|
||||||
|
std::cout << "/////////////////////////////////////////////////////////\n\n";
|
||||||
|
std::cout << "Type an expression...or [q or Q] to quit\n\n";
|
||||||
|
|
||||||
|
// iterator type used to expose the underlying input stream
|
||||||
|
typedef std::string::const_iterator base_iterator_type;
|
||||||
|
|
||||||
|
// This is the lexer token type to use. The second template parameter lists
|
||||||
|
// all attribute types used for token_def's during token definition (see
|
||||||
|
// calculator_tokens<> above). Here we use the predefined lexertl token
|
||||||
|
// type, but any compatible token type may be used.
|
||||||
|
typedef lexertl_token<
|
||||||
|
base_iterator_type, boost::mpl::vector<unsigned int>
|
||||||
|
> token_type;
|
||||||
|
|
||||||
|
// This is the lexer type to use to tokenize the input.
|
||||||
|
// Here we use the lexertl based lexer engine.
|
||||||
|
typedef lexertl_lexer<base_iterator_type, token_type> lexer_type;
|
||||||
|
|
||||||
|
// This is the token definition type (derived from the given lexer type).
|
||||||
|
typedef calculator_tokens<lexer_type> calculator_tokens;
|
||||||
|
|
||||||
|
// this is the iterator type exposed by the lexer
|
||||||
|
typedef lexer<calculator_tokens>::iterator_type iterator_type;
|
||||||
|
|
||||||
|
// this is the type of the grammar to parse
|
||||||
|
typedef calculator<iterator_type, lexer_type> calculator;
|
||||||
|
|
||||||
|
// now we use the types defined above to create the lexer and grammar
|
||||||
|
// object instances needed to invoke the parsing process
|
||||||
|
calculator_tokens tokens; // Our token definition
|
||||||
|
calculator def (tokens); // Our grammar definition
|
||||||
|
|
||||||
|
lexer<calculator_tokens> lex(tokens); // Our lexer
|
||||||
|
grammar<calculator> calc(def, def.expression); // Our grammar
|
||||||
|
|
||||||
|
// get input line by line and feed the parser to evaluate the expressions
|
||||||
|
// read in from the input
|
||||||
|
std::string str;
|
||||||
|
int result;
|
||||||
|
while (std::getline(std::cin, str))
|
||||||
|
{
|
||||||
|
if (str.empty() || str[0] == 'q' || str[0] == 'Q')
|
||||||
|
break;
|
||||||
|
|
||||||
|
// At this point we generate the iterator pair used to expose the
|
||||||
|
// tokenized input stream.
|
||||||
|
iterator_type iter = lex.begin(str.begin(), str.end());
|
||||||
|
iterator_type end = lex.end();
|
||||||
|
|
||||||
|
// Parsing is done based on the the token stream, not the character
|
||||||
|
// stream read from the input.
|
||||||
|
// Note, how we use the token_set defined above as the skip parser.
|
||||||
|
bool r = phrase_parse(iter, end, calc, result, tokens.skipper);
|
||||||
|
|
||||||
|
if (r && iter == end)
|
||||||
|
{
|
||||||
|
std::cout << "-------------------------\n";
|
||||||
|
std::cout << "Parsing succeeded\n";
|
||||||
|
std::cout << "result = " << result << std::endl;
|
||||||
|
std::cout << "-------------------------\n";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
std::cout << "-------------------------\n";
|
||||||
|
std::cout << "Parsing failed\n";
|
||||||
|
std::cout << "-------------------------\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << "Bye... :-) \n\n";
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
126
example/qi/calc4.cpp
Normal file
126
example/qi/calc4.cpp
Normal file
@@ -0,0 +1,126 @@
|
|||||||
|
/*=============================================================================
|
||||||
|
Copyright (c) 2001-2007 Joel de Guzman
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
=============================================================================*/
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// This time, we'll incorporate error handling and reporting.
|
||||||
|
//
|
||||||
|
// [ JDG June 29, 2002 ] spirit1
|
||||||
|
// [ JDG March 5, 2007 ] spirit2
|
||||||
|
//
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
#include <boost/spirit/include/qi.hpp>
|
||||||
|
#include <boost/spirit/include/phoenix_operator.hpp>
|
||||||
|
#include <boost/spirit/include/phoenix_object.hpp>
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
using namespace boost::spirit;
|
||||||
|
using namespace boost::spirit::qi;
|
||||||
|
using namespace boost::spirit::ascii;
|
||||||
|
using namespace boost::spirit::arg_names;
|
||||||
|
|
||||||
|
using boost::phoenix::val;
|
||||||
|
using boost::phoenix::construct;
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Our calculator grammar
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
template <typename Iterator>
|
||||||
|
struct calculator : grammar_def<Iterator, int(), space_type>
|
||||||
|
{
|
||||||
|
calculator()
|
||||||
|
{
|
||||||
|
expression =
|
||||||
|
term [_val = _1]
|
||||||
|
>> *( ('+' > term [_val += _1])
|
||||||
|
| ('-' > term [_val -= _1])
|
||||||
|
)
|
||||||
|
;
|
||||||
|
|
||||||
|
term =
|
||||||
|
factor [_val = _1]
|
||||||
|
>> *( ('*' > factor [_val *= _1])
|
||||||
|
| ('/' > factor [_val /= _1])
|
||||||
|
)
|
||||||
|
;
|
||||||
|
|
||||||
|
factor =
|
||||||
|
uint_ [_val = _1]
|
||||||
|
| '(' > expression [_val = _1] > ')'
|
||||||
|
| ('-' > factor [_val = -_1])
|
||||||
|
| ('+' > factor [_val = _1])
|
||||||
|
;
|
||||||
|
|
||||||
|
expression.name("expression");
|
||||||
|
term.name("term");
|
||||||
|
factor.name("factor");
|
||||||
|
|
||||||
|
on_error<fail>
|
||||||
|
(
|
||||||
|
expression
|
||||||
|
, std::cout
|
||||||
|
<< val("Error! Expecting ")
|
||||||
|
<< _4 // what failed?
|
||||||
|
<< val(" here: \"")
|
||||||
|
<< construct<std::string>(_3, _2) // iterators to error-pos, end
|
||||||
|
<< val("\"")
|
||||||
|
<< std::endl
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
rule<Iterator, int(), space_type> expression, term, factor;
|
||||||
|
};
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Main program
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
int
|
||||||
|
main()
|
||||||
|
{
|
||||||
|
std::cout << "/////////////////////////////////////////////////////////\n\n";
|
||||||
|
std::cout << "Expression parser...\n\n";
|
||||||
|
std::cout << "/////////////////////////////////////////////////////////\n\n";
|
||||||
|
std::cout << "Type an expression...or [q or Q] to quit\n\n";
|
||||||
|
|
||||||
|
typedef std::string::const_iterator iterator_type;
|
||||||
|
typedef calculator<iterator_type> calculator;
|
||||||
|
|
||||||
|
calculator def; // Our grammar definition
|
||||||
|
grammar<calculator> calc(def, def.expression); // Our grammar
|
||||||
|
|
||||||
|
std::string str;
|
||||||
|
int result;
|
||||||
|
while (std::getline(std::cin, str))
|
||||||
|
{
|
||||||
|
if (str.empty() || str[0] == 'q' || str[0] == 'Q')
|
||||||
|
break;
|
||||||
|
|
||||||
|
std::string::const_iterator iter = str.begin();
|
||||||
|
std::string::const_iterator end = str.end();
|
||||||
|
bool r = phrase_parse(iter, end, calc, result, space);
|
||||||
|
|
||||||
|
if (r && iter == end)
|
||||||
|
{
|
||||||
|
std::cout << "-------------------------\n";
|
||||||
|
std::cout << "Parsing succeeded\n";
|
||||||
|
std::cout << "result = " << result << std::endl;
|
||||||
|
std::cout << "-------------------------\n";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
std::cout << "-------------------------\n";
|
||||||
|
std::cout << "Parsing failed\n";
|
||||||
|
std::cout << "-------------------------\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << "Bye... :-) \n\n";
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user