2
0
mirror of https://github.com/boostorg/spirit.git synced 2026-01-19 04:42:11 +00:00

spirit2 ! :)

[SVN r44360]
This commit is contained in:
Joel de Guzman
2008-04-13 03:02:30 +00:00
parent bfb86cb157
commit 2431a80d8a
308 changed files with 34985 additions and 1 deletions

View File

@@ -11,7 +11,7 @@
// or https://sf.net/mailarchive/forum.php?thread_id=2692308&forum_id=1595
// for a description of the bug being tested for by this program
//
// the problem should be solved with version 1.3 of phoenix/closures.hpp
// the problem should be solved with version 1.3 of phoenix/closures.hpp>
#if defined(BOOST_SPIRIT_DEBUG) && defined(__GNUC__) && defined(__WIN32__)
// It seems that MinGW has some problems with threads and iostream ?

27
doc/Jamfile Normal file
View File

@@ -0,0 +1,27 @@
#==============================================================================
# Copyright (c) 2001-2007 Joel de Guzman
# Copyright (c) 2001-2007 Hartmut Kaiser
#
# Use, modification and distribution is subject to the Boost Software
# License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
# http://www.boost.org/LICENSE_1_0.txt)
#==============================================================================
project spirit/doc ;
import boostbook : boostbook ;
using quickbook : quickbook ;
boostbook spirit2
:
spirit2.qbk
:
<xsl:param>boost.root=../../../..
<xsl:param>boost.libraries=../../../libraries.htm
<xsl:param>html.stylesheet=../../../../doc/html/boostbook.css
<xsl:param>chunk.section.depth=5
<xsl:param>chunk.first.sections=1
<xsl:param>toc.section.depth=4
<xsl:param>toc.max.depth=4
<xsl:param>generate.section.toc.level=4
;

View File

@@ -0,0 +1,46 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section XXX]
[heading Description]
Description of XXX concept
[variablelist Notation
[[`xxx`] [An XXX]]
]
[heading Valid Expressions]
(For any Forward Sequence the following expressions must be valid:)
In addition to the requirements defined in _XXX-Basic_concept_, for any
XXX the following must be met:
[table
[[Expression] [Semantics] [Return type] [Complexity]]
[[`xxx`] [Semantics of `xxx`] [XXX] [Constant]]
]
[heading Type Requirements]
[table
[[Expression] [Requirements]]
[[`xxx`] [Requirements for `xxx`]]
]
[heading Invariants]
For any XXX xxx the following invariants always hold:
[heading Models]
Links to models of XXX concept
[endsect]

View File

@@ -0,0 +1,56 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section XXX]
[heading Description]
Description of XXX
[heading Header]
#include <boost/spirit/xxx.hpp>
[heading Synopsis]
template <typename T>
struct XXX;
[heading Template parameters]
[table
[[Parameter] [Description] [Default]]
[[`T`] [What is T] []]
]
[heading Model of]
Link to concept
[heading Objects]
Objects provided by the library
[variablelist Notation
[[`xxx`] [An XXX]]
]
Semantics of an expression is defined only where it differs from, or is not
defined in _concept-of_XXX_.
[table
[[Expression] [Semantics] [Return type] [Complexity]]
[[`xxx`] [Semantics of `xxx`] [XXX] [Constant]]
]
[heading Example]
Real example code. Use Quickbook import mechanism to link to actual
working code snippets here.
[endsect]

147
doc/acknowledgments.qbk Normal file
View File

@@ -0,0 +1,147 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section Acknowledgments]
This version of Spirit is a complete rewrite of the /classic/ Spirit many
people have been contributing to (see below). But there are a couple of people
who already managed to help significantly during this rewrite. We would like to
express our special acknowledgement to:
[*Eric Niebler] for writing Boost.Proto, without which this rewrite wouldn't
have been possible, and helping with examples, advices, and suggestions on
how to use Boost.Proto in the best possible way.
[*Ben Hanson] for providing us with an early version of his Lexertl library,
which is proposed to be included into Boost (as Boost.Lexer), but at the time
of this writing the Boost review for this library is still pending.
__fixme__: Add more people
[heading Acknowledgements from the Spirit V1 /classic/ Documentation]
Special thanks for working on Spirit /classic/ to:
[*Dan Nuffer] for his work on lexers, parse trees, ASTs, XML parsers, the
multi-pass iterator as well as administering Spirit's site, editing,
maintaining the CVS and doing the releases plus a zillion of other chores that
were almost taken for granted.
[*Hartmut Kaiser] for his work on the C parser, the work on the C/C++
preprocessor, utility parsers, the original port to Intel 5.0, various work on
Phoenix, porting to v1.5, the meta-parsers, the grouping-parsers, extensive
testing and painstaking attention to details.
[*Martin Wille] who improved grammar multi thread safety, contributed the eol_p
parser, the dynamic parsers, documentation and for taking an active role in
almost every aspect from brainstorming and design to coding. And, as always,
helps keep the regression tests for g++ on Linux as green as ever :-).
[*Martijn W. Van Der Lee] our Web site administrator and for contributing the
RFC821 parser.
[*Giovanni Bajo] for last minute tweaks of Spirit 1.8.0 for CodeWarrior 8.3.
Actually, I'm ashamed Giovanni was not in this list already. He's done a lot
since Spirit 1.5, the first Boost.Spirit release. He's instrumental in the
porting of the Spirit iterators stuff to the new Boost Iterators Library
(version 2). He also did various bug fixes and wrote some tests here and there.
[*Juan Carlos Arevalo-Baeza (JCAB)*] for his work on the C++ parser, the position
iterator, ports to v1.5 and keeping the mailing list discussions alive and
kicking.
[*Vaclav Vesely], lots of stuff, the no\_actions directive, various patches
fixes, the distinct parsers, the lazy parser, some phoenix tweaks and add-ons
(e.g. new\_). Also, *Stefan Slapeta] and wife for editing Vaclav's distinct
parser doc.
[*Raghavendra Satish] for doing the original v1.3 port to VC++ and his work on
Phoenix.
[*Noah Stein] for following up and helping Ragav on the VC++ ports.
[*Hakki Dogusan], for his original v1.0 Pascal parser.
[*John (EBo) David] for his work on the VM and watching over my shoulder as I
code giving the impression of distance eXtreme programming.
[*Chris Uzdavinis] for feeding in comments and valuable suggestions as well as
editing the documentation.
[*Carsten Stoll], for his work on dynamic parsers.
[*Andy Elvey] and his conifer parser.
[*Bruce Florman], who did the original v1.0 port to VC++.
[*Jeff Westfahl] for porting the loop parsers to v1.5 and contributing the file
iterator.
[*Peter Simons] for the RFC date parser example and tutorial plus helping out
with some nitty gritty details.
[*Markus Sch'''&ouml;'''pflin] for suggesting the end_p parser and lots of other
nifty things and his active presence in the mailing list.
[*Doug Gregor] for mentoring and his ability to see things that others don't.
[*David Abrahams] for giving Joel a job that allows him to still work on Spirit,
plus countless advice and help on C++ and specifically template
metaprogramming.
[*Aleksey Gurtovoy] for his MPL library from which we stole many metaprogramming
tricks especially for less conforming compilers such as Borland and VC6/7.
[*Gustavo Guerra] for his last minute review of Spirit and constant feedback,
plus patches here and there (e.g. proposing the new dot behavior of the real
numerics parsers).
[*Nicola Musatti], [*Paul Snively], [*Alisdair Meredith] and [*Hugo Duncan] for
testing and sending in various patches.
[*Steve Rowe] for his splendid work on the TSTs that will soon be taken into
Spirit.
[*Jonathan de Halleux] for his work on actors.
[*Angus Leeming] for last minute editing work on the 1.8.0 release
documentation, his work on Phoenix and his active presence in the Spirit
mailing list.
[*Joao Abecasis] for his active presence in the Spirit mailing list, providing
user support, participating in the discussions and so on.
[*Guillaume Melquiond] for a last minute patch to multi_pass for 1.8.1.
[*Peder Holt] for his porting work on Phoenix, Fusion and Spirit to VC6.
To Joels wife Mariel who did the graphics in this document.
My, there's a lot in this list! And it's a continuing list. We add people to
this list everytime. We hope we did not forget anyone. If we missed
someone you know who has helped in any way, please inform us.
Special thanks also to people who gave feedback and valuable comments,
particularly members of Boost and Spirit mailing lists. This includes all those
who participated in the review:
[*John Maddock], our review manager, [*Aleksey Gurtovoy], [*Andre Hentz],
[*Beman Dawes], [*Carl Daniel], [*Christopher Currie], [*Dan Gohman],
[*Dan Nuffer], [*Daryle Walker], [*David Abrahams], [*David B. Held],
[*Dirk Gerrits], [*Douglas Gregor], [*Hartmut Kaiser], [*Iain K.Hanson],
[*Juan Carlos Arevalo-Baeza], [*Larry Evans], [*Martin Wille],
[*Mattias Flodin], [*Noah Stein], [*Nuno Lucas], [*Peter Dimov],
[*Peter Simons], [*Petr Kocmid], [*Ross Smith], [*Scott Kirkwood],
[*Steve Cleary], [*Thorsten Ottosen], [*Tom Wenisch], [*Vladimir Prus]
Finally thanks to SourceForge for hosting the Spirit project and Boost: a C++
community comprised of extremely talented library authors who participate in
the discussion and peer review of well crafted C++ libraries.
[endsect]

10
doc/faq.qbk Normal file
View File

@@ -0,0 +1,10 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section FAQ]
[endsect]

Binary file not shown.

After

Width:  |  Height:  |  Size: 78 KiB

BIN
doc/html/images/Thumbs.db Normal file

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 58 KiB

10
doc/introduction.qbk Normal file
View File

@@ -0,0 +1,10 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section Introduction]
[endsect]

50
doc/lex.qbk Normal file
View File

@@ -0,0 +1,50 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section __lex__]
[include lex/introduction.qbk]
[section __lex__ Tutorials]
[include lex/lexer_tutorials.qbk]
[include lex/lexer_quickstart1.qbk]
[include lex/lexer_quickstart2.qbk]
[include lex/lexer_quickstart3.qbk]
[endsect]
[section Abstracts]
[section Lexer Primitives]
[include lex/lexer_primitives.qbk]
[include lex/tokens_values.qbk]
[include lex/token_definition.qbk]
[endsect]
[include lex/tokenizing.qbk]
[include lex/lexer_semantic_actions.qbk]
[include lex/lexer_static_model.qbk]
[include lex/parsing_using_a_lexer.qbk]
[include lex/lexer_attributes.qbk]
[include lex/lexer_states.qbk]
[endsect]
[section Quick Reference]
[endsect]
[section Reference]
[section Concepts]
[include reference/lex/lexer.qbk]
[include reference/lex/token.qbk]
[include reference/lex/tokendef.qbk]
[include reference/lex/tokenset.qbk]
[endsect]
[include reference/lex/lexer_class.qbk]
[include reference/lex/token_class.qbk]
[include reference/lex/tokendef_class.qbk]
[include reference/lex/tokenset_class.qbk]
[endsect]
[endsect]

137
doc/lex/introduction.qbk Normal file
View File

@@ -0,0 +1,137 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section Introduction to __lex__]
Lexical scanning is the process of analyzing the stream of input characters and
separating it into strings called tokens, separated by whitespace.
Most compiler texts start here, and devote several chapters to discussing
various ways to build scanners. __lex__ is a library built to take care of the
complexities of creating a lexer for your grammar (in this documentation we
will use the terms 'lexical analyzer', 'lexer' and 'scanner' interchangably).
All it needs to create a lexer is to know the set of patterns describing the
different tokens you want to recognize in the input. To make this a bit more
formal, here are some definitions:
* A token is a sequence of consecutive characters having a collective meaning.
Tokens may have attributes specific to the token type, carrying additional
information about the matched character sequence.
* A pattern is a rule expressed as a regular expression and describing how a
particular token can be formed. For example, [^\[A-Za-z\]\[A-Za-z_0-9\]*] is
a pattern for a rule matching C++ identifiers.
* Characters between tokens are called whitespace; these include spaces, tabs,
newlines, and formfeeds. Many people also count comments as whitespace,
though since some tools such as lint look at comments, this conflation is not
perfect.
[heading Why Using a Separate Lexer]
Typically, lexical scanning is done in a separate module from the parser,
feeding the parser with a stream of input tokens only. Now, theoretically it is
not necessary to do this separation. In the end there is only one set of
syntactical rules defining the language, so in theory we could write the whole
parser in one module. In fact, __qi__ allows to write parsers without using a
lexer, parsing the input character stream directly, and for the most part this
is the way __spirit__ has been used since its invention.
However, the separation has both practical and theoretical bases and proves to
be very useful in practical applications. In 1956, Noam Chomsky defined the
"Chomsky Hierarchy" of grammars:
* Type 0: Unrestricted grammars (e.g., natural languages)
* Type 1: Context-Sensitive grammars
* Type 2: Context-Free grammars
* Type 3: Regular grammars
The complexity of these grammars increases from regular grammars being the
simplest to unrestricted grammars being the most complex. Similarily, the
complexity of the recognizers for these grammars increases. Although, a few
features of some programming languages (such as C++) are Type 1, fortunately
for the most part programming languages can be described using only the Types 3
and 2. The neat part about these two types is that they are well known and the
ways to parse them are well understood. It has been shown that any regular
grammar can be parsed using a state machine (finite automaton). Similarly,
context-free grammars can always be parsed using a push-down automaton
(essentially a state machine augmented by a stack).
In real programming languages and practical grammars the parts that can be
handled as regular expressions tend to be the lower-level parts, such as the
definition of an identifier or of an integer value:
letter := [a-zA-Z]
digit := [0-9]
identifier := letter [ letter | digit ]*
integer := digit*
Higher level parts of practical grammars tend to be more complex and can't be
implemented using plain regular expressions anymore. We need to store
information on the built-in hardware stack while recursing the grammar
hierarchy, and that in fact this is the preferred approach used for top-down
parsing. Since it takes a different kind of abstract machine to parse the two
types of grammars, it proved to be efficient to separate the lexical scanner
into a separate module which is built around the idea of a state machine. The
goal here is to use the simplest parsing technique needed for the job.
Another, more practical reason for separating the scanner from the parser is
the need for backtracking during parsing. The input data is a stream of
characters, which is often thought to be processed left to right without any
backtracking. Unfortunately, in practice most of the time that isn't possible.
Almost every language has certain keywords such as IF, FOR, and WHILE. The
decision if a certain character sequence actually comprises a keyword or just
an identifier often can be made only after seeing the first delimiter /after/
it. This already is a limited form of backtracking, since we need to store the
string long enough to be able to make the decision. The same is true for more
coarse grained language features such as nested IF/ELSE statements, where the
decision about to which IF belongs the last ELSE statement can be made only
after seeing the whole construct.
So the structure of a conventional compiler often involves splitting up the
functions of the lower-level and higher-level parsing. The lexical scanner
deals with things at the character level, collecting characters into strings,
converting character sequence into different representations as integers, etc.,
and passing them along to the parser proper as indivisible tokens. It's also
considered normal to let the scanner do additional jobs, such as identifying
keywords, storing identifiers in tables, etc.
Now, __spirit__ follows this structure, where __lex__ can be used to implement
state machine based recognizers, while __qi__ can be used to build recognizers
for context free grammars. Since both modules are seemlessly integrated with
each other and with the C++ target language it is even possible to use the
provided functionality to build more complex grammar recognizers.
[heading Advantages of using __lex__]
The advantage of using __lex__ to create the lexical analyzer over using more
traditional tools such as __flex__ is its carefully crafted integration with
the __spirit__ library and the C++ host language. You don't need any external
tools to generate the code, your lexer will be perfectly integrated with the
rest of your program, making it possible to freely access any context
information and data structure. Since the C++ compiler sees all the code it
will generate optimal code nomatter what configuration options have been chosen
by the user. __lex__ gives you all the features you could get from a similar
__flex__ program without the need to leave C++ as a host language:
* the definition of tokens is done using regular expressions (patterns)
* the token definitions can refer to special substitution string (pattern
macros) simplifying pattern definitions
* the generated lexical scanner may have multiple start states
* it is possible to attach code to any of the token definitions; this code gets
executed whenever the corresponding token pattern has been matched
Even if it is possible to use __lex__ to generate C++ code representing
the lexical analyzer (we will refer to that as the /static/ model, described in
more detail in the section __sec_lex_static_model__) - a model
very similar to the way __flex__ operates - we will mainly focus on the
opposite, the /dynamic/ model. You can directly integrate the token definitions
into your C++ program, building the lexical analyzer dynamicly at runtime. The
dynamic model is something not supported by __flex__ or other lexical scanner
generators (such as __re2c__, __ragel__, etc.). But it is very flexible and
allows to speed up the development of your application.
[endsect]

View File

@@ -0,0 +1,12 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section Lexer Attributes]
[endsect]

View File

@@ -0,0 +1,15 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section Lexer Primitives]
[/ Describe the primitive lexer constructs, such as token_def, token_set? ]
[/ Describe the primitive lexer constructs usable in parsers, such as
in_state[], set_state(), token(), etc. ]
[endsect]

View File

@@ -0,0 +1,97 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section Quickstart 1 - A word counter using __lex__]
__lex__ is very modular, which follows the general building principle of the
__spirit__ libraries. You never pay for features you don't use. It is nicely
integrated with the other parts of __spirit__ but nevertheless can be used
separately to build standalone lexical analyzers.
The first quick start example describes a standalone application:
counting characters, words and lines in a file, very similar to what the well
known Unix command `wc` is doing (for the full example code see here:
[@../../example/lex/word_count_functor.cpp word_count_functor.cpp]).
[import ../example/lex/word_count_functor.cpp]
[heading Prerequisites]
The only required `#include` specific to /Spirit.Lex/ follows. It is a wrapper
for all necessary definitions to use /Spirit.Lex/ in a standalone fashion, and
on top of the __lexertl__ library. Additionally we `#include` two of the Boost
headers to define `boost::bind()` and `boost::ref()`.
[wcf_includes]
To make all the code below more readable we introduce the following namespaces.
[wcf_namespaces]
[heading Defining Tokens]
The most important step while creating a lexer using __lex__ is to define the
tokens to be recognized in the input sequence. This is normally done by
defining the regular expressions describing the matching character sequences,
and optionally their corresponding token ids. Additionally the defined tokens
need to be associated with an instance of a lexer object as provided by the
library. The following code snippet shows how this can be done using __lex__.
[wcf_token_definition]
[heading Doing the Useful Work]
We will use a setup, where we want the __lex__ library to invoke a given
function after any of of the generated tokens is recognized. For this reason
we need to implement a functor taking at least the generated token as an
argument and returning a boolean value allowing to stop the tokenization
process. The default token type used in this example carries a token value of
the type `iterator_range<BaseIterator>` pointing to the matched range in the
underlying input sequence.
[wcf_functor]
All what's left is to write some boilerplate code helping to tie together the
pieces described so far. To simplify this example we call the `lex::tokenize()`
function implemented in __lex__ (for a more detailed description of this
function see here: __fixme__), even if we could have written a loop to iterate
over the lexer iterators [`first`, `last`) as well.
[heading Pulling Everything Together]
[wcf_main]
[heading Comparing __lex__ with __flex__]
This example was deliberately chosen to be similar as much as possible to the
equivalent __flex__ program (see below), which isn't too different from what
has to be written when using __lex__.
[note Interestingly enough, performance comparisons of lexical analyzers
written using __lex__ with equivalent programs generated by
__flex__ show that both have comparable execution speeds!
Generally, thanks to the highly optimized __lexertl__ library and
due its carefully designed integration with __spirit__ the
abstraction penalty to be paid for using __lex__ is neglectible.
]
The remaining examples in this tutorial will use more sophisticated features
of __lex__, mainly to allow further simplification of the code to be written,
while maintaining the similarity with corresponding features of __flex__.
__lex__ has been designed to be as much as possible similar to __flex__, that
is why this documentation will provide the corresponding __flex__ code for the
shown __lex__ examples almost everywhere. So consequently, here is the __flex__
code corresponding to the example as shown above.
[wcf_flex_version]
[endsect]

View File

@@ -0,0 +1,133 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section Quickstart 2 - A better word counter using __lex__]
People knowing __flex__ will probably complain about the example from the
section __sec_lex_quickstart_1__ as being overly complex and not being
written to leverage the possibilities provided by this tool. In particular the
previous example did not directly use the lexer actions to count the lines,
words and characters. So the example provided in this step of the tutorial will
show how to use semantic actions in __lex__. Even if it still
will allow to count text elements only it introduces other new concepts and
configuration options along the lines (for the full example code
see here: [@../../example/lex/word_count_lexer.cpp word_count_lexer.cpp]).
[import ../example/lex/word_count_lexer.cpp]
[heading Prerequisites]
In addition to the only required `#include` specific to /Spirit.Lex/ this
example needs to include a couple of header files from the __phoenix2__
library. This example shows how to attach functors to token definitions, which
could be done using any type of C++ technique resulting in a callable object.
Using __phoenix2__ for this task simplifies things and avoids adding
dependencies to other libraries (__phoenix2__ is already in use for
__spirit__ anyway).
[wcl_includes]
To make all the code below more readable we introduce the following namespaces.
[wcl_namespaces]
To give a preview at what to expect from this example, here is the flex program
which has been used as the starting point. The useful code is directly included
inside the actions associated with each of the token definitions.
[wcl_flex_version]
[heading Semantic Actions in __lex__]
__lex__ uses a very similar way of associating actions with the token
definitions (which should look familiar to anybody knowlegdeable with
__spirit__ as well): specifying the operations to execute inside of a pair of
`[]` brackets. In order to be able to attach semantic actions to token
definitions for each of them there is defined an instance of a `token_def<>`.
[wcl_token_definition]
The semantics of the shown code is as follows. The code inside the `[]`
brackets will be executed whenever the corresponding token has been matched by
the lexical analyzer. This is very similar to __flex__, where the action code
associated with a token definition gets executed after the recognition of a
matching input sequence. The code above uses functors constructed using
__phoenix2__, but it is possible to insert any C++ functor as long as it
exposes the interface:
void f (Range r, std::size_t id, Context& ctx, bool& matched);
[variablelist where:
[[`Range r`] [This is a `boost::iterator_range` holding two
iterators pointing to the matched range in the
underlying input sequence. The type of the
held iterators is the same as specified while
defining the type of the `lexertl_lexer<...>`
(its first template parameter).]]
[[`std::size_t id`] [This is the token id for the matched token.]]
[[`Context& ctx`] [This is a reference to a lexer specific,
unspecified type, providing the context for the
current lexer state. It can be used to access
different internal data items and is needed for
lexer state control from inside a semantic
action.]]
[[`bool& matched`] [This boolean value is pre/initialized to `true`.
If the functor sets it to `false` the lexer
stops calling any semantic actions attached to
this token and behaves as if the token have not
been matched in the first place.]]
]
Even if it is possible to write your own functor implementations, the preferred
way of defining lexer semantic actions is to use __phoenix2__. In this case you
can access the three parameters described in the table above by using the
predefined __phoenix2__ placeholders: `_1` for the iterator range, `_2` for the
token id, `_3` for the reference to the lexer state, and `_4` for the reference
to the boolean value signaling the outcome of the semantic action.
[important All placeholders (`_1`, `_2`, etc.) used in /lexer/ semantic
actions in conjunction with functors created based on __phoenix2__
need to be imported from the `namespace boost::phoenix::arg_names`
(and *not* `namespace boost::spirit::arg_names`, which is
different from using placeholders in __qi__ or __karma__).
Using the wrong placeholders leads to subtle compilation errors
which are difficult to backtrack to their cause.
]
[heading Associating Token Definitions with the Lexer]
If you compare the with the code from __sec_lex_quickstart_1__ with regard to
the way how token definitions are associated with the lexer, you will notice
a different syntax being used here. If in the previous example we have been
using the `self.add()` style of the API, then here we directly assign the token
definitions to `self`, combining the different token definitions using the `|`
operator. Here is the code snippet again:
self = word [++ref(w), ref(c) += distance(_1)]
| eol [++ref(c), ++ref(l)]
| any [++ref(c)]
;
This way we have a very powerful and natural way of building the lexical
analyzer. If translated into English this may be read as: The lexical analyer
will recognize ('`=`') tokens as defined by any of ('`|`') the token
definitions `word`, `eol`, and `any`.
A second difference to the previous example is that we do not explicitly
specify any token ids to use for the separate tokens. Using semantic actions to
trigger some useful work free'd us from the need to define these. To ensure
every token gets assigned a id the __lex__ library internally assigns unique
numbers to the token definitions, starting with the constant defined by
`boost::spirit::lex::min_token_id`.
[endsect]

View File

@@ -0,0 +1,151 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section Quickstart 3 - Counting Words Using a Parser]
The whole purpose of integrating __lex__ as part of the __spirit__ library was
to add a library allowing to merge lexical analysis with the parsing
process as defined by a __spirit__ grammar. __spirit__ parsers read their input
from an input sequence accessed by iterators. So naturally, we chose iterators
to be used as the interface beween the lexer and the parser. A second goal of
the lexer/parser integration was to enable the usage of possibly different
lexical analyzer libraries. The utilization of iterators seemed to be the
right choice from this standpoint as well, mainly because these can be used as
an abstraction layer hiding implementation specifics of the used lexer
library. The [link spirit.lex.flowcontrol picture] below shows the common
flow control implemented while parsing combined with lexical analysis.
[fig ./images/FlowOfControl.png..The common flow control implemented while parsing combined with lexical analysis..spirit.lex.flowcontrol]
Another problem related to the integration of the lexical analyzer with the
parser was to find a way how the defined tokens syntactically could be blended
with the grammar definition syntax of __spirit__. For tokens defined as
instances of the `token_def<>` class the most natural way of integration was
to allow to directly use these as parser components. Semantically these parser
components succeed matching their input whenever the corresponding token type
has been matched by the lexer. This quick start example will demonstrate this
(and more) by counting words again, simply by adding up the numbers inside
of semantic actions of a parser (for the full example code see here:
[@../../example/lex/word_count.cpp word_count.cpp]).
[import ../example/lex/word_count.cpp]
[heading Prerequisites]
This example uses two of the __spirit__ library components: __lex__ and __qi__,
consequently we have to `#include` the corresponding header files. Again, we
need to include a couple of header files from the __phoenix2__ library. This
example shows how to attach functors to parser components, which
could be done using any type of C++ technique resulting in a callable object.
Using __phoenix2__ for this task simplifies things and avoids adding
dependencies to other libraries (__phoenix2__ is already in use for
__spirit__ anyway).
[wcp_includes]
To make all the code below more readable we introduce the following namespaces.
[wcp_namespaces]
[heading Defining Tokens]
If compared to the two previous quick start examples (__sec_lex_quickstart_1__
and __sec_lex_quickstart_2__) the token definition class for this example does
not reveal any surprises. However, it uses lexer token definition macros to
simplify the composition of the regular expressions, which will be described in
more detail in the section __fixme__. Generally, any token definition is usable
without modification either for a standalone lexical analyzer or in conjunction
with a parser.
[wcp_token_definition]
[heading Using Token Definition Instances as Parsers]
While the integration of lexer and parser in the control flow is achieved by
using special iterators wrapping the lexical analyzer, we still nead a means of
expressing in the grammar what tokens to match and where. The token definition
class above uses three different ways of defining a token:
* Using an instance of a `token_def<>`, which is handy whenever you need to
specify a token attribute (for more information about lexer related
attributes please look here: __sec_lex_attributes__).
* Using a single character as the token, in this case the character represents
itself as a token, where the token id is the ASCII character value.
* Using a regular expression represented as a string, where the token id needs
to be specified explicitly to make the token accessible from the grammar
level.
All three token definition methods require a different method of grammar
integration. But as you can see from the following code snippet, each of this
methods is straightforward and blends the corresponding token instance
naturally with the surrounding __qi__ grammar syntax.
[table
[[Token definition] [Parser integration]]
[[`token_def<>`] [The `token_def<>` instance is directly usable as a
parser component. Parsing of this component will
succeed if the regular expression used to define
this has been matched successfully.]]
[[single character] [The single character is directly usable in the
grammar, under certain circumstances it needs to be
wrapped by a `char_()` parser component, though.
Parsing of this component will succeed if the
single character has been matched.]]
[[explicit token id] [To use an explicit token id in a __qi__ grammar you
are required to wrap it with the special `token()`
parser component. Parsing of this component will
succeed if the current token has the same token
id as specified in the expression `token(<id>)`.]]
]
The grammar definition below uses each of the three types demonstrating their
usage.
[wcp_grammar_definition]
As already described (see: __sec_qi_karma_attributes__), the __qi__ parser
library builds upon a set of of fully attributed parser components.
Consequently, all the token definitions do support the this attribute model as
well. The most natural way of implementing this was to use the token values as
the attributes exposed by the parser component corresponding to the token
definition (you can read more about this topic here: __sec_lex_tokenvalues__).
The example above takes advantage of the full integration of the token values
as the `token_def<>`'s parser attributes: the `word` token definition is
declared as a `token_def<std::string>`, making every instance of a `word` token
carry the string representation of the matched input sequence as its value.
The semantic action attached to `tok.word` receives this string (represented by
the `_1` placeholder) and uses it to calculate the number of matched
characters: `ref(c) += size(_1)`.
[important All placeholders (`_1`, `_2`, etc.) used in /parser/ semantic
actions in conjunction with functors created based on __phoenix2__
need to be imported from the `namespace boost::spirit::arg_names`
(and *not* `namespace boost::phoenix::arg_names`, which is
different from using placeholders in __lex__).
Using the wrong placeholders leads to subtle compilation errors
which are difficult to backtrack to their cause.
]
[heading Pulling Everything Together]
The main function needs to implement a bit more logic now as we have to
initialize and start not only the lexical analysis but the parsing process as
well. The three type definitions (`typedef` statements) simplify the creation
of the lexical analyzer and the grammar. After reading the contents of the
given file into memory it calls the function __api_tokenize_and_parse__ to
initialize the lexical analysis and parsing processes.
[wcp_main]
[endsect]

View File

@@ -0,0 +1,10 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section Lexer Semantic Actions]
[endsect]

21
doc/lex/lexer_states.qbk Normal file
View File

@@ -0,0 +1,21 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section Lexer States]
[heading Controlling the Lexer State from Lexer Semantic Actions]
[heading Controlling the Lexer State from Parser Semantic Actions]
[heading Using a Lexer State for the Skip Parser]
[endsect]

View File

@@ -0,0 +1,119 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section The /Static/ Lexer Model]
The documentation of __lex__ so far mostly was about describing the features of
the /dynamic/ model, where the tables needed for lexical analysis are generated
from the regular expressions at runtime. The big advantage of the dynamic model
is its flexibility, and its integration with the __spirit__ library and the C++
host language. Its big disadvantage is the need to spend additional runtime to
generate the tables, which especially might be a limitation for larger lexical
analyers. The /static/ model strives to build upon the smooth integration with
__spirit__ and C++, and reuses large parts of the __lex__ library as described
so far, while overcoming the additional runtime requirements by using
pre-generated tables and tokenizer routines. To make the code generation as
simple as possible, it is possible reuse the token definition types developed
using the /dynamic/ model without any changes. As will be shown in this
section, building a code generator based on an existing token definition type
is a matter of writing 3 lines of code.
Assuming you already built a dynamic lexer for your problem, there are two more
steps needed to create a static lexical analyzer using __lex__:
# generating the C++ code for the static analyzer (including the tokenization
function and corresponding tables), and
# modifying the dynamic lexical anlyzer to use the generated code.
Both steps are described in more detail in the two sections below (for the full
source code used in this example see the code here:
[@../../example/lex/static_lexer/word_count_tokens.hpp the common token definition],
[@../../example/lex/static_lexer/word_count_generate.cpp the code generator],
[@../../example/lex/static_lexer/word_count_static.hpp the generated code], and
[@../../example/lex/static_lexer/word_count_static.cpp the static lexical analyzer]).
[import ../example/lex/static_lexer/word_count_tokens.hpp]
[import ../example/lex/static_lexer/word_count_static.cpp]
[import ../example/lex/static_lexer/word_count_generate.cpp]
But first we provide the code snippets needed to understand the further
descriptions. Both, the definition of the used token identifier and the of the
token definition class in this example are put into a separate header file to
make these available to the code generator and the static lexical analyzer.
[wc_static_tokenids]
The important point here is, that the token definition class is not different
from a similar class to be used for a dynamic lexical analyzer. The library
has been designed in a way, that all components (dynamic lexical analyzer, code
generator, and static lexical analyzer) can reuse the very same token definition
syntax.
[wc_static_tokendef]
The only thing changing between the three different use cases is the template
parameter used to instantiate a concrete token definition. Fot the dynamic
model and the code generator you probably will use the __class_lexertl_lexer__
template, where for the static model you will use the
__class_lexertl_static_lexer__ type as the template parameter.
This example not only shows how to build a static lexer, but it additionally
demonstrates, how such a lexer can be used for parsing in conjunction with a
__qi__ grammar. For completeness we provide the simple grammar used in this
example. As you can see, this grammar does not have any dependencies on the
static lexical analyzer, and for this reason it is not different from a grammar
used either without a lexer or using a dynamic lexical analyzer as described
before.
[wc_static_grammar]
[heading Generating the Static Analyzer]
The first additional step to perform in order to create a static lexical
analyzer is to create a small standalone program for creating the lexer tables
and the corresponding tokenization function. For this purpose the __lex__
library exposes a special API - the function __api_generate_static__. It
implements the whole code generator, no further code is needed. All what it
takes to invoke this function is to supply a token definition instance, an
output stream to use to generate the code to, and an optional string to be used
as a prefix for the name of the generated function. All in all just a couple
lines of code.
[wc_static_generate_main]
The shown code generator will generate output, which should be stored in a file
for later inclusion into the static lexical analzyer as shown in the next
topic (the full generated code can be viewed
[@../../example/lex/static_lexer/word_count_static.hpp here]).
[heading Modifying the Dynamic Analyzer]
The second required step to convert an existing dynamic lexer into a static one
is to change your main program at two places. First, you need to change the
type of the used lexer (that is the template parameter used while instantiating
your token definition class). While in the dynamic model we have been using the
__class_lexertl_lexer__ template, we now need to change that to the
__class_lexertl_static_lexer__ type. The second change is tightly related to
the first one and involves correcting the corresponding `#include` statement to:
[wc_static_include]
Otherwise the main program is not different from an equivalent program using
the dynamic model. This feature makes it really easy for instance to develop
the lexer in dynamic mode and to switch to the static mode after the code has
been stabilized. The simple generator application showed above enables the
integration of the code generator into any existing build process. The
following code snippet provides the overall main function, highlighting
the code to be changed.
[wc_static_main]
[endsect]

View File

@@ -0,0 +1,59 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section __lex__ Tutorials Overview]
The __lex__ library implements several components on top of possibly different
lexer generator libraries. It exposes a pair of iterators, which, when
dereferenced, return a stream of tokens generated from the underlying character
stream. The generated tokens are based on the token definitions supplied by the
user.
Currently, __lex__ is built on top of Ben Hansons excellent __lexertl__
library (which is a proposed Boost library). __lexertl__ provides the necessary
functionality to build state
machines based on a set of supplied regular expressions. But __lex__ is not
restricted to be used with __lexertl__. We expect it to be usable in
conjunction with any other lexical scanner generator library, all what needs
to be implemented is a set of wrapper objects exposing a well defined
interface as described in this documentation.
[note For the sake of clarity all examples in this documentation assume
__lex__ to be used on top of __lexertl__.]
Building a lexer using __lex__ is highly configurable, where most of this
configuration has to be done at compile time. Almost all of the configurable
parameters have generally useful default values, though, which means that
starting a project is easy and straightforward. Here is a (non-complete) list
of features you can tweak to adjust the generated lexer instance to the actual
needs:
* Select and customize the token type to be generated by the lexer instance.
* Select and customize the token value types the generated token instances will
be able to hold.
* Select the iterator type of the underlying input stream, which will be used
as the source for the character stream to tokenize.
* Customize the iterator type returned by the lexer to enable debug support,
special handling of certain input sequences, etc.
* Select the /dynamic/ or the /static/ runtime model for the lexical
analyzer.
Special care has been taken during the development of the library that
optimal code will be generated regardless of the configuration options
selected.
The series of tutorial examples of this section will guide you through some
common use cases helping to understand the big picture. The first two quick
start examples (__sec_lex_quickstart_1__ and __sec_lex_quickstart_2__)
introduce the __lex__ library while building two standalone applications, not
being connected to or depending on any other part of __spirit__. The section
__sec_lex_quickstart_3__ demonstrates how to use a lexer in conjunction with a
parser (where certainly the parser is built using __qi__).
[endsect]

View File

@@ -0,0 +1,15 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section Parsing using a Lexer]
[/ write about integration of lexer component with __qi__]
[/ write about iterator interface exposed by a __lex__ lexer]
[endsect]

View File

@@ -0,0 +1,11 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section Ways to define Tokens]
[endsect]

15
doc/lex/tokenizing.qbk Normal file
View File

@@ -0,0 +1,15 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section Tokenizing Input Data]
[heading The tokenize() function]
[heading The generate_static() function]
[endsect]

207
doc/lex/tokens_values.qbk Normal file
View File

@@ -0,0 +1,207 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section About Tokens and Token Values]
As already discussed, lexical scanning is the process of analyzing the stream
of input characters and separating it into strings called tokens, most of the
time separated by whitespace. The different token types recognized by a lexical
analyzer often get assigned unique integer token identifiers (token ids). These
token ids arenormally used by the parser to identifiy the current token without
having to look at the matched string again. The __lex__ library is not
different with respect to this, as it uses the token ids as the main means of
identification of the different token types defined for a particular lexical
analyzer. However, it is different from commonly used lexical analyzers in the
sense that it returns (references to) instances of a (user defined) token class
to the user. The only real limitation posed on this token class is consequently,
that it has to carry at least the token id of the token it represents. For more
information about the interface a user defined token type has to expose please
look at the __sec_ref_lex_token__ reference. The library provides a default
token type based on the __lexertl__ library which should be sufficient in most
use cases: the __class_lexertl_token__ type. This section focusses on the
description of general features a token class may implement and how this
integrates with the other parts of the __lex__ library.
[heading The Anatomy of a Token]
It is very important to understand the difference between a token definition
(represented by the __class_token_def__ template) and a token itself (for
instance represented by the __class_lexertl_token__ template).
The token definition is used to describe the main features of a particular
token type, especially:
* to simplify the definition of a token type using a regular expression pattern
applied while matching this token type,
* to associate a token type with a particular lexer state,
* to optionally assign a token id to a token type,
* to optionally associate some code to execute whenever an instance of this
token type has been matched,
* and to optionally specify the attribute type of the token value.
The token itself is a data structure returned by the lexer iterators.
Dereferencing a lexer iterator returns a reference to the last matched token
instance. It encapsulates the part of the underlying input sequence matched by
the regular expression used during the definiton of this token type.
Incrementing the lexer iterator invokes the lexical analyzer to
match the next token by advancing the underlying input stream. The token data
structure contains at least the token id of the matched token type,
allowing to identify the matched character sequence. Optionally, the token
instance may contain a token value and/or the lexer state this token instance
was matched in. The following [link spirit.lex.tokenstructure figure] shows the
schematic structure of a token.
[fig ./images/TokenStructure.png..The structure of a token..spirit.lex.tokenstructure]
The token value and the token state may be omitted for optimization reasons,
avoiding the token to carry more data than actually required. This
configuration can be achieved by supplying appropriate template parameters
for the __class_lexertl_token__ template while defining the token type.
The lexer iterator returns the same token type for each of the different
matched token definitions. To accomodate for the possibly different token
/value/ types exposed by the various token types (token definitions), the
general type of the token value is a __boost_variant__. As a minimum (for the
default configuration) this token value variant will be configured to always
hold a __boost_iterator_range__ containing the pair of iterators pointing to
the matched input sequence for this token instance.
[note If the lexical analyzer is used in conjunction with a __qi__ parser, the
stored __boost_iterator_range__ token value will be converted to the
requested token type (parser attribute) exactly once. This happens at the
time of the first access to the token value requiring the
corresponding type conversion. The converted token value will be stored
in the __boost_variant__ replacing the initially stored iterator range.
This avoids to convert the input sequence to the token value more than
once, thus optimizing the integration of the lexer with __qi__, even
during parser backtracking.
]
Here is the template prototype of the __class_lexertl_token__ template:
template <
typename Iterator = char const*,
typename AttributeTypes = mpl::vector0<>,
typename HasState = mpl::true_
>
struct lexertl_token;
[variablelist where:
[[Iterator] [This is the type of the iterator used to access the
underlying input stream. It defaults to a plain
`char const*`.]]
[[AttributeTypes] [This is either a mpl sequence containing all
attribute types used for the token definitions or the
type `omitted`. If the mpl sequence is empty (which is
the default), all token instances will store a
`boost::iterator_range<Iterator>` pointing to the start
and the end of the matched section in the input stream.
If the type is `omitted`, the generated tokens will
contain no token value (attribute) at all.]]
[[HasState] [This is either `mpl::true_` or `mpl::false_`, allowing
to control whether the generated token instances will
contain the lexer state they were generated in. The
default is mpl::true_, so all token instances will
contain the lexer state.]]
]
Normally, during its construction, a token instance always holds the
__boost_iterator_range__ as its token value (except, if it has been defined
using the `omitted` token value type). This iterator range then is
converted in place to the requested token value type (attribute) when it is
requested for the first time.
[heading The Physiognomy of a Token Definition]
The token definitions (represented by the __class_token_def__ template) are
normally used as part of the definition of the lexical analyzer. At the same
time a token definition instance may be used as a parser component in __qi__.
The template prototype of this class is shown here:
template<
typename Attribute = unused_type,
typename Char = char
>
class token_def;
[variablelist where:
[[Attribute] [This is the type of the token value (attribute)
supported by token instances representing this token
type. This attribute type is exposed to the __qi__
library, whenever this token definition is used as a
parser component. The default attribute type is
`unused_type`, which means the token instance holds a
__boost_iterator_range__ pointing to the start
and the end of the matched section in the input stream.
If the attribute is `omitted` the token instance will
expose no token type at all. Any other type will be
used directly as the token value type.]]
[[Char] [This is the value type of the iterator for the
underlying input sequence. It defaults to `char`.]]
]
The semantics of the template parameters for the token type and the token
definition type are very similar and interdependent. As a rule of thumb you can
think of the token definition type as the means of specifying everything
related to a single specific token type (such as `identifier` or `integer`).
On the other hand the token type is used to define the general proerties of all
token instances generated by the __lex__ library.
[important If you don't list any token value types in the token type definition
declaration (resulting in the usage of the default __boost_iterator_range__
token type) everything will compile and work just fine, just a bit
less efficient. This is because the token value will be converted
from the matched input sequence every time it is requested.
But as soon as you specify at least one token value type while
defining the token type you'll have to list all value types used for
__class_token_def__ declarations in the token definition class,
otherwise compilation errors will occur.
]
[heading Examples of using __class_lexertl_token__]
Let's start with some examples. We refer to one of the __lex__ examples (for
the full source code of this example please see
[@../../example/lex/example4.cpp example4.cpp]).
[import ../example/lex/example4.cpp]
The first code snippet shows an excerpt of the token definition class, the
definition of a couple of token types. Some of the token types do not expose a
special token value (`if_`, `else_`, and `while_`). Their token value will
always hold the iterator range of the matched input sequence only. The token
definitions for the `identifier` and the integer `constant` are specialized
to expose an explicit token type each: `std::string` and `unsigned int`.
[example4_token_def]
As the parsers generated by __qi__ are fully attributed, any __qi__ parser
component needs to expose a certain type as its parser attribute. Naturally,
the __class_token_def__ exposes the token value type as its parser attribute,
enabling a smooth integration with __qi__.
The next code snippet demonstrates how the required token value types are
specified while defining the token type to use. All of the token value types
used for at least one of the token definitions have to be re-iterated for the
token definition as well.
[example4_token]
To avoid the token to have a token value at all, the special tag `omitted` can
be used: `token_def<omitted>` and `lexertl_token<base_iterator_type, omitted>`.
[endsect]

24
doc/notes.qbk Normal file
View File

@@ -0,0 +1,24 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section Notes]
[section Portability]
[endsect]
[section Porting from Spirit 1.8.x]
[endsect]
[section Style Guide]
[include notes/style_guide.qbk]
[endsect]
[section Techniques]
[endsect]
[endsect]

87
doc/notes/style_guide.qbk Normal file
View File

@@ -0,0 +1,87 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
At some point, especially when there are lots of semantic actions attached to
various points, the grammar tends to be quite difficult to follow. In order to
keep an easy-to-read, consistent en aesthetically pleasing look to the Spirit
code, the following coding styleguide is advised.
This coding style is adapted and extended from the ANTLR/PCCTS style (Terrence
Parr) and [@http://groups.yahoo.com/group/boost/files/coding_guidelines.html
Boost coding guidelines] (David Abrahams and Nathan Myers) and is the
combined work of Joel de Guzman, Chris Uzdavinis and Hartmut Kaiser.
* Rule names use std C++ (Boost) convention. The rule name may be very long.
* The '=' is neatly indented 4 spaces below. Like in Boost, use spaces instead
of tabs.
* Breaking the operands into separate lines puts the semantic actions neatly
to the right.
* Semicolon at the last line terminates the rule.
* The adjacent parts of a sequence should be indented accordingly to have all,
what belongs to one level, at one indentation level.
program
= program_heading [heading_action]
>> block [block_action]
>> '.'
| another_sequence
>> etc
;
* Prefer literals in the grammar instead of identifiers. e.g. `"program"` instead
of `PROGRAM`, `'>='` instead of `GTE` and `'.'` instead of `DOT`. This makes it much
easier to read. If this isn't possible (for instance where the used tokens
must be identified through integers) capitalized identifiers should be used
instead.
* Breaking the operands may not be needed for short expressions.
e.g. `*(',' >> file_identifier)` as long as the line does not
exceed 80 characters.
* If a sequence fits on one line, put spaces inside the parentheses
to clearly separate them from the rules.
program_heading
= no_case["program"]
>> identifier
>> '('
>> file_identifier
>> *( ',' >> file_identifier )
>> ')'
>> ';'
;
* Nesting directives: If a rule does not fit on one line (80 characters)
it should be continued on the next line intended by one level. The brackets
of directives, semantic expressions (using Phoenix or LL lambda expressions)
or parsers should be placed as follows.
identifier
= no_case
[
lexeme
[
alpha >> *(alnum | '_') [id_action]
]
]
;
* Nesting unary operators (e.g.Kleene star): Unary rule operators
(Kleene star, `'!'`, `'+'` etc.) should be moved out one space before
the corresponding indentation level, if this rule has a body or a
sequence after it, which does not fit on on line. This makes the
formatting more consistent and moves the rule 'body' at the same
indentation level as the rule itself, highlighting the unary operator.
block
= *( label_declaration_part
| constant_definition_part
| type_definition_part
| variable_declaration_part
| procedure_and_function_declaration_part
)
>> statement_part
;

97
doc/outline.txt Normal file
View File

@@ -0,0 +1,97 @@
# Copyright (C) 2001-2008 Joel de Guzman
# Copyright (C) 2001-2008 Hartmut Kaiser
#
# Distributed under the Boost Software License, Version 1.0. (See accompanying
# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
Preface
What's New
Introduction
Qi and Karma
Tutorials
Abstracts
Parsing Expression Grammar
Parsing and Generating
Primitives
Operators
Attributes
Semantic Actions
Directives
Rules
Grammars
Debugging
Error Handling
Parse Trees and ASTs
Quick Reference
Reference
Concepts
Parser
Generator
Parser Director
Generator Director
Char
String
Numeric
Binary
Directive
Action
Nonterminal
Operator
Stream
Auxiliary
Debug
Lex
Introduction
Tutorials
Abstracts
Parsing using a Lexer
Lexer Primitives
Lexer States
Lexer Attributes
Lexer Semantic Actions
Quick Reference
Reference
Concepts
Lexer
Token
TokenDef
TokenSet
Lexer Class
Token Class
TokenDef Class
TokenSet Class
FAQ
Notes
Portability
Porting from Spirit 1.8.x
Style Guide
Techniques
Rationale
Acknowledgments
References
-----------------------------------------------------------------
Concepts Outline:
Description
Notation
Valid Expressions
Expression | Semantics | Return type | Complexity
Type Requirements
Expression | Requirements
Invariants
Models
Reference Page Outline:
Description
Header
Synopsis
Template parameters
Model of
Objects
Expression Semantics
Expression | Semantics | Return type | Complexity
Example

217
doc/preface.qbk Normal file
View File

@@ -0,0 +1,217 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section Preface]
[:['["Examples of designs that meet most of the criteria for
"goodness" (easy to understand, flexible, efficient) are a recursive-
descent parser, which is traditional procedural code. Another example
is the STL, which is a generic library of containers and algorithms
depending crucially on both traditional procedural code and on
parametric polymorphism.]] [*--Bjarne Stroustrup]]
[heading History]
[heading /80s/]
In the Mid 80s, Joel wrote his first calculator in Pascal. It has been
an unforgettable coding experience. He was amazed how a mutually
recursive set of functions can model a grammar specification. In time,
the skills he acquired from that academic experience became very
practical. Periodically Joel was tasked to do some parsing. For
instance, whenever he needs to perform any form of I/O, even in
binary, he tries to approach the task somewhat formally by writing a
grammar using Pascal- like syntax diagrams and then write a
corresponding recursive-descent parser. This worked very well.
[heading /90s/]
The arrival of the Internet and the World Wide Web magnified this
thousand-fold. At one point Joel had to write an HTML parser for a Web
browser project. He got a recursive-descent HTML parser working based
on the W3C formal specifications easily. He was certainly glad that
HTML had a formal grammar specification. Because of the influence of
the Internet, Joel then had to do more parsing. RFC specifications
were everywhere. SGML, HTML, XML, even email addresses and those
seemingly trivial URLs were all formally specified using small EBNF-
style grammar specifications. This made him wish for a tool similar to
big- time parser generators such as YACC and ANTLR, where a parser is
built automatically from a grammar specification. Yet, he wants it to
be extremely small; small enough to fit in my pocket, yet scalable.
It must be able to practically parse simple grammars such as email
addresses to moderately complex grammars such as XML and perhaps some
small to medium-sized scripting languages. Scalability is a prime
goal. You should be able to use it for small tasks such as parsing
command lines without incurring a heavy payload, as you do when you
are using YACC or PCCTS. Even now that it has evolved and matured to
become a multi-module library, true to its original intent, Spirit can
still be used for extreme micro-parsing tasks. You only pay for
features that you need. The power of Spirit comes from its modularity
and extensibility. Instead of giving you a sledgehammer, it gives you
the right ingredients to create a sledgehammer easily.
The result was Spirit. Spirit was a personal project that was
conceived when Joel was doing R&D in Japan. Inspired by the GoF's
composite and interpreter patterns, he realized that he can model a
recursive-descent parser with hierarchical-object composition of
primitives (terminals) and composites (productions). The original
version was implemented with run-time polymorphic classes. A parser is
generated at run time by feeding in production rule strings such as:
"prod ::= {'A' | 'B'} 'C';"
A compile function compiled the parser, dynamically creating a
hierarchy of objects and linking semantic actions on the fly. A very
early text can be found here: __early_spirit__.
[heading /2001 to 2006/]
Version 1.0 to 1.8 was a complete rewrite of the original Spirit
parser using expression templates and static polymorphism, inspired by
the works of Todd Veldhuizen (__todd__exprtemplates__, C++ Report,
June 1995). Initially, the static-Spirit version was meant only to
replace the core of the original dynamic-Spirit. Dynamic-spirit
needed a parser to implement itself anyway. The original employed a
hand-coded recursive-descent parser to parse the input grammar
specification strings. Incidentially it was the time, when Hartmut
joined the Spirit development.
After its initial "open-source" debut in May 2001, static-Spirit
became a success. At around November 2001, the Spirit website had an
activity percentile of 98%, making it the number one parser tool at
Source Forge at the time. Not bad for such a niche project such as a
parser library. The "static" portion of Spirit was forgotten and
static-Spirit simply became Spirit. The library soon evolved to
acquire more dynamic features.
Spirit was formally accepted into __boost__ in October 2002. Boost is
a peer-reviewed, open collaborative development effort that is a
collection of free Open Source C++ libraries covering a wide range of
domains. The Boost Libraries have become widely known as an industry
standard for design and implementation quality, robustness, and
reusability.
[heading /2007/]
Over the years, especially after Spirit was accepted into Boost,
Spirit has served its purpose quite admirably. The focus of what we'll
now call [*/Classic-Spirit/] (versions prior to 2.0) was on
transduction parsing where the input string is merely translated to an
output string. A lot of parsers are of the transduction type. When the
time came to add attributes to the parser library, it was done rather
in an ad-hoc manner, with the goal being 100% backward compatible with
classic Spirit. Some parsers have attributes, some don't.
Spirit V2 is another major rewrite. Spirit V2 grammars are fully
attributed (see __attr_grammar__). All parser components have
attributes. To do this efficiently and ellegantly, we had to use a
couple of infrastructure libraries. Some of which haven't been written
yet at the time, some were quite new when Spirit debuted, and some
needed work. __mpl__ is an important infrastructure library, yet is
not sufficient to implement Spirit V2. Another library had to be
written: __fusion__. Fusion sits between MPL and STL --between compile
time and runtime -- mapping types to values. Fusion is a direct
descendant of both MPL and __boost_tuples__ (Fusion is now a full
fledged __boost__ library). __phoenix__ also had to be beefed up to
support Spirit V2. The result is __phoenix2__. Last but not least,
Spirit V2 uses an __todd__exprtemplates__ library called
__boost_proto__.
[heading New Ideas: Spirit V2]
Just before the development of Spirit V2 began, Hartmut came across
the __string_template__ library which is a part of the ANTLR parser
framework. It is a Java template engine (with ports for C# and Python)
for generating source code, web pages, emails, or any other formatted
text output. With it, he got the the idea of using a formal notation
(a grammar) to describe the expected structure of an input character
sequence. The same grammar may be used to formalize the structure of a
corresponding output character sequence. This is possible because
parsing, most of the time, is implemented by comparing the input with
the patterns defined by the grammar. If we use the same patterns to
format a matching output, the generated sequence will follow the rules
of the grammar as well.
This insight lead to the implementation of a grammar driven output generation
library compatibile with the Spirit parser library. As it turned out, parsing
and generation are tightly connected and have very similar concepts. The
duality of these two sides of the same medal is ubiquitous, which
allowed us to build the parser library __qi__ and the generator library
__karma__ using the same component infastructure.
The idea of creating a lexer library well integrated with the Spirit parsers is
not new. This has been discussed almost for the whole time of the existence of
Classic-Spirit (pre V2) now. Several attempts to integrate existing lexer
libraries and frameworks with Spirit have been made and served as a proof of
concept and usability (for example see __wave__: The Boost C/C++ Preprocessor
Library, and __slex__: a fully dynamic C++ lexer implemented with Spirit).
Based on these experiences we added __lex__: a fully integrated lexer library
to the mix, allowing to take advantage of the power of regular expressions for
token matching, removing pressure from the parser components, simplifying
parser grammars. Again, Spirit's modular structure allowed us to reuse the same
underlying component library as for the parser and generator libraries.
[heading How to use this manual]
Each major section (there are two: __sec_qi_and_karma__, and __sec_lex__) is
roughly divided into 3 parts:
# Tutorials: A step by step guide with heavily annotated code. These
are meant to get the user acquainted with the library as quickly as
possible. The objective is to build the confidence of the user in
using the library using abundant examples and detailed instructions.
Examples speak volumes.
# Abstracts: A high level summary of key topics. The objective is to
give the user a high level view of the library, the key concepts,
background and theories.
# Reference: Detailed formal technical reference. We start with a quick
reference -- an easy to use table that maps into the reference proper.
The reference proper starts with C++ __cpp_concepts__ followed by
models of the concepts.
Some icons are used to mark certain topics indicative of their relevance.
These icons precede some text to indicate:
[table Icons
[[Icon] [Name] [Meaning]]
[[__note__] [Note] [Generally useful information (an aside that
doesn't fit in the flow of the text)]]
[[__tip__] [Tip] [Suggestion on how to do something
(especially something that not be obvious)]]
[[__important__] [Important] [Important note on something to take
particular notice of]]
[[__caution__] [Caution] [Take special care with this - it may
not be what you expect and may cause bad
results]]
[[__danger__] [Danger] [This is likely to cause serious
trouble if ignored]]
]
This documentation is automatically generated by Boost QuickBook documentation
tool. QuickBook can be found in the __boost_tools__.
[heading Support]
Please direct all questions to Spirit's mailing list. You can subscribe to the
__spirit_list__. The mailing list has a searchable archive. A search link to
this archive is provided in __spirit__'s home page. You may also read and post
messages to the mailing list through __spirit_general__ (thanks to __gmane__).
The news group mirrors the mailing list. Here is a link to the archives:
__mlist_archive__.
[endsect] [/ Preface]

52
doc/qi_and_karma.qbk Normal file
View File

@@ -0,0 +1,52 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section Qi and Karma]
[include qi_and_karma/tutorials.qbk]
[section Abstracts]
[include qi_and_karma/peg.qbk]
[include qi_and_karma/parsing.qbk]
[include qi_and_karma/generating.qbk]
[include qi_and_karma/primitives.qbk]
[include qi_and_karma/operators.qbk]
[include qi_and_karma/attributes.qbk]
[include qi_and_karma/semantic_actions.qbk]
[include qi_and_karma/directives.qbk]
[include qi_and_karma/rules.qbk]
[include qi_and_karma/grammars.qbk]
[include qi_and_karma/debugging.qbk]
[include qi_and_karma/error_handling.qbk]
[include qi_and_karma/parse_trees_and_asts.qbk]
[endsect]
[/section Quick Reference]
[include qi_and_karma/quick_reference.qbk]
[/endsect]
[section Reference]
[section Concepts]
[include reference/qi_and_karma/parser.qbk]
[include reference/qi_and_karma/generator.qbk]
[endsect]
[include reference/qi_and_karma/char.qbk]
[include reference/qi_and_karma/string.qbk]
[include reference/qi_and_karma/numeric.qbk]
[include reference/qi_and_karma/binary.qbk]
[include reference/qi_and_karma/directive.qbk]
[include reference/qi_and_karma/action.qbk]
[include reference/qi_and_karma/nonterminal.qbk]
[include reference/qi_and_karma/operator.qbk]
[include reference/qi_and_karma/stream.qbk]
[include reference/qi_and_karma/auxiliary.qbk]
[include reference/qi_and_karma/debug.qbk]
[endsect]
[endsect]

View File

@@ -0,0 +1,10 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section Attributes]
[endsect]

View File

@@ -0,0 +1,10 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section Debugging]
[endsect]

View File

@@ -0,0 +1,10 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section Directives]
[endsect]

View File

@@ -0,0 +1,10 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section Error Handling]
[endsect]

View File

@@ -0,0 +1,24 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section Parsing and Generating]
[heading The API functions exposed by __qi__ ]
[heading The parse() function]
[heading The phrase_parse() function]
[heading The tokenize_and_parse() function]
[heading The tokenize_and_phrase_parse() function]
[heading The make_parser() function]
[endsect]

View File

@@ -0,0 +1,10 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section Grammars]
[endsect]

View File

@@ -0,0 +1,10 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section Operators]
[endsect]

View File

@@ -0,0 +1,10 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section Parse Trees and ASTs]
[endsect]

View File

@@ -0,0 +1,44 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section Parsing]
Central to the library is the parser. The parser does the actual
work of recognizing a linear input stream of data read sequentially
from start to end by the supplied iterators. The parser attempts to
match the input following a well-defined set of specifications known
as grammar rules. The parser returns a `bool` to report the success or
failure. When successful, the parser calls a client-supplied semantic
action, if there is one. The semantic action extracts structural
information depending on the data passed by the parser and the
hierarchical context of the parser it is attached to.
Parsers come in different flavors. The Spirit library comes bundled with an extensive set of pre-defined parsers that perform various parsing tasks from the trivial to the complex. The parser, as a concept, has a public conceptual interface contract. Following the contract, anyone can write a conforming parser that will play along well with the library's predefined components. We shall provide a blueprint detailing the conceptual interface of the parser later.
Clients of the library generally do not need to write their own hand-coded parsers at all. Spirit has an immense repertoire of pre-defined parsers covering all aspects of syntax and semantic analysis. We shall examine this repertoire of parsers in the following sections. In the rare case where a specific functionality is not available, it is extremely easy to write a user-defined parser. The ease in writing a parser entity is the main reason for Spirit's extensibility.
[heading The API functions exposed by __qi__ ]
[heading The parse() function]
[heading The phrase_parse() function]
[heading The tokenize_and_parse() function]
[heading The tokenize_and_phrase_parse() function]
[heading The make_parser() function]
[endsect]

10
doc/qi_and_karma/peg.qbk Normal file
View File

@@ -0,0 +1,10 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section Parsing Expression Grammar]
[endsect]

View File

@@ -0,0 +1,10 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section Primitives]
[endsect]

View File

@@ -0,0 +1,43 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section Quick Reference]
The following tables use some conventions to encode the attribute type exposed
by a component
[variablelist
[[`attribute_of(P)`] [The component exposes the same attribute as the
component 'P' used as part of the overall
construct]]
[[`value_type(I)`] [The component exposes the value_type of the
underlying iterator 'I' as its attribute type]]
]
[table Character Parsers
[[Component] [Description] [Attribute]]
[[`char_`] [] [`char`]]
[[`wchar`] [] [`wchar_t`]]
[[`lit`] [] [`unused`]]
[[`wlit`] [] [`unused`]]
[[`'x'`] [] [`unused`]]
[[`L'x'`] [] [`unused`]]
[[`alnum`] [] [`Char`]]
[[`alpha`] [] [`Char`]]
[[`blank`] [] [`Char`]]
[[`cntrl`] [] [`Char`]]
[[`digit`] [] [`Char`]]
[[`graph`] [] [`Char`]]
[[`print`] [] [`Char`]]
[[`punct`] [] [`Char`]]
[[`space`] [] [`Char`]]
[[`xdigit`] [] [`Char`]]
[[`~P`] [] [`attribute_of(P)`]]
]
[endsect]

View File

@@ -0,0 +1,10 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section Rules]
[endsect]

View File

@@ -0,0 +1,10 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section Semantic Actions]
[endsect]

View File

@@ -0,0 +1,10 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section Tutorials]
[endsect]

10
doc/rationale.qbk Normal file
View File

@@ -0,0 +1,10 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section Rationale]
[endsect]

View File

@@ -0,0 +1,10 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section Lexer]
[endsect]

View File

@@ -0,0 +1,19 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section Lexer Class]
[heading The lexertl_lexer Class Implementing the Dynamic Model]
[heading The lexertl_actor_lexer Class Implementing the Dynamic Model]
[heading The lexertl_static_lexer Class Implementing the Static Model]
[heading The lexertl_static_actor_lexer Class Implementing the Static Model]
[endsect]

View File

@@ -0,0 +1,10 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section Token]
[endsect]

View File

@@ -0,0 +1,10 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section Token Class]
[endsect]

View File

@@ -0,0 +1,10 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section TokenDef]
[endsect]

View File

@@ -0,0 +1,10 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section TokenDef Class]
[endsect]

View File

@@ -0,0 +1,10 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section TokenSet]
[endsect]

View File

@@ -0,0 +1,10 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section TokenSet Class]
[endsect]

View File

@@ -0,0 +1,10 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section Action]
[endsect]

View File

@@ -0,0 +1,10 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section Auxiliary]
[endsect]

View File

@@ -0,0 +1,10 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section Binary]
[endsect]

View File

@@ -0,0 +1,10 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section Char]
[endsect]

View File

@@ -0,0 +1,10 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section Debug]
[endsect]

View File

@@ -0,0 +1,10 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section Directive]
[endsect]

View File

@@ -0,0 +1,10 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section Generator]
[endsect]

View File

@@ -0,0 +1,10 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section Nonterminal]
[endsect]

View File

@@ -0,0 +1,10 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section Numeric]
[endsect]

View File

@@ -0,0 +1,10 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section Operators]
[endsect]

View File

@@ -0,0 +1,43 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section Parser]
[heading Description]
Description of Parser concept
[variablelist Notation
[[`p`] [A Parser]]
]
[heading Valid Expressions]
For any Parser the following expressions must be valid:
[table
[[Expression] [Semantics] [Return type] [Complexity]]
[[`xxx`] [Semantics of `xxx`] [Parser] [Constant]]
]
[heading Type Requirements]
[table
[[Expression] [Requirements]]
[[`xxx`] [Requirements for `xxx`]]
]
[heading Invariants]
For any Parser xxx the following invariants always hold:
[heading Models]
Links to models of Parser concept
[endsect]

View File

@@ -0,0 +1,10 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section Stream]
[endsect]

View File

@@ -0,0 +1,10 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section String]
[endsect]

91
doc/references.qbk Normal file
View File

@@ -0,0 +1,91 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section References]
[table
[[ ] [Authors] [Title, Publisher/link, Date Published]]
[[1.] [Todd Veldhuizen] [[@http://www.extreme.indiana.edu/%7Etveldhui/papers/Expression-Templates/exprtmpl.html
"Expression Templates"]. C++ Report, June 1995.]]
[[2.] [Peter Naur (ed.)] [[@http://www.masswerk.at/algol60/report.htm
"Report on the Algorithmic Language ALGOL 60"]. CACM, May 1960.]]
[[3.] [ISO/IEC] [[@http://www.cl.cam.ac.uk/%7Emgk25/iso-14977.pdf "ISO-EBNF"],
ISO/IEC 14977: 1996(E).]]
[[4.] [Richard J.Botting, Ph.D.] [[@http://www.csci.csusb.edu/dick/maths/intro_ebnf.html
"XBNF"] (citing Leu-Weiner, 1973).
California State University, San Bernardino, 1998.]]
[[5.] [James Coplien.] ["Curiously Recurring Template Pattern".
C++ Report, Feb. 1995.]]
[[6.] [Thierry Geraud and
Alexandre Duret-Lutz] [[@http://www.coldewey.com/europlop2000/papers/geraud%2Bduret.zip
Generic Programming Redesign of Patterns]
Proceedings of the 5th European Conference on Pattern Languages
of Programs(EuroPLoP'2000) Irsee, Germany, July 2000.]]
[[7.] [Geoffrey Furnish] [[@http://www.adtmag.com/joop/carticle.aspx?ID=627
"Disambiguated Glommable Expression Templates Reintroduced"]
C++ Report, May 2000]]
[[8.] [Erich Gamma,
Richard Helm,
Ralph Jhonson,
and John Vlissides] [Design Patterns, Elements of Reusable Object-Oriented Software.
Addison-Wesley, 1995.]]
[[9.] [Alfred V. Aho,
Revi Sethi,
Feffrey D. Ulman] [Compilers, Principles, Techniques and Tools
Addison-Wesley, June 1987.]]
[[10.] [Dick Grune and
Ceriel Jacobs] [[@http://www.cs.vu.nl/%7Edick/PTAPG.html
Parsing Techniques: A Practical Guide.]
Ellis Horwood Ltd.: West Sussex, England, 1990.
(electronic copy, 1998).]]
[[11.] [T. J. Parr,
H. G. Dietz, and
W. E. Cohen] [[@http://citeseer.ist.psu.edu/6885.html
PCCTS Reference Manual (Version 1.00)].
School of Electrical Engineering, Purdue University,
West Lafayette, August 1991.]]
[[12.] [Adrian Johnstone and
Elizabeth Scott.] [[@ftp://ftp.cs.rhul.ac.uk/pub/rdp
RDP, A Recursive Descent Compiler Compiler].
Technical Report CSD TR 97 25, Dept. of Computer Science,
Egham, Surrey, England, Dec. 20, 1997.]]
[[13.] [Adrian Johnstone] [[@http://www.cs.rhul.ac.uk/research/languages/projects/lookahead_backtrack.shtml
Languages and Architectures,
Parser generators with backtrack or extended lookahead capability]
Department of Computer Science, Royal Holloway, University of London,
Egham, Surrey, England]]
[[14.] [Damian Conway] [[@http://www.csse.monash.edu.au/%7Edamian/papers/#Embedded_Input_Parsing_for_C
Parsing with C++ Classes].
ACM SIGPLAN Notices, 29:1, 1994.]]
[[15.] [Joel de Guzman] [[@http://spirit.sourceforge.net/distrib/spirit_1_8_5/libs/spirit/index.html
"Spirit Version 1.8"], 1998-2003.]]
[[16.] [S. Doaitse Swierstra and
Luc Duponcheel] [[@http://citeseer.ist.psu.edu/448665.html
Deterministic, Error-Correcting Combinator Parsers]
Dept. of Computer Science, Utrecht University P.O.Box 80.089,
3508 TB Utrecht, The Netherland]]
[[17.] [Bjarne Stroustrup] [[@http://www.research.att.com/%7Ebs/whitespace98.pdf
Generalizing Overloading for C++2000]
Overload, Issue 25. April 1, 1998.]]
[[18.] [Dr. John Maddock] [[@http://www.boost.org/libs/regex/index.html
Regex++ Documentation]
http://www.boost.org/libs/regex/index.htm]]
[[19.] [Anonymous
Edited by Graham Hutton] [[@http://www.cs.nott.ac.uk/~gmh//faq.html
Frequently Asked Questions for comp.lang.functional].
Edited by Graham Hutton, University of Nottingham.]]
[[20.] [Hewlett-Packard] [[@http://www.sgi.com/tech/stl/
Standard Template Library Programmer's Guide.], Hewlett-Packard Company, 1994]]
[[21.] [Boost Libraries] [[@http://boost.org/libs/libraries.htm
Boost Libraries Documentation].]]
[[22.] [Brian McNamara and
Yannis Smaragdakis] [[@http://www.cc.gatech.edu/~yannis/fc++/ FC++:Functional Programming in C++].]]
[[23.] [Todd Veldhuizen] [[@ftp://ftp.cs.indiana.edu/pub/techreports/TR542.pdf Techniques for Scientic C++.]]]
]
[endsect]

143
doc/spirit2.qbk Normal file
View File

@@ -0,0 +1,143 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[article Spirit
[quickbook 1.4]
[version 2.0]
[authors [de Guzman, Joel], [Kaiser, Hartmut]]
[copyright 2001 2002 2003 2004 2005 2006 2007 2008 Joel de Guzman, Hartmut Kaiser]
[purpose Parser and Generator Library]
[license
Distributed under the Boost Software License, Version 1.0.
(See accompanying file LICENSE_1_0.txt or copy at
[@http://www.boost.org/LICENSE_1_0.txt])
]
]
[/ November 14, 2007 ]
[/ Some links ]
[def __spirit__ [@http://spirit.sourceforge.net Spirit]]
[def __phoenix__ [@http://boost.org/libs/spirit/phoenix/index.html Phoenix]]
[def __phoenix2__ [@http://spirit.sourceforge.net/dl_more/phoenix_v2/libs/spirit/phoenix/doc/html/index.html Phoenix2]]
[def __fusion__ [@http://spirit.sourceforge.net/dl_more/fusion_v2/libs/fusion/doc/html/index.html Fusion]]
[def __mpl__ [@http://www.boost.org/libs/mpl/index.html MPL]]
[def __boost_tuples__ [@http://www.boost.org/libs/tuple/index.html Boost.Tuples]]
[def __boost_proto__ -Boost.Proto-]
[def __boost__ [@http://www.boost.org/ Boost]]
[def __boost_tools__ [@http://www.boost.org/tools/index.html Boost Tools]]
[def __spirit_list__ [@https://lists.sourceforge.net/lists/listinfo/spirit-general Spirit Mailing List]]
[def __spirit_general__ [@news://news.gmane.org/gmane.comp.spirit.general Spirit General NNTP news portal]]
[def __gmane__ [@http://www.gmane.org Gmane]]
[def __mlist_archive__ [@http://news.gmane.org/gmane.comp.parsers.spirit.general]]
[def __early_spirit__ [@http://spirit.sourceforge.net/dl_docs/pre-spirit.htm pre-Spirit]]
[def __todd__exprtemplates__ [@http://ubiety.uwaterloo.ca/~tveldhui/papers/Expression-Templates/exprtmpl.html Expression Templates]]
[def __cpp_concepts__ [@http://en.wikipedia.org/wiki/C%2B%2B0x#Concept Concepts]]
[def __attr_grammar__ [@http://en.wikipedia.org/wiki/Attribute_grammar Attribute Grammar]]
[def __string_template__ [@http://www.stringtemplate.org/ StringTemplate]]
[def __lexertl__ [@http://www.benhanson.net/lexertl.html Lexertl]]
[def __wave__ [@http://www.boost.org/libs/wave/index.html Wave]]
[def __slex__ [@http://spirit.sourceforge.net/repository/applications/slex.zip SLex]]
[def __flex__ [@http://flex.sourceforge.net/ Flex]]
[def __re2c__ [@http://re2c.sourceforge.net/ re2c]]
[def __ragel__ [@http://www.cs.queensu.ca/~thurston/ragel/ Ragel]]
[def __boost_variant__ [@http://www.boost.org/doc/html/variant.html `boost::variant<>`]]
[def __boost_iterator_range__ [@http://www.boost.org/libs/range/doc/utility_class.html#iter_range `boost::iterator_range<>`]]
[def __qi__ /Spirit.Qi/]
[def __karma__ /Spirit.Karma/]
[def __lex__ /Spirit.Lex/]
[def __fixme__ *FIXME*]
[/ Sections ]
[def __sec_qi_and_karma__ [link spirit.qi_and_karma Qi and Karma]]
[def __sec_qi_karma_attributes__ [link spirit.qi_and_karma.abstracts.attributes Attributes]]
[def __sec_lex__ [link spirit.__lex__ Lex]]
[def __sec_lex_quickstart_1__ [link spirit.__lex__.__lex___tutorials.quickstart_1___a_word_counter_using___lex__ Lex Quickstart 1 - A word counter using __lex__]]
[def __sec_lex_quickstart_2__ [link spirit.__lex__.__lex___tutorials.quickstart_2___a_better_word_counter_using___lex__ Lex Quickstart 2 - A better word counter using __lex__]]
[def __sec_lex_quickstart_3__ [link spirit.__lex__.__lex___tutorials.quickstart_3___counting_words_using_a_parser Lex Quickstart 3 - Counting Words Using a Parser]]
[def __sec_lex_static_model__ [link spirit.__lex__.abstracts.the__static__lexer_model The /Static/ Model]]
[def __sec_lex_primitives__ [link spirit.__lex__.abstracts.lexer_primitives Lexer Primitives]]
[def __sec_lex_tokenvalues__ [link spirit.__lex__.abstracts.lexer_primitives.about_tokens_and_token_values About Tokens and Token Values]]
[def __sec_lex_attributes__ [link spirit.__lex__.abstracts.lexer_attributes Lexer Attributes]]
[def __sec_ref_lex_token__ [link spirit.__lex__.reference.concepts.token Token Reference]]
[def __sec_ref_lex_token_def__ [link spirit.__lex__.reference.concepts.tokendef TokenDef Reference]]
[/ References to API descriptions ]
[def __api_tokenize_and_parse__ [link spirit.qi_and_karma.abstracts.parsing_and_generating.the_tokenize_and_phrase_parse___function `tokenize_and_parse()`]]
[def __api_generate_static__ [link spirit.__lex__.abstracts.tokenizing_input_data.the_generate_static___function `generate_static()`]]
[/ References to classes ]
[def __class_token_def__ [link spirit.__lex__.reference.tokendef_class `token_def<>`]]
[def __class_lexertl_token__ [link spirit.__lex__.reference.token_class `lexertl_token<>`]]
[def __class_lexertl_lexer__ [link spirit.__lex__.reference.lexer_class.the_lexertl_lexer_class_implementing_the_dynamic_model `lexertl_lexer<>`]]
[def __class_lexertl_static_lexer__ [link spirit.__lex__.reference.lexer_class.the_lexertl_static_lexer_class_implementing_the_static_model `lexertl_static_lexer<>`]]
[/ Some images ]
[def __note__ [$../../../../doc/html/images/adm_note.png]]
[def __tip__ [$../../../../doc/html/images/adm_tip.png]]
[def __important__ [$../../../../doc/html/images/adm_important.png]]
[def __caution__ [$../../../../doc/html/images/adm_caution.png]]
[def __danger__ [$../../../../doc/html/images/adm_danger.png]]
[/ some templates]
[/ fig[ref title label]
Image element with a title.
ref := Reference to the image file.
title := The title to associate with this figure.
label := the id to use to be able to reference this picture
]
[template fig[ref title label]'''
<figure id="'''[label]'''">
<title>'''[title]'''</title>
<inlinemediaobject>
<imageobject>
<imagedata fileref="'''[ref]'''"></imagedata>
</imageobject>
<textobject>
<phrase role="alt">'''[title]'''</phrase>
</textobject>
</inlinemediaobject>
</figure>
''']
[/ Here we go ]
[include preface.qbk]
[include what_s_new.qbk]
[include introduction.qbk]
[include qi_and_karma.qbk]
[include lex.qbk]
[include faq.qbk]
[include notes.qbk]
[include rationale.qbk]
[include acknowledgments.qbk]
[include references.qbk]

10
doc/what_s_new.qbk Normal file
View File

@@ -0,0 +1,10 @@
[/==============================================================================
Copyright (C) 2001-2008 Joel de Guzman
Copyright (C) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
===============================================================================/]
[section What's New]
[endsect]

12
example/karma/Jamfile Normal file
View File

@@ -0,0 +1,12 @@
#==============================================================================
# Copyright (c) 2001-2007 Joel de Guzman
# Copyright (c) 2001-2007 Hartmut Kaiser
#
# Distributed under the Boost Software License, Version 1.0. (See accompanying
# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
#==============================================================================
project spirit-karma-example ;
exe basic_facilities : basic_facilities.cpp ;
exe functor_facilities : functor_facilities.cpp ;

View File

@@ -0,0 +1,178 @@
// Copyright (c) 2001-2008 Hartmut Kaiser
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
// The main purpose of this example is to show the uniform and easy way of
// output formatting for different container types.
//
// Since the 'stream' primitive used below uses the streaming operator defined
// for the container value_type, you must make sure to have a corresponding
// operator<<() available for this contained data type. OTOH this means, that
// the format descriptions used below will be usable for any contained type as
// long as this type has an associated streaming operator defined.
// use a larger value for the alignment field width (default is 10)
#define BOOST_KARMA_DEFAULT_FIELD_LENGTH 25
#include <boost/spirit/include/karma.hpp>
#include <boost/spirit/include/karma_stream.hpp>
#include <iostream>
#include <string>
#include <vector>
#include <list>
#include <algorithm>
#include <cstdlib>
#include <boost/range.hpp>
#include <boost/date_time//gregorian/gregorian.hpp>
using namespace boost::spirit;
using namespace boost::spirit::ascii;
namespace karma = boost::spirit::karma;
///////////////////////////////////////////////////////////////////////////////
// Output the given containers in list format
// Note: the format description does not depend on the type of the sequence
// nor does it depend on the type of the elements contained in the
// sequence
///////////////////////////////////////////////////////////////////////////////
template <typename Container>
void output_container(std::ostream& os, Container const& c)
{
// output the container as a space separated sequence
os <<
karma::format_delimited(
*stream, // format description
c, // data
space // delimiter
) << std::endl << std::endl;
os <<
karma::format_delimited(
'[' << *stream << ']', // format description
c, // data
space // delimiter
) << std::endl << std::endl;
// output the container as a comma separated list
os <<
karma::format(
stream % ", ", // format description
c // data
) << std::endl << std::endl;
os <<
karma::format(
'[' << (stream % ", ") << ']', // format description
c // data
) << std::endl << std::endl;
// output the container as a comma separated list of items enclosed in '()'
os <<
karma::format(
('(' << stream << ')') % ", ", // format description
c // data
) << std::endl << std::endl;
os <<
karma::format(
'[' << (
('(' << stream << ')') % ", "
) << ']', // format description
c // data
) << std::endl << std::endl;
// output the container as a HTML list
os <<
karma::format_delimited(
"<ol>" <<
*verbatim["<li>" << stream << "</li>"]
<< "</ol>", // format description
c, // data
'\n' // delimiter
) << std::endl;
// output the container as right aligned column
os <<
karma::format_delimited(
*verbatim[
"|" << right_align[stream] << "|"
], // format description
c, // data
'\n' // delimiter
) << std::endl;
os << std::endl;
}
int main()
{
///////////////////////////////////////////////////////////////////////////
// vector
std::vector<int> v (8);
std::generate(v.begin(), v.end(), std::rand); // randomly fill the vector
std::cout << "-------------------------------------------------------------"
<< std::endl;
std::cout << "std::vector<int>" << std::endl;
output_container(std::cout, v);
///////////////////////////////////////////////////////////////////////////
// list
std::list<char> l;
l.push_back('A');
l.push_back('B');
l.push_back('C');
std::cout << "-------------------------------------------------------------"
<< std::endl;
std::cout << "std::list<char>" << std::endl;
output_container(std::cout, l);
///////////////////////////////////////////////////////////////////////////
// C-style array
int i[4] = { 3, 6, 9, 12 };
std::cout << "-------------------------------------------------------------"
<< std::endl;
std::cout << "int i[]" << std::endl;
output_container(std::cout, boost::make_iterator_range(i, i+4));
///////////////////////////////////////////////////////////////////////////
// strings
std::string str("Hello world!");
std::cout << "-------------------------------------------------------------"
<< std::endl;
std::cout << "std::string" << std::endl;
output_container(std::cout, str);
///////////////////////////////////////////////////////////////////////////
// vector of boost::date objects
// Note: any registered facets get used!
using namespace boost::gregorian;
std::vector<date> dates;
dates.push_back(date(2005, Jun, 25));
dates.push_back(date(2006, Jan, 13));
dates.push_back(date(2007, May, 03));
date_facet* facet(new date_facet("%A %B %d, %Y"));
std::cout.imbue(std::locale(std::cout.getloc(), facet));
std::cout << "-------------------------------------------------------------"
<< std::endl;
std::cout << "std::vector<boost::date>" << std::endl;
output_container(std::cout, dates);
///////////////////////////////////////////////////////////////////////////
// fusion tuples
// this will work in the future
// boost::fusion::vector<int, char, double> fv(42, 'a', 45.8);
//
// std::cout << "boost::fusion::vector<int, char, double>" << std::endl;
// output_container(std::cout, fv);
return 0;
}

View File

@@ -0,0 +1,202 @@
// Copyright (c) 2001-2008 Hartmut Kaiser
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
// This examples demonstrate how to write functor based generators for special
// purposes.
#include <boost/spirit/include/karma.hpp>
#include <boost/spirit/include/karma_stream.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_bind.hpp>
#include <iostream>
#include <string>
#include <vector>
#include <list>
#include <algorithm>
#include <cstdlib>
using namespace boost::spirit;
///////////////////////////////////////////////////////////////////////////////
// The functor generator 'counter' can be used for output annotation with some
// item counting information.
///////////////////////////////////////////////////////////////////////////////
struct counter_impl : boost::spirit::karma::functor_base
{
template <typename OutputIterator, typename Context, typename Parameter>
bool operator()(Parameter const&, Context& ctx, OutputIterator& sink) const
{
namespace karma = boost::spirit::karma;
return karma::generate(sink, int_ << ": ", counter++);
}
counter_impl(int& counter_)
: counter(counter_) {}
int& counter;
};
inline boost::spirit::result_of::as_generator<counter_impl>::type
counter(int& counter_)
{
using namespace boost::spirit::karma;
return as_generator(counter_impl(counter_));
}
///////////////////////////////////////////////////////////////////////////////
// The functor generator 'confix' allows a simple syntax for generating
// output wrapped inside a pair of a prefix and a suffix.
///////////////////////////////////////////////////////////////////////////////
template <typename Expr>
struct confix_impl : public boost::spirit::karma::functor_base
{
template <typename Context>
struct apply
{
typedef boost::spirit::hold_any type;
};
template <typename OutputIterator, typename Context, typename Parameter>
bool operator()(Parameter const& v, Context& ctx, OutputIterator& sink) const
{
namespace karma = boost::spirit::karma;
return karma::generate(sink, open << xpr << close, v);
}
confix_impl(char const* open_, char const* close_, Expr const& xpr_)
: open(open_), close(close_), xpr(xpr_) {}
std::string open;
std::string close;
Expr xpr;
};
template <typename Expr>
inline typename boost::spirit::result_of::as_generator<confix_impl<Expr> >::type
confix(Expr const& xpr_, char const* open_ = "", char const* close_ = "")
{
using namespace boost::spirit::karma;
return as_generator(confix_impl<Expr>(open_, close_, xpr_));
}
///////////////////////////////////////////////////////////////////////////////
// The functor generator 'list' allows a simple syntax for generating
// list formatted output.
//
// This example uses phoenix::bind to allow to omit the second argument from
// the operator() and to allow to switch the remaining two arguments.
///////////////////////////////////////////////////////////////////////////////
template <typename Expr>
struct list_impl : boost::spirit::karma::functor_base
{
// this function will be called to generate the output
template <typename OutputIterator, typename Parameter>
bool operator()(OutputIterator& sink, Parameter const& v) const
{
namespace karma = boost::spirit::karma;
return karma::generate(sink, xpr % delim, v);
}
list_impl(Expr const& xpr_, char const* delim_)
: xpr(xpr_), delim(delim_) {}
Expr xpr;
std::string delim;
};
// Supply the expected parameter type explicitly
struct list_impl_mf
{
// the expected parameter type of a functor has to be defined using a
// embedded apply metafunction
template <typename Context>
struct apply
{
typedef boost::spirit::hold_any type;
};
};
template <typename Expr>
inline list_impl<Expr>
list(Expr const& xpr, char const* delim)
{
return list_impl<Expr>(xpr, delim);
}
///////////////////////////////////////////////////////////////////////////////
int main()
{
namespace karma = boost::spirit::karma;
using namespace boost::phoenix;
using namespace boost::phoenix::arg_names;
///////////////////////////////////////////////////////////////////////////
// Output the given containers in list format
// We use a special functor generator here to annotate the output with
// a integer counting the entries.
///////////////////////////////////////////////////////////////////////////
std::vector<int> v (8);
std::generate(v.begin(), v.end(), std::rand); // randomly fill the vector
int counter1 = 1;
std::cout <<
karma::format(
(counter(counter1) << int_) % ", ", // format description
v // data
) << std::endl;
// Here we initialize the counter to 100
int counter2 = 100;
std::cout <<
karma::format(
'[' << (
(counter(counter2) << int_) % ", "
) << ']', // format description
v // data
) << std::endl;
///////////////////////////////////////////////////////////////////////////
// list
// The output format description used below adds special item formatting
///////////////////////////////////////////////////////////////////////////
std::list<std::string> names;
names.push_back("Spirit");
names.push_back("Qi");
names.push_back("Karma");
// specifying a prefix item suffix scheme directly
std::cout <<
karma::format(
('{' << stream << '}') % ", ", // format description
names // data
) << std::endl;
// The confix generator nicely wraps the given expression with prefix and
// suffix strings
std::cout <<
karma::format(
confix(stream % ", ", "[", "]"), // format description
names // data
) << std::endl;
///////////////////////////////////////////////////////////////////////////
// Output the given container as a list
// We use a separate metafunction list_impl_mf to specify the expected
// parameter type of this functor generator.
// We use phoenix::bind to allow to omit the 2nd argument from the functor
// function operator and to change the sequence of the remaining two
// arguments.
///////////////////////////////////////////////////////////////////////////
std::string str("Hello world!");
std::cout <<
karma::format(
karma::as_generator_mf<list_impl_mf>(bind(list(stream, ", "), _3, _1)),
str
) << std::endl;
return 0;
}

View File

@@ -0,0 +1,119 @@
// Copyright (c) 2001-2008 Hartmut Kaiser
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
// The main purpose of this example is to show how a single container type can
// be formatted using different output grammars.
#include <boost/spirit/include/karma.hpp>
#include <boost/spirit/include/karma_stream.hpp>
#include <iostream>
#include <vector>
#include <algorithm>
#include <cstdlib>
using namespace boost::spirit;
using namespace boost::spirit::ascii;
namespace karma = boost::spirit::karma;
///////////////////////////////////////////////////////////////////////////////
int main()
{
///////////////////////////////////////////////////////////////////////////
// vector
std::vector<int> v (8);
std::generate(v.begin(), v.end(), std::rand); // randomly fill the vector
std::cout << "Output 8 integers from a std::vector<int>..." << std::endl;
// output the container as a sequence without any separation
std::cout << "...without any separation" << std::endl;
std::cout <<
karma::format(
*int_, // format description
v // data
) << std::endl << std::endl;
// output the container as a space separated sequence
std::cout << "...as space delited list" << std::endl;
std::cout <<
karma::format_delimited(
*int_, // format description
v, // data
space // delimiter
) << std::endl << std::endl;
std::cout <<
karma::format_delimited(
'[' << *int_ << ']', // format description
v, // data
space // delimiter
) << std::endl << std::endl;
// output the container as a comma separated list
std::cout << "...as comma separated list" << std::endl;
std::cout <<
karma::format(
int_ % ", ", // format description
v // data
) << std::endl << std::endl;
std::cout <<
karma::format(
'[' << (int_ % ", ") << ']', // format description
v // data
) << std::endl << std::endl;
// output the container as a comma separated list of double's
std::cout << "...as comma separated list of doubles" << std::endl;
std::cout <<
karma::format(
double_ % ", ", // format description
v // data
) << std::endl << std::endl;
// output the container as a comma separated list of items enclosed in '()'
std::cout << "..as list of ints enclosed in '()'" << std::endl;
std::cout <<
karma::format(
('(' << int_ << ')') % ", ", // format description
v // data
) << std::endl << std::endl;
std::cout <<
karma::format(
'[' << (
('(' << int_ << ')') % ", "
) << ']', // format description
v // data
) << std::endl << std::endl;
// output the container as a HTML list
std::cout << "...as HTML bullet list" << std::endl;
std::cout <<
karma::format_delimited(
"<ol>" <<
// no delimiting within verbatim
*verbatim[" <li>" << int_ << "</li>"]
<< "</ol>", // format description
v, // data
'\n' // delimiter
) << std::endl;
// output the container as right aligned column
std::cout << "...right aligned in a column" << std::endl;
std::cout <<
karma::format_delimited(
*verbatim[
"|" << right_align[int_] << "|"
], // format description
v, // data
'\n' // delimiter
) << std::endl;
std::cout << std::endl;
return 0;
}

22
example/lex/Jamfile Normal file
View File

@@ -0,0 +1,22 @@
#==============================================================================
# Copyright (c) 2001-2007 Joel de Guzman
# Copyright (c) 2001-2008 Hartmut Kaiser
#
# Distributed under the Boost Software License, Version 1.0. (See accompanying
# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
#==============================================================================
project spirit-lexer-example ;
exe example1 : example1.cpp ;
exe example2 : example2.cpp ;
exe example3 : example3.cpp ;
exe example4 : example4.cpp ;
exe example5 : example5.cpp ;
exe example6 : example6.cpp ;
exe print_numbers : print_numbers.cpp ;
exe word_count : word_count.cpp ;
exe word_count_functor : word_count_functor.cpp ;
exe word_count_lexer : word_count_lexer.cpp ;
exe strip_comments : strip_comments.cpp ;

26
example/lex/example.hpp Normal file
View File

@@ -0,0 +1,26 @@
// Copyright (c) 2001-2008 Hartmut Kaiser
// Copyright (c) 2001-2007 Joel de Guzman
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
#include <iostream>
#include <fstream>
#include <string>
///////////////////////////////////////////////////////////////////////////////
// Helper function reading a file into a string
///////////////////////////////////////////////////////////////////////////////
inline std::string
read_from_file(char const* infile)
{
std::ifstream instream(infile);
if (!instream.is_open()) {
std::cerr << "Couldn't open file: " << infile << std::endl;
exit(-1);
}
instream.unsetf(std::ios::skipws); // No white space skipping!
return std::string(std::istreambuf_iterator<char>(instream.rdbuf()),
std::istreambuf_iterator<char>());
}

136
example/lex/example1.cpp Normal file
View File

@@ -0,0 +1,136 @@
// Copyright (c) 2001-2008 Hartmut Kaiser
// Copyright (c) 2001-2007 Joel de Guzman
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
// Simple lexer/parser to test the Spirit installation.
//
// This example shows, how to create a simple lexer recognizing 4 different
// tokens, and how to use a single token definition as the skip parser during
// the parsing. Additionally it demonstrates how to use one of the defined
// tokens as a parser component in the grammar.
//
// The grammar recognizes a simple input structure, for instance:
//
// {
// hello world, hello it is me
// }
//
// Any number of simple sentences (optionally comma separated) inside a pair
// of curly braces will be matched.
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/lex_lexer_lexertl.hpp>
#include <iostream>
#include <fstream>
#include <string>
#include "example.hpp"
using namespace boost::spirit;
using namespace boost::spirit::qi;
using namespace boost::spirit::lex;
///////////////////////////////////////////////////////////////////////////////
// Token definition
///////////////////////////////////////////////////////////////////////////////
template <typename Lexer>
struct example1_tokens : lexer_def<Lexer>
{
template <typename Self>
void def (Self& self)
{
// define tokens and associate them with the lexer
identifier = "[a-zA-Z_][a-zA-Z0-9_]*";
self = token_def<>(',') | '{' | '}' | identifier;
// any token definition to be used as the skip parser during parsing
// has to be associated with a separate lexer state (here 'WS')
white_space = "[ \\t\\n]+";
self("WS") = white_space;
}
token_def<> identifier, white_space;
};
///////////////////////////////////////////////////////////////////////////////
// Grammar definition
///////////////////////////////////////////////////////////////////////////////
template <typename Iterator>
struct example1_grammar
: grammar_def<Iterator, in_state_skipper<token_def<> > >
{
template <typename TokenDef>
example1_grammar(TokenDef const& tok)
{
start = '{' >> *(tok.identifier >> -char_(',')) >> '}';
}
rule<Iterator, in_state_skipper<token_def<> > > start;
};
///////////////////////////////////////////////////////////////////////////////
int main()
{
// iterator type used to expose the underlying input stream
typedef std::string::iterator base_iterator_type;
// This is the token type to return from the lexer iterator
typedef lexertl_token<base_iterator_type> token_type;
// This is the lexer type to use to tokenize the input.
// We use the lexertl based lexer engine.
typedef lexertl_lexer<token_type> lexer_type;
// This is the token definition type (derived from the given lexer type).
typedef example1_tokens<lexer_type> example1_tokens;
// This is the iterator type exposed by the lexer
typedef lexer<example1_tokens>::iterator_type iterator_type;
// This is the type of the grammar to parse
typedef example1_grammar<iterator_type> example1_grammar;
// now we use the types defined above to create the lexer and grammar
// object instances needed to invoke the parsing process
example1_tokens tokens; // Our token definition
example1_grammar def (tokens); // Our grammar definition
lexer<example1_tokens> lex(tokens); // Our lexer
grammar<example1_grammar> calc(def); // Our parser
std::string str (read_from_file("example1.input"));
// At this point we generate the iterator pair used to expose the
// tokenized input stream.
std::string::iterator it = str.begin();
iterator_type iter = lex.begin(it, str.end());
iterator_type end = lex.end();
// Parsing is done based on the the token stream, not the character
// stream read from the input.
// Note, how we use the token_def defined above as the skip parser. It must
// be explicitly wrapped inside a state directive, switching the lexer
// state for the duration of skipping whitespace.
bool r = phrase_parse(iter, end, calc, in_state("WS")[tokens.white_space]);
if (r && iter == end)
{
std::cout << "-------------------------\n";
std::cout << "Parsing succeeded\n";
std::cout << "-------------------------\n";
}
else
{
std::string rest(iter, end);
std::cout << "-------------------------\n";
std::cout << "Parsing failed\n";
std::cout << "stopped at: \"" << rest << "\"\n";
std::cout << "-------------------------\n";
}
std::cout << "Bye... :-) \n\n";
return 0;
}

169
example/lex/example2.cpp Normal file
View File

@@ -0,0 +1,169 @@
// Copyright (c) 2001-2008 Hartmut Kaiser
// Copyright (c) 2001-2007 Joel de Guzman
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
// This example shows how to create a simple lexer recognizing a couple of
// different tokens and how to use this with a grammar. This example has a
// heavily backtracking grammar which makes it a candidate for lexer based
// parsing (all tokens are scanned and generated only once, even if
// backtracking is required) which speeds up the overall parsing process
// considerably, out-weighting the overhead needed for setting up the lexer.
// Additionally it demonstrates how to use one of the defined tokens as a
// parser component in the grammar.
//
// The grammar recognizes a simple input structure: any number of English
// simple sentences (statements, questions and commands) are recognized and
// are being counted separately.
// #define BOOST_SPIRIT_DEBUG
// #define BOOST_SPIRIT_LEXERTL_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/lex_lexer_lexertl.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <iostream>
#include <fstream>
#include <string>
#include "example.hpp"
using namespace boost::spirit;
using namespace boost::spirit::qi;
using namespace boost::spirit::lex;
using boost::phoenix::ref;
///////////////////////////////////////////////////////////////////////////////
// Token definition
///////////////////////////////////////////////////////////////////////////////
template <typename Lexer>
struct example2_tokens : lexer_def<Lexer>
{
template <typename Self>
void def (Self& self)
{
// A 'word' is comprised of one or more letters and an optional
// apostrophe. If it contains an apostrophe, there may only be one and
// the apostrophe must be preceded and succeeded by at least 1 letter.
// For example, "I'm" and "doesn't" meet the definition of 'word' we
// define below.
word = "[a-zA-Z]+('[a-zA-Z]+)?";
// associate the tokens and the token set with the lexer
self = token_def<>(',') | '!' | '.' | '?' | ' ' | '\n' | word;
}
token_def<> word;
};
///////////////////////////////////////////////////////////////////////////////
// Grammar definition
///////////////////////////////////////////////////////////////////////////////
template <typename Iterator>
struct example2_grammar : grammar_def<Iterator>
{
template <typename TokenDef>
example2_grammar(TokenDef const& tok)
: paragraphs(0), commands(0), questions(0), statements(0)
{
story
= +paragraph
;
paragraph
= ( +( command [ ++ref(commands) ]
| question [ ++ref(questions) ]
| statement [ ++ref(statements) ]
)
>> *char_(' ') >> +char_('\n')
)
[ ++ref(paragraphs) ]
;
command
= +(tok.word | ' ' | ',') >> '!'
;
question
= +(tok.word | ' ' | ',') >> '?'
;
statement
= +(tok.word | ' ' | ',') >> '.'
;
BOOST_SPIRIT_DEBUG_NODE(story);
BOOST_SPIRIT_DEBUG_NODE(paragraph);
BOOST_SPIRIT_DEBUG_NODE(command);
BOOST_SPIRIT_DEBUG_NODE(question);
BOOST_SPIRIT_DEBUG_NODE(statement);
}
rule<Iterator> story, paragraph, command, question, statement;
int paragraphs, commands, questions, statements;
};
///////////////////////////////////////////////////////////////////////////////
int main()
{
// iterator type used to expose the underlying input stream
typedef std::string::iterator base_iterator_type;
// This is the token type to return from the lexer iterator
typedef lexertl_token<base_iterator_type> token_type;
// This is the lexer type to use to tokenize the input.
// Here we use the lexertl based lexer engine.
typedef lexertl_lexer<token_type> lexer_type;
// This is the token definition type (derived from the given lexer type).
typedef example2_tokens<lexer_type> example2_tokens;
// this is the iterator type exposed by the lexer
typedef lexer<example2_tokens>::iterator_type iterator_type;
// this is the type of the grammar to parse
typedef example2_grammar<iterator_type> example2_grammar;
// now we use the types defined above to create the lexer and grammar
// object instances needed to invoke the parsing process
example2_tokens tokens; // Our token definition
example2_grammar def (tokens); // Our grammar definition
lexer<example2_tokens> lex(tokens); // Our lexer
grammar<example2_grammar> calc(def, def.story); // Our grammar
std::string str (read_from_file("example2.input"));
// At this point we generate the iterator pair used to expose the
// tokenized input stream.
std::string::iterator it = str.begin();
iterator_type iter = lex.begin(it, str.end());
iterator_type end = lex.end();
// Parsing is done based on the the token stream, not the character
// stream read from the input.
bool r = parse(iter, end, calc);
if (r && iter == end)
{
std::cout << "-------------------------\n";
std::cout << "Parsing succeeded\n";
std::cout << "There were "
<< def.commands << " commands, "
<< def.questions << " questions, and "
<< def.statements << " statements.\n";
std::cout << "-------------------------\n";
}
else
{
std::cout << "-------------------------\n";
std::cout << "Parsing failed\n";
std::cout << "-------------------------\n";
}
std::cout << "Bye... :-) \n\n";
return 0;
}

161
example/lex/example3.cpp Normal file
View File

@@ -0,0 +1,161 @@
// Copyright (c) 2001-2008 Hartmut Kaiser
// Copyright (c) 2001-2007 Joel de Guzman
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
// This example shows how to create a simple lexer recognizing a couple of
// different tokens and how to use this with a grammar. This example has a
// heavily backtracking grammar which makes it a candidate for lexer based
// parsing (all tokens are scanned and generated only once, even if
// backtracking is required) which speeds up the overall parsing process
// considerably, out-weighting the overhead needed for setting up the lexer.
//
// Additionally, this example demonstrates, how to define a token set usable
// as the skip parser during parsing, allowing to define several tokens to be
// ignored.
//
// This example recognizes couplets, which are sequences of numbers enclosed
// in matching pairs of parenthesis. See the comments below to for details
// and examples.
// #define BOOST_SPIRIT_LEXERTL_DEBUG
// #define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/lex_lexer_lexertl.hpp>
#include <iostream>
#include <fstream>
#include <string>
#include "example.hpp"
using namespace boost::spirit;
using namespace boost::spirit::qi;
using namespace boost::spirit::lex;
///////////////////////////////////////////////////////////////////////////////
// Token definition
///////////////////////////////////////////////////////////////////////////////
template <typename Lexer>
struct example3_tokens : lexer_def<Lexer>
{
typedef typename Lexer::token_set token_set;
template <typename Self>
void def (Self& self)
{
// define the tokens to match
ellipses = "\\.\\.\\.";
number = "[0-9]+";
// define the whitespace to ignore (spaces, tabs, newlines and C-style
// comments)
white_space
= token_def<>("[ \\t\\n]+") // whitespace
| "\\/\\*[^*]*\\*+([^/*][^*]*\\*+)*\\/" // C style comments
;
// associate the tokens and the token set with the lexer
self = ellipses | '(' | ')' | number;
self("WS") = white_space;
}
// these tokens expose the iterator_range of the matched input sequence
token_def<> ellipses, identifier, number;
token_set white_space;
};
///////////////////////////////////////////////////////////////////////////////
// Grammar definition
///////////////////////////////////////////////////////////////////////////////
template <typename Iterator, typename Lexer>
struct example3_grammar
: grammar_def<Iterator, in_state_skipper<typename Lexer::token_set> >
{
template <typename TokenDef>
example3_grammar(TokenDef const& tok)
{
start
= +(couplet | tok.ellipses)
;
// A couplet matches nested left and right parenthesis.
// For example:
// (1) (1 2) (1 2 3) ...
// ((1)) ((1 2)(3 4)) (((1) (2 3) (1 2 (3) 4))) ...
// (((1))) ...
couplet
= tok.number
| '(' >> +couplet >> ')'
;
BOOST_SPIRIT_DEBUG_NODE(start);
BOOST_SPIRIT_DEBUG_NODE(couplet);
}
typedef typename Lexer::token_set token_set;
rule<Iterator, in_state_skipper<token_set> > start, couplet;
};
///////////////////////////////////////////////////////////////////////////////
int main()
{
// iterator type used to expose the underlying input stream
typedef std::string::iterator base_iterator_type;
// This is the token type to return from the lexer iterator
typedef lexertl_token<base_iterator_type> token_type;
// This is the lexer type to use to tokenize the input.
// Here we use the lexertl based lexer engine.
typedef lexertl_lexer<token_type> lexer_type;
// This is the token definition type (derived from the given lexer type).
typedef example3_tokens<lexer_type> example3_tokens;
// this is the iterator type exposed by the lexer
typedef lexer<example3_tokens>::iterator_type iterator_type;
// this is the type of the grammar to parse
typedef example3_grammar<iterator_type, lexer_type> example3_grammar;
// now we use the types defined above to create the lexer and grammar
// object instances needed to invoke the parsing process
example3_tokens tokens; // Our token definition
example3_grammar def (tokens); // Our grammar definition
lexer<example3_tokens> lex(tokens); // Our lexer
grammar<example3_grammar> calc(def); // Our grammar
std::string str (read_from_file("example3.input"));
// At this point we generate the iterator pair used to expose the
// tokenized input stream.
std::string::iterator it = str.begin();
iterator_type iter = lex.begin(it, str.end());
iterator_type end = lex.end();
// Parsing is done based on the the token stream, not the character
// stream read from the input.
// Note, how we use the token_set defined above as the skip parser.
std::string ws("WS");
bool r = phrase_parse(iter, end, calc, in_state(ws)[tokens.white_space]);
if (r && iter == end)
{
std::cout << "-------------------------\n";
std::cout << "Parsing succeeded\n";
std::cout << "-------------------------\n";
}
else
{
std::cout << "-------------------------\n";
std::cout << "Parsing failed\n";
std::cout << "-------------------------\n";
}
std::cout << "Bye... :-) \n\n";
return 0;
}

239
example/lex/example4.cpp Normal file
View File

@@ -0,0 +1,239 @@
// Copyright (c) 2001-2008 Hartmut Kaiser
// Copyright (c) 2001-2007 Joel de Guzman
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
// This example shows how to create a simple lexer recognizing a couple of
// different tokens aimed at a simple language and how to use this lexer with
// a grammar. It shows how to associate values to tokens and how to access the
// token values from inside the grammar.
//
// We use explicit token value types, making the corresponding token instances
// carry convert the matched input into an instance of that type. The token
// value is exposed as the parser attribute if this token is used as a
// parser component somewhere in a grammar.
//
// Additionally, this example demonstrates, how to define a token set usable
// as the skip parser during parsing, allowing to define several tokens to be
// ignored.
//
// This example recognizes a very simple programming language having
// assignment statements and if and while control structures. Look at the file
// example4.input for an example.
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/lex_lexer_lexertl.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <iostream>
#include <fstream>
#include <string>
#include "example.hpp"
using namespace boost::spirit;
using namespace boost::spirit::qi;
using namespace boost::spirit::lex;
using namespace boost::spirit::arg_names;
using boost::phoenix::val;
///////////////////////////////////////////////////////////////////////////////
// Token definition
///////////////////////////////////////////////////////////////////////////////
template <typename Lexer>
struct example4_tokens : lexer_def<Lexer>
{
typedef typename Lexer::token_set token_set;
template <typename Self>
void def (Self& self)
{
// define the tokens to match
identifier = "[a-zA-Z_][a-zA-Z0-9_]*";
constant = "[0-9]+";
if_ = "if";
else_ = "else";
while_ = "while";
// define the whitespace to ignore (spaces, tabs, newlines and C-style
// comments)
white_space
= token_def<>("[ \\t\\n]+")
| "\\/\\*[^*]*\\*+([^/*][^*]*\\*+)*\\/"
;
// associate the tokens and the token set with the lexer
self = token_def<>('(') | ')' | '{' | '}' | '=' | ';' | constant;
self += if_ | else_ | while_ | identifier;
self("WS") = white_space;
}
//[example4_token_def
// these tokens expose the iterator_range of the matched input sequence
token_def<> if_, else_, while_;
// The following two tokens have an associated value type, 'identifier'
// carries a string (the identifier name) and 'constant' carries the
// matched integer value.
//
// Note: any token value type specified explicitly during a token_def<>
// declaration needs to be listed during token type definition as
// well (see the typedef for the token_type below).
//
// The conversion of the matched input to an instance of this type occurs
// once (on first access), which makes token values as efficient as
// possible. Moreover, token instances are constructed once by the lexer
// library. From this point on tokens are passed by reference only,
// avoiding tokens being copied around.
token_def<std::string> identifier;
token_def<unsigned int> constant;
//]
// token set to be used as the skip parser
token_set white_space;
};
///////////////////////////////////////////////////////////////////////////////
// Grammar definition
///////////////////////////////////////////////////////////////////////////////
template <typename Iterator, typename Lexer>
struct example4_grammar
: grammar_def<Iterator, in_state_skipper<typename Lexer::token_set> >
{
template <typename TokenDef>
example4_grammar(TokenDef const& tok)
{
program
= +block
;
block
= '{' >> *statement >> '}'
;
statement
= assignment
| if_stmt
| while_stmt
;
assignment
= (tok.identifier >> '=' >> expression >> ';')
[
std::cout << val("assignment statement to: ") << _1 << "\n"
]
;
if_stmt
= ( tok.if_ >> '(' >> expression >> ')' >> block
>> -(tok.else_ >> block)
)
[
std::cout << val("if expression: ") << _2 << "\n"
]
;
while_stmt
= (tok.while_ >> '(' >> expression >> ')' >> block)
[
std::cout << val("while expression: ") << _2 << "\n"
]
;
// since expression has a variant return type accommodating for
// std::string and unsigned integer, both possible values may be
// returned to the calling rule
expression
= tok.identifier [ _val = _1 ]
| tok.constant [ _val = _1 ]
;
}
typedef typename Lexer::token_set token_set;
typedef boost::variant<unsigned int, std::string> expression_type;
rule<Iterator, in_state_skipper<token_set> > program, block, statement;
rule<Iterator, in_state_skipper<token_set> > assignment, if_stmt;
rule<Iterator, in_state_skipper<token_set> > while_stmt;
// the expression is the only rule having a return value
rule<Iterator, expression_type(), in_state_skipper<token_set> > expression;
};
///////////////////////////////////////////////////////////////////////////////
int main()
{
// iterator type used to expose the underlying input stream
typedef std::string::iterator base_iterator_type;
//[example4_token
// This is the lexer token type to use. The second template parameter lists
// all attribute types used for token_def's during token definition (see
// calculator_tokens<> above). Here we use the predefined lexertl token
// type, but any compatible token type may be used instead.
//
// If you don't list any token value types in the following declaration
// (or just use the default token type: lexertl_token<base_iterator_type>)
// it will compile and work just fine, just a bit less efficient. This is
// because the token value will be generated from the matched input
// sequence every time it is requested. But as soon as you specify at
// least one token value type you'll have to list all value types used
// for token_def<> declarations in the token definition class above,
// otherwise compilation errors will occur.
typedef lexertl_token<
base_iterator_type, boost::mpl::vector<unsigned int, std::string>
> token_type;
//]
// Here we use the lexertl based lexer engine.
typedef lexertl_lexer<token_type> lexer_type;
// This is the token definition type (derived from the given lexer type).
typedef example4_tokens<lexer_type> example4_tokens;
// this is the iterator type exposed by the lexer
typedef lexer<example4_tokens>::iterator_type iterator_type;
// this is the type of the grammar to parse
typedef example4_grammar<iterator_type, lexer_type> example4_grammar;
// now we use the types defined above to create the lexer and grammar
// object instances needed to invoke the parsing process
example4_tokens tokens; // Our token definition
example4_grammar def (tokens); // Our grammar definition
lexer<example4_tokens> lex(tokens); // Our lexer
grammar<example4_grammar> calc(def, def.program); // Our grammar
std::string str (read_from_file("example4.input"));
// At this point we generate the iterator pair used to expose the
// tokenized input stream.
std::string::iterator it = str.begin();
iterator_type iter = lex.begin(it, str.end());
iterator_type end = lex.end();
// Parsing is done based on the the token stream, not the character
// stream read from the input.
// Note, how we use the token_set defined above as the skip parser. It must
// be explicitly wrapped inside a state directive, switching the lexer
// state for the duration of skipping whitespace.
bool r = phrase_parse(iter, end, calc, in_state("WS")[tokens.white_space]);
if (r && iter == end)
{
std::cout << "-------------------------\n";
std::cout << "Parsing succeeded\n";
std::cout << "-------------------------\n";
}
else
{
std::cout << "-------------------------\n";
std::cout << "Parsing failed\n";
std::cout << "-------------------------\n";
}
std::cout << "Bye... :-) \n\n";
return 0;
}

283
example/lex/example5.cpp Normal file
View File

@@ -0,0 +1,283 @@
// Copyright (c) 2001-2008 Hartmut Kaiser
// Copyright (c) 2001-2007 Joel de Guzman
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
// This example shows how to create a simple lexer recognizing a couple of
// different tokens aimed at a simple language and how to use this lexer with
// a grammar. It shows how to associate values to tokens and how to access the
// token values from inside the grammar.
//
// Additionally, this example demonstrates, how to define a token set usable
// as the skip parser during parsing, allowing to define several tokens to be
// ignored.
//
// The main purpose of this example is to show, how inheritance can be used to
// overload parts of a base grammar and add token definitions to a base lexer.
//
// Further, it shows how you can use the 'omitted' attribute type specifier
// for token definitions to force the token to have no attribute (expose an
// unused attribute).
//
// This example recognizes a very simple programming language having
// assignment statements and if and while control structures. Look at the file
// example5.input for an example.
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/lex_lexer_lexertl.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <iostream>
#include <fstream>
#include <string>
#include "example.hpp"
using namespace boost::spirit;
using namespace boost::spirit::qi;
using namespace boost::spirit::lex;
using namespace boost::spirit::arg_names;
using boost::phoenix::val;
///////////////////////////////////////////////////////////////////////////////
// Token definition base, defines all tokens for the base grammar below
///////////////////////////////////////////////////////////////////////////////
template <typename Lexer>
struct example5_base_tokens : lexer_def<Lexer>
{
typedef typename Lexer::token_set token_set;
template <typename Self>
void def (Self& self)
{
// define the tokens to match
identifier = "[a-zA-Z_][a-zA-Z0-9_]*";
constant = "[0-9]+";
if_ = "if";
while_ = "while";
// define the whitespace to ignore (spaces, tabs, newlines and C-style
// comments)
white_space
= token_def<>("[ \\t\\n]+")
| "\\/\\*[^*]*\\*+([^/*][^*]*\\*+)*\\/"
;
// associate the tokens and the token set with the lexer
self += token_def<>('(') | ')' | '{' | '}' | '=' | ';' | constant;
self += if_ | while_ | identifier;
self("WS") = white_space;
}
// these tokens have no value
token_def<omitted> if_, while_;
// The following two tokens have an associated value type, identifier
// carries a string (the identifier name) and constant carries the matched
// integer value.
//
// Note: any explicitly token value type specified during a token_def<>
// declaration needs to be listed during token type definition as
// well (see the typedef for the token_type below).
//
// The conversion of the matched input to an instance of this type occurs
// once (on first access), which makes token values as efficient as
// possible. Moreover, token instances are constructed once by the lexer
// library. From this point on tokens are passed by reference only,
// avoiding tokens being copied around.
token_def<std::string> identifier;
token_def<unsigned int> constant;
// token set to be used as the skip parser
token_set white_space;
};
///////////////////////////////////////////////////////////////////////////////
// Grammar definition base, defines a basic language
///////////////////////////////////////////////////////////////////////////////
template <typename Iterator, typename Lexer>
struct example5_base_grammar
: grammar_def<Iterator, in_state_skipper<typename Lexer::token_set> >
{
template <typename TokenDef>
example5_base_grammar(TokenDef const& tok)
{
program
= +block
;
block
= '{' >> *statement >> '}'
;
statement
= assignment
| if_stmt
| while_stmt
;
assignment
= (tok.identifier >> '=' >> expression >> ';')
[
std::cout << val("assignment statement to: ") << _1 << "\n"
]
;
if_stmt
= (tok.if_ >> '(' >> expression >> ')' >> block)
[
std::cout << val("if expression: ") << _1 << "\n"
]
;
while_stmt
= (tok.while_ >> '(' >> expression >> ')' >> block)
[
std::cout << val("while expression: ") << _1 << "\n"
]
;
// since expression has a variant return type accommodating for
// std::string and unsigned integer, both possible values may be
// returned to the calling rule
expression
= tok.identifier [ _val = _1 ]
| tok.constant [ _val = _1 ]
;
}
typedef
grammar_def<Iterator, in_state_skipper<typename Lexer::token_set> >
base_type;
typedef typename base_type::skipper_type skipper_type;
rule<Iterator, skipper_type> program, block, statement;
rule<Iterator, skipper_type> assignment, if_stmt;
rule<Iterator, skipper_type> while_stmt;
// the expression is the only rule having a return value
typedef boost::variant<unsigned int, std::string> expression_type;
rule<Iterator, expression_type(), skipper_type> expression;
};
///////////////////////////////////////////////////////////////////////////////
// Token definition for derived lexer, defines additional tokens
///////////////////////////////////////////////////////////////////////////////
template <typename Lexer>
struct example5_tokens : example5_base_tokens<Lexer>
{
typedef typename Lexer::token_set token_set;
template <typename Self>
void def (Self& self)
{
// define the additional token to match
else_ = "else";
// associate the new token with the lexer, note we add 'else' before
// anything else to add it to the token set before the identifier
// token, otherwise "else" would be matched as an identifier
self = else_;
// call the base class definition function
example5_base_tokens<Lexer>::def(self);
}
// this token has no value
token_def<omitted> else_;
};
///////////////////////////////////////////////////////////////////////////////
// Derived grammar definition, defines a language extension
///////////////////////////////////////////////////////////////////////////////
template <typename Iterator, typename Lexer>
struct example5_grammar : example5_base_grammar<Iterator, Lexer>
{
template <typename TokenDef>
example5_grammar(TokenDef const& tok)
: example5_base_grammar<Iterator, Lexer>(tok)
{
// we alter the if_stmt only
this->if_stmt
= this->if_stmt.copy() >> -(tok.else_ >> this->block)
;
}
};
///////////////////////////////////////////////////////////////////////////////
int main()
{
// iterator type used to expose the underlying input stream
typedef std::string::iterator base_iterator_type;
// This is the lexer token type to use. The second template parameter lists
// all attribute types used for token_def's during token definition (see
// calculator_tokens<> above). Here we use the predefined lexertl token
// type, but any compatible token type may be used instead.
//
// If you don't list any token value types in the following declaration
// (or just use the default token type: lexertl_token<base_iterator_type>)
// it will compile and work just fine, just a bit less efficient. This is
// because the token value will be generated from the matched input
// sequence every time it is requested. But as soon as you specify at
// least one token value type you'll have to list all value types used
// for token_def<> declarations in the token definition class above,
// otherwise compilation errors will occur.
typedef lexertl_token<
base_iterator_type, boost::mpl::vector<unsigned int, std::string>
> token_type;
// Here we use the lexertl based lexer engine.
typedef lexertl_lexer<token_type> lexer_type;
// This is the token definition type (derived from the given lexer type).
typedef example5_tokens<lexer_type> example5_tokens;
// this is the iterator type exposed by the lexer
typedef lexer<example5_tokens>::iterator_type iterator_type;
// this is the type of the grammar to parse
typedef example5_grammar<iterator_type, lexer_type> example5_grammar;
// now we use the types defined above to create the lexer and grammar
// object instances needed to invoke the parsing process
example5_tokens tokens; // Our token definition
example5_grammar def (tokens); // Our grammar definition
lexer<example5_tokens> lex(tokens); // Our lexer
grammar<example5_grammar> calc(def, def.program); // Our grammar
std::string str (read_from_file("example5.input"));
// At this point we generate the iterator pair used to expose the
// tokenized input stream.
std::string::iterator it = str.begin();
iterator_type iter = lex.begin(it, str.end());
iterator_type end = lex.end();
// Parsing is done based on the the token stream, not the character
// stream read from the input.
// Note, how we use the token_set defined above as the skip parser. It must
// be explicitly wrapped inside a state directive, switching the lexer
// state for the duration of skipping whitespace.
std::string ws("WS");
bool r = phrase_parse(iter, end, calc, in_state(ws)[tokens.white_space]);
if (r && iter == end)
{
std::cout << "-------------------------\n";
std::cout << "Parsing succeeded\n";
std::cout << "-------------------------\n";
}
else
{
std::cout << "-------------------------\n";
std::cout << "Parsing failed\n";
std::cout << "-------------------------\n";
}
std::cout << "Bye... :-) \n\n";
return 0;
}

263
example/lex/example6.cpp Normal file
View File

@@ -0,0 +1,263 @@
// Copyright (c) 2001-2008 Hartmut Kaiser
// Copyright (c) 2001-2007 Joel de Guzman
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
// This example shows how to create a simple lexer recognizing a couple of
// different tokens aimed at a simple language and how to use this lexer with
// a grammar. It shows how to associate values to tokens and how to access the
// token values from inside the grammar.
//
// Additionally, this example demonstrates, how to define a token set usable
// as the skip parser during parsing, allowing to define several tokens to be
// ignored.
//
// The example demonstrates how to use the add(...)(...) syntax to associate
// token definitions with the lexer and how token ids can be used in the
// parser to refer to a token, without having to directly reference its
// definition.
//
// This example recognizes a very simple programming language having
// assignment statements and if and while control structures. Look at the file
// example6.input for an example.
//
// This example is essentially identical to example4.cpp. The only difference
// is that we use the self.add() syntax to define tokens and to associate them
// with the lexer.
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/lex_lexer_lexertl.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <iostream>
#include <fstream>
#include <string>
#include "example.hpp"
using namespace boost::spirit;
using namespace boost::spirit::qi;
using namespace boost::spirit::lex;
using namespace boost::spirit::arg_names;
using boost::phoenix::val;
///////////////////////////////////////////////////////////////////////////////
// Token id definitions
///////////////////////////////////////////////////////////////////////////////
enum token_ids
{
ID_CONSTANT = 1000,
ID_IF,
ID_ELSE,
ID_WHILE,
ID_IDENTIFIER
};
///////////////////////////////////////////////////////////////////////////////
// Token definitions
///////////////////////////////////////////////////////////////////////////////
template <typename Lexer>
struct example6_tokens : lexer_def<Lexer>
{
typedef typename Lexer::token_set token_set;
template <typename Self>
void def (Self& self)
{
// define the tokens to match
identifier = "[a-zA-Z_][a-zA-Z0-9_]*";
constant = "[0-9]+";
// define the whitespace to ignore (spaces, tabs, newlines and C-style
// comments)
white_space
= token_def<>("[ \\t\\n]+")
| "\\/\\*[^*]*\\*+([^/*][^*]*\\*+)*\\/"
;
// associate the tokens and the token set with the lexer
self = token_def<>('(') | ')' | '{' | '}' | '=' | ';';
// Token definitions can be added by using some special syntactic
// construct as shown below.
// Note, that the token definitions added this way expose the iterator
// pair pointing to the matched input stream as their attribute.
self.add
(constant, ID_CONSTANT)
("if", ID_IF)
("else", ID_ELSE)
("while", ID_WHILE)
(identifier, ID_IDENTIFIER)
;
// add whitespace tokens to another lexer state (here: "WS")
self("WS") = white_space;
}
// The following two tokens have an associated value type, identifier
// carries a string (the identifier name) and constant carries the matched
// integer value.
//
// Note: any explicitly token value type specified during a token_def<>
// declaration needs to be listed during token type definition as
// well (see the typedef for the token_type below).
//
// The conversion of the matched input to an instance of this type occurs
// once (on first access), which makes token values as efficient as
// possible. Moreover, token instances are constructed once by the lexer
// library. From this point on tokens are passed by reference only,
// avoiding tokens being copied around.
token_def<std::string> identifier;
token_def<unsigned int> constant;
// token set to be used as the skip parser
token_set white_space;
};
///////////////////////////////////////////////////////////////////////////////
// Grammar definition
///////////////////////////////////////////////////////////////////////////////
template <typename Iterator, typename Lexer>
struct example6_grammar
: grammar_def<Iterator, in_state_skipper<typename Lexer::token_set> >
{
template <typename TokenDef>
example6_grammar(TokenDef const& tok)
{
program
= +block
;
block
= '{' >> *statement >> '}'
;
statement
= assignment
| if_stmt
| while_stmt
;
assignment
= (tok.identifier >> '=' >> expression >> ';')
[
std::cout << val("assignment statement to: ")
<< _1 << "\n"
]
;
if_stmt
= ( token(ID_IF) >> '(' >> expression >> ')' >> block
>> -(token(ID_ELSE) >> block)
)
[
std::cout << val("if expression: ")
<< _2 << "\n"
]
;
while_stmt
= (token(ID_WHILE) >> '(' >> expression >> ')' >> block)
[
std::cout << val("while expression: ")
<< _2 << "\n"
]
;
// since expression has a variant return type accommodating for
// std::string and unsigned integer, both possible values may be
// returned to the calling rule
expression
= tok.identifier [ _val = _1 ]
| tok.constant [ _val = _1 ]
;
}
typedef typename Lexer::token_set token_set;
typedef boost::variant<unsigned int, std::string> expression_type;
rule<Iterator, in_state_skipper<token_set> > program, block, statement;
rule<Iterator, in_state_skipper<token_set> > assignment, if_stmt;
rule<Iterator, in_state_skipper<token_set> > while_stmt;
// the expression is the only rule having a return value
rule<Iterator, expression_type(), in_state_skipper<token_set> > expression;
};
///////////////////////////////////////////////////////////////////////////////
int main()
{
// iterator type used to expose the underlying input stream
typedef std::string::iterator base_iterator_type;
// This is the lexer token type to use. The second template parameter lists
// all attribute types used for token_def's during token definition (see
// calculator_tokens<> above). Here we use the predefined lexertl token
// type, but any compatible token type may be used instead.
//
// If you don't list any token value types in the following declaration
// (or just use the default token type: lexertl_token<base_iterator_type>)
// it will compile and work just fine, just a bit less efficient. This is
// because the token value will be generated from the matched input
// sequence every time it is requested. But as soon as you specify at
// least one token value type you'll have to list all value types used
// for token_def<> declarations in the token definition class above,
// otherwise compilation errors will occur.
typedef lexertl_token<
base_iterator_type, boost::mpl::vector<unsigned int, std::string>
> token_type;
// Here we use the lexertl based lexer engine.
typedef lexertl_lexer<token_type> lexer_type;
// This is the token definition type (derived from the given lexer type).
typedef example6_tokens<lexer_type> example6_tokens;
// this is the iterator type exposed by the lexer
typedef lexer<example6_tokens>::iterator_type iterator_type;
// this is the type of the grammar to parse
typedef example6_grammar<iterator_type, lexer_type> example6_grammar;
// now we use the types defined above to create the lexer and grammar
// object instances needed to invoke the parsing process
example6_tokens tokens; // Our token definition
example6_grammar def (tokens); // Our grammar definition
lexer<example6_tokens> lex(tokens); // Our lexer
grammar<example6_grammar> calc(def, def.program); // Our grammar
std::string str (read_from_file("example6.input"));
// At this point we generate the iterator pair used to expose the
// tokenized input stream.
std::string::iterator it = str.begin();
iterator_type iter = lex.begin(it, str.end());
iterator_type end = lex.end();
// Parsing is done based on the the token stream, not the character
// stream read from the input.
// Note, how we use the token_def defined above as the skip parser. It must
// be explicitly wrapped inside a state directive, switching the lexer
// state for the duration of skipping whitespace.
std::string ws("WS");
bool r = phrase_parse(iter, end, calc, in_state(ws)[tokens.white_space]);
if (r && iter == end)
{
std::cout << "-------------------------\n";
std::cout << "Parsing succeeded\n";
std::cout << "-------------------------\n";
}
else
{
std::cout << "-------------------------\n";
std::cout << "Parsing failed\n";
std::cout << "-------------------------\n";
}
std::cout << "Bye... :-) \n\n";
return 0;
}

View File

@@ -0,0 +1,118 @@
// Copyright (c) 2001-2008 Hartmut Kaiser
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
// This example is the equivalent to the following lex program:
//
// %{
// #include <stdio.h>
// %}
// %%
// [0-9]+ { printf("%s\n", yytext); }
// .|\n ;
// %%
// main()
// {
// yylex();
// }
//
// Its purpose is to print all the (integer) numbers found in a file
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/lex_lexer_lexertl.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <iostream>
#include <string>
#include "example.hpp"
using namespace boost::spirit;
using namespace boost::spirit::qi;
using namespace boost::spirit::lex;
using namespace boost::spirit::arg_names;
///////////////////////////////////////////////////////////////////////////////
// Token definition: We use the lexertl based lexer engine as the underlying
// lexer type.
///////////////////////////////////////////////////////////////////////////////
template <typename Lexer>
struct print_numbers_tokens : lexer_def<Lexer>
{
// define tokens and associate it with the lexer
template <typename Self>
void def (Self& self)
{
self = token_def<int>("[0-9]*") | ".|\n";
}
};
///////////////////////////////////////////////////////////////////////////////
// Grammar definition
///////////////////////////////////////////////////////////////////////////////
template <typename Iterator>
struct print_numbers_grammar : grammar_def<Iterator>
{
print_numbers_grammar()
{
start = *( token(lex::min_token_id) [ std::cout << _1 << "\n" ]
| token(lex::min_token_id+1)
)
;
}
rule<Iterator> start;
};
///////////////////////////////////////////////////////////////////////////////
int main(int argc, char* argv[])
{
// iterator type used to expose the underlying input stream
typedef std::string::iterator base_iterator_type;
// the token type to be used, 'int' is available as the type of the token
// value and no lexer state is supported
typedef lexertl_token<
base_iterator_type, boost::mpl::vector<int>, boost::mpl::false_
> token_type;
// lexer type
typedef lexertl_lexer<token_type> lexer_type;
// iterator type exposed by the lexer
typedef
lexer_iterator<print_numbers_tokens<lexer_type> >::type
iterator_type;
// now we use the types defined above to create the lexer and grammar
// object instances needed to invoke the parsing process
print_numbers_tokens<lexer_type> print_tokens; // Our token definition
print_numbers_grammar<iterator_type> def; // Our grammar definition
// Parsing is done based on the the token stream, not the character
// stream read from the input.
std::string str (read_from_file(1 == argc ? "print_numbers.input" : argv[1]));
base_iterator_type first = str.begin();
bool r = tokenize_and_parse(first, str.end(), make_lexer(print_tokens),
make_parser(def));
if (r) {
std::cout << "-------------------------\n";
std::cout << "Parsing succeeded\n";
std::cout << "-------------------------\n";
}
else {
std::string rest(first, str.end());
std::cout << "-------------------------\n";
std::cout << "Parsing failed\n";
std::cout << "stopped at: \"" << rest << "\"\n";
std::cout << "-------------------------\n";
}
std::cout << "Bye... :-) \n\n";
return 0;
}

View File

@@ -0,0 +1,13 @@
#==============================================================================
# Copyright (c) 2001-2007 Joel de Guzman
# Copyright (c) 2001-2008 Hartmut Kaiser
#
# Distributed under the Boost Software License, Version 1.0. (See accompanying
# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
#==============================================================================
project spirit-static-lexer-example ;
exe generate_tables : generate_tables.cpp ;
exe word_count_static : word_count_static.cpp ;

View File

@@ -0,0 +1,42 @@
// Copyright (c) 2001-2008 Hartmut Kaiser
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
// The purpose of this example is to show, how it is possible to use a lexer
// token definition for two purposes:
//
// . To generate C++ code implementing a static lexical analyzer allowing
// to recognize all defined tokens (this file)
// . To integrate the generated C++ lexer into the /Spirit/ framework.
// (see the file: word_count_static.cpp)
// #define BOOST_SPIRIT_LEXERTL_DEBUG
#include <boost/spirit/include/lex_lexer_lexertl.hpp>
#include <boost/spirit/lex/lexer/lexertl/lexertl_generate_static.hpp>
#include <fstream>
#include "word_count_tokens.hpp"
using namespace boost::spirit;
using namespace boost::spirit::lex;
///////////////////////////////////////////////////////////////////////////////
//[wc_static_generate_main
int main(int argc, char* argv[])
{
// create the lexer object instance needed to invoke the generator
word_count_tokens<lexertl_lexer<> > word_count; // the token definition
// open the output file, where the generated tokenizer function will be
// written to
std::ofstream out(argc < 2 ? "word_count_static.hpp" : argv[1]);
// invoke the generator, passing the token definition, the output stream
// and the name prefix of the tokenizing function to be generated
char const* function_name = (argc < 3 ? "" : argv[2]);
return generate_static(make_lexer(word_count), out, function_name) ? 0 : -1;
}
//]

View File

@@ -0,0 +1,118 @@
// Copyright (c) 2001-2008 Hartmut Kaiser
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
// The purpose of this example is to show, how it is possible to use a lexer
// token definition for two purposes:
//
// . To generate C++ code implementing a static lexical analyzer allowing
// to recognize all defined tokens
// . To integrate the generated C++ lexer into the /Spirit/ framework.
//
// #define BOOST_SPIRIT_LEXERTL_DEBUG
#define BOOST_VARIANT_MINIMIZE_SIZE
#include <boost/spirit/include/qi.hpp>
//[wc_static_include
#include <boost/spirit/include/lex_lexer_static_lexertl.hpp>
//]
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_statement.hpp>
#include <boost/spirit/include/phoenix_container.hpp>
#include <iostream>
#include <string>
#include "../example.hpp"
#include "word_count_tokens.hpp" // token definition
#include "word_count_static.hpp" // generated tokenizer
using namespace boost::spirit;
using namespace boost::spirit::qi;
using namespace boost::spirit::lex;
///////////////////////////////////////////////////////////////////////////////
// Grammar definition
///////////////////////////////////////////////////////////////////////////////
//[wc_static_grammar
// This is an ordinary grammar definition following the rules defined by
// Spirit.Qi. There is nothing specific about it, except it gets the token
// definition class instance passed to the constructor to allow accessing the
// embedded token_def<> instances.
template <typename Iterator>
struct word_count_grammar : grammar_def<Iterator>
{
template <typename TokenDef>
word_count_grammar(TokenDef const& tok)
: c(0), w(0), l(0)
{
using boost::spirit::arg_names::_1;
using boost::phoenix::ref;
using boost::phoenix::size;
// associate the defined tokens with the lexer, at the same time
// defining the actions to be executed
start = *( tok.word [++ref(w), ref(c) += size(_1)]
| char_('\n') [++ref(l), ++ref(c)]
| token(IDANY) [++ref(c)]
)
;
}
std::size_t c, w, l; // counter for characters, words, and lines
rule<Iterator> start;
};
//]
///////////////////////////////////////////////////////////////////////////////
//[wc_static_main
int main(int argc, char* argv[])
{
// Define the token type to be used: 'std::string' is available as the type
// of the token value.
typedef lexertl_token<
char const*, boost::mpl::vector<std::string>
> token_type;
// Define the lexer type to be used as the base class for our token
// definition.
//
// This is the only place where the code is different from an equivalent
// dynamic lexical analyzer. We use the `lexertl_static_lexer<>` instead of
// the `lexertl_lexer<>` as the base class for our token defintion type.
//
typedef lexertl_static_lexer<token_type> lexer_type;
// Define the iterator type exposed by the lexer.
typedef lexer_iterator<word_count_tokens<lexer_type> >::type iterator_type;
// Now we use the types defined above to create the lexer and grammar
// object instances needed to invoke the parsing process.
word_count_tokens<lexer_type> word_count; // Our token definition
word_count_grammar<iterator_type> def (word_count); // Our grammar definition
// Read in the file into memory.
std::string str (read_from_file(1 == argc ? "word_count.input" : argv[1]));
char const* first = str.c_str();
char const* last = &first[str.size()];
// Parsing is done based on the the token stream, not the character
// stream read from the input.
bool r = tokenize_and_parse(first, last, make_lexer(word_count),
make_parser(def));
if (r) { // success
std::cout << "lines: " << def.l << ", words: " << def.w
<< ", characters: " << def.c << "\n";
}
else {
std::string rest(first, last);
std::cerr << "Parsing failed\n" << "stopped at: \""
<< rest << "\"\n";
}
return 0;
}
//]

View File

@@ -0,0 +1,111 @@
// Copyright (c) 2008 Ben Hanson
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
// Auto-generated by boost::lexer
#if !defined(BOOST_SPIRIT_LEXER_NEXT_TOKEN_Feb_13_2008_12_01_20)
#define BOOST_SPIRIT_LEXER_NEXT_TOKEN_Feb_13_2008_12_01_20
#include <boost/detail/iterator.hpp>
#include <boost/spirit/support/detail/lexer/char_traits.hpp>
// the generated table of state names and the tokenizer have to be
// defined in the boost::spirit::lex::static namespace
namespace boost { namespace spirit { namespace lex { namespace static_ {
// this table defines the names of the lexer states
char const* const lexer_state_names[1] =
{
"INITIAL",
};
template<typename Iterator>
std::size_t next_token (std::size_t &start_state_, Iterator const& start_,
Iterator &start_token_, Iterator const& end_)
{
enum {end_state_index, id_index, state_index, bol_index, eol_index,
dead_state_index, dfa_offset};
static const std::size_t npos = static_cast<std::size_t>(~0);
static const std::size_t lookup_[256] = {8, 8, 8, 8, 8, 8, 8, 8,
8, 7, 6, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8,
7, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8};
static const std::size_t dfa_alphabet_ = 9;
static const std::size_t dfa_[45] = {0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 3,
4, 2, 1, 65536, 0, 0, 0, 0,
0, 0, 2, 1, 65537, 0, 0, 0,
0, 0, 0, 0, 1, 65538, 0, 0,
0, 0, 0, 0, 0};
if (start_token_ == end_) return 0;
const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
Iterator curr_ = start_token_;
bool end_state_ = *ptr_ != 0;
std::size_t id_ = *(ptr_ + id_index);
Iterator end_token_ = start_token_;
while (curr_ != end_)
{
std::size_t const state_ =
ptr_[lookup_[static_cast<unsigned char>
(*curr_++)]];
if (state_ == 0) break;
ptr_ = &dfa_[state_ * dfa_alphabet_];
if (*ptr_)
{
end_state_ = true;
id_ = *(ptr_ + id_index);
end_token_ = curr_;
}
}
if (end_state_)
{
// return longest match
start_token_ = end_token_;
}
else
{
id_ = npos;
}
return id_;
}
}}}} // namespace boost::spirit::lex::static_
#endif

View File

@@ -0,0 +1,40 @@
// Copyright (c) 2001-2008 Hartmut Kaiser
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
#if !defined(SPIRIT_LEXER_EXAMPLE_WORD_COUNT_TOKENS_FEB_10_2008_0739PM)
#define SPIRIT_LEXER_EXAMPLE_WORD_COUNT_TOKENS_FEB_10_2008_0739PM
///////////////////////////////////////////////////////////////////////////////
// Token definition: We keep the base class for the token definition as a
// template parameter to allow this class to be used for
// both: the code generation and the lexical analysis
///////////////////////////////////////////////////////////////////////////////
//[wc_static_tokenids
enum tokenids
{
IDANY = boost::spirit::lex::min_token_id + 1,
};
//]
//[wc_static_tokendef
// This token definition class can be used without any change for all three
// possible use cases: a dynamic lexical analyzer, a code generator, and a
// static lexical analyzer.
template <typename BaseLexer>
struct word_count_tokens : boost::spirit::lex::lexer_def<BaseLexer>
{
template <typename Self>
void def (Self& self)
{
// define tokens and associate them with the lexer
word = "[^ \t\n]+";
self = word | '\n' | token_def<>(".", IDANY);
}
boost::spirit::lex::token_def<std::string> word;
};
//]
#endif

View File

@@ -0,0 +1,164 @@
// Copyright (c) 2001-2008 Hartmut Kaiser
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
// This example is the equivalent to the following lex program:
//
// %{
// /* INITIAL is the default start state. COMMENT is our new */
// /* state where we remove comments. */
// %}
//
// %s COMMENT
// %%
// <INITIAL>"//".* ;
// <INITIAL>"/*" BEGIN COMMENT;
// <INITIAL>. ECHO;
// <INITIAL>[\n] ECHO;
// <COMMENT>"*/" BEGIN INITIAL;
// <COMMENT>. ;
// <COMMENT>[\n] ;
// %%
//
// main()
// {
// yylex();
// }
//
// Its purpose is to strip comments out of C code.
//
// Additionally this example demonstrates the use of lexer states to structure
// the lexer definition.
// #define BOOST_SPIRIT_LEXERTL_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/lex_lexer_lexertl.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_container.hpp>
#include <iostream>
#include <string>
#include "example.hpp"
using namespace boost::spirit;
using namespace boost::spirit::qi;
using namespace boost::spirit::lex;
using namespace boost::spirit::arg_names;
///////////////////////////////////////////////////////////////////////////////
// Token definition: We use the lexertl based lexer engine as the underlying
// lexer type.
///////////////////////////////////////////////////////////////////////////////
enum tokenids
{
IDANY = lex::min_token_id + 10
};
template <typename Lexer>
struct strip_comments_tokens : lexer_def<Lexer>
{
template <typename Self>
void def (Self& self)
{
// define tokens and associate them with the lexer
cppcomment = "//.*\n";
ccomment = "/\\*";
endcomment = "\\*/";
// The following tokens are associated with the default lexer state
// (the "INITIAL" state). Specifying 'INITIAL' as a lexer state is
// strictly optional.
self.add
(cppcomment) // no explicit token id is associated
(ccomment)
(".", IDANY) // IDANY is the token id associated with this token
// definition
;
// The following tokens are associated with the lexer state "COMMENT".
// We switch lexer states from inside the parsing process using the
// in_state("COMMENT")[] parser component as shown below.
self("COMMENT").add
(endcomment)
(".", IDANY)
;
}
token_def<> cppcomment, ccomment, endcomment;
};
///////////////////////////////////////////////////////////////////////////////
// Grammar definition
///////////////////////////////////////////////////////////////////////////////
template <typename Iterator>
struct strip_comments_grammar : grammar_def<Iterator>
{
template <typename TokenDef>
strip_comments_grammar(TokenDef const& tok)
{
// The in_state("COMMENT")[...] parser component switches the lexer
// state to be 'COMMENT' during the matching of the embedded parser.
start = *( tok.ccomment
>> in_state("COMMENT")
[
// the lexer is in the 'COMMENT' state during
// matching of the following parser components
*token(IDANY) >> tok.endcomment
]
| tok.cppcomment
| token(IDANY)
)
;
}
rule<Iterator> start;
};
///////////////////////////////////////////////////////////////////////////////
int main(int argc, char* argv[])
{
// iterator type used to expose the underlying input stream
typedef std::string::iterator base_iterator_type;
// lexer type
typedef lexertl_lexer<lexertl_token<base_iterator_type> > lexer_type;
// iterator type exposed by the lexer
typedef
lexer_iterator<strip_comments_tokens<lexer_type> >::type
iterator_type;
// now we use the types defined above to create the lexer and grammar
// object instances needed to invoke the parsing process
strip_comments_tokens<lexer_type> strip_comments; // Our token definition
strip_comments_grammar<iterator_type> def (strip_comments); // Our grammar definition
// Parsing is done based on the the token stream, not the character
// stream read from the input.
std::string str (read_from_file(1 == argc ? "strip_comments.input" : argv[1]));
base_iterator_type first = str.begin();
bool r = tokenize_and_parse(first, str.end(), make_lexer(strip_comments),
make_parser(def));
if (r) {
std::cout << "-------------------------\n";
std::cout << "Parsing succeeded\n";
std::cout << "-------------------------\n";
}
else {
std::string rest(first, str.end());
std::cout << "-------------------------\n";
std::cout << "Parsing failed\n";
std::cout << "stopped at: \"" << rest << "\"\n";
std::cout << "-------------------------\n";
}
std::cout << "Bye... :-) \n\n";
return 0;
}

View File

@@ -0,0 +1,121 @@
// Copyright (c) 2001-2008 Hartmut Kaiser
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
// This example is the equivalent to the following lex program:
//
// %{
// /* INITIAL is the default start state. COMMENT is our new */
// /* state where we remove comments. */
// %}
//
// %s COMMENT
// %%
// <INITIAL>"//".* ;
// <INITIAL>"/*" BEGIN COMMENT;
// <INITIAL>. ECHO;
// <INITIAL>[\n] ECHO;
// <COMMENT>"*/" BEGIN INITIAL;
// <COMMENT>. ;
// <COMMENT>[\n] ;
// %%
//
// main()
// {
// yylex();
// }
//
// Its purpose is to strip comments out of C code.
//
// Additionally this example demonstrates the use of lexer states to structure
// the lexer definition.
// #define BOOST_SPIRIT_LEXERTL_DEBUG
#include <boost/spirit/include/lex_lexer_lexertl.hpp>
#include <boost/spirit/lex/lexer/lexer_actions.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_statement.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <iostream>
#include <string>
#include "example.hpp"
using namespace boost::spirit;
using namespace boost::spirit::lex;
///////////////////////////////////////////////////////////////////////////////
// Token definition: We use the lexertl based lexer engine as the underlying
// lexer type.
///////////////////////////////////////////////////////////////////////////////
enum tokenids
{
IDANY = lex::min_token_id + 10,
IDEOL = lex::min_token_id + 11
};
template <typename Lexer>
struct strip_comments_tokens : lexer_def<Lexer>
{
template <typename Self>
void def (Self& self)
{
// define tokens and associate them with the lexer
cppcomment = "//[^\n]*";
ccomment = "/\\*";
endcomment = "\\*/";
any = ".";
eol = "\n";
// The following tokens are associated with the default lexer state
// (the "INITIAL" state). Specifying 'INITIAL' as a lexer state is
// strictly optional.
self = cppcomment
| ccomment [ set_state("COMMENT") ]
| eol [ echo_input(std::cout) ]
| any [ echo_input(std::cout) ]
;
// The following tokens are associated with the lexer state 'COMMENT'.
self("COMMENT")
= endcomment [ set_state("INITIAL") ]
| eol
| any
;
}
token_def<> cppcomment, ccomment, endcomment, any, eol;
};
///////////////////////////////////////////////////////////////////////////////
int main(int argc, char* argv[])
{
// iterator type used to expose the underlying input stream
typedef std::string::iterator base_iterator_type;
// lexer type
typedef lexertl_actor_lexer<lexertl_token<base_iterator_type> > lexer_type;
// now we use the types defined above to create the lexer and grammar
// object instances needed to invoke the parsing process
strip_comments_tokens<lexer_type> strip_comments; // Our token definition
// Parsing is done based on the the token stream, not the character
// stream read from the input.
std::string str (read_from_file(1 == argc ? "strip_comments.input" : argv[1]));
base_iterator_type first = str.begin();
bool r = tokenize(first, str.end(), make_lexer(strip_comments));
if (!r) {
std::string rest(first, str.end());
std::cerr << "Lexical analysis failed\n" << "stopped at: \""
<< rest << "\"\n";
}
return 0;
}

172
example/lex/word_count.cpp Normal file
View File

@@ -0,0 +1,172 @@
// Copyright (c) 2001-2008 Hartmut Kaiser
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
// This example is the equivalent to the following lex program:
/*
//[wcp_flex_version
%{
int c = 0, w = 0, l = 0;
%}
word [^ \t\n]+
eol \n
%%
{word} { ++w; c += yyleng; }
{eol} { ++c; ++l; }
. { ++c; }
%%
main()
{
yylex();
printf("%d %d %d\n", l, w, c);
}
//]
*/
// Its purpose is to do the word count function of the wc command in UNIX. It
// prints the number of lines, words and characters in a file.
//
// The example additionally demonstrates how to use the add_pattern(...)(...)
// syntax to define lexer patterns. These patterns are essentially parameter-
// less 'macros' for regular expressions, allowing to simplify their
// definition.
// #define BOOST_SPIRIT_LEXERTL_DEBUG
#define BOOST_VARIANT_MINIMIZE_SIZE
//[wcp_includes
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/lex_lexer_lexertl.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_statement.hpp>
#include <boost/spirit/include/phoenix_container.hpp>
//]
#include <iostream>
#include <string>
#include "example.hpp"
//[wcp_namespaces
using namespace boost::spirit;
using namespace boost::spirit::qi;
using namespace boost::spirit::lex;
//]
///////////////////////////////////////////////////////////////////////////////
// Token definition: We use the lexertl based lexer engine as the underlying
// lexer type.
///////////////////////////////////////////////////////////////////////////////
//[wcp_token_ids
enum tokenids
{
IDANY = lex::min_token_id + 10
};
//]
//[wcp_token_definition
template <typename Lexer>
struct word_count_tokens : lexer_def<Lexer>
{
template <typename Self>
void def (Self& self)
{
// define patterns (lexer macros) to be used during token definition
// below
self.add_pattern
("WORD", "[^ \t\n]+")
;
// define tokens and associate them with the lexer
word = "{WORD}"; // reference the pattern 'WORD' as defined above
// this lexer will recognize 3 token types: words, newlines, and
// everything else
self.add
(word) // no token id is needed here
('\n') // characters are usable as tokens as well
(".", IDANY)
;
}
token_def<std::string> word;
};
//]
///////////////////////////////////////////////////////////////////////////////
// Grammar definition
///////////////////////////////////////////////////////////////////////////////
//[wcp_grammar_definition
template <typename Iterator>
struct word_count_grammar : grammar_def<Iterator>
{
template <typename TokenDef>
word_count_grammar(TokenDef const& tok)
: c(0), w(0), l(0)
{
using boost::phoenix::ref;
using boost::phoenix::size;
// As documented in the Spirit.Qi documentation, any placeholders
// (_1 et.al.) used in semantic actions inside a grammar need to be
// imported from the namespace boost::spirit::arg_names, and not from
// the corresponding namespace in Phoenix.
using boost::spirit::arg_names::_1;
start = *( tok.word [++ref(w), ref(c) += size(_1)]
| char_('\n') [++ref(c), ++ref(l)]
| token(IDANY) [++ref(c)]
)
;
}
std::size_t c, w, l;
rule<Iterator> start;
};
//]
///////////////////////////////////////////////////////////////////////////////
//[wcp_main
int main(int argc, char* argv[])
{
/*< define the token type to be used: `std::string` is available as the
type of the token value
>*/ typedef lexertl_token<
char const*, boost::mpl::vector<std::string>
> token_type;
/*< define the lexer type to use implementing the state machine
>*/ typedef lexertl_lexer<token_type> lexer_type;
/*< define the iterator type exposed by the lexer type
>*/ typedef lexer_iterator<word_count_tokens<lexer_type> >::type iterator_type;
// now we use the types defined above to create the lexer and grammar
// object instances needed to invoke the parsing process
word_count_tokens<lexer_type> word_count; // Our token definition
word_count_grammar<iterator_type> def (word_count); // Our grammar definition
// read in the file int memory
std::string str (read_from_file(1 == argc ? "word_count.input" : argv[1]));
char const* first = str.c_str();
char const* last = &first[str.size()];
// Parsing is done based on the the token stream, not the character
// stream read from the input. The function `tokenize_and_parse()` wraps
// the passed iterator range `[first, last)` by the lexical analyzer and
// uses its exposed iterators to parse the toke stream.
bool r = tokenize_and_parse(first, last, make_lexer(word_count),
make_parser(def));
if (r) {
std::cout << "lines: " << def.l << ", words: " << def.w
<< ", characters: " << def.c << "\n";
}
else {
std::string rest(first, last);
std::cerr << "Parsing failed\n" << "stopped at: \""
<< rest << "\"\n";
}
return 0;
}
//]

View File

@@ -0,0 +1,184 @@
// Copyright (c) 2001-2008 Hartmut Kaiser
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
// This example is the equivalent to the following flex program:
/*
//[wcf_flex_version
%{
#define ID_WORD 1000
#define ID_EOL 1001
#define ID_CHAR 1002
int c = 0, w = 0, l = 0;
%}
%%
[^ \t\n]+ { return ID_WORD; }
\n { return ID_EOL; }
. { return ID_CHAR; }
%%
bool count(int tok)
{
switch (tok) {
case ID_WORD: ++w; c += yyleng; break;
case ID_EOL: ++l; ++c; break;
case ID_CHAR: ++c; break;
default:
return false;
}
return true;
}
void main()
{
int tok = EOF;
do {
tok = yylex();
if (!count(tok))
break;
} while (EOF != tok);
printf("%d %d %d\n", l, w, c);
}
//]
*/
// Its purpose is to do the word count function of the wc command in UNIX. It
// prints the number of lines, words and characters in a file.
//
// This examples shows how to use the tokenize() function together with a
// simple functor, which gets executed whenever a token got matched in the
// input sequence.
// #define BOOST_SPIRIT_LEXERTL_DEBUG
//[wcf_includes
#include <boost/spirit/include/lex_lexer_lexertl.hpp>
#include <boost/bind.hpp>
#include <boost/ref.hpp>
//]
#include <iostream>
#include <string>
#include "example.hpp"
//[wcf_namespaces
using namespace boost::spirit;
using namespace boost::spirit::lex;
//]
///////////////////////////////////////////////////////////////////////////////
// Token id definitions
///////////////////////////////////////////////////////////////////////////////
//[wcf_token_ids
enum token_ids
{
ID_WORD = 1000,
ID_EOL,
ID_CHAR
};
//]
//[wcf_token_definition
/*` The template `word_count_tokens` defines three different tokens:
`ID_WORD`, `ID_EOL`, and `ID_CHAR`, representing a word (anything except
a whitespace or a newline), a newline character, and any other character
(`ID_WORD`, `ID_EOL`, and `ID_CHAR` are enum values representing the token
ids, but could be anything else convertible to an integer as well).
The direct base class of any token definition class needs to be the
template `lexer_def<>`, where the corresponding template parameter (here:
`lexertl_lexer<BaseIterator>`) defines which underlying lexer engine has
to be used to provide the required state machine functionality. In this
example we use the Lexertl based lexer engine as the underlying lexer type.
*/
template <typename Lexer>
struct word_count_tokens : lexer_def<Lexer>
{
template <typename Self>
void def (Self& self)
{
// define tokens (the regular expression to match and the corresponding
// token id) and add them to the lexer
self.add
("[^ \t\n]+", ID_WORD) // words (anything except ' ', '\t' or '\n')
("\n", ID_EOL) // newline characters
(".", ID_CHAR) // anything else is a plain character
;
}
};
//]
//[wcf_functor
/*` In this example the struct 'counter' is used as a functor counting the
characters, words and lines in the analyzed input sequence by identifying
the matched tokens as passed from the /Spirit.Lex/ library.
*/
struct counter
{
//<- this is an implementation detail and doesn't show up in the documentation
typedef bool result_type;
//->
// the function operator gets called for each of the matched tokens
// c, l, w are references to the counters used to keep track of the numbers
template <typename Token>
bool operator()(Token const& t, std::size_t& c, std::size_t& w, std::size_t& l) const
{
switch (t.id()) {
case ID_WORD: // matched a word
// since we're using a default token type in this example, every
// token instance contains a `iterator_range<BaseIterator>` as its
// token value pointing to the matched character sequence in the input
++w; c += t.value().size();
break;
case ID_EOL: // matched a newline character
++l; ++c;
break;
case ID_CHAR: // matched something else
++c;
break;
}
return true; // always continue to tokenize
}
};
//]
///////////////////////////////////////////////////////////////////////////////
//[wcf_main
/*` The main function simply loads the given file into memory (as a
`std::string`), instantiates an instance of the token definition template
using the correct iterator type (`word_count_tokens<char const*>`),
and finally calls `lex::tokenize`, passing an instance of the counter functor
defined above. The return value of `lex::tokenize` will be `true` if the
whole input sequence has been successfully tokenized, and `false` otherwise.
*/
int main(int argc, char* argv[])
{
// these variables are used to count characters, words and lines
std::size_t c = 0, w = 0, l = 0;
// read input from the given file
std::string str (read_from_file(1 == argc ? "word_count.input" : argv[1]));
// create the token definition instance needed to invoke the lexical analyzer
word_count_tokens<lexertl_lexer<> > word_count_functor;
// tokenize the given string, the bound functor gets invoked for each of
// the matched tokens
char const* first = str.c_str();
char const* last = &first[str.size()];
bool r = lex::tokenize(first, last, make_lexer(word_count_functor),
boost::bind(counter(), _1, boost::ref(c), boost::ref(w), boost::ref(l)));
// print results
if (r) {
std::cout << "lines: " << l << ", words: " << w
<< ", characters: " << c << "\n";
}
else {
std::string rest(first, last);
std::cout << "Lexical analysis failed\n" << "stopped at: \""
<< rest << "\"\n";
}
return 0;
}
//]

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,138 @@
// Copyright (c) 2001-2008 Hartmut Kaiser
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
// This example is the equivalent to the following lex program:
/*
//[wcl_flex_version
%{
int c = 0, w = 0, l = 0;
%}
%%
[^ \t\n]+ { ++w; c += yyleng; }
\n { ++c; ++l; }
. { ++c; }
%%
main()
{
yylex();
printf("%d %d %d\n", l, w, c);
}
//]
*/
// Its purpose is to do the word count function of the wc command in UNIX. It
// prints the number of lines, words and characters in a file.
//
// This examples shows how to use semantic actions associated with token
// definitions to directly attach actions to tokens. These get executed
// whenever the corresponding token got matched in the input sequence. Note,
// how this example implements all functionality directly in the lexer
// definition without any need for a parser.
// #define BOOST_SPIRIT_LEXERTL_DEBUG
//[wcl_includes
#include <boost/spirit/include/lex_lexer_lexertl.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_statement.hpp>
#include <boost/spirit/include/phoenix_algorithm.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
//]
#include <iostream>
#include <string>
#include "example.hpp"
//[wcl_namespaces
using namespace boost::spirit;
using namespace boost::spirit::lex;
//]
///////////////////////////////////////////////////////////////////////////////
// Token definition: We use the lexertl based lexer engine as the underlying
// lexer type.
//
// Note, the token definition type is derived from the 'lexertl_actor_lexer'
// template, which is a necessary to being able to use lexer semantic actions.
///////////////////////////////////////////////////////////////////////////////
//[wcl_token_definition
template <typename Lexer>
struct word_count_tokens : lexer_def<Lexer>
{
word_count_tokens()
: c(0), w(0), l(0),
word("[^ \t\n]+"), eol("\n"), any(".") // define tokens
{}
template <typename Self>
void def (Self& self)
{
using boost::phoenix::ref;
using boost::phoenix::distance;
// Note that all placeholders used in lexer semantic actions in
// conjunction with functors created based on Phoenix2 need to be from
// the namespace boost::phoenix::arg_names (not spirit::arg_names).
// Using the wrong placeholders leads to subtle compilation errors
// which are difficult to backtrack to their cause.
using boost::phoenix::arg_names::_1;
// associate tokens with the lexer
self = word [++ref(w), ref(c) += distance(_1)]
| eol [++ref(c), ++ref(l)]
| any [++ref(c)]
;
}
std::size_t c, w, l;
token_def<> word, eol, any;
};
//]
///////////////////////////////////////////////////////////////////////////////
//[wcl_main
int main(int argc, char* argv[])
{
// read input from the given file
std::string str (read_from_file(1 == argc ? "word_count.input" : argv[1]));
// Specifying 'omitted' as the token value type generates a token class not
// holding any token value at all (not even the iterator_range of the
// matched input sequence), therefor optimizing the token, the lexer, and
// possibly the parser implementation as much as possible.
//
// Specifying mpl::false_ as the 3rd template parameter generates a token
// type and an iterator, both holding no lexer state, allowing for even more
// aggressive optimizations.
//
// As a result the token instances contain the token ids as the only data
// member.
typedef lexertl_token<char const*, omitted, boost::mpl::false_> token_type;
// lexer type
typedef lexertl_actor_lexer<token_type> lexer_type;
// create the lexer object instance needed to invoke the lexical analysis
word_count_tokens<lexer_type> word_count_lexer;
// tokenize the given string, all generated tokens are discarded
char const* first = str.c_str();
char const* last = &first[str.size()];
bool r = tokenize(first, last, make_lexer(word_count_lexer));
if (r) {
std::cout << "lines: " << word_count_lexer.l
<< ", words: " << word_count_lexer.w
<< ", characters: " << word_count_lexer.c
<< "\n";
}
else {
std::string rest(first, last);
std::cout << "Lexical analysis failed\n" << "stopped at: \""
<< rest << "\"\n";
}
return 0;
}
//]

46
example/qi/Jamfile Normal file
View File

@@ -0,0 +1,46 @@
#==============================================================================
# Copyright (c) 2001-2007 Joel de Guzman
#
# Distributed under the Boost Software License, Version 1.0. (See accompanying
# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
#==============================================================================
project spirit-qi-example ;
exe sum : sum.cpp ;
exe complex_number : complex_number.cpp ;
exe employee : employee.cpp ;
exe roman : roman.cpp ;
exe mini_xml1 : mini_xml1.cpp ;
exe mini_xml2 : mini_xml2.cpp ;
exe num_list : num_list.cpp ;
exe num_list2 : num_list2.cpp ;
exe num_list3 : num_list3.cpp ;
exe calc1 : calc1.cpp ;
exe calc2 : calc2.cpp ;
exe calc3 : calc3.cpp ;
exe calc4 : calc4.cpp ;
exe calc5 : calc5.cpp ;
exe calc6 :
calc6/calc6.cpp
calc6/calc6a.cpp
calc6/calc6b.cpp
calc6/calc6c.cpp
;
exe calc7 :
calc7/calc7.cpp
calc7/calc7a.cpp
calc7/calc7b.cpp
calc7/calc7c.cpp
;
exe mini_c :
mini_c/mini_c.cpp
mini_c/mini_ca.cpp
mini_c/mini_cb.cpp
mini_c/mini_cc.cpp
mini_c/mini_cd.cpp
;

104
example/qi/calc1.cpp Normal file
View File

@@ -0,0 +1,104 @@
/*=============================================================================
Copyright (c) 2001-2007 Joel de Guzman
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
=============================================================================*/
///////////////////////////////////////////////////////////////////////////////
//
// Plain calculator example demonstrating the grammar. The parser is a
// syntax checker only and does not do any semantic evaluation.
//
// [ JDG May 10, 2002 ] spirit1
// [ JDG March 4, 2007 ] spirit2
//
///////////////////////////////////////////////////////////////////////////////
#include <boost/spirit/include/qi.hpp>
#include <iostream>
#include <string>
using namespace boost::spirit;
using namespace boost::spirit::qi;
using namespace boost::spirit::ascii;
///////////////////////////////////////////////////////////////////////////////
// Our calculator grammar
///////////////////////////////////////////////////////////////////////////////
template <typename Iterator>
struct calculator : grammar_def<Iterator, space_type>
{
calculator()
{
expression =
term
>> *( ('+' >> term)
| ('-' >> term)
)
;
term =
factor
>> *( ('*' >> factor)
| ('/' >> factor)
)
;
factor =
uint_
| '(' >> expression >> ')'
| ('-' >> factor)
| ('+' >> factor)
;
}
rule<Iterator, space_type> expression, term, factor;
};
///////////////////////////////////////////////////////////////////////////////
// Main program
///////////////////////////////////////////////////////////////////////////////
int
main()
{
std::cout << "/////////////////////////////////////////////////////////\n\n";
std::cout << "Expression parser...\n\n";
std::cout << "/////////////////////////////////////////////////////////\n\n";
std::cout << "Type an expression...or [q or Q] to quit\n\n";
typedef std::string::const_iterator iterator_type;
typedef calculator<iterator_type> calculator;
calculator def; // Our grammar definition
grammar<calculator> calc(def, def.expression); // Our grammar
std::string str;
while (std::getline(std::cin, str))
{
if (str.empty() || str[0] == 'q' || str[0] == 'Q')
break;
std::string::const_iterator iter = str.begin();
std::string::const_iterator end = str.end();
bool r = phrase_parse(iter, end, calc, space);
if (r && iter == end)
{
std::cout << "-------------------------\n";
std::cout << "Parsing succeeded\n";
std::cout << "-------------------------\n";
}
else
{
std::string rest(iter, end);
std::cout << "-------------------------\n";
std::cout << "Parsing failed\n";
std::cout << "stopped at: \": " << rest << "\"\n";
std::cout << "-------------------------\n";
}
}
std::cout << "Bye... :-) \n\n";
return 0;
}

123
example/qi/calc2.cpp Normal file
View File

@@ -0,0 +1,123 @@
/*=============================================================================
Copyright (c) 2001-2007 Joel de Guzman
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
=============================================================================*/
///////////////////////////////////////////////////////////////////////////////
//
// A Calculator example demonstrating the grammar and semantic actions
// using phoenix to "bind" plain functions. The parser prints code suitable
// for a stack based virtual machine.
//
// [ JDG May 10, 2002 ] spirit1
// [ JDG March 4, 2007 ] spirit2
//
///////////////////////////////////////////////////////////////////////////////
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_bind.hpp>
#include <iostream>
#include <string>
using namespace boost::spirit;
using namespace boost::spirit::qi;
using namespace boost::spirit::ascii;
using namespace boost::spirit::arg_names;
using boost::phoenix::bind;
///////////////////////////////////////////////////////////////////////////////
// Semantic actions
///////////////////////////////////////////////////////////////////////////////
namespace
{
void do_int(int n) { std::cout << "push " << n << std::endl; }
void do_add() { std::cout << "add\n"; }
void do_subt() { std::cout << "subtract\n"; }
void do_mult() { std::cout << "mult\n"; }
void do_div() { std::cout << "divide\n"; }
void do_neg() { std::cout << "negate\n"; }
}
///////////////////////////////////////////////////////////////////////////////
// Our calculator grammar
///////////////////////////////////////////////////////////////////////////////
template <typename Iterator>
struct calculator : grammar_def<Iterator, space_type>
{
calculator()
{
expression =
term
>> *( ('+' >> term [bind(&do_add)])
| ('-' >> term [bind(&do_subt)])
)
;
term =
factor
>> *( ('*' >> factor [bind(&do_mult)])
| ('/' >> factor [bind(&do_div)])
)
;
factor =
uint_ [bind(&do_int, _1)]
| '(' >> expression >> ')'
| ('-' >> factor [bind(&do_neg)])
| ('+' >> factor)
;
}
rule<Iterator, space_type> expression, term, factor;
};
///////////////////////////////////////////////////////////////////////////////
// Main program
///////////////////////////////////////////////////////////////////////////////
int
main()
{
std::cout << "/////////////////////////////////////////////////////////\n\n";
std::cout << "Expression parser...\n\n";
std::cout << "/////////////////////////////////////////////////////////\n\n";
std::cout << "Type an expression...or [q or Q] to quit\n\n";
typedef std::string::const_iterator iterator_type;
typedef calculator<iterator_type> calculator;
calculator def; // Our grammar definition
grammar<calculator> calc(def, def.expression); // Our grammar
std::string str;
while (std::getline(std::cin, str))
{
if (str.empty() || str[0] == 'q' || str[0] == 'Q')
break;
std::string::const_iterator iter = str.begin();
std::string::const_iterator end = str.end();
bool r = phrase_parse(iter, end, calc, space);
if (r && iter == end)
{
std::cout << "-------------------------\n";
std::cout << "Parsing succeeded\n";
std::cout << "-------------------------\n";
}
else
{
std::string rest(iter, end);
std::cout << "-------------------------\n";
std::cout << "Parsing failed\n";
std::cout << "stopped at: \": " << rest << "\"\n";
std::cout << "-------------------------\n";
}
}
std::cout << "Bye... :-) \n\n";
return 0;
}

110
example/qi/calc3.cpp Normal file
View File

@@ -0,0 +1,110 @@
/*=============================================================================
Copyright (c) 2001-2007 Joel de Guzman
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
=============================================================================*/
///////////////////////////////////////////////////////////////////////////////
//
// A calculator example demonstrating the grammar and semantic actions
// using phoenix to do the actual expression evaluation. The parser is
// essentially an "interpreter" that evaluates expressions on the fly.
//
// [ JDG June 29, 2002 ] spirit1
// [ JDG March 5, 2007 ] spirit2
//
///////////////////////////////////////////////////////////////////////////////
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <iostream>
#include <string>
using namespace boost::spirit;
using namespace boost::spirit::qi;
using namespace boost::spirit::ascii;
using namespace boost::spirit::arg_names;
///////////////////////////////////////////////////////////////////////////////
// Our calculator grammar
///////////////////////////////////////////////////////////////////////////////
template <typename Iterator>
struct calculator : grammar_def<Iterator, int(), space_type>
{
calculator()
{
expression =
term [_val = _1]
>> *( ('+' >> term [_val += _1])
| ('-' >> term [_val -= _1])
)
;
term =
factor [_val = _1]
>> *( ('*' >> factor [_val *= _1])
| ('/' >> factor [_val /= _1])
)
;
factor =
uint_ [_val = _1]
| '(' >> expression [_val = _1] >> ')'
| ('-' >> factor [_val = -_1])
| ('+' >> factor [_val = _1])
;
}
rule<Iterator, int(), space_type> expression, term, factor;
};
///////////////////////////////////////////////////////////////////////////////
// Main program
///////////////////////////////////////////////////////////////////////////////
int
main()
{
std::cout << "/////////////////////////////////////////////////////////\n\n";
std::cout << "Expression parser...\n\n";
std::cout << "/////////////////////////////////////////////////////////\n\n";
std::cout << "Type an expression...or [q or Q] to quit\n\n";
typedef std::string::const_iterator iterator_type;
typedef calculator<iterator_type> calculator;
calculator def; // Our grammar definition
grammar<calculator> calc(def, def.expression); // Our grammar
std::string str;
int result;
while (std::getline(std::cin, str))
{
if (str.empty() || str[0] == 'q' || str[0] == 'Q')
break;
std::string::const_iterator iter = str.begin();
std::string::const_iterator end = str.end();
bool r = phrase_parse(iter, end, calc, result, space);
if (r && iter == end)
{
std::cout << "-------------------------\n";
std::cout << "Parsing succeeded\n";
std::cout << "result = " << result << std::endl;
std::cout << "-------------------------\n";
}
else
{
std::string rest(iter, end);
std::cout << "-------------------------\n";
std::cout << "Parsing failed\n";
std::cout << "stopped at: \": " << rest << "\"\n";
std::cout << "-------------------------\n";
}
}
std::cout << "Bye... :-) \n\n";
return 0;
}

201
example/qi/calc3_lexer.cpp Normal file
View File

@@ -0,0 +1,201 @@
/*=============================================================================
Copyright (c) 2001-2007 Joel de Guzman
Copyright (c) 2001-2008 Hartmut Kaiser
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
=============================================================================*/
///////////////////////////////////////////////////////////////////////////////
//
// A calculator example demonstrating the grammar and semantic actions
// using phoenix to do the actual expression evaluation. The parser is
// essentially an "interpreter" that evaluates expressions on the fly.
//
// Additionally this examples shows how to build and use a lexer based on
// Ben Hansons Lexertl (http://www.benhanson.net/lexertl.html). This way the
// parser matches the grammar against the tokens generated by the lexer
// component and not against the input character stream.
//
// Even if the benefits of using a lexer for this small calculator grammar may
// not outweight the corresponding overhead, we provide this example because
// it allows to concentrate on the essentials without having to understand
// the semantics first.
//
// [ JDG June 29, 2002 ] spirit1
// [ JDG March 5, 2007 ] spirit2
//
///////////////////////////////////////////////////////////////////////////////
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/lex_lexer_lexertl.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <iostream>
#include <string>
using namespace boost::spirit;
using namespace boost::spirit::qi;
using namespace boost::spirit::lex;
using namespace boost::spirit::ascii;
using namespace boost::spirit::arg_names;
///////////////////////////////////////////////////////////////////////////////
// Our token definition
// This class is used to define all the tokens to be recognized by the lexer.
///////////////////////////////////////////////////////////////////////////////
template <typename Lexer>
struct calculator_tokens : lexer_def<Lexer>
{
template <typename Self>
void def (Self& self)
{
// unsigned integer token definition
ui = "[1-9][0-9]*";
// whitespace token definitions
ws = "[ \\t\\f\\v]+";
c_comment = "\\/\\*[^*]*\\*+([^/*][^*]*\\*+)*\\/";
// build token set
skipper = ws | c_comment; // += is allowed as well
// associate the tokens and the token set with the lexer
// default lexer state
self = token_def<>('+') | '-' | '*' | '/' | '(' | ')';
self += ui; // still default state
// The token_set 'skipper' get's assigned to a separate lexer state
// which allows to use it separately from the main tokenization
// (it is used as the skipper parser below)
self("SKIPPER") = skipper; // lexer state "SKIPPER"
}
// This are the tokens to be recognized by the lexer.
token_def<unsigned int> ui; // matched tokens will have a unsigned int
token_def<> ws, c_comment; // attribute will not be used
// This is the only token set explicitly defined by this lexer because it
// needs to be accessible from the outside (used as skip parser below).
typename Lexer::token_set skipper;
};
///////////////////////////////////////////////////////////////////////////////
// Our calculator grammar
//
// The difference to the original example (calc3.cpp) is that we are
// specifying a second template parameter referring to the lexer. Further, we
// use a defined tokenset from above as the skip parser.
///////////////////////////////////////////////////////////////////////////////
template <typename Iterator, typename Lexer>
struct calculator : grammar_def<Iterator, int(), typename Lexer::token_set>
{
template <typename TokenDef>
calculator(TokenDef const& tok)
{
// grammar
expression =
term [_val = _1]
>> *( ('+' >> term [_val += _1])
| ('-' >> term [_val -= _1])
)
;
term =
factor [_val = _1]
>> *( ('*' >> factor [_val *= _1])
| ('/' >> factor [_val /= _1])
)
;
factor =
tok.ui [_val = _1]
| '(' >> expression [_val = _1] >> ')'
| ('-' >> factor [_val = -_1])
| ('+' >> factor [_val = _1])
;
}
rule<Iterator, int(), typename Lexer::token_set> expression, term, factor;
};
///////////////////////////////////////////////////////////////////////////////
// Main program
///////////////////////////////////////////////////////////////////////////////
int
main()
{
std::cout << "/////////////////////////////////////////////////////////\n\n";
std::cout << "Expression parser...\n\n";
std::cout << "/////////////////////////////////////////////////////////\n\n";
std::cout << "Type an expression...or [q or Q] to quit\n\n";
// iterator type used to expose the underlying input stream
typedef std::string::const_iterator base_iterator_type;
// This is the lexer token type to use. The second template parameter lists
// all attribute types used for token_def's during token definition (see
// calculator_tokens<> above). Here we use the predefined lexertl token
// type, but any compatible token type may be used.
typedef lexertl_token<
base_iterator_type, boost::mpl::vector<unsigned int>
> token_type;
// This is the lexer type to use to tokenize the input.
// Here we use the lexertl based lexer engine.
typedef lexertl_lexer<base_iterator_type, token_type> lexer_type;
// This is the token definition type (derived from the given lexer type).
typedef calculator_tokens<lexer_type> calculator_tokens;
// this is the iterator type exposed by the lexer
typedef lexer<calculator_tokens>::iterator_type iterator_type;
// this is the type of the grammar to parse
typedef calculator<iterator_type, lexer_type> calculator;
// now we use the types defined above to create the lexer and grammar
// object instances needed to invoke the parsing process
calculator_tokens tokens; // Our token definition
calculator def (tokens); // Our grammar definition
lexer<calculator_tokens> lex(tokens); // Our lexer
grammar<calculator> calc(def, def.expression); // Our grammar
// get input line by line and feed the parser to evaluate the expressions
// read in from the input
std::string str;
int result;
while (std::getline(std::cin, str))
{
if (str.empty() || str[0] == 'q' || str[0] == 'Q')
break;
// At this point we generate the iterator pair used to expose the
// tokenized input stream.
iterator_type iter = lex.begin(str.begin(), str.end());
iterator_type end = lex.end();
// Parsing is done based on the the token stream, not the character
// stream read from the input.
// Note, how we use the token_set defined above as the skip parser.
bool r = phrase_parse(iter, end, calc, result, tokens.skipper);
if (r && iter == end)
{
std::cout << "-------------------------\n";
std::cout << "Parsing succeeded\n";
std::cout << "result = " << result << std::endl;
std::cout << "-------------------------\n";
}
else
{
std::cout << "-------------------------\n";
std::cout << "Parsing failed\n";
std::cout << "-------------------------\n";
}
}
std::cout << "Bye... :-) \n\n";
return 0;
}

126
example/qi/calc4.cpp Normal file
View File

@@ -0,0 +1,126 @@
/*=============================================================================
Copyright (c) 2001-2007 Joel de Guzman
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
=============================================================================*/
///////////////////////////////////////////////////////////////////////////////
//
// This time, we'll incorporate error handling and reporting.
//
// [ JDG June 29, 2002 ] spirit1
// [ JDG March 5, 2007 ] spirit2
//
///////////////////////////////////////////////////////////////////////////////
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_object.hpp>
#include <iostream>
#include <string>
using namespace boost::spirit;
using namespace boost::spirit::qi;
using namespace boost::spirit::ascii;
using namespace boost::spirit::arg_names;
using boost::phoenix::val;
using boost::phoenix::construct;
///////////////////////////////////////////////////////////////////////////////
// Our calculator grammar
///////////////////////////////////////////////////////////////////////////////
template <typename Iterator>
struct calculator : grammar_def<Iterator, int(), space_type>
{
calculator()
{
expression =
term [_val = _1]
>> *( ('+' > term [_val += _1])
| ('-' > term [_val -= _1])
)
;
term =
factor [_val = _1]
>> *( ('*' > factor [_val *= _1])
| ('/' > factor [_val /= _1])
)
;
factor =
uint_ [_val = _1]
| '(' > expression [_val = _1] > ')'
| ('-' > factor [_val = -_1])
| ('+' > factor [_val = _1])
;
expression.name("expression");
term.name("term");
factor.name("factor");
on_error<fail>
(
expression
, std::cout
<< val("Error! Expecting ")
<< _4 // what failed?
<< val(" here: \"")
<< construct<std::string>(_3, _2) // iterators to error-pos, end
<< val("\"")
<< std::endl
);
}
rule<Iterator, int(), space_type> expression, term, factor;
};
///////////////////////////////////////////////////////////////////////////////
// Main program
///////////////////////////////////////////////////////////////////////////////
int
main()
{
std::cout << "/////////////////////////////////////////////////////////\n\n";
std::cout << "Expression parser...\n\n";
std::cout << "/////////////////////////////////////////////////////////\n\n";
std::cout << "Type an expression...or [q or Q] to quit\n\n";
typedef std::string::const_iterator iterator_type;
typedef calculator<iterator_type> calculator;
calculator def; // Our grammar definition
grammar<calculator> calc(def, def.expression); // Our grammar
std::string str;
int result;
while (std::getline(std::cin, str))
{
if (str.empty() || str[0] == 'q' || str[0] == 'Q')
break;
std::string::const_iterator iter = str.begin();
std::string::const_iterator end = str.end();
bool r = phrase_parse(iter, end, calc, result, space);
if (r && iter == end)
{
std::cout << "-------------------------\n";
std::cout << "Parsing succeeded\n";
std::cout << "result = " << result << std::endl;
std::cout << "-------------------------\n";
}
else
{
std::cout << "-------------------------\n";
std::cout << "Parsing failed\n";
std::cout << "-------------------------\n";
}
}
std::cout << "Bye... :-) \n\n";
return 0;
}

Some files were not shown because too many files have changed in this diff Show More