mirror of
https://github.com/boostorg/spirit_x4.git
synced 2026-01-19 04:42:11 +00:00
spirit2 ! :)
[SVN r44360]
This commit is contained in:
@@ -11,7 +11,7 @@
|
||||
// or https://sf.net/mailarchive/forum.php?thread_id=2692308&forum_id=1595
|
||||
// for a description of the bug being tested for by this program
|
||||
//
|
||||
// the problem should be solved with version 1.3 of phoenix/closures.hpp
|
||||
// the problem should be solved with version 1.3 of phoenix/closures.hpp>
|
||||
|
||||
#if defined(BOOST_SPIRIT_DEBUG) && defined(__GNUC__) && defined(__WIN32__)
|
||||
// It seems that MinGW has some problems with threads and iostream ?
|
||||
|
||||
27
doc/Jamfile
Normal file
27
doc/Jamfile
Normal file
@@ -0,0 +1,27 @@
|
||||
#==============================================================================
|
||||
# Copyright (c) 2001-2007 Joel de Guzman
|
||||
# Copyright (c) 2001-2007 Hartmut Kaiser
|
||||
#
|
||||
# Use, modification and distribution is subject to the Boost Software
|
||||
# License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
||||
# http://www.boost.org/LICENSE_1_0.txt)
|
||||
#==============================================================================
|
||||
|
||||
project spirit/doc ;
|
||||
|
||||
import boostbook : boostbook ;
|
||||
using quickbook : quickbook ;
|
||||
|
||||
boostbook spirit2
|
||||
:
|
||||
spirit2.qbk
|
||||
:
|
||||
<xsl:param>boost.root=../../../..
|
||||
<xsl:param>boost.libraries=../../../libraries.htm
|
||||
<xsl:param>html.stylesheet=../../../../doc/html/boostbook.css
|
||||
<xsl:param>chunk.section.depth=5
|
||||
<xsl:param>chunk.first.sections=1
|
||||
<xsl:param>toc.section.depth=4
|
||||
<xsl:param>toc.max.depth=4
|
||||
<xsl:param>generate.section.toc.level=4
|
||||
;
|
||||
46
doc/_concepts_template_.qbk
Normal file
46
doc/_concepts_template_.qbk
Normal file
@@ -0,0 +1,46 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section XXX]
|
||||
|
||||
[heading Description]
|
||||
|
||||
Description of XXX concept
|
||||
|
||||
[variablelist Notation
|
||||
[[`xxx`] [An XXX]]
|
||||
]
|
||||
|
||||
[heading Valid Expressions]
|
||||
|
||||
(For any Forward Sequence the following expressions must be valid:)
|
||||
|
||||
In addition to the requirements defined in _XXX-Basic_concept_, for any
|
||||
XXX the following must be met:
|
||||
|
||||
[table
|
||||
[[Expression] [Semantics] [Return type] [Complexity]]
|
||||
[[`xxx`] [Semantics of `xxx`] [XXX] [Constant]]
|
||||
]
|
||||
|
||||
[heading Type Requirements]
|
||||
|
||||
[table
|
||||
[[Expression] [Requirements]]
|
||||
[[`xxx`] [Requirements for `xxx`]]
|
||||
]
|
||||
|
||||
[heading Invariants]
|
||||
|
||||
For any XXX xxx the following invariants always hold:
|
||||
|
||||
[heading Models]
|
||||
|
||||
Links to models of XXX concept
|
||||
|
||||
[endsect]
|
||||
56
doc/_reference_template_.qbk
Normal file
56
doc/_reference_template_.qbk
Normal file
@@ -0,0 +1,56 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section XXX]
|
||||
|
||||
[heading Description]
|
||||
|
||||
Description of XXX
|
||||
|
||||
[heading Header]
|
||||
|
||||
#include <boost/spirit/xxx.hpp>
|
||||
|
||||
[heading Synopsis]
|
||||
|
||||
template <typename T>
|
||||
struct XXX;
|
||||
|
||||
[heading Template parameters]
|
||||
|
||||
[table
|
||||
[[Parameter] [Description] [Default]]
|
||||
[[`T`] [What is T] []]
|
||||
]
|
||||
|
||||
[heading Model of]
|
||||
|
||||
Link to concept
|
||||
|
||||
[heading Objects]
|
||||
|
||||
Objects provided by the library
|
||||
|
||||
[variablelist Notation
|
||||
[[`xxx`] [An XXX]]
|
||||
]
|
||||
|
||||
Semantics of an expression is defined only where it differs from, or is not
|
||||
defined in _concept-of_XXX_.
|
||||
|
||||
[table
|
||||
[[Expression] [Semantics] [Return type] [Complexity]]
|
||||
[[`xxx`] [Semantics of `xxx`] [XXX] [Constant]]
|
||||
]
|
||||
|
||||
[heading Example]
|
||||
|
||||
Real example code. Use Quickbook import mechanism to link to actual
|
||||
working code snippets here.
|
||||
|
||||
[endsect]
|
||||
147
doc/acknowledgments.qbk
Normal file
147
doc/acknowledgments.qbk
Normal file
@@ -0,0 +1,147 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section Acknowledgments]
|
||||
|
||||
This version of Spirit is a complete rewrite of the /classic/ Spirit many
|
||||
people have been contributing to (see below). But there are a couple of people
|
||||
who already managed to help significantly during this rewrite. We would like to
|
||||
express our special acknowledgement to:
|
||||
|
||||
[*Eric Niebler] for writing Boost.Proto, without which this rewrite wouldn't
|
||||
have been possible, and helping with examples, advices, and suggestions on
|
||||
how to use Boost.Proto in the best possible way.
|
||||
|
||||
[*Ben Hanson] for providing us with an early version of his Lexertl library,
|
||||
which is proposed to be included into Boost (as Boost.Lexer), but at the time
|
||||
of this writing the Boost review for this library is still pending.
|
||||
|
||||
__fixme__: Add more people
|
||||
|
||||
|
||||
[heading Acknowledgements from the Spirit V1 /classic/ Documentation]
|
||||
|
||||
Special thanks for working on Spirit /classic/ to:
|
||||
|
||||
[*Dan Nuffer] for his work on lexers, parse trees, ASTs, XML parsers, the
|
||||
multi-pass iterator as well as administering Spirit's site, editing,
|
||||
maintaining the CVS and doing the releases plus a zillion of other chores that
|
||||
were almost taken for granted.
|
||||
|
||||
[*Hartmut Kaiser] for his work on the C parser, the work on the C/C++
|
||||
preprocessor, utility parsers, the original port to Intel 5.0, various work on
|
||||
Phoenix, porting to v1.5, the meta-parsers, the grouping-parsers, extensive
|
||||
testing and painstaking attention to details.
|
||||
|
||||
[*Martin Wille] who improved grammar multi thread safety, contributed the eol_p
|
||||
parser, the dynamic parsers, documentation and for taking an active role in
|
||||
almost every aspect from brainstorming and design to coding. And, as always,
|
||||
helps keep the regression tests for g++ on Linux as green as ever :-).
|
||||
|
||||
[*Martijn W. Van Der Lee] our Web site administrator and for contributing the
|
||||
RFC821 parser.
|
||||
|
||||
[*Giovanni Bajo] for last minute tweaks of Spirit 1.8.0 for CodeWarrior 8.3.
|
||||
Actually, I'm ashamed Giovanni was not in this list already. He's done a lot
|
||||
since Spirit 1.5, the first Boost.Spirit release. He's instrumental in the
|
||||
porting of the Spirit iterators stuff to the new Boost Iterators Library
|
||||
(version 2). He also did various bug fixes and wrote some tests here and there.
|
||||
|
||||
[*Juan Carlos Arevalo-Baeza (JCAB)*] for his work on the C++ parser, the position
|
||||
iterator, ports to v1.5 and keeping the mailing list discussions alive and
|
||||
kicking.
|
||||
|
||||
[*Vaclav Vesely], lots of stuff, the no\_actions directive, various patches
|
||||
fixes, the distinct parsers, the lazy parser, some phoenix tweaks and add-ons
|
||||
(e.g. new\_). Also, *Stefan Slapeta] and wife for editing Vaclav's distinct
|
||||
parser doc.
|
||||
|
||||
[*Raghavendra Satish] for doing the original v1.3 port to VC++ and his work on
|
||||
Phoenix.
|
||||
|
||||
[*Noah Stein] for following up and helping Ragav on the VC++ ports.
|
||||
|
||||
[*Hakki Dogusan], for his original v1.0 Pascal parser.
|
||||
|
||||
[*John (EBo) David] for his work on the VM and watching over my shoulder as I
|
||||
code giving the impression of distance eXtreme programming.
|
||||
|
||||
[*Chris Uzdavinis] for feeding in comments and valuable suggestions as well as
|
||||
editing the documentation.
|
||||
|
||||
[*Carsten Stoll], for his work on dynamic parsers.
|
||||
|
||||
[*Andy Elvey] and his conifer parser.
|
||||
|
||||
[*Bruce Florman], who did the original v1.0 port to VC++.
|
||||
|
||||
[*Jeff Westfahl] for porting the loop parsers to v1.5 and contributing the file
|
||||
iterator.
|
||||
|
||||
[*Peter Simons] for the RFC date parser example and tutorial plus helping out
|
||||
with some nitty gritty details.
|
||||
|
||||
[*Markus Sch'''ö'''pflin] for suggesting the end_p parser and lots of other
|
||||
nifty things and his active presence in the mailing list.
|
||||
|
||||
[*Doug Gregor] for mentoring and his ability to see things that others don't.
|
||||
|
||||
[*David Abrahams] for giving Joel a job that allows him to still work on Spirit,
|
||||
plus countless advice and help on C++ and specifically template
|
||||
metaprogramming.
|
||||
|
||||
[*Aleksey Gurtovoy] for his MPL library from which we stole many metaprogramming
|
||||
tricks especially for less conforming compilers such as Borland and VC6/7.
|
||||
|
||||
[*Gustavo Guerra] for his last minute review of Spirit and constant feedback,
|
||||
plus patches here and there (e.g. proposing the new dot behavior of the real
|
||||
numerics parsers).
|
||||
|
||||
[*Nicola Musatti], [*Paul Snively], [*Alisdair Meredith] and [*Hugo Duncan] for
|
||||
testing and sending in various patches.
|
||||
|
||||
[*Steve Rowe] for his splendid work on the TSTs that will soon be taken into
|
||||
Spirit.
|
||||
|
||||
[*Jonathan de Halleux] for his work on actors.
|
||||
|
||||
[*Angus Leeming] for last minute editing work on the 1.8.0 release
|
||||
documentation, his work on Phoenix and his active presence in the Spirit
|
||||
mailing list.
|
||||
|
||||
[*Joao Abecasis] for his active presence in the Spirit mailing list, providing
|
||||
user support, participating in the discussions and so on.
|
||||
|
||||
[*Guillaume Melquiond] for a last minute patch to multi_pass for 1.8.1.
|
||||
|
||||
[*Peder Holt] for his porting work on Phoenix, Fusion and Spirit to VC6.
|
||||
|
||||
To Joels wife Mariel who did the graphics in this document.
|
||||
|
||||
My, there's a lot in this list! And it's a continuing list. We add people to
|
||||
this list everytime. We hope we did not forget anyone. If we missed
|
||||
someone you know who has helped in any way, please inform us.
|
||||
|
||||
Special thanks also to people who gave feedback and valuable comments,
|
||||
particularly members of Boost and Spirit mailing lists. This includes all those
|
||||
who participated in the review:
|
||||
|
||||
[*John Maddock], our review manager, [*Aleksey Gurtovoy], [*Andre Hentz],
|
||||
[*Beman Dawes], [*Carl Daniel], [*Christopher Currie], [*Dan Gohman],
|
||||
[*Dan Nuffer], [*Daryle Walker], [*David Abrahams], [*David B. Held],
|
||||
[*Dirk Gerrits], [*Douglas Gregor], [*Hartmut Kaiser], [*Iain K.Hanson],
|
||||
[*Juan Carlos Arevalo-Baeza], [*Larry Evans], [*Martin Wille],
|
||||
[*Mattias Flodin], [*Noah Stein], [*Nuno Lucas], [*Peter Dimov],
|
||||
[*Peter Simons], [*Petr Kocmid], [*Ross Smith], [*Scott Kirkwood],
|
||||
[*Steve Cleary], [*Thorsten Ottosen], [*Tom Wenisch], [*Vladimir Prus]
|
||||
|
||||
Finally thanks to SourceForge for hosting the Spirit project and Boost: a C++
|
||||
community comprised of extremely talented library authors who participate in
|
||||
the discussion and peer review of well crafted C++ libraries.
|
||||
|
||||
[endsect]
|
||||
10
doc/faq.qbk
Normal file
10
doc/faq.qbk
Normal file
@@ -0,0 +1,10 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section FAQ]
|
||||
[endsect]
|
||||
BIN
doc/html/images/FlowOfControl.png
Normal file
BIN
doc/html/images/FlowOfControl.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 78 KiB |
BIN
doc/html/images/Thumbs.db
Normal file
BIN
doc/html/images/Thumbs.db
Normal file
Binary file not shown.
BIN
doc/html/images/TokenStructure.png
Normal file
BIN
doc/html/images/TokenStructure.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 58 KiB |
10
doc/introduction.qbk
Normal file
10
doc/introduction.qbk
Normal file
@@ -0,0 +1,10 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section Introduction]
|
||||
[endsect]
|
||||
50
doc/lex.qbk
Normal file
50
doc/lex.qbk
Normal file
@@ -0,0 +1,50 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section __lex__]
|
||||
|
||||
[include lex/introduction.qbk]
|
||||
|
||||
[section __lex__ Tutorials]
|
||||
[include lex/lexer_tutorials.qbk]
|
||||
[include lex/lexer_quickstart1.qbk]
|
||||
[include lex/lexer_quickstart2.qbk]
|
||||
[include lex/lexer_quickstart3.qbk]
|
||||
[endsect]
|
||||
|
||||
[section Abstracts]
|
||||
[section Lexer Primitives]
|
||||
[include lex/lexer_primitives.qbk]
|
||||
[include lex/tokens_values.qbk]
|
||||
[include lex/token_definition.qbk]
|
||||
[endsect]
|
||||
[include lex/tokenizing.qbk]
|
||||
[include lex/lexer_semantic_actions.qbk]
|
||||
[include lex/lexer_static_model.qbk]
|
||||
[include lex/parsing_using_a_lexer.qbk]
|
||||
[include lex/lexer_attributes.qbk]
|
||||
[include lex/lexer_states.qbk]
|
||||
[endsect]
|
||||
|
||||
[section Quick Reference]
|
||||
[endsect]
|
||||
|
||||
[section Reference]
|
||||
[section Concepts]
|
||||
[include reference/lex/lexer.qbk]
|
||||
[include reference/lex/token.qbk]
|
||||
[include reference/lex/tokendef.qbk]
|
||||
[include reference/lex/tokenset.qbk]
|
||||
[endsect]
|
||||
[include reference/lex/lexer_class.qbk]
|
||||
[include reference/lex/token_class.qbk]
|
||||
[include reference/lex/tokendef_class.qbk]
|
||||
[include reference/lex/tokenset_class.qbk]
|
||||
[endsect]
|
||||
|
||||
[endsect]
|
||||
137
doc/lex/introduction.qbk
Normal file
137
doc/lex/introduction.qbk
Normal file
@@ -0,0 +1,137 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section Introduction to __lex__]
|
||||
|
||||
Lexical scanning is the process of analyzing the stream of input characters and
|
||||
separating it into strings called tokens, separated by whitespace.
|
||||
Most compiler texts start here, and devote several chapters to discussing
|
||||
various ways to build scanners. __lex__ is a library built to take care of the
|
||||
complexities of creating a lexer for your grammar (in this documentation we
|
||||
will use the terms 'lexical analyzer', 'lexer' and 'scanner' interchangably).
|
||||
All it needs to create a lexer is to know the set of patterns describing the
|
||||
different tokens you want to recognize in the input. To make this a bit more
|
||||
formal, here are some definitions:
|
||||
|
||||
* A token is a sequence of consecutive characters having a collective meaning.
|
||||
Tokens may have attributes specific to the token type, carrying additional
|
||||
information about the matched character sequence.
|
||||
* A pattern is a rule expressed as a regular expression and describing how a
|
||||
particular token can be formed. For example, [^\[A-Za-z\]\[A-Za-z_0-9\]*] is
|
||||
a pattern for a rule matching C++ identifiers.
|
||||
* Characters between tokens are called whitespace; these include spaces, tabs,
|
||||
newlines, and formfeeds. Many people also count comments as whitespace,
|
||||
though since some tools such as lint look at comments, this conflation is not
|
||||
perfect.
|
||||
|
||||
[heading Why Using a Separate Lexer]
|
||||
|
||||
Typically, lexical scanning is done in a separate module from the parser,
|
||||
feeding the parser with a stream of input tokens only. Now, theoretically it is
|
||||
not necessary to do this separation. In the end there is only one set of
|
||||
syntactical rules defining the language, so in theory we could write the whole
|
||||
parser in one module. In fact, __qi__ allows to write parsers without using a
|
||||
lexer, parsing the input character stream directly, and for the most part this
|
||||
is the way __spirit__ has been used since its invention.
|
||||
|
||||
However, the separation has both practical and theoretical bases and proves to
|
||||
be very useful in practical applications. In 1956, Noam Chomsky defined the
|
||||
"Chomsky Hierarchy" of grammars:
|
||||
|
||||
* Type 0: Unrestricted grammars (e.g., natural languages)
|
||||
* Type 1: Context-Sensitive grammars
|
||||
* Type 2: Context-Free grammars
|
||||
* Type 3: Regular grammars
|
||||
|
||||
The complexity of these grammars increases from regular grammars being the
|
||||
simplest to unrestricted grammars being the most complex. Similarily, the
|
||||
complexity of the recognizers for these grammars increases. Although, a few
|
||||
features of some programming languages (such as C++) are Type 1, fortunately
|
||||
for the most part programming languages can be described using only the Types 3
|
||||
and 2. The neat part about these two types is that they are well known and the
|
||||
ways to parse them are well understood. It has been shown that any regular
|
||||
grammar can be parsed using a state machine (finite automaton). Similarly,
|
||||
context-free grammars can always be parsed using a push-down automaton
|
||||
(essentially a state machine augmented by a stack).
|
||||
|
||||
In real programming languages and practical grammars the parts that can be
|
||||
handled as regular expressions tend to be the lower-level parts, such as the
|
||||
definition of an identifier or of an integer value:
|
||||
|
||||
letter := [a-zA-Z]
|
||||
digit := [0-9]
|
||||
|
||||
identifier := letter [ letter | digit ]*
|
||||
integer := digit*
|
||||
|
||||
Higher level parts of practical grammars tend to be more complex and can't be
|
||||
implemented using plain regular expressions anymore. We need to store
|
||||
information on the built-in hardware stack while recursing the grammar
|
||||
hierarchy, and that in fact this is the preferred approach used for top-down
|
||||
parsing. Since it takes a different kind of abstract machine to parse the two
|
||||
types of grammars, it proved to be efficient to separate the lexical scanner
|
||||
into a separate module which is built around the idea of a state machine. The
|
||||
goal here is to use the simplest parsing technique needed for the job.
|
||||
|
||||
Another, more practical reason for separating the scanner from the parser is
|
||||
the need for backtracking during parsing. The input data is a stream of
|
||||
characters, which is often thought to be processed left to right without any
|
||||
backtracking. Unfortunately, in practice most of the time that isn't possible.
|
||||
Almost every language has certain keywords such as IF, FOR, and WHILE. The
|
||||
decision if a certain character sequence actually comprises a keyword or just
|
||||
an identifier often can be made only after seeing the first delimiter /after/
|
||||
it. This already is a limited form of backtracking, since we need to store the
|
||||
string long enough to be able to make the decision. The same is true for more
|
||||
coarse grained language features such as nested IF/ELSE statements, where the
|
||||
decision about to which IF belongs the last ELSE statement can be made only
|
||||
after seeing the whole construct.
|
||||
|
||||
So the structure of a conventional compiler often involves splitting up the
|
||||
functions of the lower-level and higher-level parsing. The lexical scanner
|
||||
deals with things at the character level, collecting characters into strings,
|
||||
converting character sequence into different representations as integers, etc.,
|
||||
and passing them along to the parser proper as indivisible tokens. It's also
|
||||
considered normal to let the scanner do additional jobs, such as identifying
|
||||
keywords, storing identifiers in tables, etc.
|
||||
|
||||
Now, __spirit__ follows this structure, where __lex__ can be used to implement
|
||||
state machine based recognizers, while __qi__ can be used to build recognizers
|
||||
for context free grammars. Since both modules are seemlessly integrated with
|
||||
each other and with the C++ target language it is even possible to use the
|
||||
provided functionality to build more complex grammar recognizers.
|
||||
|
||||
[heading Advantages of using __lex__]
|
||||
|
||||
The advantage of using __lex__ to create the lexical analyzer over using more
|
||||
traditional tools such as __flex__ is its carefully crafted integration with
|
||||
the __spirit__ library and the C++ host language. You don't need any external
|
||||
tools to generate the code, your lexer will be perfectly integrated with the
|
||||
rest of your program, making it possible to freely access any context
|
||||
information and data structure. Since the C++ compiler sees all the code it
|
||||
will generate optimal code nomatter what configuration options have been chosen
|
||||
by the user. __lex__ gives you all the features you could get from a similar
|
||||
__flex__ program without the need to leave C++ as a host language:
|
||||
|
||||
* the definition of tokens is done using regular expressions (patterns)
|
||||
* the token definitions can refer to special substitution string (pattern
|
||||
macros) simplifying pattern definitions
|
||||
* the generated lexical scanner may have multiple start states
|
||||
* it is possible to attach code to any of the token definitions; this code gets
|
||||
executed whenever the corresponding token pattern has been matched
|
||||
|
||||
Even if it is possible to use __lex__ to generate C++ code representing
|
||||
the lexical analyzer (we will refer to that as the /static/ model, described in
|
||||
more detail in the section __sec_lex_static_model__) - a model
|
||||
very similar to the way __flex__ operates - we will mainly focus on the
|
||||
opposite, the /dynamic/ model. You can directly integrate the token definitions
|
||||
into your C++ program, building the lexical analyzer dynamicly at runtime. The
|
||||
dynamic model is something not supported by __flex__ or other lexical scanner
|
||||
generators (such as __re2c__, __ragel__, etc.). But it is very flexible and
|
||||
allows to speed up the development of your application.
|
||||
|
||||
[endsect]
|
||||
12
doc/lex/lexer_attributes.qbk
Normal file
12
doc/lex/lexer_attributes.qbk
Normal file
@@ -0,0 +1,12 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section Lexer Attributes]
|
||||
|
||||
|
||||
[endsect]
|
||||
15
doc/lex/lexer_primitives.qbk
Normal file
15
doc/lex/lexer_primitives.qbk
Normal file
@@ -0,0 +1,15 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section Lexer Primitives]
|
||||
|
||||
[/ Describe the primitive lexer constructs, such as token_def, token_set? ]
|
||||
[/ Describe the primitive lexer constructs usable in parsers, such as
|
||||
in_state[], set_state(), token(), etc. ]
|
||||
|
||||
[endsect]
|
||||
97
doc/lex/lexer_quickstart1.qbk
Normal file
97
doc/lex/lexer_quickstart1.qbk
Normal file
@@ -0,0 +1,97 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section Quickstart 1 - A word counter using __lex__]
|
||||
|
||||
__lex__ is very modular, which follows the general building principle of the
|
||||
__spirit__ libraries. You never pay for features you don't use. It is nicely
|
||||
integrated with the other parts of __spirit__ but nevertheless can be used
|
||||
separately to build standalone lexical analyzers.
|
||||
The first quick start example describes a standalone application:
|
||||
counting characters, words and lines in a file, very similar to what the well
|
||||
known Unix command `wc` is doing (for the full example code see here:
|
||||
[@../../example/lex/word_count_functor.cpp word_count_functor.cpp]).
|
||||
|
||||
[import ../example/lex/word_count_functor.cpp]
|
||||
|
||||
|
||||
[heading Prerequisites]
|
||||
|
||||
The only required `#include` specific to /Spirit.Lex/ follows. It is a wrapper
|
||||
for all necessary definitions to use /Spirit.Lex/ in a standalone fashion, and
|
||||
on top of the __lexertl__ library. Additionally we `#include` two of the Boost
|
||||
headers to define `boost::bind()` and `boost::ref()`.
|
||||
|
||||
[wcf_includes]
|
||||
|
||||
To make all the code below more readable we introduce the following namespaces.
|
||||
|
||||
[wcf_namespaces]
|
||||
|
||||
|
||||
[heading Defining Tokens]
|
||||
|
||||
The most important step while creating a lexer using __lex__ is to define the
|
||||
tokens to be recognized in the input sequence. This is normally done by
|
||||
defining the regular expressions describing the matching character sequences,
|
||||
and optionally their corresponding token ids. Additionally the defined tokens
|
||||
need to be associated with an instance of a lexer object as provided by the
|
||||
library. The following code snippet shows how this can be done using __lex__.
|
||||
|
||||
[wcf_token_definition]
|
||||
|
||||
|
||||
[heading Doing the Useful Work]
|
||||
|
||||
We will use a setup, where we want the __lex__ library to invoke a given
|
||||
function after any of of the generated tokens is recognized. For this reason
|
||||
we need to implement a functor taking at least the generated token as an
|
||||
argument and returning a boolean value allowing to stop the tokenization
|
||||
process. The default token type used in this example carries a token value of
|
||||
the type `iterator_range<BaseIterator>` pointing to the matched range in the
|
||||
underlying input sequence.
|
||||
|
||||
[wcf_functor]
|
||||
|
||||
All what's left is to write some boilerplate code helping to tie together the
|
||||
pieces described so far. To simplify this example we call the `lex::tokenize()`
|
||||
function implemented in __lex__ (for a more detailed description of this
|
||||
function see here: __fixme__), even if we could have written a loop to iterate
|
||||
over the lexer iterators [`first`, `last`) as well.
|
||||
|
||||
|
||||
[heading Pulling Everything Together]
|
||||
|
||||
[wcf_main]
|
||||
|
||||
|
||||
[heading Comparing __lex__ with __flex__]
|
||||
|
||||
This example was deliberately chosen to be similar as much as possible to the
|
||||
equivalent __flex__ program (see below), which isn't too different from what
|
||||
has to be written when using __lex__.
|
||||
|
||||
[note Interestingly enough, performance comparisons of lexical analyzers
|
||||
written using __lex__ with equivalent programs generated by
|
||||
__flex__ show that both have comparable execution speeds!
|
||||
Generally, thanks to the highly optimized __lexertl__ library and
|
||||
due its carefully designed integration with __spirit__ the
|
||||
abstraction penalty to be paid for using __lex__ is neglectible.
|
||||
]
|
||||
|
||||
The remaining examples in this tutorial will use more sophisticated features
|
||||
of __lex__, mainly to allow further simplification of the code to be written,
|
||||
while maintaining the similarity with corresponding features of __flex__.
|
||||
__lex__ has been designed to be as much as possible similar to __flex__, that
|
||||
is why this documentation will provide the corresponding __flex__ code for the
|
||||
shown __lex__ examples almost everywhere. So consequently, here is the __flex__
|
||||
code corresponding to the example as shown above.
|
||||
|
||||
[wcf_flex_version]
|
||||
|
||||
[endsect]
|
||||
133
doc/lex/lexer_quickstart2.qbk
Normal file
133
doc/lex/lexer_quickstart2.qbk
Normal file
@@ -0,0 +1,133 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section Quickstart 2 - A better word counter using __lex__]
|
||||
|
||||
People knowing __flex__ will probably complain about the example from the
|
||||
section __sec_lex_quickstart_1__ as being overly complex and not being
|
||||
written to leverage the possibilities provided by this tool. In particular the
|
||||
previous example did not directly use the lexer actions to count the lines,
|
||||
words and characters. So the example provided in this step of the tutorial will
|
||||
show how to use semantic actions in __lex__. Even if it still
|
||||
will allow to count text elements only it introduces other new concepts and
|
||||
configuration options along the lines (for the full example code
|
||||
see here: [@../../example/lex/word_count_lexer.cpp word_count_lexer.cpp]).
|
||||
|
||||
[import ../example/lex/word_count_lexer.cpp]
|
||||
|
||||
|
||||
[heading Prerequisites]
|
||||
|
||||
In addition to the only required `#include` specific to /Spirit.Lex/ this
|
||||
example needs to include a couple of header files from the __phoenix2__
|
||||
library. This example shows how to attach functors to token definitions, which
|
||||
could be done using any type of C++ technique resulting in a callable object.
|
||||
Using __phoenix2__ for this task simplifies things and avoids adding
|
||||
dependencies to other libraries (__phoenix2__ is already in use for
|
||||
__spirit__ anyway).
|
||||
|
||||
[wcl_includes]
|
||||
|
||||
To make all the code below more readable we introduce the following namespaces.
|
||||
|
||||
[wcl_namespaces]
|
||||
|
||||
To give a preview at what to expect from this example, here is the flex program
|
||||
which has been used as the starting point. The useful code is directly included
|
||||
inside the actions associated with each of the token definitions.
|
||||
|
||||
[wcl_flex_version]
|
||||
|
||||
|
||||
[heading Semantic Actions in __lex__]
|
||||
|
||||
__lex__ uses a very similar way of associating actions with the token
|
||||
definitions (which should look familiar to anybody knowlegdeable with
|
||||
__spirit__ as well): specifying the operations to execute inside of a pair of
|
||||
`[]` brackets. In order to be able to attach semantic actions to token
|
||||
definitions for each of them there is defined an instance of a `token_def<>`.
|
||||
|
||||
[wcl_token_definition]
|
||||
|
||||
The semantics of the shown code is as follows. The code inside the `[]`
|
||||
brackets will be executed whenever the corresponding token has been matched by
|
||||
the lexical analyzer. This is very similar to __flex__, where the action code
|
||||
associated with a token definition gets executed after the recognition of a
|
||||
matching input sequence. The code above uses functors constructed using
|
||||
__phoenix2__, but it is possible to insert any C++ functor as long as it
|
||||
exposes the interface:
|
||||
|
||||
void f (Range r, std::size_t id, Context& ctx, bool& matched);
|
||||
|
||||
[variablelist where:
|
||||
[[`Range r`] [This is a `boost::iterator_range` holding two
|
||||
iterators pointing to the matched range in the
|
||||
underlying input sequence. The type of the
|
||||
held iterators is the same as specified while
|
||||
defining the type of the `lexertl_lexer<...>`
|
||||
(its first template parameter).]]
|
||||
[[`std::size_t id`] [This is the token id for the matched token.]]
|
||||
[[`Context& ctx`] [This is a reference to a lexer specific,
|
||||
unspecified type, providing the context for the
|
||||
current lexer state. It can be used to access
|
||||
different internal data items and is needed for
|
||||
lexer state control from inside a semantic
|
||||
action.]]
|
||||
[[`bool& matched`] [This boolean value is pre/initialized to `true`.
|
||||
If the functor sets it to `false` the lexer
|
||||
stops calling any semantic actions attached to
|
||||
this token and behaves as if the token have not
|
||||
been matched in the first place.]]
|
||||
]
|
||||
|
||||
Even if it is possible to write your own functor implementations, the preferred
|
||||
way of defining lexer semantic actions is to use __phoenix2__. In this case you
|
||||
can access the three parameters described in the table above by using the
|
||||
predefined __phoenix2__ placeholders: `_1` for the iterator range, `_2` for the
|
||||
token id, `_3` for the reference to the lexer state, and `_4` for the reference
|
||||
to the boolean value signaling the outcome of the semantic action.
|
||||
|
||||
[important All placeholders (`_1`, `_2`, etc.) used in /lexer/ semantic
|
||||
actions in conjunction with functors created based on __phoenix2__
|
||||
need to be imported from the `namespace boost::phoenix::arg_names`
|
||||
(and *not* `namespace boost::spirit::arg_names`, which is
|
||||
different from using placeholders in __qi__ or __karma__).
|
||||
Using the wrong placeholders leads to subtle compilation errors
|
||||
which are difficult to backtrack to their cause.
|
||||
]
|
||||
|
||||
|
||||
[heading Associating Token Definitions with the Lexer]
|
||||
|
||||
If you compare the with the code from __sec_lex_quickstart_1__ with regard to
|
||||
the way how token definitions are associated with the lexer, you will notice
|
||||
a different syntax being used here. If in the previous example we have been
|
||||
using the `self.add()` style of the API, then here we directly assign the token
|
||||
definitions to `self`, combining the different token definitions using the `|`
|
||||
operator. Here is the code snippet again:
|
||||
|
||||
self = word [++ref(w), ref(c) += distance(_1)]
|
||||
| eol [++ref(c), ++ref(l)]
|
||||
| any [++ref(c)]
|
||||
;
|
||||
|
||||
This way we have a very powerful and natural way of building the lexical
|
||||
analyzer. If translated into English this may be read as: The lexical analyer
|
||||
will recognize ('`=`') tokens as defined by any of ('`|`') the token
|
||||
definitions `word`, `eol`, and `any`.
|
||||
|
||||
A second difference to the previous example is that we do not explicitly
|
||||
specify any token ids to use for the separate tokens. Using semantic actions to
|
||||
trigger some useful work free'd us from the need to define these. To ensure
|
||||
every token gets assigned a id the __lex__ library internally assigns unique
|
||||
numbers to the token definitions, starting with the constant defined by
|
||||
`boost::spirit::lex::min_token_id`.
|
||||
|
||||
|
||||
|
||||
[endsect]
|
||||
151
doc/lex/lexer_quickstart3.qbk
Normal file
151
doc/lex/lexer_quickstart3.qbk
Normal file
@@ -0,0 +1,151 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section Quickstart 3 - Counting Words Using a Parser]
|
||||
|
||||
The whole purpose of integrating __lex__ as part of the __spirit__ library was
|
||||
to add a library allowing to merge lexical analysis with the parsing
|
||||
process as defined by a __spirit__ grammar. __spirit__ parsers read their input
|
||||
from an input sequence accessed by iterators. So naturally, we chose iterators
|
||||
to be used as the interface beween the lexer and the parser. A second goal of
|
||||
the lexer/parser integration was to enable the usage of possibly different
|
||||
lexical analyzer libraries. The utilization of iterators seemed to be the
|
||||
right choice from this standpoint as well, mainly because these can be used as
|
||||
an abstraction layer hiding implementation specifics of the used lexer
|
||||
library. The [link spirit.lex.flowcontrol picture] below shows the common
|
||||
flow control implemented while parsing combined with lexical analysis.
|
||||
|
||||
[fig ./images/FlowOfControl.png..The common flow control implemented while parsing combined with lexical analysis..spirit.lex.flowcontrol]
|
||||
|
||||
Another problem related to the integration of the lexical analyzer with the
|
||||
parser was to find a way how the defined tokens syntactically could be blended
|
||||
with the grammar definition syntax of __spirit__. For tokens defined as
|
||||
instances of the `token_def<>` class the most natural way of integration was
|
||||
to allow to directly use these as parser components. Semantically these parser
|
||||
components succeed matching their input whenever the corresponding token type
|
||||
has been matched by the lexer. This quick start example will demonstrate this
|
||||
(and more) by counting words again, simply by adding up the numbers inside
|
||||
of semantic actions of a parser (for the full example code see here:
|
||||
[@../../example/lex/word_count.cpp word_count.cpp]).
|
||||
|
||||
|
||||
[import ../example/lex/word_count.cpp]
|
||||
|
||||
|
||||
[heading Prerequisites]
|
||||
|
||||
This example uses two of the __spirit__ library components: __lex__ and __qi__,
|
||||
consequently we have to `#include` the corresponding header files. Again, we
|
||||
need to include a couple of header files from the __phoenix2__ library. This
|
||||
example shows how to attach functors to parser components, which
|
||||
could be done using any type of C++ technique resulting in a callable object.
|
||||
Using __phoenix2__ for this task simplifies things and avoids adding
|
||||
dependencies to other libraries (__phoenix2__ is already in use for
|
||||
__spirit__ anyway).
|
||||
|
||||
[wcp_includes]
|
||||
|
||||
To make all the code below more readable we introduce the following namespaces.
|
||||
|
||||
[wcp_namespaces]
|
||||
|
||||
|
||||
[heading Defining Tokens]
|
||||
|
||||
If compared to the two previous quick start examples (__sec_lex_quickstart_1__
|
||||
and __sec_lex_quickstart_2__) the token definition class for this example does
|
||||
not reveal any surprises. However, it uses lexer token definition macros to
|
||||
simplify the composition of the regular expressions, which will be described in
|
||||
more detail in the section __fixme__. Generally, any token definition is usable
|
||||
without modification either for a standalone lexical analyzer or in conjunction
|
||||
with a parser.
|
||||
|
||||
[wcp_token_definition]
|
||||
|
||||
|
||||
[heading Using Token Definition Instances as Parsers]
|
||||
|
||||
While the integration of lexer and parser in the control flow is achieved by
|
||||
using special iterators wrapping the lexical analyzer, we still nead a means of
|
||||
expressing in the grammar what tokens to match and where. The token definition
|
||||
class above uses three different ways of defining a token:
|
||||
|
||||
* Using an instance of a `token_def<>`, which is handy whenever you need to
|
||||
specify a token attribute (for more information about lexer related
|
||||
attributes please look here: __sec_lex_attributes__).
|
||||
* Using a single character as the token, in this case the character represents
|
||||
itself as a token, where the token id is the ASCII character value.
|
||||
* Using a regular expression represented as a string, where the token id needs
|
||||
to be specified explicitly to make the token accessible from the grammar
|
||||
level.
|
||||
|
||||
All three token definition methods require a different method of grammar
|
||||
integration. But as you can see from the following code snippet, each of this
|
||||
methods is straightforward and blends the corresponding token instance
|
||||
naturally with the surrounding __qi__ grammar syntax.
|
||||
|
||||
[table
|
||||
[[Token definition] [Parser integration]]
|
||||
[[`token_def<>`] [The `token_def<>` instance is directly usable as a
|
||||
parser component. Parsing of this component will
|
||||
succeed if the regular expression used to define
|
||||
this has been matched successfully.]]
|
||||
[[single character] [The single character is directly usable in the
|
||||
grammar, under certain circumstances it needs to be
|
||||
wrapped by a `char_()` parser component, though.
|
||||
Parsing of this component will succeed if the
|
||||
single character has been matched.]]
|
||||
[[explicit token id] [To use an explicit token id in a __qi__ grammar you
|
||||
are required to wrap it with the special `token()`
|
||||
parser component. Parsing of this component will
|
||||
succeed if the current token has the same token
|
||||
id as specified in the expression `token(<id>)`.]]
|
||||
]
|
||||
|
||||
The grammar definition below uses each of the three types demonstrating their
|
||||
usage.
|
||||
|
||||
[wcp_grammar_definition]
|
||||
|
||||
As already described (see: __sec_qi_karma_attributes__), the __qi__ parser
|
||||
library builds upon a set of of fully attributed parser components.
|
||||
Consequently, all the token definitions do support the this attribute model as
|
||||
well. The most natural way of implementing this was to use the token values as
|
||||
the attributes exposed by the parser component corresponding to the token
|
||||
definition (you can read more about this topic here: __sec_lex_tokenvalues__).
|
||||
The example above takes advantage of the full integration of the token values
|
||||
as the `token_def<>`'s parser attributes: the `word` token definition is
|
||||
declared as a `token_def<std::string>`, making every instance of a `word` token
|
||||
carry the string representation of the matched input sequence as its value.
|
||||
The semantic action attached to `tok.word` receives this string (represented by
|
||||
the `_1` placeholder) and uses it to calculate the number of matched
|
||||
characters: `ref(c) += size(_1)`.
|
||||
|
||||
[important All placeholders (`_1`, `_2`, etc.) used in /parser/ semantic
|
||||
actions in conjunction with functors created based on __phoenix2__
|
||||
need to be imported from the `namespace boost::spirit::arg_names`
|
||||
(and *not* `namespace boost::phoenix::arg_names`, which is
|
||||
different from using placeholders in __lex__).
|
||||
Using the wrong placeholders leads to subtle compilation errors
|
||||
which are difficult to backtrack to their cause.
|
||||
]
|
||||
|
||||
|
||||
[heading Pulling Everything Together]
|
||||
|
||||
The main function needs to implement a bit more logic now as we have to
|
||||
initialize and start not only the lexical analysis but the parsing process as
|
||||
well. The three type definitions (`typedef` statements) simplify the creation
|
||||
of the lexical analyzer and the grammar. After reading the contents of the
|
||||
given file into memory it calls the function __api_tokenize_and_parse__ to
|
||||
initialize the lexical analysis and parsing processes.
|
||||
|
||||
[wcp_main]
|
||||
|
||||
|
||||
[endsect]
|
||||
10
doc/lex/lexer_semantic_actions.qbk
Normal file
10
doc/lex/lexer_semantic_actions.qbk
Normal file
@@ -0,0 +1,10 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section Lexer Semantic Actions]
|
||||
[endsect]
|
||||
21
doc/lex/lexer_states.qbk
Normal file
21
doc/lex/lexer_states.qbk
Normal file
@@ -0,0 +1,21 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section Lexer States]
|
||||
|
||||
|
||||
[heading Controlling the Lexer State from Lexer Semantic Actions]
|
||||
|
||||
|
||||
[heading Controlling the Lexer State from Parser Semantic Actions]
|
||||
|
||||
|
||||
[heading Using a Lexer State for the Skip Parser]
|
||||
|
||||
|
||||
[endsect]
|
||||
119
doc/lex/lexer_static_model.qbk
Normal file
119
doc/lex/lexer_static_model.qbk
Normal file
@@ -0,0 +1,119 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section The /Static/ Lexer Model]
|
||||
|
||||
The documentation of __lex__ so far mostly was about describing the features of
|
||||
the /dynamic/ model, where the tables needed for lexical analysis are generated
|
||||
from the regular expressions at runtime. The big advantage of the dynamic model
|
||||
is its flexibility, and its integration with the __spirit__ library and the C++
|
||||
host language. Its big disadvantage is the need to spend additional runtime to
|
||||
generate the tables, which especially might be a limitation for larger lexical
|
||||
analyers. The /static/ model strives to build upon the smooth integration with
|
||||
__spirit__ and C++, and reuses large parts of the __lex__ library as described
|
||||
so far, while overcoming the additional runtime requirements by using
|
||||
pre-generated tables and tokenizer routines. To make the code generation as
|
||||
simple as possible, it is possible reuse the token definition types developed
|
||||
using the /dynamic/ model without any changes. As will be shown in this
|
||||
section, building a code generator based on an existing token definition type
|
||||
is a matter of writing 3 lines of code.
|
||||
|
||||
Assuming you already built a dynamic lexer for your problem, there are two more
|
||||
steps needed to create a static lexical analyzer using __lex__:
|
||||
|
||||
# generating the C++ code for the static analyzer (including the tokenization
|
||||
function and corresponding tables), and
|
||||
# modifying the dynamic lexical anlyzer to use the generated code.
|
||||
|
||||
Both steps are described in more detail in the two sections below (for the full
|
||||
source code used in this example see the code here:
|
||||
[@../../example/lex/static_lexer/word_count_tokens.hpp the common token definition],
|
||||
[@../../example/lex/static_lexer/word_count_generate.cpp the code generator],
|
||||
[@../../example/lex/static_lexer/word_count_static.hpp the generated code], and
|
||||
[@../../example/lex/static_lexer/word_count_static.cpp the static lexical analyzer]).
|
||||
|
||||
[import ../example/lex/static_lexer/word_count_tokens.hpp]
|
||||
[import ../example/lex/static_lexer/word_count_static.cpp]
|
||||
[import ../example/lex/static_lexer/word_count_generate.cpp]
|
||||
|
||||
But first we provide the code snippets needed to understand the further
|
||||
descriptions. Both, the definition of the used token identifier and the of the
|
||||
token definition class in this example are put into a separate header file to
|
||||
make these available to the code generator and the static lexical analyzer.
|
||||
|
||||
[wc_static_tokenids]
|
||||
|
||||
The important point here is, that the token definition class is not different
|
||||
from a similar class to be used for a dynamic lexical analyzer. The library
|
||||
has been designed in a way, that all components (dynamic lexical analyzer, code
|
||||
generator, and static lexical analyzer) can reuse the very same token definition
|
||||
syntax.
|
||||
|
||||
[wc_static_tokendef]
|
||||
|
||||
The only thing changing between the three different use cases is the template
|
||||
parameter used to instantiate a concrete token definition. Fot the dynamic
|
||||
model and the code generator you probably will use the __class_lexertl_lexer__
|
||||
template, where for the static model you will use the
|
||||
__class_lexertl_static_lexer__ type as the template parameter.
|
||||
|
||||
This example not only shows how to build a static lexer, but it additionally
|
||||
demonstrates, how such a lexer can be used for parsing in conjunction with a
|
||||
__qi__ grammar. For completeness we provide the simple grammar used in this
|
||||
example. As you can see, this grammar does not have any dependencies on the
|
||||
static lexical analyzer, and for this reason it is not different from a grammar
|
||||
used either without a lexer or using a dynamic lexical analyzer as described
|
||||
before.
|
||||
|
||||
[wc_static_grammar]
|
||||
|
||||
|
||||
[heading Generating the Static Analyzer]
|
||||
|
||||
The first additional step to perform in order to create a static lexical
|
||||
analyzer is to create a small standalone program for creating the lexer tables
|
||||
and the corresponding tokenization function. For this purpose the __lex__
|
||||
library exposes a special API - the function __api_generate_static__. It
|
||||
implements the whole code generator, no further code is needed. All what it
|
||||
takes to invoke this function is to supply a token definition instance, an
|
||||
output stream to use to generate the code to, and an optional string to be used
|
||||
as a prefix for the name of the generated function. All in all just a couple
|
||||
lines of code.
|
||||
|
||||
[wc_static_generate_main]
|
||||
|
||||
The shown code generator will generate output, which should be stored in a file
|
||||
for later inclusion into the static lexical analzyer as shown in the next
|
||||
topic (the full generated code can be viewed
|
||||
[@../../example/lex/static_lexer/word_count_static.hpp here]).
|
||||
|
||||
|
||||
[heading Modifying the Dynamic Analyzer]
|
||||
|
||||
The second required step to convert an existing dynamic lexer into a static one
|
||||
is to change your main program at two places. First, you need to change the
|
||||
type of the used lexer (that is the template parameter used while instantiating
|
||||
your token definition class). While in the dynamic model we have been using the
|
||||
__class_lexertl_lexer__ template, we now need to change that to the
|
||||
__class_lexertl_static_lexer__ type. The second change is tightly related to
|
||||
the first one and involves correcting the corresponding `#include` statement to:
|
||||
|
||||
[wc_static_include]
|
||||
|
||||
Otherwise the main program is not different from an equivalent program using
|
||||
the dynamic model. This feature makes it really easy for instance to develop
|
||||
the lexer in dynamic mode and to switch to the static mode after the code has
|
||||
been stabilized. The simple generator application showed above enables the
|
||||
integration of the code generator into any existing build process. The
|
||||
following code snippet provides the overall main function, highlighting
|
||||
the code to be changed.
|
||||
|
||||
[wc_static_main]
|
||||
|
||||
|
||||
[endsect]
|
||||
59
doc/lex/lexer_tutorials.qbk
Normal file
59
doc/lex/lexer_tutorials.qbk
Normal file
@@ -0,0 +1,59 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section __lex__ Tutorials Overview]
|
||||
|
||||
The __lex__ library implements several components on top of possibly different
|
||||
lexer generator libraries. It exposes a pair of iterators, which, when
|
||||
dereferenced, return a stream of tokens generated from the underlying character
|
||||
stream. The generated tokens are based on the token definitions supplied by the
|
||||
user.
|
||||
|
||||
Currently, __lex__ is built on top of Ben Hansons excellent __lexertl__
|
||||
library (which is a proposed Boost library). __lexertl__ provides the necessary
|
||||
functionality to build state
|
||||
machines based on a set of supplied regular expressions. But __lex__ is not
|
||||
restricted to be used with __lexertl__. We expect it to be usable in
|
||||
conjunction with any other lexical scanner generator library, all what needs
|
||||
to be implemented is a set of wrapper objects exposing a well defined
|
||||
interface as described in this documentation.
|
||||
|
||||
[note For the sake of clarity all examples in this documentation assume
|
||||
__lex__ to be used on top of __lexertl__.]
|
||||
|
||||
Building a lexer using __lex__ is highly configurable, where most of this
|
||||
configuration has to be done at compile time. Almost all of the configurable
|
||||
parameters have generally useful default values, though, which means that
|
||||
starting a project is easy and straightforward. Here is a (non-complete) list
|
||||
of features you can tweak to adjust the generated lexer instance to the actual
|
||||
needs:
|
||||
|
||||
* Select and customize the token type to be generated by the lexer instance.
|
||||
* Select and customize the token value types the generated token instances will
|
||||
be able to hold.
|
||||
* Select the iterator type of the underlying input stream, which will be used
|
||||
as the source for the character stream to tokenize.
|
||||
* Customize the iterator type returned by the lexer to enable debug support,
|
||||
special handling of certain input sequences, etc.
|
||||
* Select the /dynamic/ or the /static/ runtime model for the lexical
|
||||
analyzer.
|
||||
|
||||
Special care has been taken during the development of the library that
|
||||
optimal code will be generated regardless of the configuration options
|
||||
selected.
|
||||
|
||||
The series of tutorial examples of this section will guide you through some
|
||||
common use cases helping to understand the big picture. The first two quick
|
||||
start examples (__sec_lex_quickstart_1__ and __sec_lex_quickstart_2__)
|
||||
introduce the __lex__ library while building two standalone applications, not
|
||||
being connected to or depending on any other part of __spirit__. The section
|
||||
__sec_lex_quickstart_3__ demonstrates how to use a lexer in conjunction with a
|
||||
parser (where certainly the parser is built using __qi__).
|
||||
|
||||
[endsect]
|
||||
|
||||
15
doc/lex/parsing_using_a_lexer.qbk
Normal file
15
doc/lex/parsing_using_a_lexer.qbk
Normal file
@@ -0,0 +1,15 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section Parsing using a Lexer]
|
||||
|
||||
[/ write about integration of lexer component with __qi__]
|
||||
|
||||
[/ write about iterator interface exposed by a __lex__ lexer]
|
||||
|
||||
[endsect]
|
||||
11
doc/lex/token_definition.qbk
Normal file
11
doc/lex/token_definition.qbk
Normal file
@@ -0,0 +1,11 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section Ways to define Tokens]
|
||||
|
||||
[endsect]
|
||||
15
doc/lex/tokenizing.qbk
Normal file
15
doc/lex/tokenizing.qbk
Normal file
@@ -0,0 +1,15 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section Tokenizing Input Data]
|
||||
|
||||
[heading The tokenize() function]
|
||||
|
||||
[heading The generate_static() function]
|
||||
|
||||
[endsect]
|
||||
207
doc/lex/tokens_values.qbk
Normal file
207
doc/lex/tokens_values.qbk
Normal file
@@ -0,0 +1,207 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section About Tokens and Token Values]
|
||||
|
||||
As already discussed, lexical scanning is the process of analyzing the stream
|
||||
of input characters and separating it into strings called tokens, most of the
|
||||
time separated by whitespace. The different token types recognized by a lexical
|
||||
analyzer often get assigned unique integer token identifiers (token ids). These
|
||||
token ids arenormally used by the parser to identifiy the current token without
|
||||
having to look at the matched string again. The __lex__ library is not
|
||||
different with respect to this, as it uses the token ids as the main means of
|
||||
identification of the different token types defined for a particular lexical
|
||||
analyzer. However, it is different from commonly used lexical analyzers in the
|
||||
sense that it returns (references to) instances of a (user defined) token class
|
||||
to the user. The only real limitation posed on this token class is consequently,
|
||||
that it has to carry at least the token id of the token it represents. For more
|
||||
information about the interface a user defined token type has to expose please
|
||||
look at the __sec_ref_lex_token__ reference. The library provides a default
|
||||
token type based on the __lexertl__ library which should be sufficient in most
|
||||
use cases: the __class_lexertl_token__ type. This section focusses on the
|
||||
description of general features a token class may implement and how this
|
||||
integrates with the other parts of the __lex__ library.
|
||||
|
||||
[heading The Anatomy of a Token]
|
||||
|
||||
It is very important to understand the difference between a token definition
|
||||
(represented by the __class_token_def__ template) and a token itself (for
|
||||
instance represented by the __class_lexertl_token__ template).
|
||||
|
||||
The token definition is used to describe the main features of a particular
|
||||
token type, especially:
|
||||
|
||||
* to simplify the definition of a token type using a regular expression pattern
|
||||
applied while matching this token type,
|
||||
* to associate a token type with a particular lexer state,
|
||||
* to optionally assign a token id to a token type,
|
||||
* to optionally associate some code to execute whenever an instance of this
|
||||
token type has been matched,
|
||||
* and to optionally specify the attribute type of the token value.
|
||||
|
||||
The token itself is a data structure returned by the lexer iterators.
|
||||
Dereferencing a lexer iterator returns a reference to the last matched token
|
||||
instance. It encapsulates the part of the underlying input sequence matched by
|
||||
the regular expression used during the definiton of this token type.
|
||||
Incrementing the lexer iterator invokes the lexical analyzer to
|
||||
match the next token by advancing the underlying input stream. The token data
|
||||
structure contains at least the token id of the matched token type,
|
||||
allowing to identify the matched character sequence. Optionally, the token
|
||||
instance may contain a token value and/or the lexer state this token instance
|
||||
was matched in. The following [link spirit.lex.tokenstructure figure] shows the
|
||||
schematic structure of a token.
|
||||
|
||||
[fig ./images/TokenStructure.png..The structure of a token..spirit.lex.tokenstructure]
|
||||
|
||||
The token value and the token state may be omitted for optimization reasons,
|
||||
avoiding the token to carry more data than actually required. This
|
||||
configuration can be achieved by supplying appropriate template parameters
|
||||
for the __class_lexertl_token__ template while defining the token type.
|
||||
|
||||
The lexer iterator returns the same token type for each of the different
|
||||
matched token definitions. To accomodate for the possibly different token
|
||||
/value/ types exposed by the various token types (token definitions), the
|
||||
general type of the token value is a __boost_variant__. As a minimum (for the
|
||||
default configuration) this token value variant will be configured to always
|
||||
hold a __boost_iterator_range__ containing the pair of iterators pointing to
|
||||
the matched input sequence for this token instance.
|
||||
|
||||
[note If the lexical analyzer is used in conjunction with a __qi__ parser, the
|
||||
stored __boost_iterator_range__ token value will be converted to the
|
||||
requested token type (parser attribute) exactly once. This happens at the
|
||||
time of the first access to the token value requiring the
|
||||
corresponding type conversion. The converted token value will be stored
|
||||
in the __boost_variant__ replacing the initially stored iterator range.
|
||||
This avoids to convert the input sequence to the token value more than
|
||||
once, thus optimizing the integration of the lexer with __qi__, even
|
||||
during parser backtracking.
|
||||
]
|
||||
|
||||
Here is the template prototype of the __class_lexertl_token__ template:
|
||||
|
||||
template <
|
||||
typename Iterator = char const*,
|
||||
typename AttributeTypes = mpl::vector0<>,
|
||||
typename HasState = mpl::true_
|
||||
>
|
||||
struct lexertl_token;
|
||||
|
||||
[variablelist where:
|
||||
[[Iterator] [This is the type of the iterator used to access the
|
||||
underlying input stream. It defaults to a plain
|
||||
`char const*`.]]
|
||||
[[AttributeTypes] [This is either a mpl sequence containing all
|
||||
attribute types used for the token definitions or the
|
||||
type `omitted`. If the mpl sequence is empty (which is
|
||||
the default), all token instances will store a
|
||||
`boost::iterator_range<Iterator>` pointing to the start
|
||||
and the end of the matched section in the input stream.
|
||||
If the type is `omitted`, the generated tokens will
|
||||
contain no token value (attribute) at all.]]
|
||||
[[HasState] [This is either `mpl::true_` or `mpl::false_`, allowing
|
||||
to control whether the generated token instances will
|
||||
contain the lexer state they were generated in. The
|
||||
default is mpl::true_, so all token instances will
|
||||
contain the lexer state.]]
|
||||
]
|
||||
|
||||
Normally, during its construction, a token instance always holds the
|
||||
__boost_iterator_range__ as its token value (except, if it has been defined
|
||||
using the `omitted` token value type). This iterator range then is
|
||||
converted in place to the requested token value type (attribute) when it is
|
||||
requested for the first time.
|
||||
|
||||
|
||||
[heading The Physiognomy of a Token Definition]
|
||||
|
||||
The token definitions (represented by the __class_token_def__ template) are
|
||||
normally used as part of the definition of the lexical analyzer. At the same
|
||||
time a token definition instance may be used as a parser component in __qi__.
|
||||
|
||||
The template prototype of this class is shown here:
|
||||
|
||||
template<
|
||||
typename Attribute = unused_type,
|
||||
typename Char = char
|
||||
>
|
||||
class token_def;
|
||||
|
||||
[variablelist where:
|
||||
[[Attribute] [This is the type of the token value (attribute)
|
||||
supported by token instances representing this token
|
||||
type. This attribute type is exposed to the __qi__
|
||||
library, whenever this token definition is used as a
|
||||
parser component. The default attribute type is
|
||||
`unused_type`, which means the token instance holds a
|
||||
__boost_iterator_range__ pointing to the start
|
||||
and the end of the matched section in the input stream.
|
||||
If the attribute is `omitted` the token instance will
|
||||
expose no token type at all. Any other type will be
|
||||
used directly as the token value type.]]
|
||||
[[Char] [This is the value type of the iterator for the
|
||||
underlying input sequence. It defaults to `char`.]]
|
||||
]
|
||||
|
||||
The semantics of the template parameters for the token type and the token
|
||||
definition type are very similar and interdependent. As a rule of thumb you can
|
||||
think of the token definition type as the means of specifying everything
|
||||
related to a single specific token type (such as `identifier` or `integer`).
|
||||
On the other hand the token type is used to define the general proerties of all
|
||||
token instances generated by the __lex__ library.
|
||||
|
||||
[important If you don't list any token value types in the token type definition
|
||||
declaration (resulting in the usage of the default __boost_iterator_range__
|
||||
token type) everything will compile and work just fine, just a bit
|
||||
less efficient. This is because the token value will be converted
|
||||
from the matched input sequence every time it is requested.
|
||||
|
||||
But as soon as you specify at least one token value type while
|
||||
defining the token type you'll have to list all value types used for
|
||||
__class_token_def__ declarations in the token definition class,
|
||||
otherwise compilation errors will occur.
|
||||
]
|
||||
|
||||
|
||||
[heading Examples of using __class_lexertl_token__]
|
||||
|
||||
Let's start with some examples. We refer to one of the __lex__ examples (for
|
||||
the full source code of this example please see
|
||||
[@../../example/lex/example4.cpp example4.cpp]).
|
||||
|
||||
[import ../example/lex/example4.cpp]
|
||||
|
||||
The first code snippet shows an excerpt of the token definition class, the
|
||||
definition of a couple of token types. Some of the token types do not expose a
|
||||
special token value (`if_`, `else_`, and `while_`). Their token value will
|
||||
always hold the iterator range of the matched input sequence only. The token
|
||||
definitions for the `identifier` and the integer `constant` are specialized
|
||||
to expose an explicit token type each: `std::string` and `unsigned int`.
|
||||
|
||||
[example4_token_def]
|
||||
|
||||
As the parsers generated by __qi__ are fully attributed, any __qi__ parser
|
||||
component needs to expose a certain type as its parser attribute. Naturally,
|
||||
the __class_token_def__ exposes the token value type as its parser attribute,
|
||||
enabling a smooth integration with __qi__.
|
||||
|
||||
The next code snippet demonstrates how the required token value types are
|
||||
specified while defining the token type to use. All of the token value types
|
||||
used for at least one of the token definitions have to be re-iterated for the
|
||||
token definition as well.
|
||||
|
||||
[example4_token]
|
||||
|
||||
To avoid the token to have a token value at all, the special tag `omitted` can
|
||||
be used: `token_def<omitted>` and `lexertl_token<base_iterator_type, omitted>`.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
[endsect]
|
||||
24
doc/notes.qbk
Normal file
24
doc/notes.qbk
Normal file
@@ -0,0 +1,24 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section Notes]
|
||||
|
||||
[section Portability]
|
||||
[endsect]
|
||||
|
||||
[section Porting from Spirit 1.8.x]
|
||||
[endsect]
|
||||
|
||||
[section Style Guide]
|
||||
[include notes/style_guide.qbk]
|
||||
[endsect]
|
||||
|
||||
[section Techniques]
|
||||
[endsect]
|
||||
|
||||
[endsect]
|
||||
87
doc/notes/style_guide.qbk
Normal file
87
doc/notes/style_guide.qbk
Normal file
@@ -0,0 +1,87 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
At some point, especially when there are lots of semantic actions attached to
|
||||
various points, the grammar tends to be quite difficult to follow. In order to
|
||||
keep an easy-to-read, consistent en aesthetically pleasing look to the Spirit
|
||||
code, the following coding styleguide is advised.
|
||||
|
||||
This coding style is adapted and extended from the ANTLR/PCCTS style (Terrence
|
||||
Parr) and [@http://groups.yahoo.com/group/boost/files/coding_guidelines.html
|
||||
Boost coding guidelines] (David Abrahams and Nathan Myers) and is the
|
||||
combined work of Joel de Guzman, Chris Uzdavinis and Hartmut Kaiser.
|
||||
|
||||
* Rule names use std C++ (Boost) convention. The rule name may be very long.
|
||||
* The '=' is neatly indented 4 spaces below. Like in Boost, use spaces instead
|
||||
of tabs.
|
||||
* Breaking the operands into separate lines puts the semantic actions neatly
|
||||
to the right.
|
||||
* Semicolon at the last line terminates the rule.
|
||||
* The adjacent parts of a sequence should be indented accordingly to have all,
|
||||
what belongs to one level, at one indentation level.
|
||||
|
||||
program
|
||||
= program_heading [heading_action]
|
||||
>> block [block_action]
|
||||
>> '.'
|
||||
| another_sequence
|
||||
>> etc
|
||||
;
|
||||
|
||||
* Prefer literals in the grammar instead of identifiers. e.g. `"program"` instead
|
||||
of `PROGRAM`, `'>='` instead of `GTE` and `'.'` instead of `DOT`. This makes it much
|
||||
easier to read. If this isn't possible (for instance where the used tokens
|
||||
must be identified through integers) capitalized identifiers should be used
|
||||
instead.
|
||||
* Breaking the operands may not be needed for short expressions.
|
||||
e.g. `*(',' >> file_identifier)` as long as the line does not
|
||||
exceed 80 characters.
|
||||
* If a sequence fits on one line, put spaces inside the parentheses
|
||||
to clearly separate them from the rules.
|
||||
|
||||
program_heading
|
||||
= no_case["program"]
|
||||
>> identifier
|
||||
>> '('
|
||||
>> file_identifier
|
||||
>> *( ',' >> file_identifier )
|
||||
>> ')'
|
||||
>> ';'
|
||||
;
|
||||
|
||||
* Nesting directives: If a rule does not fit on one line (80 characters)
|
||||
it should be continued on the next line intended by one level. The brackets
|
||||
of directives, semantic expressions (using Phoenix or LL lambda expressions)
|
||||
or parsers should be placed as follows.
|
||||
|
||||
identifier
|
||||
= no_case
|
||||
[
|
||||
lexeme
|
||||
[
|
||||
alpha >> *(alnum | '_') [id_action]
|
||||
]
|
||||
]
|
||||
;
|
||||
|
||||
* Nesting unary operators (e.g.Kleene star): Unary rule operators
|
||||
(Kleene star, `'!'`, `'+'` etc.) should be moved out one space before
|
||||
the corresponding indentation level, if this rule has a body or a
|
||||
sequence after it, which does not fit on on line. This makes the
|
||||
formatting more consistent and moves the rule 'body' at the same
|
||||
indentation level as the rule itself, highlighting the unary operator.
|
||||
|
||||
block
|
||||
= *( label_declaration_part
|
||||
| constant_definition_part
|
||||
| type_definition_part
|
||||
| variable_declaration_part
|
||||
| procedure_and_function_declaration_part
|
||||
)
|
||||
>> statement_part
|
||||
;
|
||||
97
doc/outline.txt
Normal file
97
doc/outline.txt
Normal file
@@ -0,0 +1,97 @@
|
||||
# Copyright (C) 2001-2008 Joel de Guzman
|
||||
# Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
#
|
||||
# Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
|
||||
Preface
|
||||
What's New
|
||||
Introduction
|
||||
Qi and Karma
|
||||
Tutorials
|
||||
Abstracts
|
||||
Parsing Expression Grammar
|
||||
Parsing and Generating
|
||||
Primitives
|
||||
Operators
|
||||
Attributes
|
||||
Semantic Actions
|
||||
Directives
|
||||
Rules
|
||||
Grammars
|
||||
Debugging
|
||||
Error Handling
|
||||
Parse Trees and ASTs
|
||||
Quick Reference
|
||||
Reference
|
||||
Concepts
|
||||
Parser
|
||||
Generator
|
||||
Parser Director
|
||||
Generator Director
|
||||
Char
|
||||
String
|
||||
Numeric
|
||||
Binary
|
||||
Directive
|
||||
Action
|
||||
Nonterminal
|
||||
Operator
|
||||
Stream
|
||||
Auxiliary
|
||||
Debug
|
||||
Lex
|
||||
Introduction
|
||||
Tutorials
|
||||
Abstracts
|
||||
Parsing using a Lexer
|
||||
Lexer Primitives
|
||||
Lexer States
|
||||
Lexer Attributes
|
||||
Lexer Semantic Actions
|
||||
Quick Reference
|
||||
Reference
|
||||
Concepts
|
||||
Lexer
|
||||
Token
|
||||
TokenDef
|
||||
TokenSet
|
||||
Lexer Class
|
||||
Token Class
|
||||
TokenDef Class
|
||||
TokenSet Class
|
||||
FAQ
|
||||
Notes
|
||||
Portability
|
||||
Porting from Spirit 1.8.x
|
||||
Style Guide
|
||||
Techniques
|
||||
Rationale
|
||||
Acknowledgments
|
||||
References
|
||||
|
||||
-----------------------------------------------------------------
|
||||
|
||||
Concepts Outline:
|
||||
Description
|
||||
Notation
|
||||
Valid Expressions
|
||||
Expression | Semantics | Return type | Complexity
|
||||
Type Requirements
|
||||
Expression | Requirements
|
||||
Invariants
|
||||
Models
|
||||
|
||||
Reference Page Outline:
|
||||
Description
|
||||
Header
|
||||
Synopsis
|
||||
Template parameters
|
||||
Model of
|
||||
Objects
|
||||
Expression Semantics
|
||||
Expression | Semantics | Return type | Complexity
|
||||
Example
|
||||
|
||||
|
||||
217
doc/preface.qbk
Normal file
217
doc/preface.qbk
Normal file
@@ -0,0 +1,217 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section Preface]
|
||||
|
||||
[:['["Examples of designs that meet most of the criteria for
|
||||
"goodness" (easy to understand, flexible, efficient) are a recursive-
|
||||
descent parser, which is traditional procedural code. Another example
|
||||
is the STL, which is a generic library of containers and algorithms
|
||||
depending crucially on both traditional procedural code and on
|
||||
parametric polymorphism.]] [*--Bjarne Stroustrup]]
|
||||
|
||||
[heading History]
|
||||
|
||||
[heading /80s/]
|
||||
|
||||
In the Mid 80s, Joel wrote his first calculator in Pascal. It has been
|
||||
an unforgettable coding experience. He was amazed how a mutually
|
||||
recursive set of functions can model a grammar specification. In time,
|
||||
the skills he acquired from that academic experience became very
|
||||
practical. Periodically Joel was tasked to do some parsing. For
|
||||
instance, whenever he needs to perform any form of I/O, even in
|
||||
binary, he tries to approach the task somewhat formally by writing a
|
||||
grammar using Pascal- like syntax diagrams and then write a
|
||||
corresponding recursive-descent parser. This worked very well.
|
||||
|
||||
[heading /90s/]
|
||||
|
||||
The arrival of the Internet and the World Wide Web magnified this
|
||||
thousand-fold. At one point Joel had to write an HTML parser for a Web
|
||||
browser project. He got a recursive-descent HTML parser working based
|
||||
on the W3C formal specifications easily. He was certainly glad that
|
||||
HTML had a formal grammar specification. Because of the influence of
|
||||
the Internet, Joel then had to do more parsing. RFC specifications
|
||||
were everywhere. SGML, HTML, XML, even email addresses and those
|
||||
seemingly trivial URLs were all formally specified using small EBNF-
|
||||
style grammar specifications. This made him wish for a tool similar to
|
||||
big- time parser generators such as YACC and ANTLR, where a parser is
|
||||
built automatically from a grammar specification. Yet, he wants it to
|
||||
be extremely small; small enough to fit in my pocket, yet scalable.
|
||||
|
||||
It must be able to practically parse simple grammars such as email
|
||||
addresses to moderately complex grammars such as XML and perhaps some
|
||||
small to medium-sized scripting languages. Scalability is a prime
|
||||
goal. You should be able to use it for small tasks such as parsing
|
||||
command lines without incurring a heavy payload, as you do when you
|
||||
are using YACC or PCCTS. Even now that it has evolved and matured to
|
||||
become a multi-module library, true to its original intent, Spirit can
|
||||
still be used for extreme micro-parsing tasks. You only pay for
|
||||
features that you need. The power of Spirit comes from its modularity
|
||||
and extensibility. Instead of giving you a sledgehammer, it gives you
|
||||
the right ingredients to create a sledgehammer easily.
|
||||
|
||||
The result was Spirit. Spirit was a personal project that was
|
||||
conceived when Joel was doing R&D in Japan. Inspired by the GoF's
|
||||
composite and interpreter patterns, he realized that he can model a
|
||||
recursive-descent parser with hierarchical-object composition of
|
||||
primitives (terminals) and composites (productions). The original
|
||||
version was implemented with run-time polymorphic classes. A parser is
|
||||
generated at run time by feeding in production rule strings such as:
|
||||
|
||||
"prod ::= {'A' | 'B'} 'C';"
|
||||
|
||||
A compile function compiled the parser, dynamically creating a
|
||||
hierarchy of objects and linking semantic actions on the fly. A very
|
||||
early text can be found here: __early_spirit__.
|
||||
|
||||
[heading /2001 to 2006/]
|
||||
|
||||
Version 1.0 to 1.8 was a complete rewrite of the original Spirit
|
||||
parser using expression templates and static polymorphism, inspired by
|
||||
the works of Todd Veldhuizen (__todd__exprtemplates__, C++ Report,
|
||||
June 1995). Initially, the static-Spirit version was meant only to
|
||||
replace the core of the original dynamic-Spirit. Dynamic-spirit
|
||||
needed a parser to implement itself anyway. The original employed a
|
||||
hand-coded recursive-descent parser to parse the input grammar
|
||||
specification strings. Incidentially it was the time, when Hartmut
|
||||
joined the Spirit development.
|
||||
|
||||
After its initial "open-source" debut in May 2001, static-Spirit
|
||||
became a success. At around November 2001, the Spirit website had an
|
||||
activity percentile of 98%, making it the number one parser tool at
|
||||
Source Forge at the time. Not bad for such a niche project such as a
|
||||
parser library. The "static" portion of Spirit was forgotten and
|
||||
static-Spirit simply became Spirit. The library soon evolved to
|
||||
acquire more dynamic features.
|
||||
|
||||
Spirit was formally accepted into __boost__ in October 2002. Boost is
|
||||
a peer-reviewed, open collaborative development effort that is a
|
||||
collection of free Open Source C++ libraries covering a wide range of
|
||||
domains. The Boost Libraries have become widely known as an industry
|
||||
standard for design and implementation quality, robustness, and
|
||||
reusability.
|
||||
|
||||
[heading /2007/]
|
||||
|
||||
Over the years, especially after Spirit was accepted into Boost,
|
||||
Spirit has served its purpose quite admirably. The focus of what we'll
|
||||
now call [*/Classic-Spirit/] (versions prior to 2.0) was on
|
||||
transduction parsing where the input string is merely translated to an
|
||||
output string. A lot of parsers are of the transduction type. When the
|
||||
time came to add attributes to the parser library, it was done rather
|
||||
in an ad-hoc manner, with the goal being 100% backward compatible with
|
||||
classic Spirit. Some parsers have attributes, some don't.
|
||||
|
||||
Spirit V2 is another major rewrite. Spirit V2 grammars are fully
|
||||
attributed (see __attr_grammar__). All parser components have
|
||||
attributes. To do this efficiently and ellegantly, we had to use a
|
||||
couple of infrastructure libraries. Some of which haven't been written
|
||||
yet at the time, some were quite new when Spirit debuted, and some
|
||||
needed work. __mpl__ is an important infrastructure library, yet is
|
||||
not sufficient to implement Spirit V2. Another library had to be
|
||||
written: __fusion__. Fusion sits between MPL and STL --between compile
|
||||
time and runtime -- mapping types to values. Fusion is a direct
|
||||
descendant of both MPL and __boost_tuples__ (Fusion is now a full
|
||||
fledged __boost__ library). __phoenix__ also had to be beefed up to
|
||||
support Spirit V2. The result is __phoenix2__. Last but not least,
|
||||
Spirit V2 uses an __todd__exprtemplates__ library called
|
||||
__boost_proto__.
|
||||
|
||||
[heading New Ideas: Spirit V2]
|
||||
|
||||
Just before the development of Spirit V2 began, Hartmut came across
|
||||
the __string_template__ library which is a part of the ANTLR parser
|
||||
framework. It is a Java template engine (with ports for C# and Python)
|
||||
for generating source code, web pages, emails, or any other formatted
|
||||
text output. With it, he got the the idea of using a formal notation
|
||||
(a grammar) to describe the expected structure of an input character
|
||||
sequence. The same grammar may be used to formalize the structure of a
|
||||
corresponding output character sequence. This is possible because
|
||||
parsing, most of the time, is implemented by comparing the input with
|
||||
the patterns defined by the grammar. If we use the same patterns to
|
||||
format a matching output, the generated sequence will follow the rules
|
||||
of the grammar as well.
|
||||
|
||||
This insight lead to the implementation of a grammar driven output generation
|
||||
library compatibile with the Spirit parser library. As it turned out, parsing
|
||||
and generation are tightly connected and have very similar concepts. The
|
||||
duality of these two sides of the same medal is ubiquitous, which
|
||||
allowed us to build the parser library __qi__ and the generator library
|
||||
__karma__ using the same component infastructure.
|
||||
|
||||
The idea of creating a lexer library well integrated with the Spirit parsers is
|
||||
not new. This has been discussed almost for the whole time of the existence of
|
||||
Classic-Spirit (pre V2) now. Several attempts to integrate existing lexer
|
||||
libraries and frameworks with Spirit have been made and served as a proof of
|
||||
concept and usability (for example see __wave__: The Boost C/C++ Preprocessor
|
||||
Library, and __slex__: a fully dynamic C++ lexer implemented with Spirit).
|
||||
Based on these experiences we added __lex__: a fully integrated lexer library
|
||||
to the mix, allowing to take advantage of the power of regular expressions for
|
||||
token matching, removing pressure from the parser components, simplifying
|
||||
parser grammars. Again, Spirit's modular structure allowed us to reuse the same
|
||||
underlying component library as for the parser and generator libraries.
|
||||
|
||||
|
||||
[heading How to use this manual]
|
||||
|
||||
Each major section (there are two: __sec_qi_and_karma__, and __sec_lex__) is
|
||||
roughly divided into 3 parts:
|
||||
|
||||
# Tutorials: A step by step guide with heavily annotated code. These
|
||||
are meant to get the user acquainted with the library as quickly as
|
||||
possible. The objective is to build the confidence of the user in
|
||||
using the library using abundant examples and detailed instructions.
|
||||
Examples speak volumes.
|
||||
|
||||
# Abstracts: A high level summary of key topics. The objective is to
|
||||
give the user a high level view of the library, the key concepts,
|
||||
background and theories.
|
||||
|
||||
# Reference: Detailed formal technical reference. We start with a quick
|
||||
reference -- an easy to use table that maps into the reference proper.
|
||||
The reference proper starts with C++ __cpp_concepts__ followed by
|
||||
models of the concepts.
|
||||
|
||||
Some icons are used to mark certain topics indicative of their relevance.
|
||||
These icons precede some text to indicate:
|
||||
|
||||
[table Icons
|
||||
|
||||
[[Icon] [Name] [Meaning]]
|
||||
|
||||
[[__note__] [Note] [Generally useful information (an aside that
|
||||
doesn't fit in the flow of the text)]]
|
||||
|
||||
[[__tip__] [Tip] [Suggestion on how to do something
|
||||
(especially something that not be obvious)]]
|
||||
|
||||
[[__important__] [Important] [Important note on something to take
|
||||
particular notice of]]
|
||||
|
||||
[[__caution__] [Caution] [Take special care with this - it may
|
||||
not be what you expect and may cause bad
|
||||
results]]
|
||||
|
||||
[[__danger__] [Danger] [This is likely to cause serious
|
||||
trouble if ignored]]
|
||||
]
|
||||
|
||||
This documentation is automatically generated by Boost QuickBook documentation
|
||||
tool. QuickBook can be found in the __boost_tools__.
|
||||
|
||||
[heading Support]
|
||||
|
||||
Please direct all questions to Spirit's mailing list. You can subscribe to the
|
||||
__spirit_list__. The mailing list has a searchable archive. A search link to
|
||||
this archive is provided in __spirit__'s home page. You may also read and post
|
||||
messages to the mailing list through __spirit_general__ (thanks to __gmane__).
|
||||
The news group mirrors the mailing list. Here is a link to the archives:
|
||||
__mlist_archive__.
|
||||
|
||||
[endsect] [/ Preface]
|
||||
52
doc/qi_and_karma.qbk
Normal file
52
doc/qi_and_karma.qbk
Normal file
@@ -0,0 +1,52 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section Qi and Karma]
|
||||
|
||||
[include qi_and_karma/tutorials.qbk]
|
||||
|
||||
[section Abstracts]
|
||||
[include qi_and_karma/peg.qbk]
|
||||
[include qi_and_karma/parsing.qbk]
|
||||
[include qi_and_karma/generating.qbk]
|
||||
[include qi_and_karma/primitives.qbk]
|
||||
[include qi_and_karma/operators.qbk]
|
||||
[include qi_and_karma/attributes.qbk]
|
||||
[include qi_and_karma/semantic_actions.qbk]
|
||||
[include qi_and_karma/directives.qbk]
|
||||
[include qi_and_karma/rules.qbk]
|
||||
[include qi_and_karma/grammars.qbk]
|
||||
[include qi_and_karma/debugging.qbk]
|
||||
[include qi_and_karma/error_handling.qbk]
|
||||
[include qi_and_karma/parse_trees_and_asts.qbk]
|
||||
[endsect]
|
||||
|
||||
[/section Quick Reference]
|
||||
[include qi_and_karma/quick_reference.qbk]
|
||||
[/endsect]
|
||||
|
||||
[section Reference]
|
||||
[section Concepts]
|
||||
[include reference/qi_and_karma/parser.qbk]
|
||||
[include reference/qi_and_karma/generator.qbk]
|
||||
[endsect]
|
||||
[include reference/qi_and_karma/char.qbk]
|
||||
[include reference/qi_and_karma/string.qbk]
|
||||
[include reference/qi_and_karma/numeric.qbk]
|
||||
[include reference/qi_and_karma/binary.qbk]
|
||||
[include reference/qi_and_karma/directive.qbk]
|
||||
[include reference/qi_and_karma/action.qbk]
|
||||
[include reference/qi_and_karma/nonterminal.qbk]
|
||||
[include reference/qi_and_karma/operator.qbk]
|
||||
[include reference/qi_and_karma/stream.qbk]
|
||||
[include reference/qi_and_karma/auxiliary.qbk]
|
||||
[include reference/qi_and_karma/debug.qbk]
|
||||
[endsect]
|
||||
|
||||
[endsect]
|
||||
|
||||
10
doc/qi_and_karma/attributes.qbk
Normal file
10
doc/qi_and_karma/attributes.qbk
Normal file
@@ -0,0 +1,10 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section Attributes]
|
||||
[endsect]
|
||||
10
doc/qi_and_karma/debugging.qbk
Normal file
10
doc/qi_and_karma/debugging.qbk
Normal file
@@ -0,0 +1,10 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section Debugging]
|
||||
[endsect]
|
||||
10
doc/qi_and_karma/directives.qbk
Normal file
10
doc/qi_and_karma/directives.qbk
Normal file
@@ -0,0 +1,10 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section Directives]
|
||||
[endsect]
|
||||
10
doc/qi_and_karma/error_handling.qbk
Normal file
10
doc/qi_and_karma/error_handling.qbk
Normal file
@@ -0,0 +1,10 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section Error Handling]
|
||||
[endsect]
|
||||
24
doc/qi_and_karma/generating.qbk
Normal file
24
doc/qi_and_karma/generating.qbk
Normal file
@@ -0,0 +1,24 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section Parsing and Generating]
|
||||
|
||||
[heading The API functions exposed by __qi__ ]
|
||||
|
||||
[heading The parse() function]
|
||||
|
||||
[heading The phrase_parse() function]
|
||||
|
||||
[heading The tokenize_and_parse() function]
|
||||
|
||||
[heading The tokenize_and_phrase_parse() function]
|
||||
|
||||
[heading The make_parser() function]
|
||||
|
||||
[endsect]
|
||||
|
||||
10
doc/qi_and_karma/grammars.qbk
Normal file
10
doc/qi_and_karma/grammars.qbk
Normal file
@@ -0,0 +1,10 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section Grammars]
|
||||
[endsect]
|
||||
10
doc/qi_and_karma/operators.qbk
Normal file
10
doc/qi_and_karma/operators.qbk
Normal file
@@ -0,0 +1,10 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section Operators]
|
||||
[endsect]
|
||||
10
doc/qi_and_karma/parse_trees_and_asts.qbk
Normal file
10
doc/qi_and_karma/parse_trees_and_asts.qbk
Normal file
@@ -0,0 +1,10 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section Parse Trees and ASTs]
|
||||
[endsect]
|
||||
44
doc/qi_and_karma/parsing.qbk
Normal file
44
doc/qi_and_karma/parsing.qbk
Normal file
@@ -0,0 +1,44 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section Parsing]
|
||||
|
||||
Central to the library is the parser. The parser does the actual
|
||||
work of recognizing a linear input stream of data read sequentially
|
||||
from start to end by the supplied iterators. The parser attempts to
|
||||
match the input following a well-defined set of specifications known
|
||||
as grammar rules. The parser returns a `bool` to report the success or
|
||||
failure. When successful, the parser calls a client-supplied semantic
|
||||
action, if there is one. The semantic action extracts structural
|
||||
information depending on the data passed by the parser and the
|
||||
hierarchical context of the parser it is attached to.
|
||||
|
||||
Parsers come in different flavors. The Spirit library comes bundled with an extensive set of pre-defined parsers that perform various parsing tasks from the trivial to the complex. The parser, as a concept, has a public conceptual interface contract. Following the contract, anyone can write a conforming parser that will play along well with the library's predefined components. We shall provide a blueprint detailing the conceptual interface of the parser later.
|
||||
|
||||
Clients of the library generally do not need to write their own hand-coded parsers at all. Spirit has an immense repertoire of pre-defined parsers covering all aspects of syntax and semantic analysis. We shall examine this repertoire of parsers in the following sections. In the rare case where a specific functionality is not available, it is extremely easy to write a user-defined parser. The ease in writing a parser entity is the main reason for Spirit's extensibility.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
[heading The API functions exposed by __qi__ ]
|
||||
|
||||
[heading The parse() function]
|
||||
|
||||
[heading The phrase_parse() function]
|
||||
|
||||
[heading The tokenize_and_parse() function]
|
||||
|
||||
[heading The tokenize_and_phrase_parse() function]
|
||||
|
||||
[heading The make_parser() function]
|
||||
|
||||
[endsect]
|
||||
|
||||
10
doc/qi_and_karma/peg.qbk
Normal file
10
doc/qi_and_karma/peg.qbk
Normal file
@@ -0,0 +1,10 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section Parsing Expression Grammar]
|
||||
[endsect]
|
||||
10
doc/qi_and_karma/primitives.qbk
Normal file
10
doc/qi_and_karma/primitives.qbk
Normal file
@@ -0,0 +1,10 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section Primitives]
|
||||
[endsect]
|
||||
43
doc/qi_and_karma/quick_reference.qbk
Normal file
43
doc/qi_and_karma/quick_reference.qbk
Normal file
@@ -0,0 +1,43 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section Quick Reference]
|
||||
|
||||
The following tables use some conventions to encode the attribute type exposed
|
||||
by a component
|
||||
|
||||
[variablelist
|
||||
[[`attribute_of(P)`] [The component exposes the same attribute as the
|
||||
component 'P' used as part of the overall
|
||||
construct]]
|
||||
[[`value_type(I)`] [The component exposes the value_type of the
|
||||
underlying iterator 'I' as its attribute type]]
|
||||
]
|
||||
|
||||
[table Character Parsers
|
||||
[[Component] [Description] [Attribute]]
|
||||
[[`char_`] [] [`char`]]
|
||||
[[`wchar`] [] [`wchar_t`]]
|
||||
[[`lit`] [] [`unused`]]
|
||||
[[`wlit`] [] [`unused`]]
|
||||
[[`'x'`] [] [`unused`]]
|
||||
[[`L'x'`] [] [`unused`]]
|
||||
[[`alnum`] [] [`Char`]]
|
||||
[[`alpha`] [] [`Char`]]
|
||||
[[`blank`] [] [`Char`]]
|
||||
[[`cntrl`] [] [`Char`]]
|
||||
[[`digit`] [] [`Char`]]
|
||||
[[`graph`] [] [`Char`]]
|
||||
[[`print`] [] [`Char`]]
|
||||
[[`punct`] [] [`Char`]]
|
||||
[[`space`] [] [`Char`]]
|
||||
[[`xdigit`] [] [`Char`]]
|
||||
[[`~P`] [] [`attribute_of(P)`]]
|
||||
]
|
||||
|
||||
[endsect]
|
||||
10
doc/qi_and_karma/rules.qbk
Normal file
10
doc/qi_and_karma/rules.qbk
Normal file
@@ -0,0 +1,10 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section Rules]
|
||||
[endsect]
|
||||
10
doc/qi_and_karma/semantic_actions.qbk
Normal file
10
doc/qi_and_karma/semantic_actions.qbk
Normal file
@@ -0,0 +1,10 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section Semantic Actions]
|
||||
[endsect]
|
||||
10
doc/qi_and_karma/tutorials.qbk
Normal file
10
doc/qi_and_karma/tutorials.qbk
Normal file
@@ -0,0 +1,10 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section Tutorials]
|
||||
[endsect]
|
||||
10
doc/rationale.qbk
Normal file
10
doc/rationale.qbk
Normal file
@@ -0,0 +1,10 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section Rationale]
|
||||
[endsect]
|
||||
10
doc/reference/lex/lexer.qbk
Normal file
10
doc/reference/lex/lexer.qbk
Normal file
@@ -0,0 +1,10 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section Lexer]
|
||||
[endsect]
|
||||
19
doc/reference/lex/lexer_class.qbk
Normal file
19
doc/reference/lex/lexer_class.qbk
Normal file
@@ -0,0 +1,19 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section Lexer Class]
|
||||
|
||||
[heading The lexertl_lexer Class Implementing the Dynamic Model]
|
||||
|
||||
[heading The lexertl_actor_lexer Class Implementing the Dynamic Model]
|
||||
|
||||
[heading The lexertl_static_lexer Class Implementing the Static Model]
|
||||
|
||||
[heading The lexertl_static_actor_lexer Class Implementing the Static Model]
|
||||
|
||||
[endsect]
|
||||
10
doc/reference/lex/token.qbk
Normal file
10
doc/reference/lex/token.qbk
Normal file
@@ -0,0 +1,10 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section Token]
|
||||
[endsect]
|
||||
10
doc/reference/lex/token_class.qbk
Normal file
10
doc/reference/lex/token_class.qbk
Normal file
@@ -0,0 +1,10 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section Token Class]
|
||||
[endsect]
|
||||
10
doc/reference/lex/tokendef.qbk
Normal file
10
doc/reference/lex/tokendef.qbk
Normal file
@@ -0,0 +1,10 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section TokenDef]
|
||||
[endsect]
|
||||
10
doc/reference/lex/tokendef_class.qbk
Normal file
10
doc/reference/lex/tokendef_class.qbk
Normal file
@@ -0,0 +1,10 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section TokenDef Class]
|
||||
[endsect]
|
||||
10
doc/reference/lex/tokenset.qbk
Normal file
10
doc/reference/lex/tokenset.qbk
Normal file
@@ -0,0 +1,10 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section TokenSet]
|
||||
[endsect]
|
||||
10
doc/reference/lex/tokenset_class.qbk
Normal file
10
doc/reference/lex/tokenset_class.qbk
Normal file
@@ -0,0 +1,10 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section TokenSet Class]
|
||||
[endsect]
|
||||
10
doc/reference/qi_and_karma/action.qbk
Normal file
10
doc/reference/qi_and_karma/action.qbk
Normal file
@@ -0,0 +1,10 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section Action]
|
||||
[endsect]
|
||||
10
doc/reference/qi_and_karma/auxiliary.qbk
Normal file
10
doc/reference/qi_and_karma/auxiliary.qbk
Normal file
@@ -0,0 +1,10 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section Auxiliary]
|
||||
[endsect]
|
||||
10
doc/reference/qi_and_karma/binary.qbk
Normal file
10
doc/reference/qi_and_karma/binary.qbk
Normal file
@@ -0,0 +1,10 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section Binary]
|
||||
[endsect]
|
||||
10
doc/reference/qi_and_karma/char.qbk
Normal file
10
doc/reference/qi_and_karma/char.qbk
Normal file
@@ -0,0 +1,10 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section Char]
|
||||
[endsect]
|
||||
10
doc/reference/qi_and_karma/debug.qbk
Normal file
10
doc/reference/qi_and_karma/debug.qbk
Normal file
@@ -0,0 +1,10 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section Debug]
|
||||
[endsect]
|
||||
10
doc/reference/qi_and_karma/directive.qbk
Normal file
10
doc/reference/qi_and_karma/directive.qbk
Normal file
@@ -0,0 +1,10 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section Directive]
|
||||
[endsect]
|
||||
10
doc/reference/qi_and_karma/generator.qbk
Normal file
10
doc/reference/qi_and_karma/generator.qbk
Normal file
@@ -0,0 +1,10 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section Generator]
|
||||
[endsect]
|
||||
10
doc/reference/qi_and_karma/nonterminal.qbk
Normal file
10
doc/reference/qi_and_karma/nonterminal.qbk
Normal file
@@ -0,0 +1,10 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section Nonterminal]
|
||||
[endsect]
|
||||
10
doc/reference/qi_and_karma/numeric.qbk
Normal file
10
doc/reference/qi_and_karma/numeric.qbk
Normal file
@@ -0,0 +1,10 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section Numeric]
|
||||
[endsect]
|
||||
10
doc/reference/qi_and_karma/operator.qbk
Normal file
10
doc/reference/qi_and_karma/operator.qbk
Normal file
@@ -0,0 +1,10 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section Operators]
|
||||
[endsect]
|
||||
43
doc/reference/qi_and_karma/parser.qbk
Normal file
43
doc/reference/qi_and_karma/parser.qbk
Normal file
@@ -0,0 +1,43 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section Parser]
|
||||
|
||||
[heading Description]
|
||||
|
||||
Description of Parser concept
|
||||
|
||||
[variablelist Notation
|
||||
[[`p`] [A Parser]]
|
||||
]
|
||||
|
||||
[heading Valid Expressions]
|
||||
|
||||
For any Parser the following expressions must be valid:
|
||||
|
||||
[table
|
||||
[[Expression] [Semantics] [Return type] [Complexity]]
|
||||
[[`xxx`] [Semantics of `xxx`] [Parser] [Constant]]
|
||||
]
|
||||
|
||||
[heading Type Requirements]
|
||||
|
||||
[table
|
||||
[[Expression] [Requirements]]
|
||||
[[`xxx`] [Requirements for `xxx`]]
|
||||
]
|
||||
|
||||
[heading Invariants]
|
||||
|
||||
For any Parser xxx the following invariants always hold:
|
||||
|
||||
[heading Models]
|
||||
|
||||
Links to models of Parser concept
|
||||
|
||||
[endsect]
|
||||
10
doc/reference/qi_and_karma/stream.qbk
Normal file
10
doc/reference/qi_and_karma/stream.qbk
Normal file
@@ -0,0 +1,10 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section Stream]
|
||||
[endsect]
|
||||
10
doc/reference/qi_and_karma/string.qbk
Normal file
10
doc/reference/qi_and_karma/string.qbk
Normal file
@@ -0,0 +1,10 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section String]
|
||||
[endsect]
|
||||
91
doc/references.qbk
Normal file
91
doc/references.qbk
Normal file
@@ -0,0 +1,91 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section References]
|
||||
|
||||
[table
|
||||
[[ ] [Authors] [Title, Publisher/link, Date Published]]
|
||||
[[1.] [Todd Veldhuizen] [[@http://www.extreme.indiana.edu/%7Etveldhui/papers/Expression-Templates/exprtmpl.html
|
||||
"Expression Templates"]. C++ Report, June 1995.]]
|
||||
[[2.] [Peter Naur (ed.)] [[@http://www.masswerk.at/algol60/report.htm
|
||||
"Report on the Algorithmic Language ALGOL 60"]. CACM, May 1960.]]
|
||||
[[3.] [ISO/IEC] [[@http://www.cl.cam.ac.uk/%7Emgk25/iso-14977.pdf "ISO-EBNF"],
|
||||
ISO/IEC 14977: 1996(E).]]
|
||||
[[4.] [Richard J.Botting, Ph.D.] [[@http://www.csci.csusb.edu/dick/maths/intro_ebnf.html
|
||||
"XBNF"] (citing Leu-Weiner, 1973).
|
||||
California State University, San Bernardino, 1998.]]
|
||||
[[5.] [James Coplien.] ["Curiously Recurring Template Pattern".
|
||||
C++ Report, Feb. 1995.]]
|
||||
[[6.] [Thierry Geraud and
|
||||
Alexandre Duret-Lutz] [[@http://www.coldewey.com/europlop2000/papers/geraud%2Bduret.zip
|
||||
Generic Programming Redesign of Patterns]
|
||||
Proceedings of the 5th European Conference on Pattern Languages
|
||||
of Programs(EuroPLoP'2000) Irsee, Germany, July 2000.]]
|
||||
[[7.] [Geoffrey Furnish] [[@http://www.adtmag.com/joop/carticle.aspx?ID=627
|
||||
"Disambiguated Glommable Expression Templates Reintroduced"]
|
||||
C++ Report, May 2000]]
|
||||
[[8.] [Erich Gamma,
|
||||
Richard Helm,
|
||||
Ralph Jhonson,
|
||||
and John Vlissides] [Design Patterns, Elements of Reusable Object-Oriented Software.
|
||||
Addison-Wesley, 1995.]]
|
||||
[[9.] [Alfred V. Aho,
|
||||
Revi Sethi,
|
||||
Feffrey D. Ulman] [Compilers, Principles, Techniques and Tools
|
||||
Addison-Wesley, June 1987.]]
|
||||
[[10.] [Dick Grune and
|
||||
Ceriel Jacobs] [[@http://www.cs.vu.nl/%7Edick/PTAPG.html
|
||||
Parsing Techniques: A Practical Guide.]
|
||||
Ellis Horwood Ltd.: West Sussex, England, 1990.
|
||||
(electronic copy, 1998).]]
|
||||
[[11.] [T. J. Parr,
|
||||
H. G. Dietz, and
|
||||
W. E. Cohen] [[@http://citeseer.ist.psu.edu/6885.html
|
||||
PCCTS Reference Manual (Version 1.00)].
|
||||
School of Electrical Engineering, Purdue University,
|
||||
West Lafayette, August 1991.]]
|
||||
[[12.] [Adrian Johnstone and
|
||||
Elizabeth Scott.] [[@ftp://ftp.cs.rhul.ac.uk/pub/rdp
|
||||
RDP, A Recursive Descent Compiler Compiler].
|
||||
Technical Report CSD TR 97 25, Dept. of Computer Science,
|
||||
Egham, Surrey, England, Dec. 20, 1997.]]
|
||||
[[13.] [Adrian Johnstone] [[@http://www.cs.rhul.ac.uk/research/languages/projects/lookahead_backtrack.shtml
|
||||
Languages and Architectures,
|
||||
Parser generators with backtrack or extended lookahead capability]
|
||||
Department of Computer Science, Royal Holloway, University of London,
|
||||
Egham, Surrey, England]]
|
||||
[[14.] [Damian Conway] [[@http://www.csse.monash.edu.au/%7Edamian/papers/#Embedded_Input_Parsing_for_C
|
||||
Parsing with C++ Classes].
|
||||
ACM SIGPLAN Notices, 29:1, 1994.]]
|
||||
[[15.] [Joel de Guzman] [[@http://spirit.sourceforge.net/distrib/spirit_1_8_5/libs/spirit/index.html
|
||||
"Spirit Version 1.8"], 1998-2003.]]
|
||||
[[16.] [S. Doaitse Swierstra and
|
||||
Luc Duponcheel] [[@http://citeseer.ist.psu.edu/448665.html
|
||||
Deterministic, Error-Correcting Combinator Parsers]
|
||||
Dept. of Computer Science, Utrecht University P.O.Box 80.089,
|
||||
3508 TB Utrecht, The Netherland]]
|
||||
[[17.] [Bjarne Stroustrup] [[@http://www.research.att.com/%7Ebs/whitespace98.pdf
|
||||
Generalizing Overloading for C++2000]
|
||||
Overload, Issue 25. April 1, 1998.]]
|
||||
[[18.] [Dr. John Maddock] [[@http://www.boost.org/libs/regex/index.html
|
||||
Regex++ Documentation]
|
||||
http://www.boost.org/libs/regex/index.htm]]
|
||||
[[19.] [Anonymous
|
||||
Edited by Graham Hutton] [[@http://www.cs.nott.ac.uk/~gmh//faq.html
|
||||
Frequently Asked Questions for comp.lang.functional].
|
||||
Edited by Graham Hutton, University of Nottingham.]]
|
||||
[[20.] [Hewlett-Packard] [[@http://www.sgi.com/tech/stl/
|
||||
Standard Template Library Programmer's Guide.], Hewlett-Packard Company, 1994]]
|
||||
[[21.] [Boost Libraries] [[@http://boost.org/libs/libraries.htm
|
||||
Boost Libraries Documentation].]]
|
||||
[[22.] [Brian McNamara and
|
||||
Yannis Smaragdakis] [[@http://www.cc.gatech.edu/~yannis/fc++/ FC++:Functional Programming in C++].]]
|
||||
[[23.] [Todd Veldhuizen] [[@ftp://ftp.cs.indiana.edu/pub/techreports/TR542.pdf Techniques for Scientic C++.]]]
|
||||
]
|
||||
|
||||
[endsect]
|
||||
143
doc/spirit2.qbk
Normal file
143
doc/spirit2.qbk
Normal file
@@ -0,0 +1,143 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[article Spirit
|
||||
[quickbook 1.4]
|
||||
[version 2.0]
|
||||
[authors [de Guzman, Joel], [Kaiser, Hartmut]]
|
||||
[copyright 2001 2002 2003 2004 2005 2006 2007 2008 Joel de Guzman, Hartmut Kaiser]
|
||||
[purpose Parser and Generator Library]
|
||||
[license
|
||||
Distributed under the Boost Software License, Version 1.0.
|
||||
(See accompanying file LICENSE_1_0.txt or copy at
|
||||
[@http://www.boost.org/LICENSE_1_0.txt])
|
||||
]
|
||||
]
|
||||
|
||||
[/ November 14, 2007 ]
|
||||
|
||||
[/ Some links ]
|
||||
|
||||
[def __spirit__ [@http://spirit.sourceforge.net Spirit]]
|
||||
[def __phoenix__ [@http://boost.org/libs/spirit/phoenix/index.html Phoenix]]
|
||||
[def __phoenix2__ [@http://spirit.sourceforge.net/dl_more/phoenix_v2/libs/spirit/phoenix/doc/html/index.html Phoenix2]]
|
||||
[def __fusion__ [@http://spirit.sourceforge.net/dl_more/fusion_v2/libs/fusion/doc/html/index.html Fusion]]
|
||||
[def __mpl__ [@http://www.boost.org/libs/mpl/index.html MPL]]
|
||||
[def __boost_tuples__ [@http://www.boost.org/libs/tuple/index.html Boost.Tuples]]
|
||||
[def __boost_proto__ -Boost.Proto-]
|
||||
[def __boost__ [@http://www.boost.org/ Boost]]
|
||||
[def __boost_tools__ [@http://www.boost.org/tools/index.html Boost Tools]]
|
||||
[def __spirit_list__ [@https://lists.sourceforge.net/lists/listinfo/spirit-general Spirit Mailing List]]
|
||||
[def __spirit_general__ [@news://news.gmane.org/gmane.comp.spirit.general Spirit General NNTP news portal]]
|
||||
[def __gmane__ [@http://www.gmane.org Gmane]]
|
||||
[def __mlist_archive__ [@http://news.gmane.org/gmane.comp.parsers.spirit.general]]
|
||||
|
||||
[def __early_spirit__ [@http://spirit.sourceforge.net/dl_docs/pre-spirit.htm pre-Spirit]]
|
||||
[def __todd__exprtemplates__ [@http://ubiety.uwaterloo.ca/~tveldhui/papers/Expression-Templates/exprtmpl.html Expression Templates]]
|
||||
[def __cpp_concepts__ [@http://en.wikipedia.org/wiki/C%2B%2B0x#Concept Concepts]]
|
||||
[def __attr_grammar__ [@http://en.wikipedia.org/wiki/Attribute_grammar Attribute Grammar]]
|
||||
[def __string_template__ [@http://www.stringtemplate.org/ StringTemplate]]
|
||||
[def __lexertl__ [@http://www.benhanson.net/lexertl.html Lexertl]]
|
||||
[def __wave__ [@http://www.boost.org/libs/wave/index.html Wave]]
|
||||
[def __slex__ [@http://spirit.sourceforge.net/repository/applications/slex.zip SLex]]
|
||||
[def __flex__ [@http://flex.sourceforge.net/ Flex]]
|
||||
[def __re2c__ [@http://re2c.sourceforge.net/ re2c]]
|
||||
[def __ragel__ [@http://www.cs.queensu.ca/~thurston/ragel/ Ragel]]
|
||||
|
||||
[def __boost_variant__ [@http://www.boost.org/doc/html/variant.html `boost::variant<>`]]
|
||||
[def __boost_iterator_range__ [@http://www.boost.org/libs/range/doc/utility_class.html#iter_range `boost::iterator_range<>`]]
|
||||
|
||||
|
||||
[def __qi__ /Spirit.Qi/]
|
||||
[def __karma__ /Spirit.Karma/]
|
||||
[def __lex__ /Spirit.Lex/]
|
||||
|
||||
|
||||
[def __fixme__ *FIXME*]
|
||||
|
||||
|
||||
[/ Sections ]
|
||||
|
||||
[def __sec_qi_and_karma__ [link spirit.qi_and_karma Qi and Karma]]
|
||||
[def __sec_qi_karma_attributes__ [link spirit.qi_and_karma.abstracts.attributes Attributes]]
|
||||
|
||||
[def __sec_lex__ [link spirit.__lex__ Lex]]
|
||||
[def __sec_lex_quickstart_1__ [link spirit.__lex__.__lex___tutorials.quickstart_1___a_word_counter_using___lex__ Lex Quickstart 1 - A word counter using __lex__]]
|
||||
[def __sec_lex_quickstart_2__ [link spirit.__lex__.__lex___tutorials.quickstart_2___a_better_word_counter_using___lex__ Lex Quickstart 2 - A better word counter using __lex__]]
|
||||
[def __sec_lex_quickstart_3__ [link spirit.__lex__.__lex___tutorials.quickstart_3___counting_words_using_a_parser Lex Quickstart 3 - Counting Words Using a Parser]]
|
||||
|
||||
[def __sec_lex_static_model__ [link spirit.__lex__.abstracts.the__static__lexer_model The /Static/ Model]]
|
||||
[def __sec_lex_primitives__ [link spirit.__lex__.abstracts.lexer_primitives Lexer Primitives]]
|
||||
[def __sec_lex_tokenvalues__ [link spirit.__lex__.abstracts.lexer_primitives.about_tokens_and_token_values About Tokens and Token Values]]
|
||||
[def __sec_lex_attributes__ [link spirit.__lex__.abstracts.lexer_attributes Lexer Attributes]]
|
||||
|
||||
[def __sec_ref_lex_token__ [link spirit.__lex__.reference.concepts.token Token Reference]]
|
||||
[def __sec_ref_lex_token_def__ [link spirit.__lex__.reference.concepts.tokendef TokenDef Reference]]
|
||||
|
||||
[/ References to API descriptions ]
|
||||
|
||||
[def __api_tokenize_and_parse__ [link spirit.qi_and_karma.abstracts.parsing_and_generating.the_tokenize_and_phrase_parse___function `tokenize_and_parse()`]]
|
||||
[def __api_generate_static__ [link spirit.__lex__.abstracts.tokenizing_input_data.the_generate_static___function `generate_static()`]]
|
||||
|
||||
|
||||
[/ References to classes ]
|
||||
|
||||
[def __class_token_def__ [link spirit.__lex__.reference.tokendef_class `token_def<>`]]
|
||||
|
||||
[def __class_lexertl_token__ [link spirit.__lex__.reference.token_class `lexertl_token<>`]]
|
||||
[def __class_lexertl_lexer__ [link spirit.__lex__.reference.lexer_class.the_lexertl_lexer_class_implementing_the_dynamic_model `lexertl_lexer<>`]]
|
||||
[def __class_lexertl_static_lexer__ [link spirit.__lex__.reference.lexer_class.the_lexertl_static_lexer_class_implementing_the_static_model `lexertl_static_lexer<>`]]
|
||||
|
||||
|
||||
[/ Some images ]
|
||||
|
||||
[def __note__ [$../../../../doc/html/images/adm_note.png]]
|
||||
[def __tip__ [$../../../../doc/html/images/adm_tip.png]]
|
||||
[def __important__ [$../../../../doc/html/images/adm_important.png]]
|
||||
[def __caution__ [$../../../../doc/html/images/adm_caution.png]]
|
||||
[def __danger__ [$../../../../doc/html/images/adm_danger.png]]
|
||||
|
||||
|
||||
[/ some templates]
|
||||
|
||||
[/ fig[ref title label]
|
||||
Image element with a title.
|
||||
|
||||
ref := Reference to the image file.
|
||||
title := The title to associate with this figure.
|
||||
label := the id to use to be able to reference this picture
|
||||
]
|
||||
[template fig[ref title label]'''
|
||||
<figure id="'''[label]'''">
|
||||
<title>'''[title]'''</title>
|
||||
<inlinemediaobject>
|
||||
<imageobject>
|
||||
<imagedata fileref="'''[ref]'''"></imagedata>
|
||||
</imageobject>
|
||||
<textobject>
|
||||
<phrase role="alt">'''[title]'''</phrase>
|
||||
</textobject>
|
||||
</inlinemediaobject>
|
||||
</figure>
|
||||
''']
|
||||
|
||||
|
||||
[/ Here we go ]
|
||||
|
||||
[include preface.qbk]
|
||||
[include what_s_new.qbk]
|
||||
[include introduction.qbk]
|
||||
[include qi_and_karma.qbk]
|
||||
[include lex.qbk]
|
||||
[include faq.qbk]
|
||||
[include notes.qbk]
|
||||
[include rationale.qbk]
|
||||
[include acknowledgments.qbk]
|
||||
[include references.qbk]
|
||||
|
||||
|
||||
10
doc/what_s_new.qbk
Normal file
10
doc/what_s_new.qbk
Normal file
@@ -0,0 +1,10 @@
|
||||
[/==============================================================================
|
||||
Copyright (C) 2001-2008 Joel de Guzman
|
||||
Copyright (C) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
===============================================================================/]
|
||||
|
||||
[section What's New]
|
||||
[endsect]
|
||||
12
example/karma/Jamfile
Normal file
12
example/karma/Jamfile
Normal file
@@ -0,0 +1,12 @@
|
||||
#==============================================================================
|
||||
# Copyright (c) 2001-2007 Joel de Guzman
|
||||
# Copyright (c) 2001-2007 Hartmut Kaiser
|
||||
#
|
||||
# Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
#==============================================================================
|
||||
project spirit-karma-example ;
|
||||
|
||||
exe basic_facilities : basic_facilities.cpp ;
|
||||
exe functor_facilities : functor_facilities.cpp ;
|
||||
|
||||
178
example/karma/basic_facilities.cpp
Normal file
178
example/karma/basic_facilities.cpp
Normal file
@@ -0,0 +1,178 @@
|
||||
// Copyright (c) 2001-2008 Hartmut Kaiser
|
||||
//
|
||||
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
// The main purpose of this example is to show the uniform and easy way of
|
||||
// output formatting for different container types.
|
||||
//
|
||||
// Since the 'stream' primitive used below uses the streaming operator defined
|
||||
// for the container value_type, you must make sure to have a corresponding
|
||||
// operator<<() available for this contained data type. OTOH this means, that
|
||||
// the format descriptions used below will be usable for any contained type as
|
||||
// long as this type has an associated streaming operator defined.
|
||||
|
||||
// use a larger value for the alignment field width (default is 10)
|
||||
#define BOOST_KARMA_DEFAULT_FIELD_LENGTH 25
|
||||
|
||||
#include <boost/spirit/include/karma.hpp>
|
||||
#include <boost/spirit/include/karma_stream.hpp>
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <list>
|
||||
#include <algorithm>
|
||||
#include <cstdlib>
|
||||
|
||||
#include <boost/range.hpp>
|
||||
#include <boost/date_time//gregorian/gregorian.hpp>
|
||||
|
||||
using namespace boost::spirit;
|
||||
using namespace boost::spirit::ascii;
|
||||
namespace karma = boost::spirit::karma;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Output the given containers in list format
|
||||
// Note: the format description does not depend on the type of the sequence
|
||||
// nor does it depend on the type of the elements contained in the
|
||||
// sequence
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
template <typename Container>
|
||||
void output_container(std::ostream& os, Container const& c)
|
||||
{
|
||||
// output the container as a space separated sequence
|
||||
os <<
|
||||
karma::format_delimited(
|
||||
*stream, // format description
|
||||
c, // data
|
||||
space // delimiter
|
||||
) << std::endl << std::endl;
|
||||
|
||||
os <<
|
||||
karma::format_delimited(
|
||||
'[' << *stream << ']', // format description
|
||||
c, // data
|
||||
space // delimiter
|
||||
) << std::endl << std::endl;
|
||||
|
||||
// output the container as a comma separated list
|
||||
os <<
|
||||
karma::format(
|
||||
stream % ", ", // format description
|
||||
c // data
|
||||
) << std::endl << std::endl;
|
||||
|
||||
os <<
|
||||
karma::format(
|
||||
'[' << (stream % ", ") << ']', // format description
|
||||
c // data
|
||||
) << std::endl << std::endl;
|
||||
|
||||
// output the container as a comma separated list of items enclosed in '()'
|
||||
os <<
|
||||
karma::format(
|
||||
('(' << stream << ')') % ", ", // format description
|
||||
c // data
|
||||
) << std::endl << std::endl;
|
||||
|
||||
os <<
|
||||
karma::format(
|
||||
'[' << (
|
||||
('(' << stream << ')') % ", "
|
||||
) << ']', // format description
|
||||
c // data
|
||||
) << std::endl << std::endl;
|
||||
|
||||
// output the container as a HTML list
|
||||
os <<
|
||||
karma::format_delimited(
|
||||
"<ol>" <<
|
||||
*verbatim["<li>" << stream << "</li>"]
|
||||
<< "</ol>", // format description
|
||||
c, // data
|
||||
'\n' // delimiter
|
||||
) << std::endl;
|
||||
|
||||
// output the container as right aligned column
|
||||
os <<
|
||||
karma::format_delimited(
|
||||
*verbatim[
|
||||
"|" << right_align[stream] << "|"
|
||||
], // format description
|
||||
c, // data
|
||||
'\n' // delimiter
|
||||
) << std::endl;
|
||||
|
||||
os << std::endl;
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// vector
|
||||
std::vector<int> v (8);
|
||||
std::generate(v.begin(), v.end(), std::rand); // randomly fill the vector
|
||||
|
||||
std::cout << "-------------------------------------------------------------"
|
||||
<< std::endl;
|
||||
std::cout << "std::vector<int>" << std::endl;
|
||||
output_container(std::cout, v);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// list
|
||||
std::list<char> l;
|
||||
l.push_back('A');
|
||||
l.push_back('B');
|
||||
l.push_back('C');
|
||||
|
||||
std::cout << "-------------------------------------------------------------"
|
||||
<< std::endl;
|
||||
std::cout << "std::list<char>" << std::endl;
|
||||
output_container(std::cout, l);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// C-style array
|
||||
int i[4] = { 3, 6, 9, 12 };
|
||||
|
||||
std::cout << "-------------------------------------------------------------"
|
||||
<< std::endl;
|
||||
std::cout << "int i[]" << std::endl;
|
||||
output_container(std::cout, boost::make_iterator_range(i, i+4));
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// strings
|
||||
std::string str("Hello world!");
|
||||
|
||||
std::cout << "-------------------------------------------------------------"
|
||||
<< std::endl;
|
||||
std::cout << "std::string" << std::endl;
|
||||
output_container(std::cout, str);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// vector of boost::date objects
|
||||
// Note: any registered facets get used!
|
||||
using namespace boost::gregorian;
|
||||
std::vector<date> dates;
|
||||
dates.push_back(date(2005, Jun, 25));
|
||||
dates.push_back(date(2006, Jan, 13));
|
||||
dates.push_back(date(2007, May, 03));
|
||||
|
||||
date_facet* facet(new date_facet("%A %B %d, %Y"));
|
||||
std::cout.imbue(std::locale(std::cout.getloc(), facet));
|
||||
|
||||
std::cout << "-------------------------------------------------------------"
|
||||
<< std::endl;
|
||||
std::cout << "std::vector<boost::date>" << std::endl;
|
||||
output_container(std::cout, dates);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// fusion tuples
|
||||
// this will work in the future
|
||||
// boost::fusion::vector<int, char, double> fv(42, 'a', 45.8);
|
||||
//
|
||||
// std::cout << "boost::fusion::vector<int, char, double>" << std::endl;
|
||||
// output_container(std::cout, fv);
|
||||
return 0;
|
||||
}
|
||||
|
||||
202
example/karma/functor_facilities.cpp
Normal file
202
example/karma/functor_facilities.cpp
Normal file
@@ -0,0 +1,202 @@
|
||||
// Copyright (c) 2001-2008 Hartmut Kaiser
|
||||
//
|
||||
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
// This examples demonstrate how to write functor based generators for special
|
||||
// purposes.
|
||||
|
||||
#include <boost/spirit/include/karma.hpp>
|
||||
#include <boost/spirit/include/karma_stream.hpp>
|
||||
|
||||
#include <boost/spirit/include/phoenix_core.hpp>
|
||||
#include <boost/spirit/include/phoenix_bind.hpp>
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <list>
|
||||
#include <algorithm>
|
||||
#include <cstdlib>
|
||||
|
||||
using namespace boost::spirit;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// The functor generator 'counter' can be used for output annotation with some
|
||||
// item counting information.
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
struct counter_impl : boost::spirit::karma::functor_base
|
||||
{
|
||||
template <typename OutputIterator, typename Context, typename Parameter>
|
||||
bool operator()(Parameter const&, Context& ctx, OutputIterator& sink) const
|
||||
{
|
||||
namespace karma = boost::spirit::karma;
|
||||
return karma::generate(sink, int_ << ": ", counter++);
|
||||
}
|
||||
|
||||
counter_impl(int& counter_)
|
||||
: counter(counter_) {}
|
||||
|
||||
int& counter;
|
||||
};
|
||||
|
||||
inline boost::spirit::result_of::as_generator<counter_impl>::type
|
||||
counter(int& counter_)
|
||||
{
|
||||
using namespace boost::spirit::karma;
|
||||
return as_generator(counter_impl(counter_));
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// The functor generator 'confix' allows a simple syntax for generating
|
||||
// output wrapped inside a pair of a prefix and a suffix.
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
template <typename Expr>
|
||||
struct confix_impl : public boost::spirit::karma::functor_base
|
||||
{
|
||||
template <typename Context>
|
||||
struct apply
|
||||
{
|
||||
typedef boost::spirit::hold_any type;
|
||||
};
|
||||
|
||||
template <typename OutputIterator, typename Context, typename Parameter>
|
||||
bool operator()(Parameter const& v, Context& ctx, OutputIterator& sink) const
|
||||
{
|
||||
namespace karma = boost::spirit::karma;
|
||||
return karma::generate(sink, open << xpr << close, v);
|
||||
}
|
||||
|
||||
confix_impl(char const* open_, char const* close_, Expr const& xpr_)
|
||||
: open(open_), close(close_), xpr(xpr_) {}
|
||||
|
||||
std::string open;
|
||||
std::string close;
|
||||
Expr xpr;
|
||||
};
|
||||
|
||||
template <typename Expr>
|
||||
inline typename boost::spirit::result_of::as_generator<confix_impl<Expr> >::type
|
||||
confix(Expr const& xpr_, char const* open_ = "", char const* close_ = "")
|
||||
{
|
||||
using namespace boost::spirit::karma;
|
||||
return as_generator(confix_impl<Expr>(open_, close_, xpr_));
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// The functor generator 'list' allows a simple syntax for generating
|
||||
// list formatted output.
|
||||
//
|
||||
// This example uses phoenix::bind to allow to omit the second argument from
|
||||
// the operator() and to allow to switch the remaining two arguments.
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
template <typename Expr>
|
||||
struct list_impl : boost::spirit::karma::functor_base
|
||||
{
|
||||
// this function will be called to generate the output
|
||||
template <typename OutputIterator, typename Parameter>
|
||||
bool operator()(OutputIterator& sink, Parameter const& v) const
|
||||
{
|
||||
namespace karma = boost::spirit::karma;
|
||||
return karma::generate(sink, xpr % delim, v);
|
||||
}
|
||||
|
||||
list_impl(Expr const& xpr_, char const* delim_)
|
||||
: xpr(xpr_), delim(delim_) {}
|
||||
|
||||
Expr xpr;
|
||||
std::string delim;
|
||||
};
|
||||
|
||||
// Supply the expected parameter type explicitly
|
||||
struct list_impl_mf
|
||||
{
|
||||
// the expected parameter type of a functor has to be defined using a
|
||||
// embedded apply metafunction
|
||||
template <typename Context>
|
||||
struct apply
|
||||
{
|
||||
typedef boost::spirit::hold_any type;
|
||||
};
|
||||
};
|
||||
|
||||
template <typename Expr>
|
||||
inline list_impl<Expr>
|
||||
list(Expr const& xpr, char const* delim)
|
||||
{
|
||||
return list_impl<Expr>(xpr, delim);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
int main()
|
||||
{
|
||||
namespace karma = boost::spirit::karma;
|
||||
using namespace boost::phoenix;
|
||||
using namespace boost::phoenix::arg_names;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// Output the given containers in list format
|
||||
// We use a special functor generator here to annotate the output with
|
||||
// a integer counting the entries.
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
std::vector<int> v (8);
|
||||
std::generate(v.begin(), v.end(), std::rand); // randomly fill the vector
|
||||
|
||||
int counter1 = 1;
|
||||
std::cout <<
|
||||
karma::format(
|
||||
(counter(counter1) << int_) % ", ", // format description
|
||||
v // data
|
||||
) << std::endl;
|
||||
|
||||
// Here we initialize the counter to 100
|
||||
int counter2 = 100;
|
||||
std::cout <<
|
||||
karma::format(
|
||||
'[' << (
|
||||
(counter(counter2) << int_) % ", "
|
||||
) << ']', // format description
|
||||
v // data
|
||||
) << std::endl;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// list
|
||||
// The output format description used below adds special item formatting
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
std::list<std::string> names;
|
||||
names.push_back("Spirit");
|
||||
names.push_back("Qi");
|
||||
names.push_back("Karma");
|
||||
|
||||
// specifying a prefix item suffix scheme directly
|
||||
std::cout <<
|
||||
karma::format(
|
||||
('{' << stream << '}') % ", ", // format description
|
||||
names // data
|
||||
) << std::endl;
|
||||
|
||||
// The confix generator nicely wraps the given expression with prefix and
|
||||
// suffix strings
|
||||
std::cout <<
|
||||
karma::format(
|
||||
confix(stream % ", ", "[", "]"), // format description
|
||||
names // data
|
||||
) << std::endl;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// Output the given container as a list
|
||||
// We use a separate metafunction list_impl_mf to specify the expected
|
||||
// parameter type of this functor generator.
|
||||
// We use phoenix::bind to allow to omit the 2nd argument from the functor
|
||||
// function operator and to change the sequence of the remaining two
|
||||
// arguments.
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
std::string str("Hello world!");
|
||||
std::cout <<
|
||||
karma::format(
|
||||
karma::as_generator_mf<list_impl_mf>(bind(list(stream, ", "), _3, _1)),
|
||||
str
|
||||
) << std::endl;
|
||||
|
||||
return 0;
|
||||
}
|
||||
119
example/karma/quick_start1.cpp
Normal file
119
example/karma/quick_start1.cpp
Normal file
@@ -0,0 +1,119 @@
|
||||
// Copyright (c) 2001-2008 Hartmut Kaiser
|
||||
//
|
||||
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
// The main purpose of this example is to show how a single container type can
|
||||
// be formatted using different output grammars.
|
||||
|
||||
#include <boost/spirit/include/karma.hpp>
|
||||
#include <boost/spirit/include/karma_stream.hpp>
|
||||
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include <cstdlib>
|
||||
|
||||
using namespace boost::spirit;
|
||||
using namespace boost::spirit::ascii;
|
||||
namespace karma = boost::spirit::karma;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
int main()
|
||||
{
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// vector
|
||||
std::vector<int> v (8);
|
||||
std::generate(v.begin(), v.end(), std::rand); // randomly fill the vector
|
||||
|
||||
std::cout << "Output 8 integers from a std::vector<int>..." << std::endl;
|
||||
|
||||
// output the container as a sequence without any separation
|
||||
std::cout << "...without any separation" << std::endl;
|
||||
std::cout <<
|
||||
karma::format(
|
||||
*int_, // format description
|
||||
v // data
|
||||
) << std::endl << std::endl;
|
||||
|
||||
// output the container as a space separated sequence
|
||||
std::cout << "...as space delited list" << std::endl;
|
||||
std::cout <<
|
||||
karma::format_delimited(
|
||||
*int_, // format description
|
||||
v, // data
|
||||
space // delimiter
|
||||
) << std::endl << std::endl;
|
||||
|
||||
std::cout <<
|
||||
karma::format_delimited(
|
||||
'[' << *int_ << ']', // format description
|
||||
v, // data
|
||||
space // delimiter
|
||||
) << std::endl << std::endl;
|
||||
|
||||
// output the container as a comma separated list
|
||||
std::cout << "...as comma separated list" << std::endl;
|
||||
std::cout <<
|
||||
karma::format(
|
||||
int_ % ", ", // format description
|
||||
v // data
|
||||
) << std::endl << std::endl;
|
||||
|
||||
std::cout <<
|
||||
karma::format(
|
||||
'[' << (int_ % ", ") << ']', // format description
|
||||
v // data
|
||||
) << std::endl << std::endl;
|
||||
|
||||
// output the container as a comma separated list of double's
|
||||
std::cout << "...as comma separated list of doubles" << std::endl;
|
||||
std::cout <<
|
||||
karma::format(
|
||||
double_ % ", ", // format description
|
||||
v // data
|
||||
) << std::endl << std::endl;
|
||||
|
||||
// output the container as a comma separated list of items enclosed in '()'
|
||||
std::cout << "..as list of ints enclosed in '()'" << std::endl;
|
||||
std::cout <<
|
||||
karma::format(
|
||||
('(' << int_ << ')') % ", ", // format description
|
||||
v // data
|
||||
) << std::endl << std::endl;
|
||||
|
||||
std::cout <<
|
||||
karma::format(
|
||||
'[' << (
|
||||
('(' << int_ << ')') % ", "
|
||||
) << ']', // format description
|
||||
v // data
|
||||
) << std::endl << std::endl;
|
||||
|
||||
// output the container as a HTML list
|
||||
std::cout << "...as HTML bullet list" << std::endl;
|
||||
std::cout <<
|
||||
karma::format_delimited(
|
||||
"<ol>" <<
|
||||
// no delimiting within verbatim
|
||||
*verbatim[" <li>" << int_ << "</li>"]
|
||||
<< "</ol>", // format description
|
||||
v, // data
|
||||
'\n' // delimiter
|
||||
) << std::endl;
|
||||
|
||||
// output the container as right aligned column
|
||||
std::cout << "...right aligned in a column" << std::endl;
|
||||
std::cout <<
|
||||
karma::format_delimited(
|
||||
*verbatim[
|
||||
"|" << right_align[int_] << "|"
|
||||
], // format description
|
||||
v, // data
|
||||
'\n' // delimiter
|
||||
) << std::endl;
|
||||
|
||||
std::cout << std::endl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
22
example/lex/Jamfile
Normal file
22
example/lex/Jamfile
Normal file
@@ -0,0 +1,22 @@
|
||||
#==============================================================================
|
||||
# Copyright (c) 2001-2007 Joel de Guzman
|
||||
# Copyright (c) 2001-2008 Hartmut Kaiser
|
||||
#
|
||||
# Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
#==============================================================================
|
||||
|
||||
project spirit-lexer-example ;
|
||||
|
||||
exe example1 : example1.cpp ;
|
||||
exe example2 : example2.cpp ;
|
||||
exe example3 : example3.cpp ;
|
||||
exe example4 : example4.cpp ;
|
||||
exe example5 : example5.cpp ;
|
||||
exe example6 : example6.cpp ;
|
||||
exe print_numbers : print_numbers.cpp ;
|
||||
exe word_count : word_count.cpp ;
|
||||
exe word_count_functor : word_count_functor.cpp ;
|
||||
exe word_count_lexer : word_count_lexer.cpp ;
|
||||
exe strip_comments : strip_comments.cpp ;
|
||||
|
||||
26
example/lex/example.hpp
Normal file
26
example/lex/example.hpp
Normal file
@@ -0,0 +1,26 @@
|
||||
// Copyright (c) 2001-2008 Hartmut Kaiser
|
||||
// Copyright (c) 2001-2007 Joel de Guzman
|
||||
//
|
||||
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Helper function reading a file into a string
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
inline std::string
|
||||
read_from_file(char const* infile)
|
||||
{
|
||||
std::ifstream instream(infile);
|
||||
if (!instream.is_open()) {
|
||||
std::cerr << "Couldn't open file: " << infile << std::endl;
|
||||
exit(-1);
|
||||
}
|
||||
instream.unsetf(std::ios::skipws); // No white space skipping!
|
||||
return std::string(std::istreambuf_iterator<char>(instream.rdbuf()),
|
||||
std::istreambuf_iterator<char>());
|
||||
}
|
||||
|
||||
136
example/lex/example1.cpp
Normal file
136
example/lex/example1.cpp
Normal file
@@ -0,0 +1,136 @@
|
||||
// Copyright (c) 2001-2008 Hartmut Kaiser
|
||||
// Copyright (c) 2001-2007 Joel de Guzman
|
||||
//
|
||||
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
// Simple lexer/parser to test the Spirit installation.
|
||||
//
|
||||
// This example shows, how to create a simple lexer recognizing 4 different
|
||||
// tokens, and how to use a single token definition as the skip parser during
|
||||
// the parsing. Additionally it demonstrates how to use one of the defined
|
||||
// tokens as a parser component in the grammar.
|
||||
//
|
||||
// The grammar recognizes a simple input structure, for instance:
|
||||
//
|
||||
// {
|
||||
// hello world, hello it is me
|
||||
// }
|
||||
//
|
||||
// Any number of simple sentences (optionally comma separated) inside a pair
|
||||
// of curly braces will be matched.
|
||||
|
||||
#include <boost/spirit/include/qi.hpp>
|
||||
#include <boost/spirit/include/lex_lexer_lexertl.hpp>
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
|
||||
#include "example.hpp"
|
||||
|
||||
using namespace boost::spirit;
|
||||
using namespace boost::spirit::qi;
|
||||
using namespace boost::spirit::lex;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Token definition
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
template <typename Lexer>
|
||||
struct example1_tokens : lexer_def<Lexer>
|
||||
{
|
||||
template <typename Self>
|
||||
void def (Self& self)
|
||||
{
|
||||
// define tokens and associate them with the lexer
|
||||
identifier = "[a-zA-Z_][a-zA-Z0-9_]*";
|
||||
self = token_def<>(',') | '{' | '}' | identifier;
|
||||
|
||||
// any token definition to be used as the skip parser during parsing
|
||||
// has to be associated with a separate lexer state (here 'WS')
|
||||
white_space = "[ \\t\\n]+";
|
||||
self("WS") = white_space;
|
||||
}
|
||||
|
||||
token_def<> identifier, white_space;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Grammar definition
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
template <typename Iterator>
|
||||
struct example1_grammar
|
||||
: grammar_def<Iterator, in_state_skipper<token_def<> > >
|
||||
{
|
||||
template <typename TokenDef>
|
||||
example1_grammar(TokenDef const& tok)
|
||||
{
|
||||
start = '{' >> *(tok.identifier >> -char_(',')) >> '}';
|
||||
}
|
||||
|
||||
rule<Iterator, in_state_skipper<token_def<> > > start;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
int main()
|
||||
{
|
||||
// iterator type used to expose the underlying input stream
|
||||
typedef std::string::iterator base_iterator_type;
|
||||
|
||||
// This is the token type to return from the lexer iterator
|
||||
typedef lexertl_token<base_iterator_type> token_type;
|
||||
|
||||
// This is the lexer type to use to tokenize the input.
|
||||
// We use the lexertl based lexer engine.
|
||||
typedef lexertl_lexer<token_type> lexer_type;
|
||||
|
||||
// This is the token definition type (derived from the given lexer type).
|
||||
typedef example1_tokens<lexer_type> example1_tokens;
|
||||
|
||||
// This is the iterator type exposed by the lexer
|
||||
typedef lexer<example1_tokens>::iterator_type iterator_type;
|
||||
|
||||
// This is the type of the grammar to parse
|
||||
typedef example1_grammar<iterator_type> example1_grammar;
|
||||
|
||||
// now we use the types defined above to create the lexer and grammar
|
||||
// object instances needed to invoke the parsing process
|
||||
example1_tokens tokens; // Our token definition
|
||||
example1_grammar def (tokens); // Our grammar definition
|
||||
|
||||
lexer<example1_tokens> lex(tokens); // Our lexer
|
||||
grammar<example1_grammar> calc(def); // Our parser
|
||||
|
||||
std::string str (read_from_file("example1.input"));
|
||||
|
||||
// At this point we generate the iterator pair used to expose the
|
||||
// tokenized input stream.
|
||||
std::string::iterator it = str.begin();
|
||||
iterator_type iter = lex.begin(it, str.end());
|
||||
iterator_type end = lex.end();
|
||||
|
||||
// Parsing is done based on the the token stream, not the character
|
||||
// stream read from the input.
|
||||
// Note, how we use the token_def defined above as the skip parser. It must
|
||||
// be explicitly wrapped inside a state directive, switching the lexer
|
||||
// state for the duration of skipping whitespace.
|
||||
bool r = phrase_parse(iter, end, calc, in_state("WS")[tokens.white_space]);
|
||||
|
||||
if (r && iter == end)
|
||||
{
|
||||
std::cout << "-------------------------\n";
|
||||
std::cout << "Parsing succeeded\n";
|
||||
std::cout << "-------------------------\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
std::string rest(iter, end);
|
||||
std::cout << "-------------------------\n";
|
||||
std::cout << "Parsing failed\n";
|
||||
std::cout << "stopped at: \"" << rest << "\"\n";
|
||||
std::cout << "-------------------------\n";
|
||||
}
|
||||
|
||||
std::cout << "Bye... :-) \n\n";
|
||||
return 0;
|
||||
}
|
||||
169
example/lex/example2.cpp
Normal file
169
example/lex/example2.cpp
Normal file
@@ -0,0 +1,169 @@
|
||||
// Copyright (c) 2001-2008 Hartmut Kaiser
|
||||
// Copyright (c) 2001-2007 Joel de Guzman
|
||||
//
|
||||
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
// This example shows how to create a simple lexer recognizing a couple of
|
||||
// different tokens and how to use this with a grammar. This example has a
|
||||
// heavily backtracking grammar which makes it a candidate for lexer based
|
||||
// parsing (all tokens are scanned and generated only once, even if
|
||||
// backtracking is required) which speeds up the overall parsing process
|
||||
// considerably, out-weighting the overhead needed for setting up the lexer.
|
||||
// Additionally it demonstrates how to use one of the defined tokens as a
|
||||
// parser component in the grammar.
|
||||
//
|
||||
// The grammar recognizes a simple input structure: any number of English
|
||||
// simple sentences (statements, questions and commands) are recognized and
|
||||
// are being counted separately.
|
||||
|
||||
// #define BOOST_SPIRIT_DEBUG
|
||||
// #define BOOST_SPIRIT_LEXERTL_DEBUG
|
||||
|
||||
#include <boost/spirit/include/qi.hpp>
|
||||
#include <boost/spirit/include/lex_lexer_lexertl.hpp>
|
||||
#include <boost/spirit/include/phoenix_operator.hpp>
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
|
||||
#include "example.hpp"
|
||||
|
||||
using namespace boost::spirit;
|
||||
using namespace boost::spirit::qi;
|
||||
using namespace boost::spirit::lex;
|
||||
using boost::phoenix::ref;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Token definition
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
template <typename Lexer>
|
||||
struct example2_tokens : lexer_def<Lexer>
|
||||
{
|
||||
template <typename Self>
|
||||
void def (Self& self)
|
||||
{
|
||||
// A 'word' is comprised of one or more letters and an optional
|
||||
// apostrophe. If it contains an apostrophe, there may only be one and
|
||||
// the apostrophe must be preceded and succeeded by at least 1 letter.
|
||||
// For example, "I'm" and "doesn't" meet the definition of 'word' we
|
||||
// define below.
|
||||
word = "[a-zA-Z]+('[a-zA-Z]+)?";
|
||||
|
||||
// associate the tokens and the token set with the lexer
|
||||
self = token_def<>(',') | '!' | '.' | '?' | ' ' | '\n' | word;
|
||||
}
|
||||
|
||||
token_def<> word;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Grammar definition
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
template <typename Iterator>
|
||||
struct example2_grammar : grammar_def<Iterator>
|
||||
{
|
||||
template <typename TokenDef>
|
||||
example2_grammar(TokenDef const& tok)
|
||||
: paragraphs(0), commands(0), questions(0), statements(0)
|
||||
{
|
||||
story
|
||||
= +paragraph
|
||||
;
|
||||
|
||||
paragraph
|
||||
= ( +( command [ ++ref(commands) ]
|
||||
| question [ ++ref(questions) ]
|
||||
| statement [ ++ref(statements) ]
|
||||
)
|
||||
>> *char_(' ') >> +char_('\n')
|
||||
)
|
||||
[ ++ref(paragraphs) ]
|
||||
;
|
||||
|
||||
command
|
||||
= +(tok.word | ' ' | ',') >> '!'
|
||||
;
|
||||
|
||||
question
|
||||
= +(tok.word | ' ' | ',') >> '?'
|
||||
;
|
||||
|
||||
statement
|
||||
= +(tok.word | ' ' | ',') >> '.'
|
||||
;
|
||||
|
||||
BOOST_SPIRIT_DEBUG_NODE(story);
|
||||
BOOST_SPIRIT_DEBUG_NODE(paragraph);
|
||||
BOOST_SPIRIT_DEBUG_NODE(command);
|
||||
BOOST_SPIRIT_DEBUG_NODE(question);
|
||||
BOOST_SPIRIT_DEBUG_NODE(statement);
|
||||
}
|
||||
|
||||
rule<Iterator> story, paragraph, command, question, statement;
|
||||
int paragraphs, commands, questions, statements;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
int main()
|
||||
{
|
||||
// iterator type used to expose the underlying input stream
|
||||
typedef std::string::iterator base_iterator_type;
|
||||
|
||||
// This is the token type to return from the lexer iterator
|
||||
typedef lexertl_token<base_iterator_type> token_type;
|
||||
|
||||
// This is the lexer type to use to tokenize the input.
|
||||
// Here we use the lexertl based lexer engine.
|
||||
typedef lexertl_lexer<token_type> lexer_type;
|
||||
|
||||
// This is the token definition type (derived from the given lexer type).
|
||||
typedef example2_tokens<lexer_type> example2_tokens;
|
||||
|
||||
// this is the iterator type exposed by the lexer
|
||||
typedef lexer<example2_tokens>::iterator_type iterator_type;
|
||||
|
||||
// this is the type of the grammar to parse
|
||||
typedef example2_grammar<iterator_type> example2_grammar;
|
||||
|
||||
// now we use the types defined above to create the lexer and grammar
|
||||
// object instances needed to invoke the parsing process
|
||||
example2_tokens tokens; // Our token definition
|
||||
example2_grammar def (tokens); // Our grammar definition
|
||||
|
||||
lexer<example2_tokens> lex(tokens); // Our lexer
|
||||
grammar<example2_grammar> calc(def, def.story); // Our grammar
|
||||
|
||||
std::string str (read_from_file("example2.input"));
|
||||
|
||||
// At this point we generate the iterator pair used to expose the
|
||||
// tokenized input stream.
|
||||
std::string::iterator it = str.begin();
|
||||
iterator_type iter = lex.begin(it, str.end());
|
||||
iterator_type end = lex.end();
|
||||
|
||||
// Parsing is done based on the the token stream, not the character
|
||||
// stream read from the input.
|
||||
bool r = parse(iter, end, calc);
|
||||
|
||||
if (r && iter == end)
|
||||
{
|
||||
std::cout << "-------------------------\n";
|
||||
std::cout << "Parsing succeeded\n";
|
||||
std::cout << "There were "
|
||||
<< def.commands << " commands, "
|
||||
<< def.questions << " questions, and "
|
||||
<< def.statements << " statements.\n";
|
||||
std::cout << "-------------------------\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cout << "-------------------------\n";
|
||||
std::cout << "Parsing failed\n";
|
||||
std::cout << "-------------------------\n";
|
||||
}
|
||||
|
||||
std::cout << "Bye... :-) \n\n";
|
||||
return 0;
|
||||
}
|
||||
161
example/lex/example3.cpp
Normal file
161
example/lex/example3.cpp
Normal file
@@ -0,0 +1,161 @@
|
||||
// Copyright (c) 2001-2008 Hartmut Kaiser
|
||||
// Copyright (c) 2001-2007 Joel de Guzman
|
||||
//
|
||||
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
// This example shows how to create a simple lexer recognizing a couple of
|
||||
// different tokens and how to use this with a grammar. This example has a
|
||||
// heavily backtracking grammar which makes it a candidate for lexer based
|
||||
// parsing (all tokens are scanned and generated only once, even if
|
||||
// backtracking is required) which speeds up the overall parsing process
|
||||
// considerably, out-weighting the overhead needed for setting up the lexer.
|
||||
//
|
||||
// Additionally, this example demonstrates, how to define a token set usable
|
||||
// as the skip parser during parsing, allowing to define several tokens to be
|
||||
// ignored.
|
||||
//
|
||||
// This example recognizes couplets, which are sequences of numbers enclosed
|
||||
// in matching pairs of parenthesis. See the comments below to for details
|
||||
// and examples.
|
||||
|
||||
// #define BOOST_SPIRIT_LEXERTL_DEBUG
|
||||
// #define BOOST_SPIRIT_DEBUG
|
||||
|
||||
#include <boost/spirit/include/qi.hpp>
|
||||
#include <boost/spirit/include/lex_lexer_lexertl.hpp>
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
|
||||
#include "example.hpp"
|
||||
|
||||
using namespace boost::spirit;
|
||||
using namespace boost::spirit::qi;
|
||||
using namespace boost::spirit::lex;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Token definition
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
template <typename Lexer>
|
||||
struct example3_tokens : lexer_def<Lexer>
|
||||
{
|
||||
typedef typename Lexer::token_set token_set;
|
||||
|
||||
template <typename Self>
|
||||
void def (Self& self)
|
||||
{
|
||||
// define the tokens to match
|
||||
ellipses = "\\.\\.\\.";
|
||||
number = "[0-9]+";
|
||||
|
||||
// define the whitespace to ignore (spaces, tabs, newlines and C-style
|
||||
// comments)
|
||||
white_space
|
||||
= token_def<>("[ \\t\\n]+") // whitespace
|
||||
| "\\/\\*[^*]*\\*+([^/*][^*]*\\*+)*\\/" // C style comments
|
||||
;
|
||||
|
||||
// associate the tokens and the token set with the lexer
|
||||
self = ellipses | '(' | ')' | number;
|
||||
self("WS") = white_space;
|
||||
}
|
||||
|
||||
// these tokens expose the iterator_range of the matched input sequence
|
||||
token_def<> ellipses, identifier, number;
|
||||
token_set white_space;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Grammar definition
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
template <typename Iterator, typename Lexer>
|
||||
struct example3_grammar
|
||||
: grammar_def<Iterator, in_state_skipper<typename Lexer::token_set> >
|
||||
{
|
||||
template <typename TokenDef>
|
||||
example3_grammar(TokenDef const& tok)
|
||||
{
|
||||
start
|
||||
= +(couplet | tok.ellipses)
|
||||
;
|
||||
|
||||
// A couplet matches nested left and right parenthesis.
|
||||
// For example:
|
||||
// (1) (1 2) (1 2 3) ...
|
||||
// ((1)) ((1 2)(3 4)) (((1) (2 3) (1 2 (3) 4))) ...
|
||||
// (((1))) ...
|
||||
couplet
|
||||
= tok.number
|
||||
| '(' >> +couplet >> ')'
|
||||
;
|
||||
|
||||
BOOST_SPIRIT_DEBUG_NODE(start);
|
||||
BOOST_SPIRIT_DEBUG_NODE(couplet);
|
||||
}
|
||||
|
||||
typedef typename Lexer::token_set token_set;
|
||||
rule<Iterator, in_state_skipper<token_set> > start, couplet;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
int main()
|
||||
{
|
||||
// iterator type used to expose the underlying input stream
|
||||
typedef std::string::iterator base_iterator_type;
|
||||
|
||||
// This is the token type to return from the lexer iterator
|
||||
typedef lexertl_token<base_iterator_type> token_type;
|
||||
|
||||
// This is the lexer type to use to tokenize the input.
|
||||
// Here we use the lexertl based lexer engine.
|
||||
typedef lexertl_lexer<token_type> lexer_type;
|
||||
|
||||
// This is the token definition type (derived from the given lexer type).
|
||||
typedef example3_tokens<lexer_type> example3_tokens;
|
||||
|
||||
// this is the iterator type exposed by the lexer
|
||||
typedef lexer<example3_tokens>::iterator_type iterator_type;
|
||||
|
||||
// this is the type of the grammar to parse
|
||||
typedef example3_grammar<iterator_type, lexer_type> example3_grammar;
|
||||
|
||||
// now we use the types defined above to create the lexer and grammar
|
||||
// object instances needed to invoke the parsing process
|
||||
example3_tokens tokens; // Our token definition
|
||||
example3_grammar def (tokens); // Our grammar definition
|
||||
|
||||
lexer<example3_tokens> lex(tokens); // Our lexer
|
||||
grammar<example3_grammar> calc(def); // Our grammar
|
||||
|
||||
std::string str (read_from_file("example3.input"));
|
||||
|
||||
// At this point we generate the iterator pair used to expose the
|
||||
// tokenized input stream.
|
||||
std::string::iterator it = str.begin();
|
||||
iterator_type iter = lex.begin(it, str.end());
|
||||
iterator_type end = lex.end();
|
||||
|
||||
// Parsing is done based on the the token stream, not the character
|
||||
// stream read from the input.
|
||||
// Note, how we use the token_set defined above as the skip parser.
|
||||
std::string ws("WS");
|
||||
bool r = phrase_parse(iter, end, calc, in_state(ws)[tokens.white_space]);
|
||||
|
||||
if (r && iter == end)
|
||||
{
|
||||
std::cout << "-------------------------\n";
|
||||
std::cout << "Parsing succeeded\n";
|
||||
std::cout << "-------------------------\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cout << "-------------------------\n";
|
||||
std::cout << "Parsing failed\n";
|
||||
std::cout << "-------------------------\n";
|
||||
}
|
||||
|
||||
std::cout << "Bye... :-) \n\n";
|
||||
return 0;
|
||||
}
|
||||
239
example/lex/example4.cpp
Normal file
239
example/lex/example4.cpp
Normal file
@@ -0,0 +1,239 @@
|
||||
// Copyright (c) 2001-2008 Hartmut Kaiser
|
||||
// Copyright (c) 2001-2007 Joel de Guzman
|
||||
//
|
||||
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
// This example shows how to create a simple lexer recognizing a couple of
|
||||
// different tokens aimed at a simple language and how to use this lexer with
|
||||
// a grammar. It shows how to associate values to tokens and how to access the
|
||||
// token values from inside the grammar.
|
||||
//
|
||||
// We use explicit token value types, making the corresponding token instances
|
||||
// carry convert the matched input into an instance of that type. The token
|
||||
// value is exposed as the parser attribute if this token is used as a
|
||||
// parser component somewhere in a grammar.
|
||||
//
|
||||
// Additionally, this example demonstrates, how to define a token set usable
|
||||
// as the skip parser during parsing, allowing to define several tokens to be
|
||||
// ignored.
|
||||
//
|
||||
// This example recognizes a very simple programming language having
|
||||
// assignment statements and if and while control structures. Look at the file
|
||||
// example4.input for an example.
|
||||
|
||||
#include <boost/spirit/include/qi.hpp>
|
||||
#include <boost/spirit/include/lex_lexer_lexertl.hpp>
|
||||
#include <boost/spirit/include/phoenix_operator.hpp>
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
|
||||
#include "example.hpp"
|
||||
|
||||
using namespace boost::spirit;
|
||||
using namespace boost::spirit::qi;
|
||||
using namespace boost::spirit::lex;
|
||||
using namespace boost::spirit::arg_names;
|
||||
|
||||
using boost::phoenix::val;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Token definition
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
template <typename Lexer>
|
||||
struct example4_tokens : lexer_def<Lexer>
|
||||
{
|
||||
typedef typename Lexer::token_set token_set;
|
||||
|
||||
template <typename Self>
|
||||
void def (Self& self)
|
||||
{
|
||||
// define the tokens to match
|
||||
identifier = "[a-zA-Z_][a-zA-Z0-9_]*";
|
||||
constant = "[0-9]+";
|
||||
if_ = "if";
|
||||
else_ = "else";
|
||||
while_ = "while";
|
||||
|
||||
// define the whitespace to ignore (spaces, tabs, newlines and C-style
|
||||
// comments)
|
||||
white_space
|
||||
= token_def<>("[ \\t\\n]+")
|
||||
| "\\/\\*[^*]*\\*+([^/*][^*]*\\*+)*\\/"
|
||||
;
|
||||
|
||||
// associate the tokens and the token set with the lexer
|
||||
self = token_def<>('(') | ')' | '{' | '}' | '=' | ';' | constant;
|
||||
self += if_ | else_ | while_ | identifier;
|
||||
self("WS") = white_space;
|
||||
}
|
||||
|
||||
//[example4_token_def
|
||||
// these tokens expose the iterator_range of the matched input sequence
|
||||
token_def<> if_, else_, while_;
|
||||
|
||||
// The following two tokens have an associated value type, 'identifier'
|
||||
// carries a string (the identifier name) and 'constant' carries the
|
||||
// matched integer value.
|
||||
//
|
||||
// Note: any token value type specified explicitly during a token_def<>
|
||||
// declaration needs to be listed during token type definition as
|
||||
// well (see the typedef for the token_type below).
|
||||
//
|
||||
// The conversion of the matched input to an instance of this type occurs
|
||||
// once (on first access), which makes token values as efficient as
|
||||
// possible. Moreover, token instances are constructed once by the lexer
|
||||
// library. From this point on tokens are passed by reference only,
|
||||
// avoiding tokens being copied around.
|
||||
token_def<std::string> identifier;
|
||||
token_def<unsigned int> constant;
|
||||
//]
|
||||
|
||||
// token set to be used as the skip parser
|
||||
token_set white_space;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Grammar definition
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
template <typename Iterator, typename Lexer>
|
||||
struct example4_grammar
|
||||
: grammar_def<Iterator, in_state_skipper<typename Lexer::token_set> >
|
||||
{
|
||||
template <typename TokenDef>
|
||||
example4_grammar(TokenDef const& tok)
|
||||
{
|
||||
program
|
||||
= +block
|
||||
;
|
||||
|
||||
block
|
||||
= '{' >> *statement >> '}'
|
||||
;
|
||||
|
||||
statement
|
||||
= assignment
|
||||
| if_stmt
|
||||
| while_stmt
|
||||
;
|
||||
|
||||
assignment
|
||||
= (tok.identifier >> '=' >> expression >> ';')
|
||||
[
|
||||
std::cout << val("assignment statement to: ") << _1 << "\n"
|
||||
]
|
||||
;
|
||||
|
||||
if_stmt
|
||||
= ( tok.if_ >> '(' >> expression >> ')' >> block
|
||||
>> -(tok.else_ >> block)
|
||||
)
|
||||
[
|
||||
std::cout << val("if expression: ") << _2 << "\n"
|
||||
]
|
||||
;
|
||||
|
||||
while_stmt
|
||||
= (tok.while_ >> '(' >> expression >> ')' >> block)
|
||||
[
|
||||
std::cout << val("while expression: ") << _2 << "\n"
|
||||
]
|
||||
;
|
||||
|
||||
// since expression has a variant return type accommodating for
|
||||
// std::string and unsigned integer, both possible values may be
|
||||
// returned to the calling rule
|
||||
expression
|
||||
= tok.identifier [ _val = _1 ]
|
||||
| tok.constant [ _val = _1 ]
|
||||
;
|
||||
}
|
||||
|
||||
typedef typename Lexer::token_set token_set;
|
||||
typedef boost::variant<unsigned int, std::string> expression_type;
|
||||
|
||||
rule<Iterator, in_state_skipper<token_set> > program, block, statement;
|
||||
rule<Iterator, in_state_skipper<token_set> > assignment, if_stmt;
|
||||
rule<Iterator, in_state_skipper<token_set> > while_stmt;
|
||||
|
||||
// the expression is the only rule having a return value
|
||||
rule<Iterator, expression_type(), in_state_skipper<token_set> > expression;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
int main()
|
||||
{
|
||||
// iterator type used to expose the underlying input stream
|
||||
typedef std::string::iterator base_iterator_type;
|
||||
|
||||
//[example4_token
|
||||
// This is the lexer token type to use. The second template parameter lists
|
||||
// all attribute types used for token_def's during token definition (see
|
||||
// calculator_tokens<> above). Here we use the predefined lexertl token
|
||||
// type, but any compatible token type may be used instead.
|
||||
//
|
||||
// If you don't list any token value types in the following declaration
|
||||
// (or just use the default token type: lexertl_token<base_iterator_type>)
|
||||
// it will compile and work just fine, just a bit less efficient. This is
|
||||
// because the token value will be generated from the matched input
|
||||
// sequence every time it is requested. But as soon as you specify at
|
||||
// least one token value type you'll have to list all value types used
|
||||
// for token_def<> declarations in the token definition class above,
|
||||
// otherwise compilation errors will occur.
|
||||
typedef lexertl_token<
|
||||
base_iterator_type, boost::mpl::vector<unsigned int, std::string>
|
||||
> token_type;
|
||||
//]
|
||||
// Here we use the lexertl based lexer engine.
|
||||
typedef lexertl_lexer<token_type> lexer_type;
|
||||
|
||||
// This is the token definition type (derived from the given lexer type).
|
||||
typedef example4_tokens<lexer_type> example4_tokens;
|
||||
|
||||
// this is the iterator type exposed by the lexer
|
||||
typedef lexer<example4_tokens>::iterator_type iterator_type;
|
||||
|
||||
// this is the type of the grammar to parse
|
||||
typedef example4_grammar<iterator_type, lexer_type> example4_grammar;
|
||||
|
||||
// now we use the types defined above to create the lexer and grammar
|
||||
// object instances needed to invoke the parsing process
|
||||
example4_tokens tokens; // Our token definition
|
||||
example4_grammar def (tokens); // Our grammar definition
|
||||
|
||||
lexer<example4_tokens> lex(tokens); // Our lexer
|
||||
grammar<example4_grammar> calc(def, def.program); // Our grammar
|
||||
|
||||
std::string str (read_from_file("example4.input"));
|
||||
|
||||
// At this point we generate the iterator pair used to expose the
|
||||
// tokenized input stream.
|
||||
std::string::iterator it = str.begin();
|
||||
iterator_type iter = lex.begin(it, str.end());
|
||||
iterator_type end = lex.end();
|
||||
|
||||
// Parsing is done based on the the token stream, not the character
|
||||
// stream read from the input.
|
||||
// Note, how we use the token_set defined above as the skip parser. It must
|
||||
// be explicitly wrapped inside a state directive, switching the lexer
|
||||
// state for the duration of skipping whitespace.
|
||||
bool r = phrase_parse(iter, end, calc, in_state("WS")[tokens.white_space]);
|
||||
|
||||
if (r && iter == end)
|
||||
{
|
||||
std::cout << "-------------------------\n";
|
||||
std::cout << "Parsing succeeded\n";
|
||||
std::cout << "-------------------------\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cout << "-------------------------\n";
|
||||
std::cout << "Parsing failed\n";
|
||||
std::cout << "-------------------------\n";
|
||||
}
|
||||
|
||||
std::cout << "Bye... :-) \n\n";
|
||||
return 0;
|
||||
}
|
||||
283
example/lex/example5.cpp
Normal file
283
example/lex/example5.cpp
Normal file
@@ -0,0 +1,283 @@
|
||||
// Copyright (c) 2001-2008 Hartmut Kaiser
|
||||
// Copyright (c) 2001-2007 Joel de Guzman
|
||||
//
|
||||
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
// This example shows how to create a simple lexer recognizing a couple of
|
||||
// different tokens aimed at a simple language and how to use this lexer with
|
||||
// a grammar. It shows how to associate values to tokens and how to access the
|
||||
// token values from inside the grammar.
|
||||
//
|
||||
// Additionally, this example demonstrates, how to define a token set usable
|
||||
// as the skip parser during parsing, allowing to define several tokens to be
|
||||
// ignored.
|
||||
//
|
||||
// The main purpose of this example is to show, how inheritance can be used to
|
||||
// overload parts of a base grammar and add token definitions to a base lexer.
|
||||
//
|
||||
// Further, it shows how you can use the 'omitted' attribute type specifier
|
||||
// for token definitions to force the token to have no attribute (expose an
|
||||
// unused attribute).
|
||||
//
|
||||
// This example recognizes a very simple programming language having
|
||||
// assignment statements and if and while control structures. Look at the file
|
||||
// example5.input for an example.
|
||||
|
||||
#include <boost/spirit/include/qi.hpp>
|
||||
#include <boost/spirit/include/lex_lexer_lexertl.hpp>
|
||||
#include <boost/spirit/include/phoenix_operator.hpp>
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
|
||||
#include "example.hpp"
|
||||
|
||||
using namespace boost::spirit;
|
||||
using namespace boost::spirit::qi;
|
||||
using namespace boost::spirit::lex;
|
||||
using namespace boost::spirit::arg_names;
|
||||
|
||||
using boost::phoenix::val;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Token definition base, defines all tokens for the base grammar below
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
template <typename Lexer>
|
||||
struct example5_base_tokens : lexer_def<Lexer>
|
||||
{
|
||||
typedef typename Lexer::token_set token_set;
|
||||
|
||||
template <typename Self>
|
||||
void def (Self& self)
|
||||
{
|
||||
// define the tokens to match
|
||||
identifier = "[a-zA-Z_][a-zA-Z0-9_]*";
|
||||
constant = "[0-9]+";
|
||||
if_ = "if";
|
||||
while_ = "while";
|
||||
|
||||
// define the whitespace to ignore (spaces, tabs, newlines and C-style
|
||||
// comments)
|
||||
white_space
|
||||
= token_def<>("[ \\t\\n]+")
|
||||
| "\\/\\*[^*]*\\*+([^/*][^*]*\\*+)*\\/"
|
||||
;
|
||||
|
||||
// associate the tokens and the token set with the lexer
|
||||
self += token_def<>('(') | ')' | '{' | '}' | '=' | ';' | constant;
|
||||
self += if_ | while_ | identifier;
|
||||
self("WS") = white_space;
|
||||
}
|
||||
|
||||
// these tokens have no value
|
||||
token_def<omitted> if_, while_;
|
||||
|
||||
// The following two tokens have an associated value type, identifier
|
||||
// carries a string (the identifier name) and constant carries the matched
|
||||
// integer value.
|
||||
//
|
||||
// Note: any explicitly token value type specified during a token_def<>
|
||||
// declaration needs to be listed during token type definition as
|
||||
// well (see the typedef for the token_type below).
|
||||
//
|
||||
// The conversion of the matched input to an instance of this type occurs
|
||||
// once (on first access), which makes token values as efficient as
|
||||
// possible. Moreover, token instances are constructed once by the lexer
|
||||
// library. From this point on tokens are passed by reference only,
|
||||
// avoiding tokens being copied around.
|
||||
token_def<std::string> identifier;
|
||||
token_def<unsigned int> constant;
|
||||
|
||||
// token set to be used as the skip parser
|
||||
token_set white_space;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Grammar definition base, defines a basic language
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
template <typename Iterator, typename Lexer>
|
||||
struct example5_base_grammar
|
||||
: grammar_def<Iterator, in_state_skipper<typename Lexer::token_set> >
|
||||
{
|
||||
template <typename TokenDef>
|
||||
example5_base_grammar(TokenDef const& tok)
|
||||
{
|
||||
program
|
||||
= +block
|
||||
;
|
||||
|
||||
block
|
||||
= '{' >> *statement >> '}'
|
||||
;
|
||||
|
||||
statement
|
||||
= assignment
|
||||
| if_stmt
|
||||
| while_stmt
|
||||
;
|
||||
|
||||
assignment
|
||||
= (tok.identifier >> '=' >> expression >> ';')
|
||||
[
|
||||
std::cout << val("assignment statement to: ") << _1 << "\n"
|
||||
]
|
||||
;
|
||||
|
||||
if_stmt
|
||||
= (tok.if_ >> '(' >> expression >> ')' >> block)
|
||||
[
|
||||
std::cout << val("if expression: ") << _1 << "\n"
|
||||
]
|
||||
;
|
||||
|
||||
while_stmt
|
||||
= (tok.while_ >> '(' >> expression >> ')' >> block)
|
||||
[
|
||||
std::cout << val("while expression: ") << _1 << "\n"
|
||||
]
|
||||
;
|
||||
|
||||
// since expression has a variant return type accommodating for
|
||||
// std::string and unsigned integer, both possible values may be
|
||||
// returned to the calling rule
|
||||
expression
|
||||
= tok.identifier [ _val = _1 ]
|
||||
| tok.constant [ _val = _1 ]
|
||||
;
|
||||
}
|
||||
|
||||
typedef
|
||||
grammar_def<Iterator, in_state_skipper<typename Lexer::token_set> >
|
||||
base_type;
|
||||
typedef typename base_type::skipper_type skipper_type;
|
||||
|
||||
rule<Iterator, skipper_type> program, block, statement;
|
||||
rule<Iterator, skipper_type> assignment, if_stmt;
|
||||
rule<Iterator, skipper_type> while_stmt;
|
||||
|
||||
// the expression is the only rule having a return value
|
||||
typedef boost::variant<unsigned int, std::string> expression_type;
|
||||
rule<Iterator, expression_type(), skipper_type> expression;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Token definition for derived lexer, defines additional tokens
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
template <typename Lexer>
|
||||
struct example5_tokens : example5_base_tokens<Lexer>
|
||||
{
|
||||
typedef typename Lexer::token_set token_set;
|
||||
|
||||
template <typename Self>
|
||||
void def (Self& self)
|
||||
{
|
||||
// define the additional token to match
|
||||
else_ = "else";
|
||||
|
||||
// associate the new token with the lexer, note we add 'else' before
|
||||
// anything else to add it to the token set before the identifier
|
||||
// token, otherwise "else" would be matched as an identifier
|
||||
self = else_;
|
||||
|
||||
// call the base class definition function
|
||||
example5_base_tokens<Lexer>::def(self);
|
||||
}
|
||||
|
||||
// this token has no value
|
||||
token_def<omitted> else_;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Derived grammar definition, defines a language extension
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
template <typename Iterator, typename Lexer>
|
||||
struct example5_grammar : example5_base_grammar<Iterator, Lexer>
|
||||
{
|
||||
template <typename TokenDef>
|
||||
example5_grammar(TokenDef const& tok)
|
||||
: example5_base_grammar<Iterator, Lexer>(tok)
|
||||
{
|
||||
// we alter the if_stmt only
|
||||
this->if_stmt
|
||||
= this->if_stmt.copy() >> -(tok.else_ >> this->block)
|
||||
;
|
||||
}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
int main()
|
||||
{
|
||||
// iterator type used to expose the underlying input stream
|
||||
typedef std::string::iterator base_iterator_type;
|
||||
|
||||
// This is the lexer token type to use. The second template parameter lists
|
||||
// all attribute types used for token_def's during token definition (see
|
||||
// calculator_tokens<> above). Here we use the predefined lexertl token
|
||||
// type, but any compatible token type may be used instead.
|
||||
//
|
||||
// If you don't list any token value types in the following declaration
|
||||
// (or just use the default token type: lexertl_token<base_iterator_type>)
|
||||
// it will compile and work just fine, just a bit less efficient. This is
|
||||
// because the token value will be generated from the matched input
|
||||
// sequence every time it is requested. But as soon as you specify at
|
||||
// least one token value type you'll have to list all value types used
|
||||
// for token_def<> declarations in the token definition class above,
|
||||
// otherwise compilation errors will occur.
|
||||
typedef lexertl_token<
|
||||
base_iterator_type, boost::mpl::vector<unsigned int, std::string>
|
||||
> token_type;
|
||||
|
||||
// Here we use the lexertl based lexer engine.
|
||||
typedef lexertl_lexer<token_type> lexer_type;
|
||||
|
||||
// This is the token definition type (derived from the given lexer type).
|
||||
typedef example5_tokens<lexer_type> example5_tokens;
|
||||
|
||||
// this is the iterator type exposed by the lexer
|
||||
typedef lexer<example5_tokens>::iterator_type iterator_type;
|
||||
|
||||
// this is the type of the grammar to parse
|
||||
typedef example5_grammar<iterator_type, lexer_type> example5_grammar;
|
||||
|
||||
// now we use the types defined above to create the lexer and grammar
|
||||
// object instances needed to invoke the parsing process
|
||||
example5_tokens tokens; // Our token definition
|
||||
example5_grammar def (tokens); // Our grammar definition
|
||||
|
||||
lexer<example5_tokens> lex(tokens); // Our lexer
|
||||
grammar<example5_grammar> calc(def, def.program); // Our grammar
|
||||
|
||||
std::string str (read_from_file("example5.input"));
|
||||
|
||||
// At this point we generate the iterator pair used to expose the
|
||||
// tokenized input stream.
|
||||
std::string::iterator it = str.begin();
|
||||
iterator_type iter = lex.begin(it, str.end());
|
||||
iterator_type end = lex.end();
|
||||
|
||||
// Parsing is done based on the the token stream, not the character
|
||||
// stream read from the input.
|
||||
// Note, how we use the token_set defined above as the skip parser. It must
|
||||
// be explicitly wrapped inside a state directive, switching the lexer
|
||||
// state for the duration of skipping whitespace.
|
||||
std::string ws("WS");
|
||||
bool r = phrase_parse(iter, end, calc, in_state(ws)[tokens.white_space]);
|
||||
|
||||
if (r && iter == end)
|
||||
{
|
||||
std::cout << "-------------------------\n";
|
||||
std::cout << "Parsing succeeded\n";
|
||||
std::cout << "-------------------------\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cout << "-------------------------\n";
|
||||
std::cout << "Parsing failed\n";
|
||||
std::cout << "-------------------------\n";
|
||||
}
|
||||
|
||||
std::cout << "Bye... :-) \n\n";
|
||||
return 0;
|
||||
}
|
||||
263
example/lex/example6.cpp
Normal file
263
example/lex/example6.cpp
Normal file
@@ -0,0 +1,263 @@
|
||||
// Copyright (c) 2001-2008 Hartmut Kaiser
|
||||
// Copyright (c) 2001-2007 Joel de Guzman
|
||||
//
|
||||
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
// This example shows how to create a simple lexer recognizing a couple of
|
||||
// different tokens aimed at a simple language and how to use this lexer with
|
||||
// a grammar. It shows how to associate values to tokens and how to access the
|
||||
// token values from inside the grammar.
|
||||
//
|
||||
// Additionally, this example demonstrates, how to define a token set usable
|
||||
// as the skip parser during parsing, allowing to define several tokens to be
|
||||
// ignored.
|
||||
//
|
||||
// The example demonstrates how to use the add(...)(...) syntax to associate
|
||||
// token definitions with the lexer and how token ids can be used in the
|
||||
// parser to refer to a token, without having to directly reference its
|
||||
// definition.
|
||||
//
|
||||
// This example recognizes a very simple programming language having
|
||||
// assignment statements and if and while control structures. Look at the file
|
||||
// example6.input for an example.
|
||||
//
|
||||
// This example is essentially identical to example4.cpp. The only difference
|
||||
// is that we use the self.add() syntax to define tokens and to associate them
|
||||
// with the lexer.
|
||||
|
||||
#include <boost/spirit/include/qi.hpp>
|
||||
#include <boost/spirit/include/lex_lexer_lexertl.hpp>
|
||||
#include <boost/spirit/include/phoenix_operator.hpp>
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
|
||||
#include "example.hpp"
|
||||
|
||||
using namespace boost::spirit;
|
||||
using namespace boost::spirit::qi;
|
||||
using namespace boost::spirit::lex;
|
||||
using namespace boost::spirit::arg_names;
|
||||
|
||||
using boost::phoenix::val;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Token id definitions
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
enum token_ids
|
||||
{
|
||||
ID_CONSTANT = 1000,
|
||||
ID_IF,
|
||||
ID_ELSE,
|
||||
ID_WHILE,
|
||||
ID_IDENTIFIER
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Token definitions
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
template <typename Lexer>
|
||||
struct example6_tokens : lexer_def<Lexer>
|
||||
{
|
||||
typedef typename Lexer::token_set token_set;
|
||||
|
||||
template <typename Self>
|
||||
void def (Self& self)
|
||||
{
|
||||
// define the tokens to match
|
||||
identifier = "[a-zA-Z_][a-zA-Z0-9_]*";
|
||||
constant = "[0-9]+";
|
||||
|
||||
// define the whitespace to ignore (spaces, tabs, newlines and C-style
|
||||
// comments)
|
||||
white_space
|
||||
= token_def<>("[ \\t\\n]+")
|
||||
| "\\/\\*[^*]*\\*+([^/*][^*]*\\*+)*\\/"
|
||||
;
|
||||
|
||||
// associate the tokens and the token set with the lexer
|
||||
self = token_def<>('(') | ')' | '{' | '}' | '=' | ';';
|
||||
|
||||
// Token definitions can be added by using some special syntactic
|
||||
// construct as shown below.
|
||||
// Note, that the token definitions added this way expose the iterator
|
||||
// pair pointing to the matched input stream as their attribute.
|
||||
self.add
|
||||
(constant, ID_CONSTANT)
|
||||
("if", ID_IF)
|
||||
("else", ID_ELSE)
|
||||
("while", ID_WHILE)
|
||||
(identifier, ID_IDENTIFIER)
|
||||
;
|
||||
|
||||
// add whitespace tokens to another lexer state (here: "WS")
|
||||
self("WS") = white_space;
|
||||
}
|
||||
|
||||
// The following two tokens have an associated value type, identifier
|
||||
// carries a string (the identifier name) and constant carries the matched
|
||||
// integer value.
|
||||
//
|
||||
// Note: any explicitly token value type specified during a token_def<>
|
||||
// declaration needs to be listed during token type definition as
|
||||
// well (see the typedef for the token_type below).
|
||||
//
|
||||
// The conversion of the matched input to an instance of this type occurs
|
||||
// once (on first access), which makes token values as efficient as
|
||||
// possible. Moreover, token instances are constructed once by the lexer
|
||||
// library. From this point on tokens are passed by reference only,
|
||||
// avoiding tokens being copied around.
|
||||
token_def<std::string> identifier;
|
||||
token_def<unsigned int> constant;
|
||||
|
||||
// token set to be used as the skip parser
|
||||
token_set white_space;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Grammar definition
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
template <typename Iterator, typename Lexer>
|
||||
struct example6_grammar
|
||||
: grammar_def<Iterator, in_state_skipper<typename Lexer::token_set> >
|
||||
{
|
||||
template <typename TokenDef>
|
||||
example6_grammar(TokenDef const& tok)
|
||||
{
|
||||
program
|
||||
= +block
|
||||
;
|
||||
|
||||
block
|
||||
= '{' >> *statement >> '}'
|
||||
;
|
||||
|
||||
statement
|
||||
= assignment
|
||||
| if_stmt
|
||||
| while_stmt
|
||||
;
|
||||
|
||||
assignment
|
||||
= (tok.identifier >> '=' >> expression >> ';')
|
||||
[
|
||||
std::cout << val("assignment statement to: ")
|
||||
<< _1 << "\n"
|
||||
]
|
||||
;
|
||||
|
||||
if_stmt
|
||||
= ( token(ID_IF) >> '(' >> expression >> ')' >> block
|
||||
>> -(token(ID_ELSE) >> block)
|
||||
)
|
||||
[
|
||||
std::cout << val("if expression: ")
|
||||
<< _2 << "\n"
|
||||
]
|
||||
;
|
||||
|
||||
while_stmt
|
||||
= (token(ID_WHILE) >> '(' >> expression >> ')' >> block)
|
||||
[
|
||||
std::cout << val("while expression: ")
|
||||
<< _2 << "\n"
|
||||
]
|
||||
;
|
||||
|
||||
// since expression has a variant return type accommodating for
|
||||
// std::string and unsigned integer, both possible values may be
|
||||
// returned to the calling rule
|
||||
expression
|
||||
= tok.identifier [ _val = _1 ]
|
||||
| tok.constant [ _val = _1 ]
|
||||
;
|
||||
}
|
||||
|
||||
typedef typename Lexer::token_set token_set;
|
||||
typedef boost::variant<unsigned int, std::string> expression_type;
|
||||
|
||||
rule<Iterator, in_state_skipper<token_set> > program, block, statement;
|
||||
rule<Iterator, in_state_skipper<token_set> > assignment, if_stmt;
|
||||
rule<Iterator, in_state_skipper<token_set> > while_stmt;
|
||||
|
||||
// the expression is the only rule having a return value
|
||||
rule<Iterator, expression_type(), in_state_skipper<token_set> > expression;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
int main()
|
||||
{
|
||||
// iterator type used to expose the underlying input stream
|
||||
typedef std::string::iterator base_iterator_type;
|
||||
|
||||
// This is the lexer token type to use. The second template parameter lists
|
||||
// all attribute types used for token_def's during token definition (see
|
||||
// calculator_tokens<> above). Here we use the predefined lexertl token
|
||||
// type, but any compatible token type may be used instead.
|
||||
//
|
||||
// If you don't list any token value types in the following declaration
|
||||
// (or just use the default token type: lexertl_token<base_iterator_type>)
|
||||
// it will compile and work just fine, just a bit less efficient. This is
|
||||
// because the token value will be generated from the matched input
|
||||
// sequence every time it is requested. But as soon as you specify at
|
||||
// least one token value type you'll have to list all value types used
|
||||
// for token_def<> declarations in the token definition class above,
|
||||
// otherwise compilation errors will occur.
|
||||
typedef lexertl_token<
|
||||
base_iterator_type, boost::mpl::vector<unsigned int, std::string>
|
||||
> token_type;
|
||||
|
||||
// Here we use the lexertl based lexer engine.
|
||||
typedef lexertl_lexer<token_type> lexer_type;
|
||||
|
||||
// This is the token definition type (derived from the given lexer type).
|
||||
typedef example6_tokens<lexer_type> example6_tokens;
|
||||
|
||||
// this is the iterator type exposed by the lexer
|
||||
typedef lexer<example6_tokens>::iterator_type iterator_type;
|
||||
|
||||
// this is the type of the grammar to parse
|
||||
typedef example6_grammar<iterator_type, lexer_type> example6_grammar;
|
||||
|
||||
// now we use the types defined above to create the lexer and grammar
|
||||
// object instances needed to invoke the parsing process
|
||||
example6_tokens tokens; // Our token definition
|
||||
example6_grammar def (tokens); // Our grammar definition
|
||||
|
||||
lexer<example6_tokens> lex(tokens); // Our lexer
|
||||
grammar<example6_grammar> calc(def, def.program); // Our grammar
|
||||
|
||||
std::string str (read_from_file("example6.input"));
|
||||
|
||||
// At this point we generate the iterator pair used to expose the
|
||||
// tokenized input stream.
|
||||
std::string::iterator it = str.begin();
|
||||
iterator_type iter = lex.begin(it, str.end());
|
||||
iterator_type end = lex.end();
|
||||
|
||||
// Parsing is done based on the the token stream, not the character
|
||||
// stream read from the input.
|
||||
// Note, how we use the token_def defined above as the skip parser. It must
|
||||
// be explicitly wrapped inside a state directive, switching the lexer
|
||||
// state for the duration of skipping whitespace.
|
||||
std::string ws("WS");
|
||||
bool r = phrase_parse(iter, end, calc, in_state(ws)[tokens.white_space]);
|
||||
|
||||
if (r && iter == end)
|
||||
{
|
||||
std::cout << "-------------------------\n";
|
||||
std::cout << "Parsing succeeded\n";
|
||||
std::cout << "-------------------------\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cout << "-------------------------\n";
|
||||
std::cout << "Parsing failed\n";
|
||||
std::cout << "-------------------------\n";
|
||||
}
|
||||
|
||||
std::cout << "Bye... :-) \n\n";
|
||||
return 0;
|
||||
}
|
||||
118
example/lex/print_numbers.cpp
Normal file
118
example/lex/print_numbers.cpp
Normal file
@@ -0,0 +1,118 @@
|
||||
// Copyright (c) 2001-2008 Hartmut Kaiser
|
||||
//
|
||||
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
// This example is the equivalent to the following lex program:
|
||||
//
|
||||
// %{
|
||||
// #include <stdio.h>
|
||||
// %}
|
||||
// %%
|
||||
// [0-9]+ { printf("%s\n", yytext); }
|
||||
// .|\n ;
|
||||
// %%
|
||||
// main()
|
||||
// {
|
||||
// yylex();
|
||||
// }
|
||||
//
|
||||
// Its purpose is to print all the (integer) numbers found in a file
|
||||
|
||||
#include <boost/spirit/include/qi.hpp>
|
||||
#include <boost/spirit/include/lex_lexer_lexertl.hpp>
|
||||
#include <boost/spirit/include/phoenix_operator.hpp>
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
#include "example.hpp"
|
||||
|
||||
using namespace boost::spirit;
|
||||
using namespace boost::spirit::qi;
|
||||
using namespace boost::spirit::lex;
|
||||
using namespace boost::spirit::arg_names;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Token definition: We use the lexertl based lexer engine as the underlying
|
||||
// lexer type.
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
template <typename Lexer>
|
||||
struct print_numbers_tokens : lexer_def<Lexer>
|
||||
{
|
||||
// define tokens and associate it with the lexer
|
||||
template <typename Self>
|
||||
void def (Self& self)
|
||||
{
|
||||
self = token_def<int>("[0-9]*") | ".|\n";
|
||||
}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Grammar definition
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
template <typename Iterator>
|
||||
struct print_numbers_grammar : grammar_def<Iterator>
|
||||
{
|
||||
print_numbers_grammar()
|
||||
{
|
||||
start = *( token(lex::min_token_id) [ std::cout << _1 << "\n" ]
|
||||
| token(lex::min_token_id+1)
|
||||
)
|
||||
;
|
||||
}
|
||||
|
||||
rule<Iterator> start;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
// iterator type used to expose the underlying input stream
|
||||
typedef std::string::iterator base_iterator_type;
|
||||
|
||||
// the token type to be used, 'int' is available as the type of the token
|
||||
// value and no lexer state is supported
|
||||
typedef lexertl_token<
|
||||
base_iterator_type, boost::mpl::vector<int>, boost::mpl::false_
|
||||
> token_type;
|
||||
|
||||
// lexer type
|
||||
typedef lexertl_lexer<token_type> lexer_type;
|
||||
|
||||
// iterator type exposed by the lexer
|
||||
typedef
|
||||
lexer_iterator<print_numbers_tokens<lexer_type> >::type
|
||||
iterator_type;
|
||||
|
||||
// now we use the types defined above to create the lexer and grammar
|
||||
// object instances needed to invoke the parsing process
|
||||
print_numbers_tokens<lexer_type> print_tokens; // Our token definition
|
||||
print_numbers_grammar<iterator_type> def; // Our grammar definition
|
||||
|
||||
// Parsing is done based on the the token stream, not the character
|
||||
// stream read from the input.
|
||||
std::string str (read_from_file(1 == argc ? "print_numbers.input" : argv[1]));
|
||||
base_iterator_type first = str.begin();
|
||||
bool r = tokenize_and_parse(first, str.end(), make_lexer(print_tokens),
|
||||
make_parser(def));
|
||||
|
||||
if (r) {
|
||||
std::cout << "-------------------------\n";
|
||||
std::cout << "Parsing succeeded\n";
|
||||
std::cout << "-------------------------\n";
|
||||
}
|
||||
else {
|
||||
std::string rest(first, str.end());
|
||||
std::cout << "-------------------------\n";
|
||||
std::cout << "Parsing failed\n";
|
||||
std::cout << "stopped at: \"" << rest << "\"\n";
|
||||
std::cout << "-------------------------\n";
|
||||
}
|
||||
|
||||
std::cout << "Bye... :-) \n\n";
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
13
example/lex/static_lexer/Jamfile
Normal file
13
example/lex/static_lexer/Jamfile
Normal file
@@ -0,0 +1,13 @@
|
||||
#==============================================================================
|
||||
# Copyright (c) 2001-2007 Joel de Guzman
|
||||
# Copyright (c) 2001-2008 Hartmut Kaiser
|
||||
#
|
||||
# Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
#==============================================================================
|
||||
|
||||
project spirit-static-lexer-example ;
|
||||
|
||||
exe generate_tables : generate_tables.cpp ;
|
||||
exe word_count_static : word_count_static.cpp ;
|
||||
|
||||
42
example/lex/static_lexer/word_count_generate.cpp
Normal file
42
example/lex/static_lexer/word_count_generate.cpp
Normal file
@@ -0,0 +1,42 @@
|
||||
// Copyright (c) 2001-2008 Hartmut Kaiser
|
||||
//
|
||||
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
// The purpose of this example is to show, how it is possible to use a lexer
|
||||
// token definition for two purposes:
|
||||
//
|
||||
// . To generate C++ code implementing a static lexical analyzer allowing
|
||||
// to recognize all defined tokens (this file)
|
||||
// . To integrate the generated C++ lexer into the /Spirit/ framework.
|
||||
// (see the file: word_count_static.cpp)
|
||||
|
||||
// #define BOOST_SPIRIT_LEXERTL_DEBUG
|
||||
|
||||
#include <boost/spirit/include/lex_lexer_lexertl.hpp>
|
||||
#include <boost/spirit/lex/lexer/lexertl/lexertl_generate_static.hpp>
|
||||
|
||||
#include <fstream>
|
||||
|
||||
#include "word_count_tokens.hpp"
|
||||
|
||||
using namespace boost::spirit;
|
||||
using namespace boost::spirit::lex;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//[wc_static_generate_main
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
// create the lexer object instance needed to invoke the generator
|
||||
word_count_tokens<lexertl_lexer<> > word_count; // the token definition
|
||||
|
||||
// open the output file, where the generated tokenizer function will be
|
||||
// written to
|
||||
std::ofstream out(argc < 2 ? "word_count_static.hpp" : argv[1]);
|
||||
|
||||
// invoke the generator, passing the token definition, the output stream
|
||||
// and the name prefix of the tokenizing function to be generated
|
||||
char const* function_name = (argc < 3 ? "" : argv[2]);
|
||||
return generate_static(make_lexer(word_count), out, function_name) ? 0 : -1;
|
||||
}
|
||||
//]
|
||||
118
example/lex/static_lexer/word_count_static.cpp
Normal file
118
example/lex/static_lexer/word_count_static.cpp
Normal file
@@ -0,0 +1,118 @@
|
||||
// Copyright (c) 2001-2008 Hartmut Kaiser
|
||||
//
|
||||
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
// The purpose of this example is to show, how it is possible to use a lexer
|
||||
// token definition for two purposes:
|
||||
//
|
||||
// . To generate C++ code implementing a static lexical analyzer allowing
|
||||
// to recognize all defined tokens
|
||||
// . To integrate the generated C++ lexer into the /Spirit/ framework.
|
||||
//
|
||||
|
||||
// #define BOOST_SPIRIT_LEXERTL_DEBUG
|
||||
#define BOOST_VARIANT_MINIMIZE_SIZE
|
||||
|
||||
#include <boost/spirit/include/qi.hpp>
|
||||
//[wc_static_include
|
||||
#include <boost/spirit/include/lex_lexer_static_lexertl.hpp>
|
||||
//]
|
||||
#include <boost/spirit/include/phoenix_operator.hpp>
|
||||
#include <boost/spirit/include/phoenix_statement.hpp>
|
||||
#include <boost/spirit/include/phoenix_container.hpp>
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
#include "../example.hpp"
|
||||
#include "word_count_tokens.hpp" // token definition
|
||||
|
||||
#include "word_count_static.hpp" // generated tokenizer
|
||||
|
||||
using namespace boost::spirit;
|
||||
using namespace boost::spirit::qi;
|
||||
using namespace boost::spirit::lex;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Grammar definition
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//[wc_static_grammar
|
||||
// This is an ordinary grammar definition following the rules defined by
|
||||
// Spirit.Qi. There is nothing specific about it, except it gets the token
|
||||
// definition class instance passed to the constructor to allow accessing the
|
||||
// embedded token_def<> instances.
|
||||
template <typename Iterator>
|
||||
struct word_count_grammar : grammar_def<Iterator>
|
||||
{
|
||||
template <typename TokenDef>
|
||||
word_count_grammar(TokenDef const& tok)
|
||||
: c(0), w(0), l(0)
|
||||
{
|
||||
using boost::spirit::arg_names::_1;
|
||||
using boost::phoenix::ref;
|
||||
using boost::phoenix::size;
|
||||
|
||||
// associate the defined tokens with the lexer, at the same time
|
||||
// defining the actions to be executed
|
||||
start = *( tok.word [++ref(w), ref(c) += size(_1)]
|
||||
| char_('\n') [++ref(l), ++ref(c)]
|
||||
| token(IDANY) [++ref(c)]
|
||||
)
|
||||
;
|
||||
}
|
||||
|
||||
std::size_t c, w, l; // counter for characters, words, and lines
|
||||
rule<Iterator> start;
|
||||
};
|
||||
//]
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//[wc_static_main
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
// Define the token type to be used: 'std::string' is available as the type
|
||||
// of the token value.
|
||||
typedef lexertl_token<
|
||||
char const*, boost::mpl::vector<std::string>
|
||||
> token_type;
|
||||
|
||||
// Define the lexer type to be used as the base class for our token
|
||||
// definition.
|
||||
//
|
||||
// This is the only place where the code is different from an equivalent
|
||||
// dynamic lexical analyzer. We use the `lexertl_static_lexer<>` instead of
|
||||
// the `lexertl_lexer<>` as the base class for our token defintion type.
|
||||
//
|
||||
typedef lexertl_static_lexer<token_type> lexer_type;
|
||||
|
||||
// Define the iterator type exposed by the lexer.
|
||||
typedef lexer_iterator<word_count_tokens<lexer_type> >::type iterator_type;
|
||||
|
||||
// Now we use the types defined above to create the lexer and grammar
|
||||
// object instances needed to invoke the parsing process.
|
||||
word_count_tokens<lexer_type> word_count; // Our token definition
|
||||
word_count_grammar<iterator_type> def (word_count); // Our grammar definition
|
||||
|
||||
// Read in the file into memory.
|
||||
std::string str (read_from_file(1 == argc ? "word_count.input" : argv[1]));
|
||||
char const* first = str.c_str();
|
||||
char const* last = &first[str.size()];
|
||||
|
||||
// Parsing is done based on the the token stream, not the character
|
||||
// stream read from the input.
|
||||
bool r = tokenize_and_parse(first, last, make_lexer(word_count),
|
||||
make_parser(def));
|
||||
|
||||
if (r) { // success
|
||||
std::cout << "lines: " << def.l << ", words: " << def.w
|
||||
<< ", characters: " << def.c << "\n";
|
||||
}
|
||||
else {
|
||||
std::string rest(first, last);
|
||||
std::cerr << "Parsing failed\n" << "stopped at: \""
|
||||
<< rest << "\"\n";
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
//]
|
||||
111
example/lex/static_lexer/word_count_static.hpp
Normal file
111
example/lex/static_lexer/word_count_static.hpp
Normal file
@@ -0,0 +1,111 @@
|
||||
// Copyright (c) 2008 Ben Hanson
|
||||
//
|
||||
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
// Auto-generated by boost::lexer
|
||||
#if !defined(BOOST_SPIRIT_LEXER_NEXT_TOKEN_Feb_13_2008_12_01_20)
|
||||
#define BOOST_SPIRIT_LEXER_NEXT_TOKEN_Feb_13_2008_12_01_20
|
||||
|
||||
#include <boost/detail/iterator.hpp>
|
||||
#include <boost/spirit/support/detail/lexer/char_traits.hpp>
|
||||
|
||||
// the generated table of state names and the tokenizer have to be
|
||||
// defined in the boost::spirit::lex::static namespace
|
||||
namespace boost { namespace spirit { namespace lex { namespace static_ {
|
||||
|
||||
// this table defines the names of the lexer states
|
||||
char const* const lexer_state_names[1] =
|
||||
{
|
||||
"INITIAL",
|
||||
};
|
||||
|
||||
template<typename Iterator>
|
||||
std::size_t next_token (std::size_t &start_state_, Iterator const& start_,
|
||||
Iterator &start_token_, Iterator const& end_)
|
||||
{
|
||||
enum {end_state_index, id_index, state_index, bol_index, eol_index,
|
||||
dead_state_index, dfa_offset};
|
||||
static const std::size_t npos = static_cast<std::size_t>(~0);
|
||||
static const std::size_t lookup_[256] = {8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 7, 6, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8,
|
||||
7, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8};
|
||||
static const std::size_t dfa_alphabet_ = 9;
|
||||
static const std::size_t dfa_[45] = {0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 3,
|
||||
4, 2, 1, 65536, 0, 0, 0, 0,
|
||||
0, 0, 2, 1, 65537, 0, 0, 0,
|
||||
0, 0, 0, 0, 1, 65538, 0, 0,
|
||||
0, 0, 0, 0, 0};
|
||||
|
||||
if (start_token_ == end_) return 0;
|
||||
|
||||
const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
|
||||
Iterator curr_ = start_token_;
|
||||
bool end_state_ = *ptr_ != 0;
|
||||
std::size_t id_ = *(ptr_ + id_index);
|
||||
Iterator end_token_ = start_token_;
|
||||
|
||||
while (curr_ != end_)
|
||||
{
|
||||
std::size_t const state_ =
|
||||
ptr_[lookup_[static_cast<unsigned char>
|
||||
(*curr_++)]];
|
||||
|
||||
if (state_ == 0) break;
|
||||
|
||||
ptr_ = &dfa_[state_ * dfa_alphabet_];
|
||||
|
||||
if (*ptr_)
|
||||
{
|
||||
end_state_ = true;
|
||||
id_ = *(ptr_ + id_index);
|
||||
end_token_ = curr_;
|
||||
}
|
||||
}
|
||||
|
||||
if (end_state_)
|
||||
{
|
||||
// return longest match
|
||||
start_token_ = end_token_;
|
||||
}
|
||||
else
|
||||
{
|
||||
id_ = npos;
|
||||
}
|
||||
|
||||
return id_;
|
||||
}
|
||||
|
||||
}}}} // namespace boost::spirit::lex::static_
|
||||
|
||||
#endif
|
||||
40
example/lex/static_lexer/word_count_tokens.hpp
Normal file
40
example/lex/static_lexer/word_count_tokens.hpp
Normal file
@@ -0,0 +1,40 @@
|
||||
// Copyright (c) 2001-2008 Hartmut Kaiser
|
||||
//
|
||||
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
#if !defined(SPIRIT_LEXER_EXAMPLE_WORD_COUNT_TOKENS_FEB_10_2008_0739PM)
|
||||
#define SPIRIT_LEXER_EXAMPLE_WORD_COUNT_TOKENS_FEB_10_2008_0739PM
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Token definition: We keep the base class for the token definition as a
|
||||
// template parameter to allow this class to be used for
|
||||
// both: the code generation and the lexical analysis
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//[wc_static_tokenids
|
||||
enum tokenids
|
||||
{
|
||||
IDANY = boost::spirit::lex::min_token_id + 1,
|
||||
};
|
||||
//]
|
||||
|
||||
//[wc_static_tokendef
|
||||
// This token definition class can be used without any change for all three
|
||||
// possible use cases: a dynamic lexical analyzer, a code generator, and a
|
||||
// static lexical analyzer.
|
||||
template <typename BaseLexer>
|
||||
struct word_count_tokens : boost::spirit::lex::lexer_def<BaseLexer>
|
||||
{
|
||||
template <typename Self>
|
||||
void def (Self& self)
|
||||
{
|
||||
// define tokens and associate them with the lexer
|
||||
word = "[^ \t\n]+";
|
||||
self = word | '\n' | token_def<>(".", IDANY);
|
||||
}
|
||||
|
||||
boost::spirit::lex::token_def<std::string> word;
|
||||
};
|
||||
//]
|
||||
|
||||
#endif
|
||||
164
example/lex/strip_comments.cpp
Normal file
164
example/lex/strip_comments.cpp
Normal file
@@ -0,0 +1,164 @@
|
||||
// Copyright (c) 2001-2008 Hartmut Kaiser
|
||||
//
|
||||
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
// This example is the equivalent to the following lex program:
|
||||
//
|
||||
// %{
|
||||
// /* INITIAL is the default start state. COMMENT is our new */
|
||||
// /* state where we remove comments. */
|
||||
// %}
|
||||
//
|
||||
// %s COMMENT
|
||||
// %%
|
||||
// <INITIAL>"//".* ;
|
||||
// <INITIAL>"/*" BEGIN COMMENT;
|
||||
// <INITIAL>. ECHO;
|
||||
// <INITIAL>[\n] ECHO;
|
||||
// <COMMENT>"*/" BEGIN INITIAL;
|
||||
// <COMMENT>. ;
|
||||
// <COMMENT>[\n] ;
|
||||
// %%
|
||||
//
|
||||
// main()
|
||||
// {
|
||||
// yylex();
|
||||
// }
|
||||
//
|
||||
// Its purpose is to strip comments out of C code.
|
||||
//
|
||||
// Additionally this example demonstrates the use of lexer states to structure
|
||||
// the lexer definition.
|
||||
|
||||
// #define BOOST_SPIRIT_LEXERTL_DEBUG
|
||||
|
||||
#include <boost/spirit/include/qi.hpp>
|
||||
#include <boost/spirit/include/lex_lexer_lexertl.hpp>
|
||||
#include <boost/spirit/include/phoenix_operator.hpp>
|
||||
#include <boost/spirit/include/phoenix_container.hpp>
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
#include "example.hpp"
|
||||
|
||||
using namespace boost::spirit;
|
||||
using namespace boost::spirit::qi;
|
||||
using namespace boost::spirit::lex;
|
||||
using namespace boost::spirit::arg_names;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Token definition: We use the lexertl based lexer engine as the underlying
|
||||
// lexer type.
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
enum tokenids
|
||||
{
|
||||
IDANY = lex::min_token_id + 10
|
||||
};
|
||||
|
||||
template <typename Lexer>
|
||||
struct strip_comments_tokens : lexer_def<Lexer>
|
||||
{
|
||||
template <typename Self>
|
||||
void def (Self& self)
|
||||
{
|
||||
// define tokens and associate them with the lexer
|
||||
cppcomment = "//.*\n";
|
||||
ccomment = "/\\*";
|
||||
endcomment = "\\*/";
|
||||
|
||||
// The following tokens are associated with the default lexer state
|
||||
// (the "INITIAL" state). Specifying 'INITIAL' as a lexer state is
|
||||
// strictly optional.
|
||||
self.add
|
||||
(cppcomment) // no explicit token id is associated
|
||||
(ccomment)
|
||||
(".", IDANY) // IDANY is the token id associated with this token
|
||||
// definition
|
||||
;
|
||||
|
||||
// The following tokens are associated with the lexer state "COMMENT".
|
||||
// We switch lexer states from inside the parsing process using the
|
||||
// in_state("COMMENT")[] parser component as shown below.
|
||||
self("COMMENT").add
|
||||
(endcomment)
|
||||
(".", IDANY)
|
||||
;
|
||||
}
|
||||
|
||||
token_def<> cppcomment, ccomment, endcomment;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Grammar definition
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
template <typename Iterator>
|
||||
struct strip_comments_grammar : grammar_def<Iterator>
|
||||
{
|
||||
template <typename TokenDef>
|
||||
strip_comments_grammar(TokenDef const& tok)
|
||||
{
|
||||
// The in_state("COMMENT")[...] parser component switches the lexer
|
||||
// state to be 'COMMENT' during the matching of the embedded parser.
|
||||
start = *( tok.ccomment
|
||||
>> in_state("COMMENT")
|
||||
[
|
||||
// the lexer is in the 'COMMENT' state during
|
||||
// matching of the following parser components
|
||||
*token(IDANY) >> tok.endcomment
|
||||
]
|
||||
| tok.cppcomment
|
||||
| token(IDANY)
|
||||
)
|
||||
;
|
||||
}
|
||||
|
||||
rule<Iterator> start;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
// iterator type used to expose the underlying input stream
|
||||
typedef std::string::iterator base_iterator_type;
|
||||
|
||||
// lexer type
|
||||
typedef lexertl_lexer<lexertl_token<base_iterator_type> > lexer_type;
|
||||
|
||||
// iterator type exposed by the lexer
|
||||
typedef
|
||||
lexer_iterator<strip_comments_tokens<lexer_type> >::type
|
||||
iterator_type;
|
||||
|
||||
// now we use the types defined above to create the lexer and grammar
|
||||
// object instances needed to invoke the parsing process
|
||||
strip_comments_tokens<lexer_type> strip_comments; // Our token definition
|
||||
strip_comments_grammar<iterator_type> def (strip_comments); // Our grammar definition
|
||||
|
||||
// Parsing is done based on the the token stream, not the character
|
||||
// stream read from the input.
|
||||
std::string str (read_from_file(1 == argc ? "strip_comments.input" : argv[1]));
|
||||
base_iterator_type first = str.begin();
|
||||
bool r = tokenize_and_parse(first, str.end(), make_lexer(strip_comments),
|
||||
make_parser(def));
|
||||
|
||||
if (r) {
|
||||
std::cout << "-------------------------\n";
|
||||
std::cout << "Parsing succeeded\n";
|
||||
std::cout << "-------------------------\n";
|
||||
}
|
||||
else {
|
||||
std::string rest(first, str.end());
|
||||
std::cout << "-------------------------\n";
|
||||
std::cout << "Parsing failed\n";
|
||||
std::cout << "stopped at: \"" << rest << "\"\n";
|
||||
std::cout << "-------------------------\n";
|
||||
}
|
||||
|
||||
std::cout << "Bye... :-) \n\n";
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
121
example/lex/strip_comments_lexer.cpp
Normal file
121
example/lex/strip_comments_lexer.cpp
Normal file
@@ -0,0 +1,121 @@
|
||||
// Copyright (c) 2001-2008 Hartmut Kaiser
|
||||
//
|
||||
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
// This example is the equivalent to the following lex program:
|
||||
//
|
||||
// %{
|
||||
// /* INITIAL is the default start state. COMMENT is our new */
|
||||
// /* state where we remove comments. */
|
||||
// %}
|
||||
//
|
||||
// %s COMMENT
|
||||
// %%
|
||||
// <INITIAL>"//".* ;
|
||||
// <INITIAL>"/*" BEGIN COMMENT;
|
||||
// <INITIAL>. ECHO;
|
||||
// <INITIAL>[\n] ECHO;
|
||||
// <COMMENT>"*/" BEGIN INITIAL;
|
||||
// <COMMENT>. ;
|
||||
// <COMMENT>[\n] ;
|
||||
// %%
|
||||
//
|
||||
// main()
|
||||
// {
|
||||
// yylex();
|
||||
// }
|
||||
//
|
||||
// Its purpose is to strip comments out of C code.
|
||||
//
|
||||
// Additionally this example demonstrates the use of lexer states to structure
|
||||
// the lexer definition.
|
||||
|
||||
// #define BOOST_SPIRIT_LEXERTL_DEBUG
|
||||
|
||||
#include <boost/spirit/include/lex_lexer_lexertl.hpp>
|
||||
#include <boost/spirit/lex/lexer/lexer_actions.hpp>
|
||||
#include <boost/spirit/include/phoenix_operator.hpp>
|
||||
#include <boost/spirit/include/phoenix_statement.hpp>
|
||||
#include <boost/spirit/include/phoenix_core.hpp>
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
#include "example.hpp"
|
||||
|
||||
using namespace boost::spirit;
|
||||
using namespace boost::spirit::lex;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Token definition: We use the lexertl based lexer engine as the underlying
|
||||
// lexer type.
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
enum tokenids
|
||||
{
|
||||
IDANY = lex::min_token_id + 10,
|
||||
IDEOL = lex::min_token_id + 11
|
||||
};
|
||||
|
||||
template <typename Lexer>
|
||||
struct strip_comments_tokens : lexer_def<Lexer>
|
||||
{
|
||||
template <typename Self>
|
||||
void def (Self& self)
|
||||
{
|
||||
// define tokens and associate them with the lexer
|
||||
cppcomment = "//[^\n]*";
|
||||
ccomment = "/\\*";
|
||||
endcomment = "\\*/";
|
||||
any = ".";
|
||||
eol = "\n";
|
||||
|
||||
// The following tokens are associated with the default lexer state
|
||||
// (the "INITIAL" state). Specifying 'INITIAL' as a lexer state is
|
||||
// strictly optional.
|
||||
self = cppcomment
|
||||
| ccomment [ set_state("COMMENT") ]
|
||||
| eol [ echo_input(std::cout) ]
|
||||
| any [ echo_input(std::cout) ]
|
||||
;
|
||||
|
||||
// The following tokens are associated with the lexer state 'COMMENT'.
|
||||
self("COMMENT")
|
||||
= endcomment [ set_state("INITIAL") ]
|
||||
| eol
|
||||
| any
|
||||
;
|
||||
}
|
||||
|
||||
token_def<> cppcomment, ccomment, endcomment, any, eol;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
// iterator type used to expose the underlying input stream
|
||||
typedef std::string::iterator base_iterator_type;
|
||||
|
||||
// lexer type
|
||||
typedef lexertl_actor_lexer<lexertl_token<base_iterator_type> > lexer_type;
|
||||
|
||||
// now we use the types defined above to create the lexer and grammar
|
||||
// object instances needed to invoke the parsing process
|
||||
strip_comments_tokens<lexer_type> strip_comments; // Our token definition
|
||||
|
||||
// Parsing is done based on the the token stream, not the character
|
||||
// stream read from the input.
|
||||
std::string str (read_from_file(1 == argc ? "strip_comments.input" : argv[1]));
|
||||
base_iterator_type first = str.begin();
|
||||
bool r = tokenize(first, str.end(), make_lexer(strip_comments));
|
||||
|
||||
if (!r) {
|
||||
std::string rest(first, str.end());
|
||||
std::cerr << "Lexical analysis failed\n" << "stopped at: \""
|
||||
<< rest << "\"\n";
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
172
example/lex/word_count.cpp
Normal file
172
example/lex/word_count.cpp
Normal file
@@ -0,0 +1,172 @@
|
||||
// Copyright (c) 2001-2008 Hartmut Kaiser
|
||||
//
|
||||
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
// This example is the equivalent to the following lex program:
|
||||
/*
|
||||
//[wcp_flex_version
|
||||
%{
|
||||
int c = 0, w = 0, l = 0;
|
||||
%}
|
||||
word [^ \t\n]+
|
||||
eol \n
|
||||
%%
|
||||
{word} { ++w; c += yyleng; }
|
||||
{eol} { ++c; ++l; }
|
||||
. { ++c; }
|
||||
%%
|
||||
main()
|
||||
{
|
||||
yylex();
|
||||
printf("%d %d %d\n", l, w, c);
|
||||
}
|
||||
//]
|
||||
*/
|
||||
// Its purpose is to do the word count function of the wc command in UNIX. It
|
||||
// prints the number of lines, words and characters in a file.
|
||||
//
|
||||
// The example additionally demonstrates how to use the add_pattern(...)(...)
|
||||
// syntax to define lexer patterns. These patterns are essentially parameter-
|
||||
// less 'macros' for regular expressions, allowing to simplify their
|
||||
// definition.
|
||||
|
||||
// #define BOOST_SPIRIT_LEXERTL_DEBUG
|
||||
#define BOOST_VARIANT_MINIMIZE_SIZE
|
||||
|
||||
//[wcp_includes
|
||||
#include <boost/spirit/include/qi.hpp>
|
||||
#include <boost/spirit/include/lex_lexer_lexertl.hpp>
|
||||
#include <boost/spirit/include/phoenix_operator.hpp>
|
||||
#include <boost/spirit/include/phoenix_statement.hpp>
|
||||
#include <boost/spirit/include/phoenix_container.hpp>
|
||||
//]
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
#include "example.hpp"
|
||||
|
||||
//[wcp_namespaces
|
||||
using namespace boost::spirit;
|
||||
using namespace boost::spirit::qi;
|
||||
using namespace boost::spirit::lex;
|
||||
//]
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Token definition: We use the lexertl based lexer engine as the underlying
|
||||
// lexer type.
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//[wcp_token_ids
|
||||
enum tokenids
|
||||
{
|
||||
IDANY = lex::min_token_id + 10
|
||||
};
|
||||
//]
|
||||
|
||||
//[wcp_token_definition
|
||||
template <typename Lexer>
|
||||
struct word_count_tokens : lexer_def<Lexer>
|
||||
{
|
||||
template <typename Self>
|
||||
void def (Self& self)
|
||||
{
|
||||
// define patterns (lexer macros) to be used during token definition
|
||||
// below
|
||||
self.add_pattern
|
||||
("WORD", "[^ \t\n]+")
|
||||
;
|
||||
|
||||
// define tokens and associate them with the lexer
|
||||
word = "{WORD}"; // reference the pattern 'WORD' as defined above
|
||||
|
||||
// this lexer will recognize 3 token types: words, newlines, and
|
||||
// everything else
|
||||
self.add
|
||||
(word) // no token id is needed here
|
||||
('\n') // characters are usable as tokens as well
|
||||
(".", IDANY)
|
||||
;
|
||||
}
|
||||
|
||||
token_def<std::string> word;
|
||||
};
|
||||
//]
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Grammar definition
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//[wcp_grammar_definition
|
||||
template <typename Iterator>
|
||||
struct word_count_grammar : grammar_def<Iterator>
|
||||
{
|
||||
template <typename TokenDef>
|
||||
word_count_grammar(TokenDef const& tok)
|
||||
: c(0), w(0), l(0)
|
||||
{
|
||||
using boost::phoenix::ref;
|
||||
using boost::phoenix::size;
|
||||
|
||||
// As documented in the Spirit.Qi documentation, any placeholders
|
||||
// (_1 et.al.) used in semantic actions inside a grammar need to be
|
||||
// imported from the namespace boost::spirit::arg_names, and not from
|
||||
// the corresponding namespace in Phoenix.
|
||||
using boost::spirit::arg_names::_1;
|
||||
|
||||
start = *( tok.word [++ref(w), ref(c) += size(_1)]
|
||||
| char_('\n') [++ref(c), ++ref(l)]
|
||||
| token(IDANY) [++ref(c)]
|
||||
)
|
||||
;
|
||||
}
|
||||
|
||||
std::size_t c, w, l;
|
||||
rule<Iterator> start;
|
||||
};
|
||||
//]
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//[wcp_main
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
/*< define the token type to be used: `std::string` is available as the
|
||||
type of the token value
|
||||
>*/ typedef lexertl_token<
|
||||
char const*, boost::mpl::vector<std::string>
|
||||
> token_type;
|
||||
|
||||
/*< define the lexer type to use implementing the state machine
|
||||
>*/ typedef lexertl_lexer<token_type> lexer_type;
|
||||
|
||||
/*< define the iterator type exposed by the lexer type
|
||||
>*/ typedef lexer_iterator<word_count_tokens<lexer_type> >::type iterator_type;
|
||||
|
||||
// now we use the types defined above to create the lexer and grammar
|
||||
// object instances needed to invoke the parsing process
|
||||
word_count_tokens<lexer_type> word_count; // Our token definition
|
||||
word_count_grammar<iterator_type> def (word_count); // Our grammar definition
|
||||
|
||||
// read in the file int memory
|
||||
std::string str (read_from_file(1 == argc ? "word_count.input" : argv[1]));
|
||||
char const* first = str.c_str();
|
||||
char const* last = &first[str.size()];
|
||||
|
||||
// Parsing is done based on the the token stream, not the character
|
||||
// stream read from the input. The function `tokenize_and_parse()` wraps
|
||||
// the passed iterator range `[first, last)` by the lexical analyzer and
|
||||
// uses its exposed iterators to parse the toke stream.
|
||||
bool r = tokenize_and_parse(first, last, make_lexer(word_count),
|
||||
make_parser(def));
|
||||
|
||||
if (r) {
|
||||
std::cout << "lines: " << def.l << ", words: " << def.w
|
||||
<< ", characters: " << def.c << "\n";
|
||||
}
|
||||
else {
|
||||
std::string rest(first, last);
|
||||
std::cerr << "Parsing failed\n" << "stopped at: \""
|
||||
<< rest << "\"\n";
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
//]
|
||||
184
example/lex/word_count_functor.cpp
Normal file
184
example/lex/word_count_functor.cpp
Normal file
@@ -0,0 +1,184 @@
|
||||
// Copyright (c) 2001-2008 Hartmut Kaiser
|
||||
//
|
||||
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
// This example is the equivalent to the following flex program:
|
||||
/*
|
||||
//[wcf_flex_version
|
||||
%{
|
||||
#define ID_WORD 1000
|
||||
#define ID_EOL 1001
|
||||
#define ID_CHAR 1002
|
||||
int c = 0, w = 0, l = 0;
|
||||
%}
|
||||
%%
|
||||
[^ \t\n]+ { return ID_WORD; }
|
||||
\n { return ID_EOL; }
|
||||
. { return ID_CHAR; }
|
||||
%%
|
||||
bool count(int tok)
|
||||
{
|
||||
switch (tok) {
|
||||
case ID_WORD: ++w; c += yyleng; break;
|
||||
case ID_EOL: ++l; ++c; break;
|
||||
case ID_CHAR: ++c; break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
void main()
|
||||
{
|
||||
int tok = EOF;
|
||||
do {
|
||||
tok = yylex();
|
||||
if (!count(tok))
|
||||
break;
|
||||
} while (EOF != tok);
|
||||
printf("%d %d %d\n", l, w, c);
|
||||
}
|
||||
//]
|
||||
*/
|
||||
// Its purpose is to do the word count function of the wc command in UNIX. It
|
||||
// prints the number of lines, words and characters in a file.
|
||||
//
|
||||
// This examples shows how to use the tokenize() function together with a
|
||||
// simple functor, which gets executed whenever a token got matched in the
|
||||
// input sequence.
|
||||
|
||||
// #define BOOST_SPIRIT_LEXERTL_DEBUG
|
||||
|
||||
//[wcf_includes
|
||||
#include <boost/spirit/include/lex_lexer_lexertl.hpp>
|
||||
#include <boost/bind.hpp>
|
||||
#include <boost/ref.hpp>
|
||||
//]
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
#include "example.hpp"
|
||||
|
||||
//[wcf_namespaces
|
||||
using namespace boost::spirit;
|
||||
using namespace boost::spirit::lex;
|
||||
//]
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Token id definitions
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//[wcf_token_ids
|
||||
enum token_ids
|
||||
{
|
||||
ID_WORD = 1000,
|
||||
ID_EOL,
|
||||
ID_CHAR
|
||||
};
|
||||
//]
|
||||
|
||||
//[wcf_token_definition
|
||||
/*` The template `word_count_tokens` defines three different tokens:
|
||||
`ID_WORD`, `ID_EOL`, and `ID_CHAR`, representing a word (anything except
|
||||
a whitespace or a newline), a newline character, and any other character
|
||||
(`ID_WORD`, `ID_EOL`, and `ID_CHAR` are enum values representing the token
|
||||
ids, but could be anything else convertible to an integer as well).
|
||||
The direct base class of any token definition class needs to be the
|
||||
template `lexer_def<>`, where the corresponding template parameter (here:
|
||||
`lexertl_lexer<BaseIterator>`) defines which underlying lexer engine has
|
||||
to be used to provide the required state machine functionality. In this
|
||||
example we use the Lexertl based lexer engine as the underlying lexer type.
|
||||
*/
|
||||
template <typename Lexer>
|
||||
struct word_count_tokens : lexer_def<Lexer>
|
||||
{
|
||||
template <typename Self>
|
||||
void def (Self& self)
|
||||
{
|
||||
// define tokens (the regular expression to match and the corresponding
|
||||
// token id) and add them to the lexer
|
||||
self.add
|
||||
("[^ \t\n]+", ID_WORD) // words (anything except ' ', '\t' or '\n')
|
||||
("\n", ID_EOL) // newline characters
|
||||
(".", ID_CHAR) // anything else is a plain character
|
||||
;
|
||||
}
|
||||
};
|
||||
//]
|
||||
|
||||
//[wcf_functor
|
||||
/*` In this example the struct 'counter' is used as a functor counting the
|
||||
characters, words and lines in the analyzed input sequence by identifying
|
||||
the matched tokens as passed from the /Spirit.Lex/ library.
|
||||
*/
|
||||
struct counter
|
||||
{
|
||||
//<- this is an implementation detail and doesn't show up in the documentation
|
||||
typedef bool result_type;
|
||||
|
||||
//->
|
||||
// the function operator gets called for each of the matched tokens
|
||||
// c, l, w are references to the counters used to keep track of the numbers
|
||||
template <typename Token>
|
||||
bool operator()(Token const& t, std::size_t& c, std::size_t& w, std::size_t& l) const
|
||||
{
|
||||
switch (t.id()) {
|
||||
case ID_WORD: // matched a word
|
||||
// since we're using a default token type in this example, every
|
||||
// token instance contains a `iterator_range<BaseIterator>` as its
|
||||
// token value pointing to the matched character sequence in the input
|
||||
++w; c += t.value().size();
|
||||
break;
|
||||
case ID_EOL: // matched a newline character
|
||||
++l; ++c;
|
||||
break;
|
||||
case ID_CHAR: // matched something else
|
||||
++c;
|
||||
break;
|
||||
}
|
||||
return true; // always continue to tokenize
|
||||
}
|
||||
};
|
||||
//]
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//[wcf_main
|
||||
/*` The main function simply loads the given file into memory (as a
|
||||
`std::string`), instantiates an instance of the token definition template
|
||||
using the correct iterator type (`word_count_tokens<char const*>`),
|
||||
and finally calls `lex::tokenize`, passing an instance of the counter functor
|
||||
defined above. The return value of `lex::tokenize` will be `true` if the
|
||||
whole input sequence has been successfully tokenized, and `false` otherwise.
|
||||
*/
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
// these variables are used to count characters, words and lines
|
||||
std::size_t c = 0, w = 0, l = 0;
|
||||
|
||||
// read input from the given file
|
||||
std::string str (read_from_file(1 == argc ? "word_count.input" : argv[1]));
|
||||
|
||||
// create the token definition instance needed to invoke the lexical analyzer
|
||||
word_count_tokens<lexertl_lexer<> > word_count_functor;
|
||||
|
||||
// tokenize the given string, the bound functor gets invoked for each of
|
||||
// the matched tokens
|
||||
char const* first = str.c_str();
|
||||
char const* last = &first[str.size()];
|
||||
bool r = lex::tokenize(first, last, make_lexer(word_count_functor),
|
||||
boost::bind(counter(), _1, boost::ref(c), boost::ref(w), boost::ref(l)));
|
||||
|
||||
// print results
|
||||
if (r) {
|
||||
std::cout << "lines: " << l << ", words: " << w
|
||||
<< ", characters: " << c << "\n";
|
||||
}
|
||||
else {
|
||||
std::string rest(first, last);
|
||||
std::cout << "Lexical analysis failed\n" << "stopped at: \""
|
||||
<< rest << "\"\n";
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
//]
|
||||
|
||||
1571
example/lex/word_count_functor_flex.cpp
Normal file
1571
example/lex/word_count_functor_flex.cpp
Normal file
File diff suppressed because it is too large
Load Diff
138
example/lex/word_count_lexer.cpp
Normal file
138
example/lex/word_count_lexer.cpp
Normal file
@@ -0,0 +1,138 @@
|
||||
// Copyright (c) 2001-2008 Hartmut Kaiser
|
||||
//
|
||||
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
// This example is the equivalent to the following lex program:
|
||||
/*
|
||||
//[wcl_flex_version
|
||||
%{
|
||||
int c = 0, w = 0, l = 0;
|
||||
%}
|
||||
%%
|
||||
[^ \t\n]+ { ++w; c += yyleng; }
|
||||
\n { ++c; ++l; }
|
||||
. { ++c; }
|
||||
%%
|
||||
main()
|
||||
{
|
||||
yylex();
|
||||
printf("%d %d %d\n", l, w, c);
|
||||
}
|
||||
//]
|
||||
*/
|
||||
// Its purpose is to do the word count function of the wc command in UNIX. It
|
||||
// prints the number of lines, words and characters in a file.
|
||||
//
|
||||
// This examples shows how to use semantic actions associated with token
|
||||
// definitions to directly attach actions to tokens. These get executed
|
||||
// whenever the corresponding token got matched in the input sequence. Note,
|
||||
// how this example implements all functionality directly in the lexer
|
||||
// definition without any need for a parser.
|
||||
|
||||
// #define BOOST_SPIRIT_LEXERTL_DEBUG
|
||||
|
||||
//[wcl_includes
|
||||
#include <boost/spirit/include/lex_lexer_lexertl.hpp>
|
||||
#include <boost/spirit/include/phoenix_operator.hpp>
|
||||
#include <boost/spirit/include/phoenix_statement.hpp>
|
||||
#include <boost/spirit/include/phoenix_algorithm.hpp>
|
||||
#include <boost/spirit/include/phoenix_core.hpp>
|
||||
//]
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
#include "example.hpp"
|
||||
|
||||
//[wcl_namespaces
|
||||
using namespace boost::spirit;
|
||||
using namespace boost::spirit::lex;
|
||||
//]
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Token definition: We use the lexertl based lexer engine as the underlying
|
||||
// lexer type.
|
||||
//
|
||||
// Note, the token definition type is derived from the 'lexertl_actor_lexer'
|
||||
// template, which is a necessary to being able to use lexer semantic actions.
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//[wcl_token_definition
|
||||
template <typename Lexer>
|
||||
struct word_count_tokens : lexer_def<Lexer>
|
||||
{
|
||||
word_count_tokens()
|
||||
: c(0), w(0), l(0),
|
||||
word("[^ \t\n]+"), eol("\n"), any(".") // define tokens
|
||||
{}
|
||||
|
||||
template <typename Self>
|
||||
void def (Self& self)
|
||||
{
|
||||
using boost::phoenix::ref;
|
||||
using boost::phoenix::distance;
|
||||
|
||||
// Note that all placeholders used in lexer semantic actions in
|
||||
// conjunction with functors created based on Phoenix2 need to be from
|
||||
// the namespace boost::phoenix::arg_names (not spirit::arg_names).
|
||||
// Using the wrong placeholders leads to subtle compilation errors
|
||||
// which are difficult to backtrack to their cause.
|
||||
using boost::phoenix::arg_names::_1;
|
||||
|
||||
// associate tokens with the lexer
|
||||
self = word [++ref(w), ref(c) += distance(_1)]
|
||||
| eol [++ref(c), ++ref(l)]
|
||||
| any [++ref(c)]
|
||||
;
|
||||
}
|
||||
|
||||
std::size_t c, w, l;
|
||||
token_def<> word, eol, any;
|
||||
};
|
||||
//]
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//[wcl_main
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
// read input from the given file
|
||||
std::string str (read_from_file(1 == argc ? "word_count.input" : argv[1]));
|
||||
|
||||
// Specifying 'omitted' as the token value type generates a token class not
|
||||
// holding any token value at all (not even the iterator_range of the
|
||||
// matched input sequence), therefor optimizing the token, the lexer, and
|
||||
// possibly the parser implementation as much as possible.
|
||||
//
|
||||
// Specifying mpl::false_ as the 3rd template parameter generates a token
|
||||
// type and an iterator, both holding no lexer state, allowing for even more
|
||||
// aggressive optimizations.
|
||||
//
|
||||
// As a result the token instances contain the token ids as the only data
|
||||
// member.
|
||||
typedef lexertl_token<char const*, omitted, boost::mpl::false_> token_type;
|
||||
|
||||
// lexer type
|
||||
typedef lexertl_actor_lexer<token_type> lexer_type;
|
||||
|
||||
// create the lexer object instance needed to invoke the lexical analysis
|
||||
word_count_tokens<lexer_type> word_count_lexer;
|
||||
|
||||
// tokenize the given string, all generated tokens are discarded
|
||||
char const* first = str.c_str();
|
||||
char const* last = &first[str.size()];
|
||||
bool r = tokenize(first, last, make_lexer(word_count_lexer));
|
||||
|
||||
if (r) {
|
||||
std::cout << "lines: " << word_count_lexer.l
|
||||
<< ", words: " << word_count_lexer.w
|
||||
<< ", characters: " << word_count_lexer.c
|
||||
<< "\n";
|
||||
}
|
||||
else {
|
||||
std::string rest(first, last);
|
||||
std::cout << "Lexical analysis failed\n" << "stopped at: \""
|
||||
<< rest << "\"\n";
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
//]
|
||||
46
example/qi/Jamfile
Normal file
46
example/qi/Jamfile
Normal file
@@ -0,0 +1,46 @@
|
||||
#==============================================================================
|
||||
# Copyright (c) 2001-2007 Joel de Guzman
|
||||
#
|
||||
# Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
#==============================================================================
|
||||
project spirit-qi-example ;
|
||||
|
||||
exe sum : sum.cpp ;
|
||||
exe complex_number : complex_number.cpp ;
|
||||
exe employee : employee.cpp ;
|
||||
exe roman : roman.cpp ;
|
||||
exe mini_xml1 : mini_xml1.cpp ;
|
||||
exe mini_xml2 : mini_xml2.cpp ;
|
||||
exe num_list : num_list.cpp ;
|
||||
exe num_list2 : num_list2.cpp ;
|
||||
exe num_list3 : num_list3.cpp ;
|
||||
|
||||
exe calc1 : calc1.cpp ;
|
||||
exe calc2 : calc2.cpp ;
|
||||
exe calc3 : calc3.cpp ;
|
||||
exe calc4 : calc4.cpp ;
|
||||
exe calc5 : calc5.cpp ;
|
||||
|
||||
exe calc6 :
|
||||
calc6/calc6.cpp
|
||||
calc6/calc6a.cpp
|
||||
calc6/calc6b.cpp
|
||||
calc6/calc6c.cpp
|
||||
;
|
||||
|
||||
exe calc7 :
|
||||
calc7/calc7.cpp
|
||||
calc7/calc7a.cpp
|
||||
calc7/calc7b.cpp
|
||||
calc7/calc7c.cpp
|
||||
;
|
||||
|
||||
exe mini_c :
|
||||
mini_c/mini_c.cpp
|
||||
mini_c/mini_ca.cpp
|
||||
mini_c/mini_cb.cpp
|
||||
mini_c/mini_cc.cpp
|
||||
mini_c/mini_cd.cpp
|
||||
;
|
||||
|
||||
104
example/qi/calc1.cpp
Normal file
104
example/qi/calc1.cpp
Normal file
@@ -0,0 +1,104 @@
|
||||
/*=============================================================================
|
||||
Copyright (c) 2001-2007 Joel de Guzman
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
=============================================================================*/
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Plain calculator example demonstrating the grammar. The parser is a
|
||||
// syntax checker only and does not do any semantic evaluation.
|
||||
//
|
||||
// [ JDG May 10, 2002 ] spirit1
|
||||
// [ JDG March 4, 2007 ] spirit2
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
#include <boost/spirit/include/qi.hpp>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
using namespace boost::spirit;
|
||||
using namespace boost::spirit::qi;
|
||||
using namespace boost::spirit::ascii;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Our calculator grammar
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
template <typename Iterator>
|
||||
struct calculator : grammar_def<Iterator, space_type>
|
||||
{
|
||||
calculator()
|
||||
{
|
||||
expression =
|
||||
term
|
||||
>> *( ('+' >> term)
|
||||
| ('-' >> term)
|
||||
)
|
||||
;
|
||||
|
||||
term =
|
||||
factor
|
||||
>> *( ('*' >> factor)
|
||||
| ('/' >> factor)
|
||||
)
|
||||
;
|
||||
|
||||
factor =
|
||||
uint_
|
||||
| '(' >> expression >> ')'
|
||||
| ('-' >> factor)
|
||||
| ('+' >> factor)
|
||||
;
|
||||
}
|
||||
|
||||
rule<Iterator, space_type> expression, term, factor;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Main program
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
int
|
||||
main()
|
||||
{
|
||||
std::cout << "/////////////////////////////////////////////////////////\n\n";
|
||||
std::cout << "Expression parser...\n\n";
|
||||
std::cout << "/////////////////////////////////////////////////////////\n\n";
|
||||
std::cout << "Type an expression...or [q or Q] to quit\n\n";
|
||||
|
||||
typedef std::string::const_iterator iterator_type;
|
||||
typedef calculator<iterator_type> calculator;
|
||||
|
||||
calculator def; // Our grammar definition
|
||||
grammar<calculator> calc(def, def.expression); // Our grammar
|
||||
|
||||
std::string str;
|
||||
while (std::getline(std::cin, str))
|
||||
{
|
||||
if (str.empty() || str[0] == 'q' || str[0] == 'Q')
|
||||
break;
|
||||
|
||||
std::string::const_iterator iter = str.begin();
|
||||
std::string::const_iterator end = str.end();
|
||||
bool r = phrase_parse(iter, end, calc, space);
|
||||
|
||||
if (r && iter == end)
|
||||
{
|
||||
std::cout << "-------------------------\n";
|
||||
std::cout << "Parsing succeeded\n";
|
||||
std::cout << "-------------------------\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
std::string rest(iter, end);
|
||||
std::cout << "-------------------------\n";
|
||||
std::cout << "Parsing failed\n";
|
||||
std::cout << "stopped at: \": " << rest << "\"\n";
|
||||
std::cout << "-------------------------\n";
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Bye... :-) \n\n";
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
123
example/qi/calc2.cpp
Normal file
123
example/qi/calc2.cpp
Normal file
@@ -0,0 +1,123 @@
|
||||
/*=============================================================================
|
||||
Copyright (c) 2001-2007 Joel de Guzman
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
=============================================================================*/
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// A Calculator example demonstrating the grammar and semantic actions
|
||||
// using phoenix to "bind" plain functions. The parser prints code suitable
|
||||
// for a stack based virtual machine.
|
||||
//
|
||||
// [ JDG May 10, 2002 ] spirit1
|
||||
// [ JDG March 4, 2007 ] spirit2
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
#include <boost/spirit/include/qi.hpp>
|
||||
#include <boost/spirit/include/phoenix_bind.hpp>
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
using namespace boost::spirit;
|
||||
using namespace boost::spirit::qi;
|
||||
using namespace boost::spirit::ascii;
|
||||
using namespace boost::spirit::arg_names;
|
||||
|
||||
using boost::phoenix::bind;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Semantic actions
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
namespace
|
||||
{
|
||||
void do_int(int n) { std::cout << "push " << n << std::endl; }
|
||||
void do_add() { std::cout << "add\n"; }
|
||||
void do_subt() { std::cout << "subtract\n"; }
|
||||
void do_mult() { std::cout << "mult\n"; }
|
||||
void do_div() { std::cout << "divide\n"; }
|
||||
void do_neg() { std::cout << "negate\n"; }
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Our calculator grammar
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
template <typename Iterator>
|
||||
struct calculator : grammar_def<Iterator, space_type>
|
||||
{
|
||||
calculator()
|
||||
{
|
||||
expression =
|
||||
term
|
||||
>> *( ('+' >> term [bind(&do_add)])
|
||||
| ('-' >> term [bind(&do_subt)])
|
||||
)
|
||||
;
|
||||
|
||||
term =
|
||||
factor
|
||||
>> *( ('*' >> factor [bind(&do_mult)])
|
||||
| ('/' >> factor [bind(&do_div)])
|
||||
)
|
||||
;
|
||||
|
||||
factor =
|
||||
uint_ [bind(&do_int, _1)]
|
||||
| '(' >> expression >> ')'
|
||||
| ('-' >> factor [bind(&do_neg)])
|
||||
| ('+' >> factor)
|
||||
;
|
||||
}
|
||||
|
||||
rule<Iterator, space_type> expression, term, factor;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Main program
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
int
|
||||
main()
|
||||
{
|
||||
std::cout << "/////////////////////////////////////////////////////////\n\n";
|
||||
std::cout << "Expression parser...\n\n";
|
||||
std::cout << "/////////////////////////////////////////////////////////\n\n";
|
||||
std::cout << "Type an expression...or [q or Q] to quit\n\n";
|
||||
|
||||
typedef std::string::const_iterator iterator_type;
|
||||
typedef calculator<iterator_type> calculator;
|
||||
|
||||
calculator def; // Our grammar definition
|
||||
grammar<calculator> calc(def, def.expression); // Our grammar
|
||||
|
||||
std::string str;
|
||||
while (std::getline(std::cin, str))
|
||||
{
|
||||
if (str.empty() || str[0] == 'q' || str[0] == 'Q')
|
||||
break;
|
||||
|
||||
std::string::const_iterator iter = str.begin();
|
||||
std::string::const_iterator end = str.end();
|
||||
bool r = phrase_parse(iter, end, calc, space);
|
||||
|
||||
if (r && iter == end)
|
||||
{
|
||||
std::cout << "-------------------------\n";
|
||||
std::cout << "Parsing succeeded\n";
|
||||
std::cout << "-------------------------\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
std::string rest(iter, end);
|
||||
std::cout << "-------------------------\n";
|
||||
std::cout << "Parsing failed\n";
|
||||
std::cout << "stopped at: \": " << rest << "\"\n";
|
||||
std::cout << "-------------------------\n";
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Bye... :-) \n\n";
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
110
example/qi/calc3.cpp
Normal file
110
example/qi/calc3.cpp
Normal file
@@ -0,0 +1,110 @@
|
||||
/*=============================================================================
|
||||
Copyright (c) 2001-2007 Joel de Guzman
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
=============================================================================*/
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// A calculator example demonstrating the grammar and semantic actions
|
||||
// using phoenix to do the actual expression evaluation. The parser is
|
||||
// essentially an "interpreter" that evaluates expressions on the fly.
|
||||
//
|
||||
// [ JDG June 29, 2002 ] spirit1
|
||||
// [ JDG March 5, 2007 ] spirit2
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
#include <boost/spirit/include/qi.hpp>
|
||||
#include <boost/spirit/include/phoenix_operator.hpp>
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
using namespace boost::spirit;
|
||||
using namespace boost::spirit::qi;
|
||||
using namespace boost::spirit::ascii;
|
||||
using namespace boost::spirit::arg_names;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Our calculator grammar
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
template <typename Iterator>
|
||||
struct calculator : grammar_def<Iterator, int(), space_type>
|
||||
{
|
||||
calculator()
|
||||
{
|
||||
expression =
|
||||
term [_val = _1]
|
||||
>> *( ('+' >> term [_val += _1])
|
||||
| ('-' >> term [_val -= _1])
|
||||
)
|
||||
;
|
||||
|
||||
term =
|
||||
factor [_val = _1]
|
||||
>> *( ('*' >> factor [_val *= _1])
|
||||
| ('/' >> factor [_val /= _1])
|
||||
)
|
||||
;
|
||||
|
||||
factor =
|
||||
uint_ [_val = _1]
|
||||
| '(' >> expression [_val = _1] >> ')'
|
||||
| ('-' >> factor [_val = -_1])
|
||||
| ('+' >> factor [_val = _1])
|
||||
;
|
||||
}
|
||||
|
||||
rule<Iterator, int(), space_type> expression, term, factor;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Main program
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
int
|
||||
main()
|
||||
{
|
||||
std::cout << "/////////////////////////////////////////////////////////\n\n";
|
||||
std::cout << "Expression parser...\n\n";
|
||||
std::cout << "/////////////////////////////////////////////////////////\n\n";
|
||||
std::cout << "Type an expression...or [q or Q] to quit\n\n";
|
||||
|
||||
typedef std::string::const_iterator iterator_type;
|
||||
typedef calculator<iterator_type> calculator;
|
||||
|
||||
calculator def; // Our grammar definition
|
||||
grammar<calculator> calc(def, def.expression); // Our grammar
|
||||
|
||||
std::string str;
|
||||
int result;
|
||||
while (std::getline(std::cin, str))
|
||||
{
|
||||
if (str.empty() || str[0] == 'q' || str[0] == 'Q')
|
||||
break;
|
||||
|
||||
std::string::const_iterator iter = str.begin();
|
||||
std::string::const_iterator end = str.end();
|
||||
bool r = phrase_parse(iter, end, calc, result, space);
|
||||
|
||||
if (r && iter == end)
|
||||
{
|
||||
std::cout << "-------------------------\n";
|
||||
std::cout << "Parsing succeeded\n";
|
||||
std::cout << "result = " << result << std::endl;
|
||||
std::cout << "-------------------------\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
std::string rest(iter, end);
|
||||
std::cout << "-------------------------\n";
|
||||
std::cout << "Parsing failed\n";
|
||||
std::cout << "stopped at: \": " << rest << "\"\n";
|
||||
std::cout << "-------------------------\n";
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Bye... :-) \n\n";
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
201
example/qi/calc3_lexer.cpp
Normal file
201
example/qi/calc3_lexer.cpp
Normal file
@@ -0,0 +1,201 @@
|
||||
/*=============================================================================
|
||||
Copyright (c) 2001-2007 Joel de Guzman
|
||||
Copyright (c) 2001-2008 Hartmut Kaiser
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
=============================================================================*/
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// A calculator example demonstrating the grammar and semantic actions
|
||||
// using phoenix to do the actual expression evaluation. The parser is
|
||||
// essentially an "interpreter" that evaluates expressions on the fly.
|
||||
//
|
||||
// Additionally this examples shows how to build and use a lexer based on
|
||||
// Ben Hansons Lexertl (http://www.benhanson.net/lexertl.html). This way the
|
||||
// parser matches the grammar against the tokens generated by the lexer
|
||||
// component and not against the input character stream.
|
||||
//
|
||||
// Even if the benefits of using a lexer for this small calculator grammar may
|
||||
// not outweight the corresponding overhead, we provide this example because
|
||||
// it allows to concentrate on the essentials without having to understand
|
||||
// the semantics first.
|
||||
//
|
||||
// [ JDG June 29, 2002 ] spirit1
|
||||
// [ JDG March 5, 2007 ] spirit2
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
#include <boost/spirit/include/qi.hpp>
|
||||
#include <boost/spirit/include/lex_lexer_lexertl.hpp>
|
||||
#include <boost/spirit/include/phoenix_operator.hpp>
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
using namespace boost::spirit;
|
||||
using namespace boost::spirit::qi;
|
||||
using namespace boost::spirit::lex;
|
||||
using namespace boost::spirit::ascii;
|
||||
using namespace boost::spirit::arg_names;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Our token definition
|
||||
// This class is used to define all the tokens to be recognized by the lexer.
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
template <typename Lexer>
|
||||
struct calculator_tokens : lexer_def<Lexer>
|
||||
{
|
||||
template <typename Self>
|
||||
void def (Self& self)
|
||||
{
|
||||
// unsigned integer token definition
|
||||
ui = "[1-9][0-9]*";
|
||||
|
||||
// whitespace token definitions
|
||||
ws = "[ \\t\\f\\v]+";
|
||||
c_comment = "\\/\\*[^*]*\\*+([^/*][^*]*\\*+)*\\/";
|
||||
|
||||
// build token set
|
||||
skipper = ws | c_comment; // += is allowed as well
|
||||
|
||||
// associate the tokens and the token set with the lexer
|
||||
// default lexer state
|
||||
self = token_def<>('+') | '-' | '*' | '/' | '(' | ')';
|
||||
self += ui; // still default state
|
||||
|
||||
// The token_set 'skipper' get's assigned to a separate lexer state
|
||||
// which allows to use it separately from the main tokenization
|
||||
// (it is used as the skipper parser below)
|
||||
self("SKIPPER") = skipper; // lexer state "SKIPPER"
|
||||
}
|
||||
|
||||
// This are the tokens to be recognized by the lexer.
|
||||
token_def<unsigned int> ui; // matched tokens will have a unsigned int
|
||||
token_def<> ws, c_comment; // attribute will not be used
|
||||
|
||||
// This is the only token set explicitly defined by this lexer because it
|
||||
// needs to be accessible from the outside (used as skip parser below).
|
||||
typename Lexer::token_set skipper;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Our calculator grammar
|
||||
//
|
||||
// The difference to the original example (calc3.cpp) is that we are
|
||||
// specifying a second template parameter referring to the lexer. Further, we
|
||||
// use a defined tokenset from above as the skip parser.
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
template <typename Iterator, typename Lexer>
|
||||
struct calculator : grammar_def<Iterator, int(), typename Lexer::token_set>
|
||||
{
|
||||
template <typename TokenDef>
|
||||
calculator(TokenDef const& tok)
|
||||
{
|
||||
// grammar
|
||||
expression =
|
||||
term [_val = _1]
|
||||
>> *( ('+' >> term [_val += _1])
|
||||
| ('-' >> term [_val -= _1])
|
||||
)
|
||||
;
|
||||
|
||||
term =
|
||||
factor [_val = _1]
|
||||
>> *( ('*' >> factor [_val *= _1])
|
||||
| ('/' >> factor [_val /= _1])
|
||||
)
|
||||
;
|
||||
|
||||
factor =
|
||||
tok.ui [_val = _1]
|
||||
| '(' >> expression [_val = _1] >> ')'
|
||||
| ('-' >> factor [_val = -_1])
|
||||
| ('+' >> factor [_val = _1])
|
||||
;
|
||||
}
|
||||
|
||||
rule<Iterator, int(), typename Lexer::token_set> expression, term, factor;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Main program
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
int
|
||||
main()
|
||||
{
|
||||
std::cout << "/////////////////////////////////////////////////////////\n\n";
|
||||
std::cout << "Expression parser...\n\n";
|
||||
std::cout << "/////////////////////////////////////////////////////////\n\n";
|
||||
std::cout << "Type an expression...or [q or Q] to quit\n\n";
|
||||
|
||||
// iterator type used to expose the underlying input stream
|
||||
typedef std::string::const_iterator base_iterator_type;
|
||||
|
||||
// This is the lexer token type to use. The second template parameter lists
|
||||
// all attribute types used for token_def's during token definition (see
|
||||
// calculator_tokens<> above). Here we use the predefined lexertl token
|
||||
// type, but any compatible token type may be used.
|
||||
typedef lexertl_token<
|
||||
base_iterator_type, boost::mpl::vector<unsigned int>
|
||||
> token_type;
|
||||
|
||||
// This is the lexer type to use to tokenize the input.
|
||||
// Here we use the lexertl based lexer engine.
|
||||
typedef lexertl_lexer<base_iterator_type, token_type> lexer_type;
|
||||
|
||||
// This is the token definition type (derived from the given lexer type).
|
||||
typedef calculator_tokens<lexer_type> calculator_tokens;
|
||||
|
||||
// this is the iterator type exposed by the lexer
|
||||
typedef lexer<calculator_tokens>::iterator_type iterator_type;
|
||||
|
||||
// this is the type of the grammar to parse
|
||||
typedef calculator<iterator_type, lexer_type> calculator;
|
||||
|
||||
// now we use the types defined above to create the lexer and grammar
|
||||
// object instances needed to invoke the parsing process
|
||||
calculator_tokens tokens; // Our token definition
|
||||
calculator def (tokens); // Our grammar definition
|
||||
|
||||
lexer<calculator_tokens> lex(tokens); // Our lexer
|
||||
grammar<calculator> calc(def, def.expression); // Our grammar
|
||||
|
||||
// get input line by line and feed the parser to evaluate the expressions
|
||||
// read in from the input
|
||||
std::string str;
|
||||
int result;
|
||||
while (std::getline(std::cin, str))
|
||||
{
|
||||
if (str.empty() || str[0] == 'q' || str[0] == 'Q')
|
||||
break;
|
||||
|
||||
// At this point we generate the iterator pair used to expose the
|
||||
// tokenized input stream.
|
||||
iterator_type iter = lex.begin(str.begin(), str.end());
|
||||
iterator_type end = lex.end();
|
||||
|
||||
// Parsing is done based on the the token stream, not the character
|
||||
// stream read from the input.
|
||||
// Note, how we use the token_set defined above as the skip parser.
|
||||
bool r = phrase_parse(iter, end, calc, result, tokens.skipper);
|
||||
|
||||
if (r && iter == end)
|
||||
{
|
||||
std::cout << "-------------------------\n";
|
||||
std::cout << "Parsing succeeded\n";
|
||||
std::cout << "result = " << result << std::endl;
|
||||
std::cout << "-------------------------\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cout << "-------------------------\n";
|
||||
std::cout << "Parsing failed\n";
|
||||
std::cout << "-------------------------\n";
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Bye... :-) \n\n";
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
126
example/qi/calc4.cpp
Normal file
126
example/qi/calc4.cpp
Normal file
@@ -0,0 +1,126 @@
|
||||
/*=============================================================================
|
||||
Copyright (c) 2001-2007 Joel de Guzman
|
||||
|
||||
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
=============================================================================*/
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// This time, we'll incorporate error handling and reporting.
|
||||
//
|
||||
// [ JDG June 29, 2002 ] spirit1
|
||||
// [ JDG March 5, 2007 ] spirit2
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
#include <boost/spirit/include/qi.hpp>
|
||||
#include <boost/spirit/include/phoenix_operator.hpp>
|
||||
#include <boost/spirit/include/phoenix_object.hpp>
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
using namespace boost::spirit;
|
||||
using namespace boost::spirit::qi;
|
||||
using namespace boost::spirit::ascii;
|
||||
using namespace boost::spirit::arg_names;
|
||||
|
||||
using boost::phoenix::val;
|
||||
using boost::phoenix::construct;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Our calculator grammar
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
template <typename Iterator>
|
||||
struct calculator : grammar_def<Iterator, int(), space_type>
|
||||
{
|
||||
calculator()
|
||||
{
|
||||
expression =
|
||||
term [_val = _1]
|
||||
>> *( ('+' > term [_val += _1])
|
||||
| ('-' > term [_val -= _1])
|
||||
)
|
||||
;
|
||||
|
||||
term =
|
||||
factor [_val = _1]
|
||||
>> *( ('*' > factor [_val *= _1])
|
||||
| ('/' > factor [_val /= _1])
|
||||
)
|
||||
;
|
||||
|
||||
factor =
|
||||
uint_ [_val = _1]
|
||||
| '(' > expression [_val = _1] > ')'
|
||||
| ('-' > factor [_val = -_1])
|
||||
| ('+' > factor [_val = _1])
|
||||
;
|
||||
|
||||
expression.name("expression");
|
||||
term.name("term");
|
||||
factor.name("factor");
|
||||
|
||||
on_error<fail>
|
||||
(
|
||||
expression
|
||||
, std::cout
|
||||
<< val("Error! Expecting ")
|
||||
<< _4 // what failed?
|
||||
<< val(" here: \"")
|
||||
<< construct<std::string>(_3, _2) // iterators to error-pos, end
|
||||
<< val("\"")
|
||||
<< std::endl
|
||||
);
|
||||
}
|
||||
|
||||
rule<Iterator, int(), space_type> expression, term, factor;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Main program
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
int
|
||||
main()
|
||||
{
|
||||
std::cout << "/////////////////////////////////////////////////////////\n\n";
|
||||
std::cout << "Expression parser...\n\n";
|
||||
std::cout << "/////////////////////////////////////////////////////////\n\n";
|
||||
std::cout << "Type an expression...or [q or Q] to quit\n\n";
|
||||
|
||||
typedef std::string::const_iterator iterator_type;
|
||||
typedef calculator<iterator_type> calculator;
|
||||
|
||||
calculator def; // Our grammar definition
|
||||
grammar<calculator> calc(def, def.expression); // Our grammar
|
||||
|
||||
std::string str;
|
||||
int result;
|
||||
while (std::getline(std::cin, str))
|
||||
{
|
||||
if (str.empty() || str[0] == 'q' || str[0] == 'Q')
|
||||
break;
|
||||
|
||||
std::string::const_iterator iter = str.begin();
|
||||
std::string::const_iterator end = str.end();
|
||||
bool r = phrase_parse(iter, end, calc, result, space);
|
||||
|
||||
if (r && iter == end)
|
||||
{
|
||||
std::cout << "-------------------------\n";
|
||||
std::cout << "Parsing succeeded\n";
|
||||
std::cout << "result = " << result << std::endl;
|
||||
std::cout << "-------------------------\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cout << "-------------------------\n";
|
||||
std::cout << "Parsing failed\n";
|
||||
std::cout << "-------------------------\n";
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Bye... :-) \n\n";
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user