mirror of
https://github.com/boostorg/spirit.git
synced 2026-01-19 04:42:11 +00:00
137 lines
4.8 KiB
C++
137 lines
4.8 KiB
C++
// Copyright (c) 2001-2008 Hartmut Kaiser
|
|
// Copyright (c) 2001-2007 Joel de Guzman
|
|
//
|
|
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
|
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
|
|
|
// Simple lexer/parser to test the Spirit installation.
|
|
//
|
|
// This example shows, how to create a simple lexer recognizing 4 different
|
|
// tokens, and how to use a single token definition as the skip parser during
|
|
// the parsing. Additionally it demonstrates how to use one of the defined
|
|
// tokens as a parser component in the grammar.
|
|
//
|
|
// The grammar recognizes a simple input structure, for instance:
|
|
//
|
|
// {
|
|
// hello world, hello it is me
|
|
// }
|
|
//
|
|
// Any number of simple sentences (optionally comma separated) inside a pair
|
|
// of curly braces will be matched.
|
|
|
|
#include <boost/spirit/include/qi.hpp>
|
|
#include <boost/spirit/include/lex_lexer_lexertl.hpp>
|
|
|
|
#include <iostream>
|
|
#include <fstream>
|
|
#include <string>
|
|
|
|
#include "example.hpp"
|
|
|
|
using namespace boost::spirit;
|
|
using namespace boost::spirit::qi;
|
|
using namespace boost::spirit::lex;
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
// Token definition
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
template <typename Lexer>
|
|
struct example1_tokens : lexer_def<Lexer>
|
|
{
|
|
template <typename Self>
|
|
void def (Self& self)
|
|
{
|
|
// define tokens and associate them with the lexer
|
|
identifier = "[a-zA-Z_][a-zA-Z0-9_]*";
|
|
self = token_def<>(',') | '{' | '}' | identifier;
|
|
|
|
// any token definition to be used as the skip parser during parsing
|
|
// has to be associated with a separate lexer state (here 'WS')
|
|
white_space = "[ \\t\\n]+";
|
|
self("WS") = white_space;
|
|
}
|
|
|
|
token_def<> identifier, white_space;
|
|
};
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
// Grammar definition
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
template <typename Iterator>
|
|
struct example1_grammar
|
|
: grammar_def<Iterator, in_state_skipper<token_def<> > >
|
|
{
|
|
template <typename TokenDef>
|
|
example1_grammar(TokenDef const& tok)
|
|
{
|
|
start = '{' >> *(tok.identifier >> -char_(',')) >> '}';
|
|
}
|
|
|
|
rule<Iterator, in_state_skipper<token_def<> > > start;
|
|
};
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
int main()
|
|
{
|
|
// iterator type used to expose the underlying input stream
|
|
typedef std::string::iterator base_iterator_type;
|
|
|
|
// This is the token type to return from the lexer iterator
|
|
typedef lexertl_token<base_iterator_type> token_type;
|
|
|
|
// This is the lexer type to use to tokenize the input.
|
|
// We use the lexertl based lexer engine.
|
|
typedef lexertl_lexer<token_type> lexer_type;
|
|
|
|
// This is the token definition type (derived from the given lexer type).
|
|
typedef example1_tokens<lexer_type> example1_tokens;
|
|
|
|
// This is the iterator type exposed by the lexer
|
|
typedef lexer<example1_tokens>::iterator_type iterator_type;
|
|
|
|
// This is the type of the grammar to parse
|
|
typedef example1_grammar<iterator_type> example1_grammar;
|
|
|
|
// now we use the types defined above to create the lexer and grammar
|
|
// object instances needed to invoke the parsing process
|
|
example1_tokens tokens; // Our token definition
|
|
example1_grammar def (tokens); // Our grammar definition
|
|
|
|
lexer<example1_tokens> lex(tokens); // Our lexer
|
|
grammar<example1_grammar> calc(def); // Our parser
|
|
|
|
std::string str (read_from_file("example1.input"));
|
|
|
|
// At this point we generate the iterator pair used to expose the
|
|
// tokenized input stream.
|
|
std::string::iterator it = str.begin();
|
|
iterator_type iter = lex.begin(it, str.end());
|
|
iterator_type end = lex.end();
|
|
|
|
// Parsing is done based on the the token stream, not the character
|
|
// stream read from the input.
|
|
// Note, how we use the token_def defined above as the skip parser. It must
|
|
// be explicitly wrapped inside a state directive, switching the lexer
|
|
// state for the duration of skipping whitespace.
|
|
bool r = phrase_parse(iter, end, calc, in_state("WS")[tokens.white_space]);
|
|
|
|
if (r && iter == end)
|
|
{
|
|
std::cout << "-------------------------\n";
|
|
std::cout << "Parsing succeeded\n";
|
|
std::cout << "-------------------------\n";
|
|
}
|
|
else
|
|
{
|
|
std::string rest(iter, end);
|
|
std::cout << "-------------------------\n";
|
|
std::cout << "Parsing failed\n";
|
|
std::cout << "stopped at: \"" << rest << "\"\n";
|
|
std::cout << "-------------------------\n";
|
|
}
|
|
|
|
std::cout << "Bye... :-) \n\n";
|
|
return 0;
|
|
}
|