2
0
mirror of https://github.com/boostorg/spirit.git synced 2026-01-19 04:42:11 +00:00
Files
spirit/example/lex/example2.cpp
Nikita Kniazev f44479bcd3 Remove boost/config/warning_disable.hpp usage
It is better to manage warnings on our side to know what warnings we need to fix or suppress, and the only thing that header does is disabling deprecation warnings on MSVC and ICC which we would prefer to not show to users.
2021-08-24 03:14:12 +03:00

169 lines
6.0 KiB
C++

// Copyright (c) 2001-2010 Hartmut Kaiser
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
// This example shows how to create a simple lexer recognizing a couple of
// different tokens and how to use this with a grammar. This example has a
// heavily backtracking grammar which makes it a candidate for lexer based
// parsing (all tokens are scanned and generated only once, even if
// backtracking is required) which speeds up the overall parsing process
// considerably, out-weighting the overhead needed for setting up the lexer.
// Additionally it demonstrates how to use one of the defined tokens as a
// parser component in the grammar.
//
// The grammar recognizes a simple input structure: any number of English
// simple sentences (statements, questions and commands) are recognized and
// are being counted separately.
// #define BOOST_SPIRIT_DEBUG
// #define BOOST_SPIRIT_LEXERTL_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <iostream>
#include <fstream>
#include <string>
#include "example.hpp"
using namespace boost::spirit;
using namespace boost::spirit::ascii;
using boost::phoenix::ref;
///////////////////////////////////////////////////////////////////////////////
// Token definition
///////////////////////////////////////////////////////////////////////////////
template <typename Lexer>
struct example2_tokens : lex::lexer<Lexer>
{
example2_tokens()
{
// A 'word' is comprised of one or more letters and an optional
// apostrophe. If it contains an apostrophe, there may only be one and
// the apostrophe must be preceded and succeeded by at least 1 letter.
// For example, "I'm" and "doesn't" meet the definition of 'word' we
// define below.
word = "[a-zA-Z]+('[a-zA-Z]+)?";
// Associate the tokens and the token set with the lexer. Note that
// single character token definitions as used below always get
// interpreted literally and never as special regex characters. This is
// done to be able to assign single characters the id of their character
// code value, allowing to reference those as literals in Qi grammars.
this->self = lex::token_def<>(',') | '!' | '.' | '?' | ' ' | '\n' | word;
}
lex::token_def<> word;
};
///////////////////////////////////////////////////////////////////////////////
// Grammar definition
///////////////////////////////////////////////////////////////////////////////
template <typename Iterator>
struct example2_grammar : qi::grammar<Iterator>
{
template <typename TokenDef>
example2_grammar(TokenDef const& tok)
: example2_grammar::base_type(story)
, paragraphs(0), commands(0), questions(0), statements(0)
{
story
= +paragraph
;
paragraph
= ( +( command [ ++ref(commands) ]
| question [ ++ref(questions) ]
| statement [ ++ref(statements) ]
)
>> *char_(' ') >> +char_('\n')
)
[ ++ref(paragraphs) ]
;
command
= +(tok.word | ' ' | ',') >> '!'
;
question
= +(tok.word | ' ' | ',') >> '?'
;
statement
= +(tok.word | ' ' | ',') >> '.'
;
BOOST_SPIRIT_DEBUG_NODE(story);
BOOST_SPIRIT_DEBUG_NODE(paragraph);
BOOST_SPIRIT_DEBUG_NODE(command);
BOOST_SPIRIT_DEBUG_NODE(question);
BOOST_SPIRIT_DEBUG_NODE(statement);
}
qi::rule<Iterator> story, paragraph, command, question, statement;
int paragraphs, commands, questions, statements;
};
///////////////////////////////////////////////////////////////////////////////
int main()
{
// iterator type used to expose the underlying input stream
typedef std::string::iterator base_iterator_type;
// This is the token type to return from the lexer iterator
typedef lex::lexertl::token<base_iterator_type> token_type;
// This is the lexer type to use to tokenize the input.
// Here we use the lexertl based lexer engine.
typedef lex::lexertl::lexer<token_type> lexer_type;
// This is the token definition type (derived from the given lexer type).
typedef example2_tokens<lexer_type> example2_tokens;
// this is the iterator type exposed by the lexer
typedef example2_tokens::iterator_type iterator_type;
// this is the type of the grammar to parse
typedef example2_grammar<iterator_type> example2_grammar;
// now we use the types defined above to create the lexer and grammar
// object instances needed to invoke the parsing process
example2_tokens tokens; // Our lexer
example2_grammar calc(tokens); // Our parser
std::string str (read_from_file("example2.input"));
// At this point we generate the iterator pair used to expose the
// tokenized input stream.
std::string::iterator it = str.begin();
iterator_type iter = tokens.begin(it, str.end());
iterator_type end = tokens.end();
// Parsing is done based on the token stream, not the character
// stream read from the input.
bool r = qi::parse(iter, end, calc);
if (r && iter == end)
{
std::cout << "-------------------------\n";
std::cout << "Parsing succeeded\n";
std::cout << "There were "
<< calc.commands << " commands, "
<< calc.questions << " questions, and "
<< calc.statements << " statements.\n";
std::cout << "-------------------------\n";
}
else
{
std::cout << "-------------------------\n";
std::cout << "Parsing failed\n";
std::cout << "-------------------------\n";
}
std::cout << "Bye... :-) \n\n";
return 0;
}