mirror of
https://github.com/boostorg/spirit.git
synced 2026-01-19 04:42:11 +00:00
452 lines
13 KiB
C++
452 lines
13 KiB
C++
//
|
|
// C++ Lexer implemented with Spirit (http://spirit.sourceforge.net/)
|
|
//
|
|
// Copyright© 2002 Juan Carlos Arevalo-Baeza, All rights reserved
|
|
// email: jcab@JCABs-Rumblings.com
|
|
// Created: 8-Nov-2002
|
|
//
|
|
|
|
#include "cpp_lexer.hpp"
|
|
|
|
#include <iostream>
|
|
#include <fstream>
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <boost/spirit/core.hpp>
|
|
#include <boost/spirit/utility/functor_parser.hpp>
|
|
#include <boost/spirit/attribute.hpp>
|
|
#include <boost/spirit/symbols.hpp>
|
|
|
|
#include <boost/phoenix/primitives.hpp>
|
|
#include <boost/phoenix/casts.hpp>
|
|
#include <boost/phoenix/binders.hpp>
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
// used namespaces
|
|
using namespace boost::spirit;
|
|
//using namespace phoenix;
|
|
//using namespace std;
|
|
|
|
using std::stringstream;
|
|
using std::string;
|
|
using std::cout;
|
|
using std::cerr;
|
|
using std::endl;
|
|
|
|
using phoenix::var;
|
|
using phoenix::val;
|
|
using phoenix::value;
|
|
using phoenix::actor;
|
|
using phoenix::arg1;
|
|
using phoenix::arg2;
|
|
using phoenix::construct_;
|
|
using phoenix::function_ptr;
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
// Utility parsers for debugging and error handling.
|
|
|
|
namespace {
|
|
|
|
template <typename ErrorDescrT>
|
|
class trace_ {
|
|
|
|
public:
|
|
typedef nil_t result_t;
|
|
|
|
trace_(ErrorDescrT const& what): info(what) {}
|
|
|
|
template <typename ScannerT>
|
|
int
|
|
operator()(ScannerT const& scan, result_t& result) const {
|
|
file_position lc = (*scan).filePos;
|
|
cout << lc << "Trace: " << info() << "\n";
|
|
return 0;
|
|
}
|
|
|
|
private:
|
|
|
|
ErrorDescrT info;
|
|
};
|
|
|
|
template < class ActorT >
|
|
static
|
|
functor_parser<trace_<ActorT> >
|
|
trace_p(ActorT const& str) {
|
|
return trace_<ActorT>(str);
|
|
}
|
|
|
|
static
|
|
functor_parser<trace_<actor<value<std::string> > > >
|
|
trace_p(std::string const& str) {
|
|
return trace_<actor<value<std::string> > >(val(str));
|
|
}
|
|
|
|
static
|
|
functor_parser<trace_<actor<value<char const*> > > >
|
|
trace_p(char const* str) {
|
|
return trace_<actor<value<char const*> > >(val(str));
|
|
}
|
|
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
// The C++ lexer grammars.
|
|
|
|
namespace {
|
|
|
|
template < typename ResultT >
|
|
struct result_closure: closure<result_closure<ResultT>, ResultT> {
|
|
member1 result_;
|
|
};
|
|
|
|
template <>
|
|
struct result_closure<nil_t> {
|
|
typedef parser_context context_t;
|
|
};
|
|
|
|
struct IDENTIFIER:
|
|
grammar<IDENTIFIER, result_closure<std::string>::context_t>
|
|
{
|
|
template < typename ScannerT >
|
|
struct definition {
|
|
typedef rule<ScannerT> rule_t;
|
|
rule_t main;
|
|
|
|
rule_t const& start() const {
|
|
return main;
|
|
}
|
|
|
|
definition(IDENTIFIER const& self) {
|
|
main = (
|
|
lexeme_d[
|
|
((alpha_p | '_' | '$') >> *(alnum_p | '_' | '$'))
|
|
[self.result_ = construct_<std::string>(arg1, arg2)]
|
|
]
|
|
);
|
|
}
|
|
};
|
|
} IDENTIFIER;
|
|
|
|
struct STRING_LITERAL:
|
|
grammar<STRING_LITERAL, result_closure<std::string>::context_t>
|
|
{
|
|
template < typename ScannerT >
|
|
struct definition {
|
|
typedef rule<ScannerT> rule_t;
|
|
rule_t main;
|
|
|
|
rule_t const& start() const {
|
|
return main;
|
|
}
|
|
|
|
definition(STRING_LITERAL const& self) {
|
|
bool is_wchar = false;
|
|
|
|
main = (
|
|
lexeme_d[
|
|
(
|
|
!(nocase_d[chlit<>('L')] [var(is_wchar) = true])
|
|
>> '\"'
|
|
>> *(str_p("\\\\") | "\\\"" | anychar_p - '\"' )
|
|
)
|
|
[self.result_ = construct_<std::string>(arg1, arg2)]
|
|
>> chlit<>('\"')
|
|
] >> *lexeme_d[
|
|
!nocase_d[chlit<>('L')] >> '\"'
|
|
>> ( *( str_p("\\\\") | "\\\"" | anychar_p - '\"' ) )
|
|
[self.result_ += construct_<std::string>(arg1, arg2)]
|
|
>> chlit<>('\"')
|
|
]
|
|
);
|
|
}
|
|
};
|
|
} STRING_LITERAL;
|
|
|
|
struct CHARACTER_LITERAL:
|
|
grammar<CHARACTER_LITERAL, result_closure<std::string>::context_t>
|
|
{
|
|
template < typename ScannerT >
|
|
struct definition {
|
|
typedef rule<ScannerT> rule_t;
|
|
rule_t main;
|
|
|
|
rule_t const& start() const {
|
|
return main;
|
|
}
|
|
|
|
definition(CHARACTER_LITERAL const& self) {
|
|
bool is_wchar = false;
|
|
|
|
main = (
|
|
lexeme_d[
|
|
(
|
|
!(nocase_d[chlit<>('L')] [var(is_wchar) = true])
|
|
>> '\''
|
|
>> +(str_p("\\\\") | "\\\'" | anychar_p - '\'' )
|
|
)
|
|
[self.result_ = construct_<std::string>(arg1, arg2)]
|
|
>> chlit<>('\'')
|
|
]
|
|
);
|
|
}
|
|
};
|
|
} CHARACTER_LITERAL;
|
|
|
|
struct INT_CONSTANT:
|
|
grammar<INT_CONSTANT, result_closure<std::string>::context_t>
|
|
{
|
|
template < typename ScannerT >
|
|
struct definition {
|
|
typedef rule<ScannerT> rule_t;
|
|
rule_t main;
|
|
|
|
rule_t const& start() const {
|
|
return main;
|
|
}
|
|
|
|
definition(INT_CONSTANT const& self) {
|
|
subrule<0> submain;
|
|
subrule<1> hex_int;
|
|
subrule<2> oct_int;
|
|
subrule<3> dec_int;
|
|
subrule<4> char_int;
|
|
subrule<5> suffix_part;
|
|
|
|
main = (
|
|
(
|
|
submain =
|
|
(hex_int | oct_int | dec_int | char_int) [
|
|
self.result_ =
|
|
construct_<std::string>(arg1, arg2)
|
|
],
|
|
|
|
hex_int =
|
|
lexeme_d[
|
|
'0' >> nocase_d[chlit<>('x')] // prefix
|
|
>> +xdigit_p // the number
|
|
>> suffix_part // suffix
|
|
],
|
|
|
|
oct_int =
|
|
lexeme_d[
|
|
'0' // prefix
|
|
>> +range<>('0', '7') // the number
|
|
>> suffix_part // suffix
|
|
],
|
|
|
|
dec_int =
|
|
lexeme_d[
|
|
+digit_p // the number
|
|
>> suffix_part // suffix
|
|
],
|
|
|
|
char_int = CHARACTER_LITERAL,
|
|
|
|
suffix_part = !nocase_d[chlit<>('l') | chlit<>('u')]
|
|
)
|
|
);
|
|
}
|
|
};
|
|
} INT_CONSTANT;
|
|
|
|
struct FLOAT_CONSTANT:
|
|
grammar<FLOAT_CONSTANT, result_closure<std::string>::context_t>
|
|
{
|
|
template < typename ScannerT >
|
|
struct definition {
|
|
typedef rule<ScannerT> rule_t;
|
|
rule_t main;
|
|
|
|
rule_t const& start() const {
|
|
return main;
|
|
}
|
|
|
|
definition(FLOAT_CONSTANT const& self) {
|
|
subrule<0> submain;
|
|
subrule<1> exponent_part;
|
|
|
|
main = (
|
|
(
|
|
submain =
|
|
lexeme_d[
|
|
(
|
|
chlit<>('.') >> +digit_p >> !exponent_part
|
|
| +digit_p >> (
|
|
(chlit<>('.') >> *digit_p)
|
|
|| exponent_part
|
|
)
|
|
) >> !nocase_d[chlit<>('l') | chlit<>('f')]
|
|
]
|
|
[
|
|
self.result_ =
|
|
construct_<std::string>(arg1, arg2)
|
|
],
|
|
|
|
exponent_part =
|
|
nocase_d[chlit<>('e')]
|
|
>> !(chlit<>('+') | chlit<>('-')) >> +digit_p
|
|
)
|
|
);
|
|
}
|
|
};
|
|
} FLOAT_CONSTANT;
|
|
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
// TokenID type tag.
|
|
|
|
// Internal transfer. Defined in cpp_lexer_tokens.cpp.
|
|
extern boost::spirit::parser<boost::spirit::symbols<TokenID> > const& cpp_operator_p;
|
|
|
|
namespace {
|
|
|
|
struct get_file_position_parser {
|
|
file_position& filePos;
|
|
get_file_position_parser(file_position& filePos_):
|
|
filePos(filePos_)
|
|
{}
|
|
typedef nil_t result_t;
|
|
template < typename ScannerT >
|
|
int operator()(ScannerT const& scan, result_t& result) const {
|
|
filePos = scan.first.get_position();
|
|
return 0;
|
|
}
|
|
};
|
|
|
|
functor_parser<get_file_position_parser>
|
|
get_file_position_p(file_position& filePos)
|
|
{
|
|
return get_file_position_parser(filePos);
|
|
}
|
|
|
|
struct token_lexer:
|
|
grammar<token_lexer, result_closure<Token>::context_t>
|
|
{
|
|
template < typename ScannerT >
|
|
struct definition {
|
|
typedef rule<ScannerT> rule_t;
|
|
rule_t main;
|
|
|
|
rule_t const& start() const {
|
|
return main;
|
|
}
|
|
|
|
Token token;
|
|
file_position filePos;
|
|
|
|
definition(token_lexer const& self);
|
|
};
|
|
} token_lexer;
|
|
|
|
template < typename ScannerT >
|
|
token_lexer::definition<ScannerT>::definition(token_lexer const& self)
|
|
{
|
|
subrule<0> submain;
|
|
subrule<1> skip_until_eol;
|
|
subrule<2> singleline_comment;
|
|
subrule<3> multiline_comment;
|
|
subrule<4> directive;
|
|
|
|
main = (
|
|
submain =
|
|
*(blank_p | ('\\' >> eol_p))
|
|
>> get_file_position_p(filePos)
|
|
>> (
|
|
eol_p >> (
|
|
// TODO: Don't ignore directives like this.
|
|
directive [SetEOLToken (token, filePos)]
|
|
| epsilon_p [SetEOLToken (token, filePos)]
|
|
)
|
|
| singleline_comment [SetCommentToken (token, filePos)]
|
|
| multiline_comment [SetCommentToken (token, filePos)]
|
|
| CHARACTER_LITERAL [SetStringToken (token, filePos)]
|
|
| STRING_LITERAL [SetStringToken (token, filePos)]
|
|
| FLOAT_CONSTANT [SetFloatingToken (token, filePos)]
|
|
| INT_CONSTANT [SetIntegerToken (token, filePos)]
|
|
| IDENTIFIER [SetIdentifierToken(token, filePos)]
|
|
| cpp_operator_p [SetOperatorToken (token, filePos)]
|
|
)
|
|
>> epsilon_p [self.result_ = var(token)],
|
|
|
|
skip_until_eol = *(('\\' >> eol_p) | (anychar_p - eol_p)),
|
|
|
|
singleline_comment = "//" >> skip_until_eol,
|
|
|
|
multiline_comment = "/*" >> *(anychar_p - "*/") >> "*/",
|
|
|
|
directive = *blank_p >> '#' >> skip_until_eol
|
|
);
|
|
}
|
|
|
|
template < typename IteratorT >
|
|
struct lex_input_interface_iterator: lex_input_interface {
|
|
public:
|
|
typedef Token result_type;
|
|
unsigned refCount;
|
|
IteratorT first;
|
|
scanner<IteratorT> scan;
|
|
|
|
lex_input_interface_iterator(IteratorT const& first_,
|
|
IteratorT const& last_):
|
|
refCount(1),
|
|
first(first_),
|
|
scan (first, last_)
|
|
{}
|
|
|
|
virtual void add_ref() {
|
|
++refCount;
|
|
}
|
|
virtual void dec_ref() {
|
|
--refCount;
|
|
if (refCount == 0) {
|
|
delete this;
|
|
}
|
|
}
|
|
|
|
virtual Token get() {
|
|
Token result;
|
|
if (token_lexer[assign(result)].parse(scan)) {
|
|
return result;
|
|
} else {
|
|
return eof();
|
|
}
|
|
}
|
|
|
|
virtual boost::spirit::file_position get_position() {
|
|
return scan.first.get_position();
|
|
}
|
|
};
|
|
|
|
template < typename IteratorT >
|
|
lex_input_interface*
|
|
NewLexerImpl(IteratorT const& first,
|
|
IteratorT const& last,
|
|
char const* fname = "<filename>")
|
|
{
|
|
typedef position_iterator<IteratorT> Iterator;
|
|
|
|
Iterator pfirst(first, last, fname);
|
|
Iterator plast;
|
|
|
|
return new lex_input_interface_iterator<Iterator>(pfirst, plast);
|
|
}
|
|
|
|
}
|
|
|
|
Token const& lex_input_interface::eof()
|
|
{
|
|
static Token const result = Token(file_position(), "", EOF_token);
|
|
return result;
|
|
}
|
|
|
|
lex_input_interface*
|
|
NewLexer(char const* first, char const* last, char const* fname)
|
|
{
|
|
return NewLexerImpl(first, last, fname);
|
|
}
|
|
|
|
Token const lex_input_policy::eof
|
|
= Token(boost::spirit::file_position(), "", EOF_token);
|