From 9a56d467171fc0ee9ca8dfa7cd03354e4b691771 Mon Sep 17 00:00:00 2001 From: Hartmut Kaiser Date: Thu, 30 Nov 2006 13:22:39 +0000 Subject: [PATCH] Added the support_option_insert_whitespace language option. [SVN r36211] --- ChangeLog | 7 ++ include/boost/wave/cpp_context.hpp | 3 +- .../wave/cpplexer/re2clex/cpp_re2c_lexer.hpp | 10 +- include/boost/wave/language_support.hpp | 5 +- include/boost/wave/util/cpp_iterator.hpp | 4 +- .../wave/util/insert_whitespace_detection.hpp | 15 ++- samples/cpp_tokens/slex/cpp_slex_lexer.hpp | 8 +- .../list_includes/lexertl/lexertl_lexer.hpp | 91 ++++--------------- samples/waveidl/idllexer/idl_re2c_lexer.hpp | 4 +- test/testwave/testwave_app.cpp | 4 +- tool/cpp.cpp | 29 +++++- 11 files changed, 84 insertions(+), 96 deletions(-) diff --git a/ChangeLog b/ChangeLog index 8ccd6ec..020669c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -74,6 +74,13 @@ CHANGELOG lexer for Wave. - Changed the list_includes sample to use a lexer which is based on the lexertl library written by Ben Hanson (http://www.benhanson.net/lexertl.html). +- Added a new support_option: insert_whitespace, allowing to switch off + whitespace insertion which is normally (by default) in place to disambiugate + C++ tokens, which would otherwise form different tokens in the output. +- Added a new commandline option to the Wave applet: --disambiguate, allowing + to control whitespace insertion. The default value for this option is + --disambiguate=1, resembling the previous behaviour. Specifying the option + --disambiguate=0 allows to suppress whitespace insertion alltogether. Boost V1.34.0 - Wave Version 1.2.4 diff --git a/include/boost/wave/cpp_context.hpp b/include/boost/wave/cpp_context.hpp index 071a3d0..3dadbd9 100644 --- a/include/boost/wave/cpp_context.hpp +++ b/include/boost/wave/cpp_context.hpp @@ -83,7 +83,7 @@ class context : private boost::noncopyable public: // concept checks -// the given iterator shall be at least a forward iterator type +// the given iterator should be at least a forward iterator type BOOST_CLASS_REQUIRE(IteratorT, boost, ForwardIteratorConcept); // public typedefs @@ -133,6 +133,7 @@ public: #if BOOST_WAVE_EMIT_PRAGMA_DIRECTIVES != 0 | support_option_emit_pragma_directives #endif + | support_option_insert_whitespace )) , hooks(hooks_) { diff --git a/include/boost/wave/cpplexer/re2clex/cpp_re2c_lexer.hpp b/include/boost/wave/cpplexer/re2clex/cpp_re2c_lexer.hpp index 2c39bcb..5ca44eb 100644 --- a/include/boost/wave/cpplexer/re2clex/cpp_re2c_lexer.hpp +++ b/include/boost/wave/cpplexer/re2clex/cpp_re2c_lexer.hpp @@ -171,7 +171,7 @@ lexer::get() // test identifier characters for validity (throws if invalid chars found) value = string_type((char const *)scanner.tok, scanner.cur-scanner.tok); - if (!(language & support_option_no_character_validation)) + if (!boost::wave::need_no_character_validation(language)) impl::validate_identifier_name(value, actline, scanner.column, filename); break; @@ -180,9 +180,9 @@ lexer::get() // test literal characters for validity (throws if invalid chars found) value = string_type((char const *)scanner.tok, scanner.cur-scanner.tok); - if (language & support_option_convert_trigraphs) + if (boost::wave::need_convert_trigraphs(language)) value = impl::convert_trigraphs(value); - if (!(language & support_option_no_character_validation)) + if (!boost::wave::need_no_character_validation(language)) impl::validate_literal(value, actline, scanner.column, filename); break; @@ -244,7 +244,7 @@ lexer::get() case T_RIGHTBRACKET_TRIGRAPH: case T_COMPL_TRIGRAPH: case T_POUND_TRIGRAPH: - if (language & support_option_convert_trigraphs) { + if (boost::wave::need_convert_trigraphs(language)) { value = cache.get_token_value(BASEID_FROM_TOKEN(id)); } else { @@ -254,7 +254,7 @@ lexer::get() break; case T_ANY_TRIGRAPH: - if (language & support_option_convert_trigraphs) { + if (boost::wave::need_convert_trigraphs(language)) { value = impl::convert_trigraph( string_type((char const *)scanner.tok)); } diff --git a/include/boost/wave/language_support.hpp b/include/boost/wave/language_support.hpp index 012dafd..41f327e 100644 --- a/include/boost/wave/language_support.hpp +++ b/include/boost/wave/language_support.hpp @@ -35,7 +35,8 @@ enum language_support { support_c99 = support_option_variadics | support_option_long_long | 0x08, #endif - support_option_mask = 0xFF00, + support_option_mask = 0xFF80, + support_option_insert_whitespace = 0x0080, support_option_preserve_comments = 0x0100, support_option_no_character_validation = 0x0200, support_option_convert_trigraphs = 0x0400, @@ -151,6 +152,7 @@ set_support_options(language_support language, language_support option) /////////////////////////////////////////////////////////////////////////////// BOOST_WAVE_OPTION(long_long) // support_option_long_long +BOOST_WAVE_OPTION(no_character_validation) // support_option_no_character_validation BOOST_WAVE_OPTION(preserve_comments) // support_option_preserve_comments BOOST_WAVE_OPTION(prefer_pp_numbers) // support_option_prefer_pp_numbers BOOST_WAVE_OPTION(emit_line_directives) // support_option_emit_line_directives @@ -165,6 +167,7 @@ BOOST_WAVE_OPTION(variadics) // support_option_variadics #if BOOST_WAVE_EMIT_PRAGMA_DIRECTIVES != 0 BOOST_WAVE_OPTION(emit_pragma_directives) // support_option_emit_pragma_directives #endif +BOOST_WAVE_OPTION(insert_whitespace) // support_option_insert_whitespace #undef BOOST_WAVE_NEED_OPTION #undef BOOST_WAVE_ENABLE_OPTION diff --git a/include/boost/wave/util/cpp_iterator.hpp b/include/boost/wave/util/cpp_iterator.hpp index c44b117..278e761 100644 --- a/include/boost/wave/util/cpp_iterator.hpp +++ b/include/boost/wave/util/cpp_iterator.hpp @@ -262,7 +262,8 @@ public: pos_.get_file().c_str() )), seen_newline(true), must_emit_line_directive(false), - act_pos(ctx_.get_main_pos()) + act_pos(ctx_.get_main_pos()), + whitespace(boost::wave::need_insert_whitespace(ctx.get_language())) { act_pos.set_file(pos_.get_file()); #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0 @@ -501,6 +502,7 @@ pp_iterator_functor::operator()() } // cleanup of certain tokens required + seen_newline = false; switch (static_cast(id)) { case T_NONREPLACABLE_IDENTIFIER: act_token.set_token_id(T_IDENTIFIER); diff --git a/include/boost/wave/util/insert_whitespace_detection.hpp b/include/boost/wave/util/insert_whitespace_detection.hpp index d5ff160..81d6e71 100644 --- a/include/boost/wave/util/insert_whitespace_detection.hpp +++ b/include/boost/wave/util/insert_whitespace_detection.hpp @@ -205,13 +205,17 @@ namespace impl { class insert_whitespace_detection { public: - insert_whitespace_detection() - : prev(boost::wave::T_EOF), beforeprev(boost::wave::T_EOF) + insert_whitespace_detection(bool insert_whitespace_ = true) + : insert_whitespace(insert_whitespace_), + prev(boost::wave::T_EOF), beforeprev(boost::wave::T_EOF) {} template bool must_insert(boost::wave::token_id current, StringT const &value) { + if (!insert_whitespace) + return false; // skip whitespace insertion alltogether + using namespace boost::wave; switch (static_cast(current)) { case T_NONREPLACABLE_IDENTIFIER: @@ -330,11 +334,14 @@ public: } void shift_tokens (boost::wave::token_id next_id) { - beforeprev = prev; - prev = next_id; + if (insert_whitespace) { + beforeprev = prev; + prev = next_id; + } } private: + bool insert_whitespace; // enable this component boost::wave::token_id prev; // the previous analyzed token boost::wave::token_id beforeprev; // the token before the previous }; diff --git a/samples/cpp_tokens/slex/cpp_slex_lexer.hpp b/samples/cpp_tokens/slex/cpp_slex_lexer.hpp index f9ba857..3e398d5 100644 --- a/samples/cpp_tokens/slex/cpp_slex_lexer.hpp +++ b/samples/cpp_tokens/slex/cpp_slex_lexer.hpp @@ -602,7 +602,7 @@ public: case T_IDENTIFIER: // test identifier characters for validity (throws if // invalid chars found) - if (!(language & support_option_no_character_validation)) { + if (!boost::wave::need_no_character_validation(language)) { using boost::wave::cpplexer::impl::validate_identifier_name; validate_identifier_name(token_val, pos.get_line(), pos.get_column(), pos.get_file()); @@ -613,11 +613,11 @@ public: case T_CHARLIT: // test literal characters for validity (throws if invalid // chars found) - if (language & support_option_convert_trigraphs) { + if (boost::wave::need_convert_trigraphs(language)) { using boost::wave::cpplexer::impl::convert_trigraphs; token_val = convert_trigraphs(token_val); } - if (!(language & support_option_no_character_validation)) { + if (!boost::wave::need_no_character_validation(language)) { using boost::wave::cpplexer::impl::validate_literal; validate_literal(token_val, pos.get_line(), pos.get_column(), pos.get_file()); @@ -666,7 +666,7 @@ public: case T_COMPL_TRIGRAPH: case T_POUND_TRIGRAPH: case T_ANY_TRIGRAPH: - if (language & support_option_convert_trigraphs) + if (boost::wave::need_convert_trigraphs(language)) { using boost::wave::cpplexer::impl::convert_trigraph; token_val = convert_trigraph(token_val); diff --git a/samples/list_includes/lexertl/lexertl_lexer.hpp b/samples/list_includes/lexertl/lexertl_lexer.hpp index d8089cf..a487e11 100644 --- a/samples/list_includes/lexertl/lexertl_lexer.hpp +++ b/samples/list_includes/lexertl/lexertl_lexer.hpp @@ -89,67 +89,10 @@ private: /////////////////////////////////////////////////////////////////////////////// // token regex definitions -#define OR "|" -#define Q(c) "\\" c -#define TRI(c) Q("?") Q("?") c - -// definition of some sub-token regexp's to simplify the regex definitions -#define BLANK "[ \\t]" -#define CCOMMENT \ - Q("/") Q("*") "[^*]*" Q("*") "+" "(" "[^/*][^*]*" Q("*") "+" ")*" Q("/") - -#define PPSPACE "(" BLANK OR CCOMMENT ")*" - -#define OCTALDIGIT "[0-7]" -#define DIGIT "[0-9]" -#define HEXDIGIT "[0-9a-fA-F]" -#define OPTSIGN "[-+]?" -#define EXPSTART "[eE]" "[-+]" -#define EXPONENT "(" "[eE]" OPTSIGN "[0-9]+" ")" -#define NONDIGIT "[a-zA-Z_]" - -#define INTEGER \ - "(" "(0x|0X)" HEXDIGIT "+" OR "0" OCTALDIGIT "*" OR "[1-9]" DIGIT "*" ")" - -#define INTEGER_SUFFIX "(" "[uU][lL]?|[lL][uU]?" ")" -#if BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0 -#define LONGINTEGER_SUFFIX "(" "[uU]" "(" "[lL][lL]" ")" OR \ - "(" "[lL][lL]" ")" "[uU]" "?" OR \ - "i64" \ - ")" -#else -#define LONGINTEGER_SUFFIX "(" "[uU]" "(" "[lL][lL]" ")" OR \ - "(" "[lL][lL]" ")" "[uU]" "?" ")" -#endif -#define FLOAT_SUFFIX "(" "[fF][lL]?|[lL][fF]?" ")" -#define CHAR_SPEC "L?" - -#define BACKSLASH "(" Q("\\") OR TRI(Q("/")) ")" -#define ESCAPESEQ BACKSLASH "(" \ - "[abfnrtv?'\"]" OR \ - BACKSLASH OR \ - "x" HEXDIGIT "+" OR \ - OCTALDIGIT OCTALDIGIT "?" OCTALDIGIT "?" \ - ")" -#define HEXQUAD HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT -#define UNIVERSALCHAR BACKSLASH "(" \ - "u" HEXQUAD OR \ - "U" HEXQUAD HEXQUAD \ - ")" - -#define POUNDDEF "(" "#" OR TRI("=") OR Q("%:") ")" -#define NEWLINEDEF "(" "\\n" OR "\\r" OR "\\r\\n" ")" - -#if BOOST_WAVE_SUPPORT_INCLUDE_NEXT != 0 -#define INCLUDEDEF "(include|include_next)" -#else -#define INCLUDEDEF "include" -#endif - -#define PP_NUMBERDEF Q(".") "?" DIGIT "(" DIGIT OR NONDIGIT OR EXPSTART OR Q(".") ")*" - -// helper for initializing macro definitions +// helper for initializing token data and macro definitions +#define Q(c) "\\" c #define MACRO_DATA(name, macro) { name, macro } +#define TOKEN_DATA(id, regex) { id, regex } // lexertl macro definitions template @@ -191,11 +134,6 @@ lexertl::init_macro_data[INIT_MACRO_DATA_SIZE] = MACRO_DATA(NULL, NULL) // should be the last entry }; -#undef MACRO_DATA - -// helper for initializing token data -#define TOKEN_DATA(id, regex) { id, regex } - // common C++/C99 token definitions template typename lexertl::lexer_data const @@ -218,7 +156,7 @@ lexertl::init_data[INIT_DATA_SIZE] = TOKEN_DATA(T_DIVIDEASSIGN, Q("/=")), TOKEN_DATA(T_DIVIDE, Q("/")), TOKEN_DATA(T_DOT, Q(".")), - TOKEN_DATA(T_ELLIPSIS, Q(".") Q(".") Q(".")), + TOKEN_DATA(T_ELLIPSIS, Q(".") "{3}"), TOKEN_DATA(T_EQUAL, "=="), TOKEN_DATA(T_GREATER, ">"), TOKEN_DATA(T_GREATEREQUAL, ">="), @@ -233,16 +171,16 @@ lexertl::init_data[INIT_DATA_SIZE] = TOKEN_DATA(T_LEFTBRACKET_TRIGRAPH, "{TRI}" Q("(")), TOKEN_DATA(T_MINUS, Q("-")), TOKEN_DATA(T_MINUSASSIGN, Q("-=")), - TOKEN_DATA(T_MINUSMINUS, Q("-") Q("-")), + TOKEN_DATA(T_MINUSMINUS, Q("-") "{2}"), TOKEN_DATA(T_PERCENT, Q("%")), TOKEN_DATA(T_PERCENTASSIGN, Q("%=")), TOKEN_DATA(T_NOT, "!"), TOKEN_DATA(T_NOTEQUAL, "!="), - TOKEN_DATA(T_OROR, Q("|") Q("|")), - TOKEN_DATA(T_OROR_TRIGRAPH, "({TRI}!\\|)|(\\|{TRI}!)|({TRI}!{TRI}!)"), + TOKEN_DATA(T_OROR, Q("|") "{2}"), + TOKEN_DATA(T_OROR_TRIGRAPH, "{TRI}!\\||\\|{TRI}!|{TRI}!{TRI}!"), TOKEN_DATA(T_PLUS, Q("+")), TOKEN_DATA(T_PLUSASSIGN, Q("+=")), - TOKEN_DATA(T_PLUSPLUS, Q("+") Q("+")), + TOKEN_DATA(T_PLUSPLUS, Q("+") "{2}"), TOKEN_DATA(T_ARROW, Q("->")), TOKEN_DATA(T_QUESTION_MARK, Q("?")), TOKEN_DATA(T_RIGHTBRACE, Q("}")), @@ -361,7 +299,8 @@ lexertl::init_data[INIT_DATA_SIZE] = TOKEN_DATA(T_LONGINTLIT, "{INTEGER}{LONGINTEGER_SUFFIX}"), TOKEN_DATA(T_INTLIT, "{INTEGER}{INTEGER_SUFFIX}?"), TOKEN_DATA(T_FLOATLIT, - "(" "{DIGIT}*" Q(".") "{DIGIT}+|{DIGIT}+" Q(".") "){EXPONENT}?{FLOAT_SUFFIX}?|" +// "(" "{DIGIT}*" Q(".") "{DIGIT}+|{DIGIT}+" Q(".") "){EXPONENT}?{FLOAT_SUFFIX}?|" + "({DIGIT}*\\.{DIGIT}+|{DIGIT}+\\.){EXPONENT}?{FLOAT_SUFFIX}?|" "{DIGIT}+{EXPONENT}{FLOAT_SUFFIX}?"), #if BOOST_WAVE_USE_STRICT_LEXER != 0 TOKEN_DATA(T_IDENTIFIER, @@ -425,7 +364,9 @@ lexertl::init_data_pp_number[INIT_DATA_PP_NUMBER_SIZE] = { token_id(0) } // this should be the last entry }; +#undef MACRO_DATA #undef TOKEN_DATA +#undef Q /////////////////////////////////////////////////////////////////////////////// // initialize lexertl lexer from C++ token regex's @@ -557,7 +498,7 @@ public: case T_IDENTIFIER: // test identifier characters for validity (throws if // invalid chars found) - if (!(language & support_option_no_character_validation)) { + if (!boost::wave::need_no_character_validation(language))) { using boost::wave::cpplexer::impl::validate_identifier_name; validate_identifier_name(token_val, pos.get_line(), pos.get_column(), pos.get_file()); @@ -568,11 +509,11 @@ public: case T_CHARLIT: // test literal characters for validity (throws if invalid // chars found) - if (language & support_option_convert_trigraphs) { + if (boost::wave::need_convert_trigraphs(language)) { using wave::cpplexer::impl::convert_trigraphs; token_val = convert_trigraphs(token_val); } - if (!(language & support_option_no_character_validation)) { + if (!boost::wave::need_option_no_character_validation(language)) { using wave::cpplexer::impl::validate_literal; validate_literal(token_val, pos.get_line(), pos.get_column(), pos.get_file()); @@ -621,7 +562,7 @@ public: case T_COMPL_TRIGRAPH: case T_POUND_TRIGRAPH: case T_ANY_TRIGRAPH: - if (language & support_option_convert_trigraphs) + if (boost::wave::need_convert_trigraphs(language)) { using wave::cpplexer::impl::convert_trigraph; token_val = convert_trigraph(token_val); diff --git a/samples/waveidl/idllexer/idl_re2c_lexer.hpp b/samples/waveidl/idllexer/idl_re2c_lexer.hpp index 22490bd..99137e5 100644 --- a/samples/waveidl/idllexer/idl_re2c_lexer.hpp +++ b/samples/waveidl/idllexer/idl_re2c_lexer.hpp @@ -140,14 +140,14 @@ lexer::get() if (T_IDENTIFIER == id) { // test identifier characters for validity (throws if invalid chars found) - if (!(language & support_option_no_character_validation)) { + if (!boost::wave::need_no_character_validation(language)) { boost::wave::cpplexer::impl::validate_identifier_name(value, scanner.line, -1, filename); } } else if (T_STRINGLIT == id || T_CHARLIT == id) { // test literal characters for validity (throws if invalid chars found) - if (!(language & support_option_no_character_validation)) { + if (!boost::wave::need_no_character_validation(language)) { boost::wave::cpplexer::impl::validate_literal(value, scanner.line, -1, filename); } diff --git a/test/testwave/testwave_app.cpp b/test/testwave/testwave_app.cpp index 6bb85ab..e0e6e68 100644 --- a/test/testwave/testwave_app.cpp +++ b/test/testwave/testwave_app.cpp @@ -528,7 +528,8 @@ testwave_app::extract_special_information(std::string const& filename, boost::wave::support_option_variadics | boost::wave::support_option_long_long | boost::wave::support_option_no_character_validation | - boost::wave::support_option_convert_trigraphs); + boost::wave::support_option_convert_trigraphs | + boost::wave::support_option_insert_whitespace); position_type pos(filename.c_str()); lexer_type it = lexer_type(instr.begin(), instr.end(), pos, lang_opts); @@ -754,6 +755,7 @@ testwave_app::initialise_options(Context& ctx, po::variables_map const& vm, #if BOOST_WAVE_EMIT_PRAGMA_DIRECTIVES != 0 | boost::wave::support_option_emit_pragma_directives #endif + | boost::wave::support_option_insert_whitespace )); } else if (vm.count("variadics")) { diff --git a/tool/cpp.cpp b/tool/cpp.cpp index 072d590..de976fe 100644 --- a/tool/cpp.cpp +++ b/tool/cpp.cpp @@ -659,6 +659,7 @@ int error_count = 0; #if BOOST_WAVE_EMIT_PRAGMA_DIRECTIVES != 0 | boost::wave::support_option_emit_pragma_directives #endif + | boost::wave::support_option_insert_whitespace )); } else if (vm.count("variadics")) { @@ -690,9 +691,28 @@ int error_count = 0; // control the generation of #line directives if (vm.count("line")) { + int lineopt = vm["line"].as(); + if (0 != lineopt && 1 != lineopt) { + cerr << "wave: bogus value for --line command line option: " + << lineopt << endl; + return -1; + } ctx.set_language( boost::wave::enable_emit_line_directives(ctx.get_language(), - vm["line"].as() != 0)); + lineopt != 0)); + } + + // control whether whitespace should be inserted to disambiguate output + if (vm.count("disambiguate")) { + int disambiguateopt = vm["disambiguate"].as(); + if (0 != disambiguateopt && 1 != disambiguateopt) { + cerr << "wave: bogus value for --disambiguate command line option: " + << disambiguateopt << endl; + return -1; + } + ctx.set_language( + boost::wave::enable_insert_whitespace(ctx.get_language(), + disambiguateopt != 0)); } // add include directories to the system include search paths @@ -1029,8 +1049,13 @@ main (int argc, char *argv[]) "2: all whitespace is preserved") ("line,L", po::value()->default_value(1), "control the generation of #line directives\n" - "0: no #line directives are generated\n" + "0: no #line directives are generated,\n" "1: #line directives will be emitted (default)") + ("disambiguate", po::value()->default_value(1), + "control whitespace insertion to disambiguate\n" + "consecutive tokens\n" + "0: no additional whitespace is generated,\n" + "1: whitespace is used to disambiguate output (default)") ("extended,x", "enable the #pragma wave system() directive") #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0 ("noguard,G", "disable include guard detection")