From e83d5ff8d7bd7213e9d54f4bd1bb0abdef9d38e1 Mon Sep 17 00:00:00 2001 From: Hartmut Kaiser Date: Tue, 18 Jan 2005 15:44:51 +0000 Subject: [PATCH] Performance improvements, minor bits fixed. [SVN r2446] --- ChangeLog | 11 +++ .../wave/cpplexer/re2clex/cpp_re2c_lexer.hpp | 10 ++- include/boost/wave/cpplexer/token_cache.hpp | 2 +- include/boost/wave/util/cpp_macromap.hpp | 71 ++----------------- include/boost/wave/wave_config.hpp | 1 + include/boost/wave/wave_version.hpp | 4 +- samples/cpp_tokens/cpp_tokens.cpp | 5 +- samples/cpp_tokens/slex_iterator.hpp | 4 +- samples/list_includes/list_includes.cpp | 10 +-- samples/waveidl/idl.cpp | 10 +-- samples/waveidl/idllexer/idl.re.cpp | 2 +- src/cpplexer/re2clex/cpp.re.cpp | 2 +- tool/cpp.cpp | 37 ++++++---- 13 files changed, 64 insertions(+), 105 deletions(-) diff --git a/ChangeLog b/ChangeLog index f104e7b..748d82b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -23,6 +23,17 @@ TODO (known issues): CHANGELOG +Version 1.1.12 +- A rough performance analysis showed, that 30% of the time is spent parsing + the input for pp directives (cpp_grammar), 35% of the time is spent inside + the flex_string code, mainly in the copy constructor and assignement + operator, 15% of the time is spent inside the list and vector member + functions, 10% is spent for memory allocation but only 1% of the time is + spent in the re2c lexer. +- Identified a performance problem, where the token_cache was instantiated + for every created lexer object, but needed to be initialsed only once. +- Added #include to wave_config.hpp + Version 1.1.11 - Updated copyrights. - Fixed some bugs introduced by the latest refactoring. diff --git a/include/boost/wave/cpplexer/re2clex/cpp_re2c_lexer.hpp b/include/boost/wave/cpplexer/re2clex/cpp_re2c_lexer.hpp index aa3be21..c250d1e 100644 --- a/include/boost/wave/cpplexer/re2clex/cpp_re2c_lexer.hpp +++ b/include/boost/wave/cpplexer/re2clex/cpp_re2c_lexer.hpp @@ -82,7 +82,7 @@ private: string_type value; bool at_eof; - token_cache token_cache; + static token_cache const cache; }; /////////////////////////////////////////////////////////////////////////////// @@ -187,7 +187,7 @@ lexer::get() value = string_type((char const *)scanner.tok, scanner.cur-scanner.tok); } else { - value = token_cache.get_token_value(id); + value = cache.get_token_value(id); } break; } @@ -245,6 +245,12 @@ private: lexer lexer; }; +/////////////////////////////////////////////////////////////////////////////// +template +token_cache::string_type> const + lexer::cache = + token_cache::string_type>(); + } // namespace re2clex /////////////////////////////////////////////////////////////////////////////// diff --git a/include/boost/wave/cpplexer/token_cache.hpp b/include/boost/wave/cpplexer/token_cache.hpp index 4239878..3b518ba 100644 --- a/include/boost/wave/cpplexer/token_cache.hpp +++ b/include/boost/wave/cpplexer/token_cache.hpp @@ -33,7 +33,7 @@ public: } } - StringT const &get_token_value(token_id id) + StringT const &get_token_value(token_id id) const { return cache[BASEID_FROM_TOKEN(id) - T_FIRST_TOKEN]; } diff --git a/include/boost/wave/util/cpp_macromap.hpp b/include/boost/wave/util/cpp_macromap.hpp index 6c8c5e1..daa2e20 100644 --- a/include/boost/wave/util/cpp_macromap.hpp +++ b/include/boost/wave/util/cpp_macromap.hpp @@ -127,14 +127,6 @@ protected: unput_queue_iterator const &last, bool expand_operator_defined); -// Expand all macros in the given replacement list and continue argument -// collection from the given input stream - template - void expand_replacementlist_tokensequence( - ContainerT &expanded, ContainerT &replacement_list, - IteratorT &first, IteratorT const &last, - bool expand_operator_defined); - // Collect all arguments supplied to a macro invocation template typename std::vector::size_type collect_arguments ( @@ -561,8 +553,6 @@ macromap::collect_arguments (token_type const curr_token, { using namespace boost::wave; -//on_exit::reset on_exit_next(next.get_allow_continuation(), true); - arguments.push_back(ContainerT()); // collect the actual arguments @@ -728,51 +718,6 @@ ContainerT pending_queue; BOOST_ASSERT(pending_queue.empty()/* && unput_queue.empty()*/); } -/////////////////////////////////////////////////////////////////////////////// -// -// expand_replacementlist_tokensequence -// -// fully expands a given replacement list and continues argument -// collection from the given input stream if necessary -// -template -template -inline void -macromap::expand_replacementlist_tokensequence(ContainerT &expanded, - ContainerT &replacement_list, IteratorT &first, IteratorT const &last, - bool expand_operator_defined) -{ - typedef impl::gen_unput_queue_iterator - gen_type; - typedef typename gen_type::return_type iterator_type; - -iterator_type last_it = gen_type::generate(last, false); -iterator_type first_it = gen_type::generate(replacement_list, first, false); - -on_exit::assign on_exit(first, first_it); -bool was_whitespace = false; -ContainerT pending_queue; - - while (!pending_queue.empty() || !first_it.get_unput_queue().empty()) { - token_type t = expand_tokensequence_worker(pending_queue, first_it, - last_it, expand_operator_defined); - bool is_whitespace = IS_CATEGORY(t, WhiteSpaceTokenType) && - T_PLACEHOLDER != token_id(t); - - if (!was_whitespace || !is_whitespace) { - if (is_whitespace && T_SPACE != token_id(t)) { - t.set_token_id(T_SPACE); - t.set_value(" "); - } - expanded.push_back(t); - } - was_whitespace = is_whitespace; - } - -// should have returned all expanded tokens - BOOST_ASSERT(pending_queue.empty()); -} - /////////////////////////////////////////////////////////////////////////////// // // expand_argument @@ -1005,8 +950,6 @@ macromap::rescan_replacement_list(token_type const &curr_token, // expansion isn't available as an expandable macro on_exit::reset on_exit(macro_def.is_available_for_replacement, false); -// expand_replacementlist_tokensequence(expanded, replacement_list, -// nfirst, nlast, expand_operator_defined); expand_whole_tokensequence(expanded, replacement_list.begin(), replacement_list.end(), expand_operator_defined); @@ -1422,7 +1365,7 @@ macromap::is_valid_concat(string_type new_value, lexer_type it = lexer_type(value_to_test.begin(), value_to_test.end(), pos, ctx.get_language()); lexer_type end = lexer_type(); - for (/**/; it != end; ++it) + for (/**/; it != end && T_EOF != token_id(*it); ++it) rescanned.push_back(*it); #if BOOST_WAVE_SUPPORT_VARIADICS_PLACEMARKERS != 0 @@ -1432,8 +1375,8 @@ macromap::is_valid_concat(string_type new_value, // test if the newly generated token sequence contains more than 1 token // the second one is the T_EOF token - BOOST_ASSERT(T_EOF == token_id(rescanned.back())); - return 2 == rescanned.size(); +// BOOST_ASSERT(T_EOF == token_id(rescanned.back())); + return 1 == rescanned.size(); } /////////////////////////////////////////////////////////////////////////////// @@ -1549,10 +1492,10 @@ macromap::concat_tokensequence(ContainerT &expanded) // replace the old token (pointed to by *prev) with the retokenized // sequence - typename ContainerT::reverse_iterator rit = rescanned.rbegin(); - - BOOST_ASSERT(rit != rescanned.rend()); - rescanned.erase((++rit).base()); +// typename ContainerT::reverse_iterator rit = rescanned.rbegin(); +// +// BOOST_ASSERT(rit != rescanned.rend()); +// rescanned.erase((++rit).base()); expanded.splice(next, rescanned); // the last token of the inserted sequence is the new previous diff --git a/include/boost/wave/wave_config.hpp b/include/boost/wave/wave_config.hpp index 47c4225..4d2d171 100644 --- a/include/boost/wave/wave_config.hpp +++ b/include/boost/wave/wave_config.hpp @@ -16,6 +16,7 @@ #include #include #include +#include /////////////////////////////////////////////////////////////////////////////// // Define the maximal include nesting depth allowed. If this value isn't diff --git a/include/boost/wave/wave_version.hpp b/include/boost/wave/wave_version.hpp index 3e349cd..61c8399 100644 --- a/include/boost/wave/wave_version.hpp +++ b/include/boost/wave/wave_version.hpp @@ -16,11 +16,11 @@ // BOOST_WAVE_VERSION & 0x0000FF is the sub-minor version // BOOST_WAVE_VERSION & 0x00FF00 is the minor version // BOOST_WAVE_VERSION & 0xFF0000 is the major version -#define BOOST_WAVE_VERSION 0x010111 +#define BOOST_WAVE_VERSION 0x010112 // The following defines contain the same information as above #define BOOST_WAVE_VERSION_MAJOR 1 #define BOOST_WAVE_VERSION_MINOR 1 -#define BOOST_WAVE_VERSION_SUBMINOR 11 +#define BOOST_WAVE_VERSION_SUBMINOR 12 #endif // !defined(WAVE_VERSION_H_9D79ABDB_AC54_4C0A_89B1_F70A2DCFE21E_INCLUDED) diff --git a/samples/cpp_tokens/cpp_tokens.cpp b/samples/cpp_tokens/cpp_tokens.cpp index 3e0b479..1575d45 100644 --- a/samples/cpp_tokens/cpp_tokens.cpp +++ b/samples/cpp_tokens/cpp_tokens.cpp @@ -71,8 +71,9 @@ main(int argc, char *argv[]) // which depends on the lexer type (provided by the second template // parameter). Our lexer type 'slex_iterator<>' depends on a custom token type // 'slex_token<>'. Our custom token type differs from the original one povided -// by the Wave library only by defining an addition operator<<, which is used -// to dump the token information carried by a given token (see loop below). +// by the Wave library only by defining an additional operator<<(), which is +// used to dump the token information carried by a given token (see loop +// below). typedef boost::wave::cpp_token_sample::slex_token<> token_type; typedef boost::wave::cpp_token_sample::slex_iterator lex_iterator_type; diff --git a/samples/cpp_tokens/slex_iterator.hpp b/samples/cpp_tokens/slex_iterator.hpp index 7b2673c..f1e446d 100644 --- a/samples/cpp_tokens/slex_iterator.hpp +++ b/samples/cpp_tokens/slex_iterator.hpp @@ -95,7 +95,9 @@ private: // eof token equivalent // - the lexer should implement a constructor taking two iterators // pointing to the beginning and the end of the input stream and -// a third parameter containing the name of the parsed input file +// a third parameter containing the name of the parsed input file, +// the 4th parameter contains the information about the mode the +// preprocessor is used in (C99/C++ mode etc.) // /////////////////////////////////////////////////////////////////////////////// diff --git a/samples/list_includes/list_includes.cpp b/samples/list_includes/list_includes.cpp index 97d6f8b..2e1cce3 100644 --- a/samples/list_includes/list_includes.cpp +++ b/samples/list_includes/list_includes.cpp @@ -21,12 +21,6 @@ /////////////////////////////////////////////////////////////////////////////// // include required boost libraries #include - -/////////////////////////////////////////////////////////////////////////////// -// This sample requires the program_options library written by Vladimir Prus, -// which is currently under Boost review. -// It is available here: http://boost-sandbox.sourceforge.net/program_options -// #include /////////////////////////////////////////////////////////////////////////////// @@ -44,9 +38,9 @@ /////////////////////////////////////////////////////////////////////////////// // include lexer specifics, import lexer names -#if !defined(BOOST_WAVE_SEPARATE_LEXER_INSTANTIATION) +#if BOOST_WAVE_SEPARATE_LEXER_INSTANTIATION == 0 #include -#endif // !defined(BOOST_WAVE_SEPARATE_LEXER_INSTANTIATION) +#endif /////////////////////////////////////////////////////////////////////////////// // import required names diff --git a/samples/waveidl/idl.cpp b/samples/waveidl/idl.cpp index ce4ba6a..65fb671 100644 --- a/samples/waveidl/idl.cpp +++ b/samples/waveidl/idl.cpp @@ -12,12 +12,6 @@ #include "idl.hpp" // global configuration -/////////////////////////////////////////////////////////////////////////////// -// This sample requires the program_options library written by Vladimir Prus, -// which is already accepted into Boost, but not included with the -// distribution yet. -// It is available here: http://boost-sandbox.sourceforge.net/program_options. -// #include #include @@ -250,8 +244,8 @@ boost::wave::util::file_position_type current_position; istreambuf_iterator()); #endif - // This sample uses the lex_iterator and lex_token types predefined with - // the Wave library, but it is possible to use your own types. + // This sample uses the lex_token type predefined in the Wave library, but + // but uses a custom lexer type. typedef boost::wave::idllexer::lex_iterator< boost::wave::cpplexer::lex_token<> > lex_iterator_type; diff --git a/samples/waveidl/idllexer/idl.re.cpp b/samples/waveidl/idllexer/idl.re.cpp index 787d605..b3e180d 100644 --- a/samples/waveidl/idllexer/idl.re.cpp +++ b/samples/waveidl/idllexer/idl.re.cpp @@ -1,4 +1,4 @@ -/* Generated by re2c 0.5 on Wed Jul 28 20:33:41 2004 */ +/* Generated by re2c 0.5 on Tue Jan 18 13:42:17 2005 */ #line 1 "c:\\Cvs\\wave\\libs\\wave\\samples\\waveidl\\idllexer\\idl.re" /*============================================================================= Wave: A Standard compliant C++ preprocessor library diff --git a/src/cpplexer/re2clex/cpp.re.cpp b/src/cpplexer/re2clex/cpp.re.cpp index f8722dc..3982971 100644 --- a/src/cpplexer/re2clex/cpp.re.cpp +++ b/src/cpplexer/re2clex/cpp.re.cpp @@ -1,4 +1,4 @@ -/* Generated by re2c 0.5 on Tue Aug 10 22:01:15 2004 */ +/* Generated by re2c 0.5 on Tue Jan 18 13:40:44 2005 */ #line 1 "c:\\Cvs\\wave\\libs\\wave\\src\\cpplexer\\re2clex\\cpp.re" /*============================================================================= Wave: A Standard compliant C++ preprocessor library diff --git a/tool/cpp.cpp b/tool/cpp.cpp index 4c83d86..a7c4ac5 100644 --- a/tool/cpp.cpp +++ b/tool/cpp.cpp @@ -275,21 +275,25 @@ boost::wave::util::file_position_type current_position; istreambuf_iterator()); #endif - // This sample uses the lex_iterator and lex_token types predefined with - // the Wave library, but it is possible to use your own types. + // This application uses the lex_iterator and lex_token types predefined + // with the Wave library, but it is possible to use your own types. + // + // You may want to have a look at the other samples to see how this is + // possible to achieve. typedef boost::wave::cpplexer::lex_iterator< boost::wave::cpplexer::lex_token<> > lex_iterator_type; + + // The C++ preprocessor iterators shouldn't be constructed directly. They + // are to be generated through a boost::wave::context<> object. This + // boost::wave::context object is additionally to be used to initialize and + // define different parameters of the actual preprocessing. typedef boost::wave::context< std::string::iterator, lex_iterator_type, boost::wave::iteration_context_policies::load_file_to_string, trace_macro_expansion> context_type; - // The C++ preprocessor iterators shouldn't be constructed directly. They - // are to be generated through a boost::wave::context<> object. This - // boost::wave::context object is additionally to be used to initialize and - // define different parameters of the actual preprocessing. // The preprocessing of the input stream is done on the fly behind the // scenes during iteration over the context_type::iterator_type stream. std::ofstream traceout; @@ -316,7 +320,10 @@ boost::wave::util::file_position_type current_position; traceout.clear(cerr.rdstate()); static_cast &>(traceout).rdbuf(cerr.rdbuf()); } - + + // This this the central piece of the Wave library, it provides you with + // the iterators to get the preprocessed tokens and allows to configure + // the preprocessing stage in advance. context_type ctx (instring.begin(), instring.end(), file_name.c_str(), trace_macro_expansion(traceout, enable_trace)); @@ -442,7 +449,7 @@ boost::wave::util::file_position_type current_position; // preprocess the required include files if (vm.count("forceinclude")) { // add the filenames to force as include files in _reverse_ order - // the second parameter 'is_last' for the force_include function should + // the second parameter 'is_last' of the force_include function should // be set to true for the last (first given) file. vector const &force = vm["forceinclude"].as >(); @@ -454,8 +461,9 @@ boost::wave::util::file_position_type current_position; first.force_include(filename.c_str(), ++cit == rend); } } - - // loop over all generated tokens outputing the generated text + + // >>>>>>>>>>>>> Here the actual preprocessing happens. <<<<<<<<<<<<<<<<<<< + // loop over all generated tokens outputting the generated text while (first != last) { // print out the string representation of this token (skip comments) using namespace boost::wave; @@ -626,11 +634,10 @@ main (int argc, char *argv[]) inserter(arguments, arguments.end()), cmd_line_util::is_argument()); // if there is no input file given, then exit - if (0 == arguments.size() || 0 == arguments[0].value.size()) { -// cerr << "wave: no input file given, " -// << "use --help to get a hint." << endl; -// return 5; - // preprocess the given input file + if (0 == arguments.size() || 0 == arguments[0].value.size() || + arguments[0].value[0] == "-") + { + // preprocess the given input from stdin return do_actual_work("stdin", std::cin, vm); } else {