From e83d5ff8d7bd7213e9d54f4bd1bb0abdef9d38e1 Mon Sep 17 00:00:00 2001
From: Hartmut Kaiser <hartmut.kaiser@gmail.com>
Date: Tue, 18 Jan 2005 15:44:51 +0000
Subject: [PATCH] Performance improvements, minor bits fixed.

[SVN r2446]
---
 ChangeLog                                     | 11 +++
 .../wave/cpplexer/re2clex/cpp_re2c_lexer.hpp  | 10 ++-
 include/boost/wave/cpplexer/token_cache.hpp   |  2 +-
 include/boost/wave/util/cpp_macromap.hpp      | 71 ++-----------------
 include/boost/wave/wave_config.hpp            |  1 +
 include/boost/wave/wave_version.hpp           |  4 +-
 samples/cpp_tokens/cpp_tokens.cpp             |  5 +-
 samples/cpp_tokens/slex_iterator.hpp          |  4 +-
 samples/list_includes/list_includes.cpp       | 10 +--
 samples/waveidl/idl.cpp                       | 10 +--
 samples/waveidl/idllexer/idl.re.cpp           |  2 +-
 src/cpplexer/re2clex/cpp.re.cpp               |  2 +-
 tool/cpp.cpp                                  | 37 ++++++----
 13 files changed, 64 insertions(+), 105 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index f104e7b..748d82b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -23,6 +23,17 @@ TODO (known issues):
 
 CHANGELOG
 
+Version 1.1.12
+- A rough performance analysis showed, that 30% of the time is spent parsing 
+  the input for pp directives (cpp_grammar), 35% of the time is spent inside
+  the flex_string code, mainly in the copy constructor and assignement 
+  operator, 15% of the time is spent inside the list and vector member 
+  functions, 10% is spent for memory allocation but only 1% of the time is 
+  spent in the re2c lexer.
+- Identified a performance problem, where the token_cache was instantiated
+  for every created lexer object, but needed to be initialsed only once.
+- Added #include <wave_version.hpp> to wave_config.hpp
+
 Version 1.1.11
 - Updated copyrights.
 - Fixed some bugs introduced by the latest refactoring.
diff --git a/include/boost/wave/cpplexer/re2clex/cpp_re2c_lexer.hpp b/include/boost/wave/cpplexer/re2clex/cpp_re2c_lexer.hpp
index aa3be21..c250d1e 100644
--- a/include/boost/wave/cpplexer/re2clex/cpp_re2c_lexer.hpp
+++ b/include/boost/wave/cpplexer/re2clex/cpp_re2c_lexer.hpp
@@ -82,7 +82,7 @@ private:
     string_type value;
     bool at_eof;
     
-    token_cache<string_type> token_cache;
+    static token_cache<string_type> const cache;
 };
 
 ///////////////////////////////////////////////////////////////////////////////
@@ -187,7 +187,7 @@ lexer<IteratorT, PositionT>::get()
             value = string_type((char const *)scanner.tok, scanner.cur-scanner.tok);
         }
         else {
-            value = token_cache.get_token_value(id);
+            value = cache.get_token_value(id);
         }
         break;
     }
@@ -245,6 +245,12 @@ private:
     lexer<IteratorT, PositionT> lexer;
 };
 
+///////////////////////////////////////////////////////////////////////////////
+template <typename IteratorT, typename PositionT>
+token_cache<typename lexer<IteratorT, PositionT>::string_type> const
+    lexer<IteratorT, PositionT>::cache = 
+        token_cache<typename lexer<IteratorT, PositionT>::string_type>();
+    
 }   // namespace re2clex
 
 ///////////////////////////////////////////////////////////////////////////////
diff --git a/include/boost/wave/cpplexer/token_cache.hpp b/include/boost/wave/cpplexer/token_cache.hpp
index 4239878..3b518ba 100644
--- a/include/boost/wave/cpplexer/token_cache.hpp
+++ b/include/boost/wave/cpplexer/token_cache.hpp
@@ -33,7 +33,7 @@ public:
         }
     }
 
-    StringT const &get_token_value(token_id id)
+    StringT const &get_token_value(token_id id) const
     {
         return cache[BASEID_FROM_TOKEN(id) - T_FIRST_TOKEN];
     }
diff --git a/include/boost/wave/util/cpp_macromap.hpp b/include/boost/wave/util/cpp_macromap.hpp
index 6c8c5e1..daa2e20 100644
--- a/include/boost/wave/util/cpp_macromap.hpp
+++ b/include/boost/wave/util/cpp_macromap.hpp
@@ -127,14 +127,6 @@ protected:
         unput_queue_iterator<IteratorT, token_type, ContainerT> const &last, 
         bool expand_operator_defined);
 
-// Expand all macros in the given replacement list and continue argument 
-// collection from the given input stream
-    template <typename IteratorT, typename ContainerT>
-    void expand_replacementlist_tokensequence(
-        ContainerT &expanded, ContainerT &replacement_list,
-        IteratorT &first, IteratorT const &last, 
-        bool expand_operator_defined);
-
 //  Collect all arguments supplied to a macro invocation
     template <typename IteratorT, typename ContainerT, typename SizeT>
     typename std::vector<ContainerT>::size_type collect_arguments (
@@ -561,8 +553,6 @@ macromap<ContextT>::collect_arguments (token_type const curr_token,
 {
     using namespace boost::wave;
 
-//on_exit::reset<bool> on_exit_next(next.get_allow_continuation(), true);
-
     arguments.push_back(ContainerT());
     
 // collect the actual arguments
@@ -728,51 +718,6 @@ ContainerT pending_queue;
     BOOST_ASSERT(pending_queue.empty()/* && unput_queue.empty()*/);
 }
 
-///////////////////////////////////////////////////////////////////////////////
-// 
-//  expand_replacementlist_tokensequence
-//
-//      fully expands a given replacement list and continues argument 
-//      collection from the given input stream if necessary
-//
-template <typename ContextT>
-template <typename IteratorT, typename ContainerT>
-inline void
-macromap<ContextT>::expand_replacementlist_tokensequence(ContainerT &expanded, 
-    ContainerT &replacement_list, IteratorT &first, IteratorT const &last, 
-    bool expand_operator_defined)
-{
-    typedef impl::gen_unput_queue_iterator<IteratorT, token_type, ContainerT> 
-        gen_type;
-    typedef typename gen_type::return_type iterator_type;
-
-iterator_type last_it = gen_type::generate(last, false);
-iterator_type first_it = gen_type::generate(replacement_list, first, false);
-
-on_exit::assign<IteratorT, iterator_type> on_exit(first, first_it);
-bool was_whitespace = false;
-ContainerT pending_queue;
-    
-    while (!pending_queue.empty() || !first_it.get_unput_queue().empty()) {
-    token_type t = expand_tokensequence_worker(pending_queue, first_it, 
-                    last_it, expand_operator_defined);
-    bool is_whitespace = IS_CATEGORY(t, WhiteSpaceTokenType) &&
-        T_PLACEHOLDER != token_id(t);
-
-        if (!was_whitespace || !is_whitespace) {
-            if (is_whitespace && T_SPACE != token_id(t)) {
-                t.set_token_id(T_SPACE);
-                t.set_value(" ");
-            }
-            expanded.push_back(t);
-        }
-        was_whitespace = is_whitespace;
-    }
-
-// should have returned all expanded tokens
-    BOOST_ASSERT(pending_queue.empty());
-}
-
 ///////////////////////////////////////////////////////////////////////////////
 // 
 //  expand_argument
@@ -1005,8 +950,6 @@ macromap<ContextT>::rescan_replacement_list(token_type const &curr_token,
     // expansion isn't available as an expandable macro
     on_exit::reset<bool> on_exit(macro_def.is_available_for_replacement, false);
 
-//        expand_replacementlist_tokensequence(expanded, replacement_list, 
-//            nfirst, nlast, expand_operator_defined);     
         expand_whole_tokensequence(expanded, replacement_list.begin(), 
             replacement_list.end(), expand_operator_defined);     
         
@@ -1422,7 +1365,7 @@ macromap<ContextT>::is_valid_concat(string_type new_value,
     lexer_type it = lexer_type(value_to_test.begin(), value_to_test.end(), pos, 
         ctx.get_language());
     lexer_type end = lexer_type();
-    for (/**/; it != end; ++it) 
+    for (/**/; it != end && T_EOF != token_id(*it); ++it) 
         rescanned.push_back(*it);
 
 #if BOOST_WAVE_SUPPORT_VARIADICS_PLACEMARKERS != 0
@@ -1432,8 +1375,8 @@ macromap<ContextT>::is_valid_concat(string_type new_value,
         
 // test if the newly generated token sequence contains more than 1 token
 // the second one is the T_EOF token
-    BOOST_ASSERT(T_EOF == token_id(rescanned.back()));
-    return 2 == rescanned.size();
+//    BOOST_ASSERT(T_EOF == token_id(rescanned.back()));
+    return 1 == rescanned.size();
 }
 
 ///////////////////////////////////////////////////////////////////////////////
@@ -1549,10 +1492,10 @@ macromap<ContextT>::concat_tokensequence(ContainerT &expanded)
 
             // replace the old token (pointed to by *prev) with the retokenized
             // sequence
-            typename ContainerT::reverse_iterator rit = rescanned.rbegin();
-
-                BOOST_ASSERT(rit != rescanned.rend());
-                rescanned.erase((++rit).base());
+//            typename ContainerT::reverse_iterator rit = rescanned.rbegin();
+//
+//                BOOST_ASSERT(rit != rescanned.rend());
+//                rescanned.erase((++rit).base());
                 expanded.splice(next, rescanned);
 
             // the last token of the inserted sequence is the new previous
diff --git a/include/boost/wave/wave_config.hpp b/include/boost/wave/wave_config.hpp
index 47c4225..4d2d171 100644
--- a/include/boost/wave/wave_config.hpp
+++ b/include/boost/wave/wave_config.hpp
@@ -16,6 +16,7 @@
 #include <boost/config.hpp>
 #include <boost/version.hpp>
 #include <boost/spirit/version.hpp>
+#include <boost/wave/wave_version.hpp>
 
 ///////////////////////////////////////////////////////////////////////////////
 //  Define the maximal include nesting depth allowed. If this value isn't 
diff --git a/include/boost/wave/wave_version.hpp b/include/boost/wave/wave_version.hpp
index 3e349cd..61c8399 100644
--- a/include/boost/wave/wave_version.hpp
+++ b/include/boost/wave/wave_version.hpp
@@ -16,11 +16,11 @@
 //  BOOST_WAVE_VERSION & 0x0000FF is the sub-minor version
 //  BOOST_WAVE_VERSION & 0x00FF00 is the minor version
 //  BOOST_WAVE_VERSION & 0xFF0000 is the major version
-#define BOOST_WAVE_VERSION                 0x010111
+#define BOOST_WAVE_VERSION                 0x010112
 
 //  The following defines contain the same information as above
 #define BOOST_WAVE_VERSION_MAJOR           1
 #define BOOST_WAVE_VERSION_MINOR           1
-#define BOOST_WAVE_VERSION_SUBMINOR       11
+#define BOOST_WAVE_VERSION_SUBMINOR       12
 
 #endif // !defined(WAVE_VERSION_H_9D79ABDB_AC54_4C0A_89B1_F70A2DCFE21E_INCLUDED)
diff --git a/samples/cpp_tokens/cpp_tokens.cpp b/samples/cpp_tokens/cpp_tokens.cpp
index 3e0b479..1575d45 100644
--- a/samples/cpp_tokens/cpp_tokens.cpp
+++ b/samples/cpp_tokens/cpp_tokens.cpp
@@ -71,8 +71,9 @@ main(int argc, char *argv[])
 //  which depends on the lexer type (provided by the second template 
 //  parameter). Our lexer type 'slex_iterator<>' depends on a custom token type
 //  'slex_token<>'. Our custom token type differs from the original one povided 
-//  by the Wave library only by defining an addition operator<<, which is used
-//  to dump the token information carried by a given token (see loop below).
+//  by the Wave library only by defining an additional operator<<(), which is 
+//  used to dump the token information carried by a given token (see loop 
+//  below).
     typedef boost::wave::cpp_token_sample::slex_token<> token_type;
     typedef boost::wave::cpp_token_sample::slex_iterator<token_type> 
         lex_iterator_type;
diff --git a/samples/cpp_tokens/slex_iterator.hpp b/samples/cpp_tokens/slex_iterator.hpp
index 7b2673c..f1e446d 100644
--- a/samples/cpp_tokens/slex_iterator.hpp
+++ b/samples/cpp_tokens/slex_iterator.hpp
@@ -95,7 +95,9 @@ private:
 //            eof token equivalent
 //          - the lexer should implement a constructor taking two iterators
 //            pointing to the beginning and the end of the input stream and
-//            a third parameter containing the name of the parsed input file 
+//            a third parameter containing the name of the parsed input file,
+//            the 4th parameter contains the information about the mode the 
+//            preprocessor is used in (C99/C++ mode etc.)
 //
 ///////////////////////////////////////////////////////////////////////////////
 
diff --git a/samples/list_includes/list_includes.cpp b/samples/list_includes/list_includes.cpp
index 97d6f8b..2e1cce3 100644
--- a/samples/list_includes/list_includes.cpp
+++ b/samples/list_includes/list_includes.cpp
@@ -21,12 +21,6 @@
 ///////////////////////////////////////////////////////////////////////////////
 //  include required boost libraries
 #include <boost/assert.hpp>
-
-///////////////////////////////////////////////////////////////////////////////
-//  This sample requires the program_options library written by Vladimir Prus,
-//  which is currently under Boost review. 
-//  It is available here: http://boost-sandbox.sourceforge.net/program_options
-//
 #include <boost/program_options.hpp>
 
 ///////////////////////////////////////////////////////////////////////////////
@@ -44,9 +38,9 @@
 
 ///////////////////////////////////////////////////////////////////////////////
 //  include lexer specifics, import lexer names
-#if !defined(BOOST_WAVE_SEPARATE_LEXER_INSTANTIATION)
+#if BOOST_WAVE_SEPARATE_LEXER_INSTANTIATION == 0
 #include <boost/wave/cpplexer/re2clex/cpp_re2c_lexer.hpp>
-#endif // !defined(BOOST_WAVE_SEPARATE_LEXER_INSTANTIATION)
+#endif 
 
 ///////////////////////////////////////////////////////////////////////////////
 //  import required names
diff --git a/samples/waveidl/idl.cpp b/samples/waveidl/idl.cpp
index ce4ba6a..65fb671 100644
--- a/samples/waveidl/idl.cpp
+++ b/samples/waveidl/idl.cpp
@@ -12,12 +12,6 @@
 
 #include "idl.hpp"                  // global configuration
 
-///////////////////////////////////////////////////////////////////////////////
-//  This sample requires the program_options library written by Vladimir Prus,
-//  which is already accepted into Boost, but not included with the 
-//  distribution yet. 
-//  It is available here: http://boost-sandbox.sourceforge.net/program_options.
-//
 #include <boost/program_options.hpp>
 #include <boost/filesystem/path.hpp>
 
@@ -250,8 +244,8 @@ boost::wave::util::file_position_type current_position;
                           istreambuf_iterator<char>());
 #endif 
 
-    //  This sample uses the lex_iterator and lex_token types predefined with 
-    //  the Wave library, but it is possible to use your own types.
+    //  This sample uses the lex_token type predefined in the Wave library, but 
+    //  but uses a custom lexer type.
         typedef boost::wave::idllexer::lex_iterator<
                 boost::wave::cpplexer::lex_token<> >
             lex_iterator_type;
diff --git a/samples/waveidl/idllexer/idl.re.cpp b/samples/waveidl/idllexer/idl.re.cpp
index 787d605..b3e180d 100644
--- a/samples/waveidl/idllexer/idl.re.cpp
+++ b/samples/waveidl/idllexer/idl.re.cpp
@@ -1,4 +1,4 @@
-/* Generated by re2c 0.5 on Wed Jul 28 20:33:41 2004 */
+/* Generated by re2c 0.5 on Tue Jan 18 13:42:17 2005 */
 #line 1 "c:\\Cvs\\wave\\libs\\wave\\samples\\waveidl\\idllexer\\idl.re"
 /*=============================================================================
     Wave: A Standard compliant C++ preprocessor library
diff --git a/src/cpplexer/re2clex/cpp.re.cpp b/src/cpplexer/re2clex/cpp.re.cpp
index f8722dc..3982971 100644
--- a/src/cpplexer/re2clex/cpp.re.cpp
+++ b/src/cpplexer/re2clex/cpp.re.cpp
@@ -1,4 +1,4 @@
-/* Generated by re2c 0.5 on Tue Aug 10 22:01:15 2004 */
+/* Generated by re2c 0.5 on Tue Jan 18 13:40:44 2005 */
 #line 1 "c:\\Cvs\\wave\\libs\\wave\\src\\cpplexer\\re2clex\\cpp.re"
 /*=============================================================================
     Wave: A Standard compliant C++ preprocessor library
diff --git a/tool/cpp.cpp b/tool/cpp.cpp
index 4c83d86..a7c4ac5 100644
--- a/tool/cpp.cpp
+++ b/tool/cpp.cpp
@@ -275,21 +275,25 @@ boost::wave::util::file_position_type current_position;
                           istreambuf_iterator<char>());
 #endif 
 
-    //  This sample uses the lex_iterator and lex_token types predefined with 
-    //  the Wave library, but it is possible to use your own types.
+    //  This application uses the lex_iterator and lex_token types predefined 
+    //  with the Wave library, but it is possible to use your own types.
+    //
+    //  You may want to have a look at the other samples to see how this is
+    //  possible to achieve.
         typedef boost::wave::cpplexer::lex_iterator<
                 boost::wave::cpplexer::lex_token<> >
             lex_iterator_type;
+
+    // The C++ preprocessor iterators shouldn't be constructed directly. They 
+    // are to be generated through a boost::wave::context<> object. This 
+    // boost::wave::context object is additionally to be used to initialize and 
+    // define different parameters of the actual preprocessing.
         typedef boost::wave::context<
                 std::string::iterator, lex_iterator_type,
                 boost::wave::iteration_context_policies::load_file_to_string,
                 trace_macro_expansion> 
             context_type;
 
-    // The C++ preprocessor iterators shouldn't be constructed directly. They 
-    // are to be generated through a boost::wave::context<> object. This 
-    // boost::wave::context object is additionally to be used to initialize and 
-    // define different parameters of the actual preprocessing.
     // The preprocessing of the input stream is done on the fly behind the 
     // scenes during iteration over the context_type::iterator_type stream.
     std::ofstream traceout;
@@ -316,7 +320,10 @@ boost::wave::util::file_position_type current_position;
             traceout.clear(cerr.rdstate());
             static_cast<std::basic_ios<char> &>(traceout).rdbuf(cerr.rdbuf());
         }
-        
+
+    // This this the central piece of the Wave library, it provides you with 
+    // the iterators to get the preprocessed tokens and allows to configure
+    // the preprocessing stage in advance.
     context_type ctx (instring.begin(), instring.end(), file_name.c_str(),
         trace_macro_expansion(traceout, enable_trace));
 
@@ -442,7 +449,7 @@ boost::wave::util::file_position_type current_position;
     // preprocess the required include files 
         if (vm.count("forceinclude")) {
         // add the filenames to force as include files in _reverse_ order
-        // the second parameter 'is_last' for the force_include function should
+        // the second parameter 'is_last' of the force_include function should
         // be set to true for the last (first given) file.
             vector<string> const &force = 
                 vm["forceinclude"].as<vector<string> >();
@@ -454,8 +461,9 @@ boost::wave::util::file_position_type current_position;
                 first.force_include(filename.c_str(), ++cit == rend);
             }
         }
-        
-    // loop over all generated tokens outputing the generated text 
+
+    // >>>>>>>>>>>>> Here the actual preprocessing happens. <<<<<<<<<<<<<<<<<<<
+    // loop over all generated tokens outputting the generated text 
         while (first != last) {
         // print out the string representation of this token (skip comments)
             using namespace boost::wave;
@@ -626,11 +634,10 @@ main (int argc, char *argv[])
             inserter(arguments, arguments.end()), cmd_line_util::is_argument());
             
     // if there is no input file given, then exit
-        if (0 == arguments.size() || 0 == arguments[0].value.size()) {
-//            cerr << "wave: no input file given, "
-//                 << "use --help to get a hint." << endl;
-//            return 5;
-        // preprocess the given input file
+        if (0 == arguments.size() || 0 == arguments[0].value.size() ||
+            arguments[0].value[0] == "-") 
+        {
+        // preprocess the given input from stdin
             return do_actual_work("stdin", std::cin, vm);
         }
         else {