From 97ff7d1100525d8bfa00df4da033ff95d71806aa Mon Sep 17 00:00:00 2001 From: Oliver Kowalke Date: Sun, 15 Apr 2012 10:49:42 +0000 Subject: [PATCH] context: reorganice performance measurement [SVN r77987] --- performance/performance.cpp | 129 ++++++++++--------------- performance/performance_gcc_i386.hpp | 7 +- performance/performance_gcc_x86-64.hpp | 11 ++- 3 files changed, 65 insertions(+), 82 deletions(-) diff --git a/performance/performance.cpp b/performance/performance.cpp index 3820762..d345ec1 100644 --- a/performance/performance.cpp +++ b/performance/performance.cpp @@ -4,6 +4,8 @@ // (See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) +#define BOOST_PP_LIMIT_MAG 10 + #include #include #include @@ -13,7 +15,7 @@ #include #include #include -#include +#include #ifndef BOOST_WINDOWS #include @@ -23,7 +25,12 @@ #include "performance.hpp" namespace ctx = boost::ctx; -namespace po = boost::program_options; + +#define CALL_UCONTEXT(z,n,unused) \ + ::swapcontext( & ucm, & uc); + +#define CALL_FCONTEXT(z,n,unused) \ + ctx::jump_fcontext( & fcm, & fc, 0); #ifndef BOOST_WINDOWS ucontext_t uc, ucm; @@ -45,111 +52,74 @@ static void f1( intptr_t) } #ifndef BOOST_WINDOWS -unsigned int test_ucontext( unsigned int iterations) +unsigned int test_ucontext() { - cycle_t total( 0); cycle_t overhead( get_overhead() ); std::cout << "overhead for rdtsc == " << overhead << " cycles" << std::endl; + ctx::stack_allocator alloc; + + ::getcontext( & uc); + uc.uc_stack.ss_sp = + static_cast< char * >( alloc.allocate(ctx::default_stacksize() ) ) + - ctx::default_stacksize(); + uc.uc_stack.ss_size = ctx::default_stacksize(); + ::makecontext( & uc, f2, 0); + // cache warum-up - { - ctx::stack_allocator alloc; +BOOST_PP_REPEAT_FROM_TO( 0, BOOST_PP_LIMIT_MAG, CALL_UCONTEXT, ~) - ::getcontext( & uc); - uc.uc_stack.ss_sp = - static_cast< char * >( alloc.allocate(ctx::default_stacksize() ) ) - - ctx::default_stacksize(); - uc.uc_stack.ss_size = ctx::default_stacksize(); - ::makecontext( & uc, f2, 0); - swapcontext( & ucm, & uc); - swapcontext( & ucm, & uc); - } + cycle_t start( get_cycles() ); +BOOST_PP_REPEAT_FROM_TO( 0, BOOST_PP_LIMIT_MAG, CALL_UCONTEXT, ~) + cycle_t total( get_cycles() - start); - for ( unsigned int i = 0; i < iterations; ++i) - { - cycle_t start( get_cycles() ); - swapcontext( & ucm, & uc); - cycle_t diff( get_cycles() - start); + // we have two jumps and two measuremt-overheads + total -= overhead; // overhead of measurement + total /= BOOST_PP_LIMIT_MAG; // per call + total /= 2; // 2x jump_to c1->c2 && c2->c1 - // we have two jumps and two measuremt-overheads - diff -= overhead; // overhead of measurement - diff /= 2; // 2x jump_to c1->c2 && c2->c1 - - BOOST_ASSERT( diff >= 0); - total += diff; - } - return total/iterations; + return total; } #endif -unsigned int test_fcontext( unsigned int iterations) +unsigned int test_fcontext() { - cycle_t total( 0); cycle_t overhead( get_overhead() ); std::cout << "overhead for rdtsc == " << overhead << " cycles" << std::endl; + ctx::stack_allocator alloc; + fc.fc_stack.base = alloc.allocate(ctx::default_stacksize()); + fc.fc_stack.limit = + static_cast< char * >( fc.fc_stack.base) - ctx::default_stacksize(); + ctx::make_fcontext( & fc, f1, 0); + + ctx::start_fcontext( & fcm, & fc); + // cache warum-up - { - ctx::stack_allocator alloc; +BOOST_PP_REPEAT_FROM_TO( 0, BOOST_PP_LIMIT_MAG, CALL_FCONTEXT, ~) - fc.fc_stack.base = alloc.allocate(ctx::default_stacksize()); - fc.fc_stack.limit = - static_cast< char * >( fc.fc_stack.base) - ctx::default_stacksize(); - ctx::make_fcontext( & fc, f1, 0); - ctx::start_fcontext( & fcm, & fc); - ctx::jump_fcontext( & fcm, & fc, 0); - } + cycle_t start( get_cycles() ); +BOOST_PP_REPEAT_FROM_TO( 0, BOOST_PP_LIMIT_MAG, CALL_FCONTEXT, ~) + cycle_t total( get_cycles() - start); - for ( unsigned int i = 0; i < iterations; ++i) - { - cycle_t start( get_cycles() ); - ctx::jump_fcontext( & fcm, & fc, 0); - cycle_t diff( get_cycles() - start); + // we have two jumps and two measuremt-overheads + total -= overhead; // overhead of measurement + total /= BOOST_PP_LIMIT_MAG; // per call + total /= 2; // 2x jump_to c1->c2 && c2->c1 - // we have two jumps and two measuremt-overheads - diff -= overhead; // overhead of measurement - diff /= 2; // 2x jump_to c1->c2 && c2->c1 - - BOOST_ASSERT( diff >= 0); - total += diff; - } - return total/iterations; + return total; } int main( int argc, char * argv[]) { try { - unsigned int iterations( 0); - - po::options_description desc("allowed options"); - desc.add_options() - ("help,h", "help message") - ("iterations,i", po::value< unsigned int >( & iterations), "iterations"); - - po::variables_map vm; - po::store( - po::parse_command_line( - argc, - argv, - desc), - vm); - po::notify( vm); - - if ( vm.count("help") ) - { - std::cout << desc << std::endl; - return EXIT_SUCCESS; - } - - if ( 0 >= iterations) throw std::invalid_argument("iterations must be greater than zero"); - bind_to_processor( 0); - unsigned int res = test_fcontext( iterations); + unsigned int res = test_fcontext(); std::cout << "fcontext: average of " << res << " cycles per switch" << std::endl; #ifndef BOOST_WINDOWS - res = test_ucontext( iterations); + res = test_ucontext(); std::cout << "ucontext: average of " << res << " cycles per switch" << std::endl; #endif @@ -161,3 +131,6 @@ int main( int argc, char * argv[]) { std::cerr << "unhandled exception" << std::endl; } return EXIT_FAILURE; } + +#undef CALL_FCONTEXT +#undef CALL_UCONTEXT diff --git a/performance/performance_gcc_i386.hpp b/performance/performance_gcc_i386.hpp index 65d5bd5..f995e5b 100644 --- a/performance/performance_gcc_i386.hpp +++ b/performance/performance_gcc_i386.hpp @@ -8,6 +8,7 @@ #define PERFORMANCE_GCC_I386_H #include +#include #include #include @@ -20,6 +21,9 @@ typedef boost::uint64_t cycle_t; inline cycle_t get_cycles() { +#if defined(__INTEL_COMPILER) || defined(__ICC) || defined(_ECC) || defined(__ICL) + return __rdtsc(); +#else boost::uint32_t res[2]; __asm__ __volatile__ ( @@ -35,6 +39,7 @@ cycle_t get_cycles() ); return * reinterpret_cast< cycle_t * >( res); +#endif } struct measure @@ -56,7 +61,7 @@ cycle_t get_overhead() overhead.begin(), overhead.end(), measure() ); BOOST_ASSERT( overhead.begin() != overhead.end() ); - return * std::min_element( overhead.begin(), overhead.end() ); + return std::accumulate( overhead.begin(), overhead.end(), 0) / iterations; } #endif // PERFORMANCE_GCC_I386_H diff --git a/performance/performance_gcc_x86-64.hpp b/performance/performance_gcc_x86-64.hpp index 69cf057..76cb59b 100644 --- a/performance/performance_gcc_x86-64.hpp +++ b/performance/performance_gcc_x86-64.hpp @@ -8,6 +8,7 @@ #define PERFORMANCE_GCC_X86_64_H #include +#include #include #include @@ -20,8 +21,11 @@ typedef boost::uint64_t cycle_t; inline cycle_t get_cycles() { +#if defined(__INTEL_COMPILER) || defined(__ICC) || defined(_ECC) || defined(__ICL) + return __rdtsc(); +#else boost::uint32_t res[2]; - + __asm__ __volatile__ ( "xorl %%eax, %%eax\n" "cpuid\n" @@ -33,8 +37,9 @@ cycle_t get_cycles() "cpuid\n" ::: "%rax", "%rbx", "%rcx", "%rdx" ); - + return * ( cycle_t *)res; +#endif } struct measure @@ -56,7 +61,7 @@ cycle_t get_overhead() overhead.begin(), overhead.end(), measure() ); BOOST_ASSERT( overhead.begin() != overhead.end() ); - return * std::min_element( overhead.begin(), overhead.end() ); + return std::accumulate( overhead.begin(), overhead.end(), 0) / iterations; } #endif // PERFORMANCE_GCC_X86_64_H