diff --git a/build/Jamfile.v2 b/build/Jamfile.v2 index 29626f7..bac2383 100644 --- a/build/Jamfile.v2 +++ b/build/Jamfile.v2 @@ -23,255 +23,374 @@ project boost/context shared:BOOST_CONTEXT_DYN_LINK=1 ; +local rule default_binary_format ( ) +{ + local tmp = elf ; + if [ os.name ] = "MACOSX" { tmp = mach-o ; } + if [ os.name ] = "NT" { tmp = pe ; } + return $(tmp) ; +} + +feature.feature binary-format + : elf + mach-o + pe + : propagated + ; +feature.set-default binary-format : [ default_binary_format ] ; + + +local rule default_abi ( ) +{ + local tmp = sysv ; + if [ os.name ] = "NT" { tmp = ms ; } + else if [ os.platform ] = "ARM" { tmp = aapcs ; } + else if [ os.platform ] = "MIPS" { tmp = o32 ; } + return $(tmp) ; +} + +feature.feature abi + : aapcs + eabi + ms + n32 + n64 + o32 + o64 + sysv + : propagated + ; +feature.set-default abi : [ default_abi ] ; + actions gas { - as -o "$(<)" "$(>)" + as -o "$(<)" "$(>)" } actions masm { - ml /c /Fo"$(<)" "$(>)" + ml /c /Fo"$(<)" "$(>)" } actions masm64 { - ml64 /c /Fo"$(<)" "$(>)" + ml64 /c /Fo"$(<)" "$(>)" } rule configure ( properties * ) { - local result ; + local result ; - return $(result) ; +# if ( ! ( gcc in $(properties) +# || intel in $(properties) +# || msvc in $(properties) ) ) +# { +# result = no ; +# ECHO "toolset not supported" ; +# } + + return $(result) ; } # ARM alias asm_context_sources : asm/fcontext_arm_aapcs_elf_gas.S - : arm - gcc + : aapcs + arm + elf + gcc ; alias asm_context_sources : asm/fcontext_arm_aapcs_elf_gas.S - : arm - qcc + : aapcs + arm + elf + qcc ; alias asm_context_sources : [ make asm/fcontext_arm_aapcs_elf_gas.o : asm/fcontext_arm_aapcs_elf_gas.S : @gas ] - : arm + : aapcs + arm + elf ; # MIPS 32bit alias asm_context_sources : asm/fcontext_mips32_o32_elf_gas.S - : mips1 - gcc + : o32 + mips1 + elf + gcc ; alias asm_context_sources : asm/fcontext_mips32_o32_elf_gas.S - : mips1 - qcc + : o32 + mips1 + elf + qcc ; alias asm_context_sources : [ make asm/fcontext_mips32_o32_elf_gas.o : asm/fcontext_mips32_o32_elf_gas.S : @gas ] - : mips1 + : o32 + mips1 + elf ; # POWERPC 32bit alias asm_context_sources : asm/fcontext_ppc32_sysv_elf_gas.S - : 32 + : sysv + 32 power - gcc + elf + gcc ; alias asm_context_sources : asm/fcontext_ppc32_sysv_elf_gas.S - : 32 + : sysv + 32 power - qcc + elf + qcc ; alias asm_context_sources : [ make asm/fcontext_ppc32_sysv_elf_gas.o : asm/fcontext_ppc32_sysv_elf_gas.S : @gas ] - : 32 + : sysv + 32 power + elf ; # POWERPC 64bit alias asm_context_sources : asm/fcontext_ppc64_sysv_elf_gas.S - : 64 + : sysv + 64 power - gcc + elf + gcc ; alias asm_context_sources : asm/fcontext_ppc64_sysv_elf_gas.S - : 64 + : sysv + 64 power - qcc + elf + qcc ; alias asm_context_sources : [ make asm/fcontext_ppc64_sysv_elf_gas.o : asm/fcontext_ppc64_sysv_elf_gas.S : @gas ] - : 64 + : sysv + 64 power + elf ; # I386 alias asm_context_sources : asm/fcontext_i386_sysv_elf_gas.S - : 32 + : sysv + 32 x86 - gcc + elf + gcc ; alias asm_context_sources : asm/fcontext_i386_sysv_elf_gas.S - : 32 + : sysv + 32 x86 - qcc + elf + qcc ; alias asm_context_sources : asm/fcontext_i386_sysv_elf_gas.S - : 32 + : sysv + 32 x86 - intel + elf + intel ; alias asm_context_sources : [ make asm/fcontext_i386_sysv_elf_gas.o : asm/fcontext_i386_sysv_elf_gas.S : @gas ] - : 32 + : sysv + 32 x86 + elf ; alias asm_context_sources : asm/fcontext_i386_sysv_macho_gas.S - : 32 + : sysv + 32 x86 + mach-o darwin - gcc + gcc ; alias asm_context_sources : asm/fcontext_i386_sysv_macho_gas.S - : 32 + : sysv + 32 x86 + mach-o darwin - intel + qcc + ; + +alias asm_context_sources + : asm/fcontext_i386_sysv_macho_gas.S + : sysv + 32 + x86 + mach-o + darwin + intel ; alias asm_context_sources : [ make asm/fcontext_i386_sysv_macho_gas.o : asm/fcontext_i386_sysv_macho_gas.S : @gas ] - : 32 + : sysv + 32 x86 + mach-o darwin ; alias asm_context_sources : asm/fcontext_i386_ms_pe_masm.asm - : 32 + : ms + 32 x86 + pe windows - intel + intel ; alias asm_context_sources : asm/fcontext_i386_ms_pe_masm.asm - : 32 + : ms + 32 x86 + pe windows - msvc + msvc ; alias asm_context_sources : [ make asm/fcontext_i386_ms_pe_masm.o : asm/fcontext_i386_ms_pe_masm.asm : @masm ] - : 32 + : ms + 32 x86 + pe windows ; - # X86_64 alias asm_context_sources : asm/fcontext_x86_64_sysv_elf_gas.S - : 64 + : sysv + 64 x86 - gcc + elf + gcc ; alias asm_context_sources : asm/fcontext_x86_64_sysv_elf_gas.S - : 64 + : sysv + 64 x86 - qcc + elf + qcc ; alias asm_context_sources : asm/fcontext_x86_64_sysv_elf_gas.S - : 64 + : sysv + 64 x86 - intel + elf + intel ; alias asm_context_sources : [ make asm/fcontext_x86_64_sysv_elf_gas.o : asm/fcontext_x86_64_sysv_elf_gas.S : @gas ] - : 64 + : sysv + 64 x86 - ; - - -alias asm_context_sources - : asm/fcontext_x86_64_sysv_macho_gas.S - : 64 - x86 - darwin - gcc + elf ; alias asm_context_sources : asm/fcontext_x86_64_sysv_macho_gas.S - : 64 + : sysv + 64 x86 + mach-o darwin - intel + gcc + ; + +alias asm_context_sources + : asm/fcontext_x86_64_sysv_macho_gas.S + : sysv + 64 + x86 + mach-o + darwin + intel ; alias asm_context_sources : [ make asm/fcontext_x86_64_sysv_macho_gas.o : asm/fcontext_x86_64_sysv_macho_gas.S : @gas ] - : 64 + : sysv + 64 x86 + mach-o darwin ; alias asm_context_sources : asm/fcontext_x86_64_ms_pe_masm.asm - : 64 + : ms + 64 x86 + pe windows - intel + intel ; alias asm_context_sources : asm/fcontext_x86_64_ms_pe_masm.asm - : 64 + : ms + 64 x86 + pe windows - msvc + msvc ; alias asm_context_sources : [ make asm/fcontext_x86_64_ms_pe_masm.o : asm/fcontext_x86_64_ms_pe_masm.asm : @masm64 ] - : 64 + : ms + 64 x86 + pe windows ; diff --git a/doc/config.qbk b/doc/config.qbk deleted file mode 100644 index 7716a48..0000000 --- a/doc/config.qbk +++ /dev/null @@ -1,60 +0,0 @@ -[/ - Copyright Oliver Kowalke 2009. - Distributed under the Boost Software License, Version 1.0. - (See accompanying file LICENSE_1_0.txt or copy at - http://www.boost.org/LICENSE_1_0.txt -] - -[section:installtion How to build and install] - -__boost_context__ must be built for the particular compiler(s) and CPU architecture(s)s -being targeted. __boost_context__ includes assembly code and, therefore, requires -GNU AS for supported POSIX systems, and MASM for Windows systems. - -[note The architecture, instruction set, and address model are optional __boost_build__ -properties that must be given on the bjam command line, as shown in the table below.] - -[table - [[][]] - [ - [ARM, UNIX, aapcs, elf] - [bjam toolset = gcc architecture = arm] - ] - [ - [MIPS (32bit), UNIX, o32, elf] - [bjam toolset = gcc architecture = mips1] - ] - [ - [I386, UNIX, sysv, elf] - [bjam toolset = gcc architecture = x86 instruction-set = i686 address-model = 32] - ] - [ - [I386, UNIX, sysv, elf] - [bjam toolset = intel architecture = x86 instruction-set = i686 address-model = 32] - ] - [ - [I386, Windows, ms, pe] - [bjam toolset = msvc-9.0 architecture = x86 instruction-set = i686 address-model = 32] - ] - [ - [PowerPc (32bit), UNIX, sysv, elf] - [bjam toolset = gcc architecture = power address-model = 32] - ] - [ - [PowerPc (64bit), UNIX, sysv, elf] - [bjam toolset = gcc architecture = power address-model = 64] - ] - [ - [X86_64, UNIX, sysv, elf] - [bjam toolset = gcc architecture = x86 instruction-set = yorksfield address-model = 64] - ] - [ - [X86_64, UNIX, sysv, elf] - [bjam toolset = intel architecture = x86 instruction-set = yorksfield address-model = 64] - ] - [ - [X86_64, Windows, ms, pe] - [bjam toolset = msvc-10.0 architecture = x86 instruction-set = yorksfield address-model = 64] - ] -] -[endsect] diff --git a/doc/fcontext.qbk b/doc/fcontext.qbk index c4b2067..3f1bc53 100644 --- a/doc/fcontext.qbk +++ b/doc/fcontext.qbk @@ -11,7 +11,8 @@ Each instance of __fcontext__ represents a context (CPU registers and stack space). Together with its related functions __jump_fcontext__ and __make_fcontext__ it provides a execution control transfer mechanism similar interface like -[@http://www.kernel.org/doc/man-pages/online/pages/man2/getcontext.2.html ucontext_t]. +[@http://www.kernel.org/doc/man-pages/online/pages/man2/getcontext.2.html +ucontext_t]. __fcontext__ and its functions are located in __context_ns__ and the functions are declared as extern "C". @@ -40,8 +41,8 @@ intptr_t as argument) must be initialized by function __make_fcontext__. // context fc uses f() as context function make_fcontext( & fc, f); -__fcontext__ requires a pointer to the top of the stack (__fc_base__) as well -as the size of the stack. +__fcontext__ requires a pointer (__fc_base__) as well as the size of the +stack. Calling __jump_fcontext__ invokes the __context_fn__ in a newly created context complete with registers, flags, stack and instruction pointers. When control diff --git a/doc/performance.qbk b/doc/performance.qbk index 2774bfa..c486c62 100644 --- a/doc/performance.qbk +++ b/doc/performance.qbk @@ -14,6 +14,9 @@ active, cache warm-up was accounted for, and the one running thread was pinned to a single CPU. The code was compiled using the build options, 'variant = release cxxflags = -DBOOST_DISABLE_ASSERTS'. +Applying `-DBOOST_USE_UCONTEXT` to cxxflags the performance of `ucontext` will +be measured too. + The numbers in the table are the number of cycles per iteration, based upon an average computed over 10 iterations. diff --git a/doc/requirements.qbk b/doc/requirements.qbk index aa5a656..719ad0e 100644 --- a/doc/requirements.qbk +++ b/doc/requirements.qbk @@ -12,7 +12,11 @@ architecture(s)s being targeted. __boost_context__ includes assembly code and, therefore, requires GNU AS for supported POSIX systems, and MASM for Windows systems. -[important Please note that address-model=64 must be given to bjam command line -on 64bit Windows (boost-build issue).] +[important Please note that `address-model=64` must be given to bjam command line +on 64bit Windows for 64bit build; otherwise 32bit code will be generated.] + +[important For cross-compiling the lib you must specify certain additional +properties at bjam command line: `target-os`, `abi`, `binary-format`, +`architecture` and `address-model`.] [endsect] diff --git a/doc/stack.qbk b/doc/stack.qbk index 306ca27..ab8d5fe 100644 --- a/doc/stack.qbk +++ b/doc/stack.qbk @@ -50,9 +50,9 @@ results in undefined behaviour.] __boost_context__ provides a __stack_allocator__ `stack_allocator` which models the __stack_allocator_concept__ concept. -It appends a __guard_page__ to protect against exceeding the stack. If the guard -page is accessed (read or write operation) a segmentation fault/access violation -is generated by the operating system. +It creates a __guard_page__ at the end of the stack to protect against exceeding +the stack. If the guard page is accessed (read or write operation) a +segmentation fault/access violation is generated by the operating system. [endsect] @@ -77,12 +77,14 @@ the environment. [heading `std::size_t default_stacksize()`] [variablelist [[Returns:] [Returns a default stack size, which may be platform specific. -The present implementation returns a value of 256 kB.]] +If the stack is unbound then the present implementation returns the maximum of +`256 kB` and `minimum_stacksize()`.]] ] [heading `std::size_t minimum_stacksize()`] [variablelist -[[Returns:] [Returns the minimum size in bytes of stack defined by the environment.]] +[[Returns:] [Returns the minimum size in bytes of stack defined by the +environment plus a pagesize (guard page).]] [[Throws:] [Nothing.]] ] diff --git a/example/exit.cpp b/example/exit.cpp index f58c979..22ff2f6 100644 --- a/example/exit.cpp +++ b/example/exit.cpp @@ -37,11 +37,11 @@ int main( int argc, char * argv[]) ctx::fcontext_t fcm; ctx::stack_allocator alloc; - fc1.fc_stack.base = alloc.allocate(ctx::minimum_stacksize()); + fc1.fc_stack.sp = alloc.allocate(ctx::minimum_stacksize()); fc1.fc_stack.size = ctx::minimum_stacksize(); ctx::make_fcontext( & fc1, f1); - fc2.fc_stack.base = alloc.allocate(ctx::minimum_stacksize()); + fc2.fc_stack.sp = alloc.allocate(ctx::minimum_stacksize()); fc2.fc_stack.size = ctx::minimum_stacksize(); ctx::make_fcontext( & fc2, f2); diff --git a/example/jump.cpp b/example/jump.cpp index d25ff20..57183dd 100644 --- a/example/jump.cpp +++ b/example/jump.cpp @@ -37,11 +37,11 @@ int main( int argc, char * argv[]) { ctx::stack_allocator alloc1, alloc2; - fc1.fc_stack.base = alloc1.allocate(ctx::minimum_stacksize()); + fc1.fc_stack.sp = alloc1.allocate(ctx::minimum_stacksize()); fc1.fc_stack.size = ctx::minimum_stacksize(); ctx::make_fcontext( & fc1, f1); - fc2.fc_stack.base = alloc2.allocate(ctx::minimum_stacksize()); + fc2.fc_stack.sp = alloc2.allocate(ctx::minimum_stacksize()); fc2.fc_stack.size = ctx::minimum_stacksize(); ctx::make_fcontext( & fc2, f2); diff --git a/example/transfer.cpp b/example/transfer.cpp index a07079e..041a206 100644 --- a/example/transfer.cpp +++ b/example/transfer.cpp @@ -32,7 +32,7 @@ int main( int argc, char * argv[]) { ctx::stack_allocator alloc; - fc1.fc_stack.base = alloc.allocate(ctx::minimum_stacksize()); + fc1.fc_stack.sp = alloc.allocate(ctx::minimum_stacksize()); fc1.fc_stack.size = ctx::minimum_stacksize(); ctx::make_fcontext( & fc1, f1); diff --git a/include/boost/context/detail/fcontext_arm.hpp b/include/boost/context/detail/fcontext_arm.hpp index 4a56250..75b0680 100644 --- a/include/boost/context/detail/fcontext_arm.hpp +++ b/include/boost/context/detail/fcontext_arm.hpp @@ -27,11 +27,11 @@ extern "C" { struct stack_t { - void * base; + void * sp; std::size_t size; stack_t() : - base( 0), size( 0) + sp( 0), size( 0) {} }; diff --git a/include/boost/context/detail/fcontext_i386.hpp b/include/boost/context/detail/fcontext_i386.hpp index 62c4f9f..5869fc3 100644 --- a/include/boost/context/detail/fcontext_i386.hpp +++ b/include/boost/context/detail/fcontext_i386.hpp @@ -27,11 +27,11 @@ extern "C" { struct stack_t { - void * base; + void * sp; std::size_t size; stack_t() : - base( 0), size( 0) + sp( 0), size( 0) {} }; diff --git a/include/boost/context/detail/fcontext_i386_win.hpp b/include/boost/context/detail/fcontext_i386_win.hpp index 234937a..d503e7e 100644 --- a/include/boost/context/detail/fcontext_i386_win.hpp +++ b/include/boost/context/detail/fcontext_i386_win.hpp @@ -36,12 +36,12 @@ extern "C" { struct stack_t { - void * base; - void * limit; + void * sp; std::size_t size; + void * limit; stack_t() : - base( 0), limit( 0), size( 0) + sp( 0), size( 0), limit( 0) {} }; diff --git a/include/boost/context/detail/fcontext_mips.hpp b/include/boost/context/detail/fcontext_mips.hpp index 42029e3..431a367 100644 --- a/include/boost/context/detail/fcontext_mips.hpp +++ b/include/boost/context/detail/fcontext_mips.hpp @@ -29,11 +29,11 @@ extern "C" { struct stack_t { - void * base; + void * sp; std::size_t size; stack_t() : - base( 0), size( 0) + sp( 0), size( 0) {} }; diff --git a/include/boost/context/detail/fcontext_ppc.hpp b/include/boost/context/detail/fcontext_ppc.hpp index a2c6add..eeb6a7f 100644 --- a/include/boost/context/detail/fcontext_ppc.hpp +++ b/include/boost/context/detail/fcontext_ppc.hpp @@ -27,11 +27,11 @@ extern "C" { struct stack_t { - void * base; + void * sp; std::size_t size; stack_t() : - base( 0), size( 0) + sp( 0), size( 0) {} }; diff --git a/include/boost/context/detail/fcontext_x86_64.hpp b/include/boost/context/detail/fcontext_x86_64.hpp index 7f85420..403d5b8 100644 --- a/include/boost/context/detail/fcontext_x86_64.hpp +++ b/include/boost/context/detail/fcontext_x86_64.hpp @@ -27,11 +27,11 @@ extern "C" { struct stack_t { - void * base; + void * sp; std::size_t size; stack_t() : - base( 0), size( 0) + sp( 0), size( 0) {} }; diff --git a/include/boost/context/detail/fcontext_x86_64_win.hpp b/include/boost/context/detail/fcontext_x86_64_win.hpp index e828ae7..c60453d 100644 --- a/include/boost/context/detail/fcontext_x86_64_win.hpp +++ b/include/boost/context/detail/fcontext_x86_64_win.hpp @@ -36,12 +36,12 @@ extern "C" { struct stack_t { - void * base; - void * limit; + void * sp; std::size_t size; + void * limit; stack_t() : - base( 0), limit( 0), size( 0) + sp( 0), size( 0), limit( 0) {} }; diff --git a/performance/performance.cpp b/performance/performance.cpp index 98fe044..bee8d72 100644 --- a/performance/performance.cpp +++ b/performance/performance.cpp @@ -19,7 +19,7 @@ #include #include -#ifndef BOOST_WINDOWS +#ifdef BOOST_USE_UCONTEXT #include #endif @@ -32,40 +32,87 @@ namespace ctx = boost::ctx; -bool preserve_fpu = false; +bool pres_fpu = false; -#define CALL_FUNCTION(z,n,unused) \ - fn(); +#define CALL_FCONTEXT(z,n,unused) ctx::jump_fcontext( & fcm, & fc, 7, pres_fpu); -#define CALL_UCONTEXT(z,n,unused) \ - ::swapcontext( & ucm, & uc); - -#define CALL_FCONTEXT(z,n,unused) \ - ctx::jump_fcontext( & fcm, & fc, 7, preserve_fpu); - -#ifndef BOOST_WINDOWS -ucontext_t uc, ucm; +#ifdef BOOST_USE_UCONTEXT +# define CALL_UCONTEXT(z,n,unused) ::swapcontext( & ucm, & uc); #endif + +#define CALL_FUNCTION(z,n,unused) fn(); + + ctx::fcontext_t fc, fcm; -static void f3() -{ } - -#ifndef BOOST_WINDOWS -static void f2() -{ - while ( true) - ::swapcontext( & uc, & ucm); -} +#ifdef BOOST_USE_UCONTEXT +ucontext_t uc, ucm; #endif + static void f1( intptr_t) -{ - while ( true) - ctx::jump_fcontext( & fc, & fcm, 7, preserve_fpu); -} +{ while ( true) ctx::jump_fcontext( & fc, & fcm, 7, pres_fpu); } + +#ifdef BOOST_USE_UCONTEXT +static void f2() +{ while ( true) ::swapcontext( & uc, & ucm); } +#endif + +static void f3() +{} + #ifdef BOOST_CONTEXT_CYCLE +cycle_t test_fcontext_cycle( cycle_t ov) +{ + ctx::stack_allocator alloc; + fc.fc_stack.sp = alloc.allocate(ctx::default_stacksize()); + fc.fc_stack.size = ctx::default_stacksize(); + ctx::make_fcontext( & fc, f1); + + ctx::jump_fcontext( & fcm, & fc, 7, pres_fpu); + + // cache warum-up +BOOST_PP_REPEAT_FROM_TO( 0, BOOST_PP_LIMIT_MAG, CALL_FCONTEXT, ~) + + cycle_t start( cycles() ); +BOOST_PP_REPEAT_FROM_TO( 0, BOOST_PP_LIMIT_MAG, CALL_FCONTEXT, ~) + cycle_t total( cycles() - start); + + // we have two jumps and two measuremt-overheads + total -= ov; // overhead of measurement + total /= BOOST_PP_LIMIT_MAG; // per call + total /= 2; // 2x jump_to c1->c2 && c2->c1 + + return total; +} + +# ifdef BOOST_USE_UCONTEXT +cycle_t test_ucontext_cycle( cycle_t ov) +{ + ctx::stack_allocator alloc; + + ::getcontext( & uc); + uc.uc_stack.ss_sp = alloc.allocate(ctx::default_stacksize()); + uc.uc_stack.ss_size = ctx::default_stacksize(); + ::makecontext( & uc, f2, 7); + + // cache warum-up +BOOST_PP_REPEAT_FROM_TO( 0, BOOST_PP_LIMIT_MAG, CALL_UCONTEXT, ~) + + cycle_t start( cycles() ); +BOOST_PP_REPEAT_FROM_TO( 0, BOOST_PP_LIMIT_MAG, CALL_UCONTEXT, ~) + cycle_t total( cycles() - start); + + // we have two jumps and two measuremt-overheads + total -= ov; // overhead of measurement + total /= BOOST_PP_LIMIT_MAG; // per call + total /= 2; // 2x jump_to c1->c2 && c2->c1 + + return total; +} +# endif + cycle_t test_function_cycle( cycle_t ov) { boost::function< void() > fn( boost::bind( f3) ); @@ -83,25 +130,50 @@ BOOST_PP_REPEAT_FROM_TO( 0, BOOST_PP_LIMIT_MAG, CALL_FUNCTION, ~) return total; } +#endif -#ifndef BOOST_WINDOWS -cycle_t test_ucontext_cycle( cycle_t ov) + +#if _POSIX_C_SOURCE >= 199309L +zeit_t test_fcontext_zeit( zeit_t ov) +{ + ctx::stack_allocator alloc; + fc.fc_stack.sp = alloc.allocate(ctx::default_stacksize()); + fc.fc_stack.size = ctx::default_stacksize(); + ctx::make_fcontext( & fc, f1); + + ctx::jump_fcontext( & fcm, & fc, 7, pres_fpu); + + // cache warum-up +BOOST_PP_REPEAT_FROM_TO( 0, BOOST_PP_LIMIT_MAG, CALL_FCONTEXT, ~) + + zeit_t start( zeit() ); +BOOST_PP_REPEAT_FROM_TO( 0, BOOST_PP_LIMIT_MAG, CALL_FCONTEXT, ~) + zeit_t total( zeit() - start); + + // we have two jumps and two measuremt-overheads + total -= ov; // overhead of measurement + total /= BOOST_PP_LIMIT_MAG; // per call + total /= 2; // 2x jump_to c1->c2 && c2->c1 + + return total; +} + +# ifdef BOOST_USE_UCONTEXT +zeit_t test_ucontext_zeit( zeit_t ov) { ctx::stack_allocator alloc; ::getcontext( & uc); - uc.uc_stack.ss_sp = - static_cast< char * >( alloc.allocate(ctx::default_stacksize() ) ) - - ctx::default_stacksize(); + uc.uc_stack.ss_sp = alloc.allocate(ctx::default_stacksize()); uc.uc_stack.ss_size = ctx::default_stacksize(); ::makecontext( & uc, f2, 7); // cache warum-up BOOST_PP_REPEAT_FROM_TO( 0, BOOST_PP_LIMIT_MAG, CALL_UCONTEXT, ~) - cycle_t start( cycles() ); + zeit_t start( zeit() ); BOOST_PP_REPEAT_FROM_TO( 0, BOOST_PP_LIMIT_MAG, CALL_UCONTEXT, ~) - cycle_t total( cycles() - start); + zeit_t total( zeit() - start); // we have two jumps and two measuremt-overheads total -= ov; // overhead of measurement @@ -110,34 +182,8 @@ BOOST_PP_REPEAT_FROM_TO( 0, BOOST_PP_LIMIT_MAG, CALL_UCONTEXT, ~) return total; } -#endif +# endif -cycle_t test_fcontext_cycle( cycle_t ov) -{ - ctx::stack_allocator alloc; - fc.fc_stack.base = alloc.allocate(ctx::default_stacksize()); - fc.fc_stack.size = ctx::default_stacksize(); - ctx::make_fcontext( & fc, f1); - - ctx::jump_fcontext( & fcm, & fc, 7, preserve_fpu); - - // cache warum-up -BOOST_PP_REPEAT_FROM_TO( 0, BOOST_PP_LIMIT_MAG, CALL_FCONTEXT, ~) - - cycle_t start( cycles() ); -BOOST_PP_REPEAT_FROM_TO( 0, BOOST_PP_LIMIT_MAG, CALL_FCONTEXT, ~) - cycle_t total( cycles() - start); - - // we have two jumps and two measuremt-overheads - total -= ov; // overhead of measurement - total /= BOOST_PP_LIMIT_MAG; // per call - total /= 2; // 2x jump_to c1->c2 && c2->c1 - - return total; -} -#endif - -#if _POSIX_C_SOURCE >= 199309L zeit_t test_function_zeit( zeit_t ov) { boost::function< void() > fn( boost::bind( f3) ); @@ -155,58 +201,6 @@ BOOST_PP_REPEAT_FROM_TO( 0, BOOST_PP_LIMIT_MAG, CALL_FUNCTION, ~) return total; } - -#ifndef BOOST_WINDOWS -zeit_t test_ucontext_zeit( zeit_t ov) -{ - ctx::stack_allocator alloc; - - ::getcontext( & uc); - uc.uc_stack.ss_sp = - static_cast< char * >( alloc.allocate(ctx::default_stacksize() ) ) - - ctx::default_stacksize(); - uc.uc_stack.ss_size = ctx::default_stacksize(); - ::makecontext( & uc, f2, 7); - - // cache warum-up -BOOST_PP_REPEAT_FROM_TO( 0, BOOST_PP_LIMIT_MAG, CALL_UCONTEXT, ~) - - zeit_t start( zeit() ); -BOOST_PP_REPEAT_FROM_TO( 0, BOOST_PP_LIMIT_MAG, CALL_UCONTEXT, ~) - zeit_t total( zeit() - start); - - // we have two jumps and two measuremt-overheads - total -= ov; // overhead of measurement - total /= BOOST_PP_LIMIT_MAG; // per call - total /= 2; // 2x jump_to c1->c2 && c2->c1 - - return total; -} -#endif - -zeit_t test_fcontext_zeit( zeit_t ov) -{ - ctx::stack_allocator alloc; - fc.fc_stack.base = alloc.allocate(ctx::default_stacksize()); - fc.fc_stack.size = ctx::default_stacksize(); - ctx::make_fcontext( & fc, f1); - - ctx::jump_fcontext( & fcm, & fc, 7, preserve_fpu); - - // cache warum-up -BOOST_PP_REPEAT_FROM_TO( 0, BOOST_PP_LIMIT_MAG, CALL_FCONTEXT, ~) - - zeit_t start( zeit() ); -BOOST_PP_REPEAT_FROM_TO( 0, BOOST_PP_LIMIT_MAG, CALL_FCONTEXT, ~) - zeit_t total( zeit() - start); - - // we have two jumps and two measuremt-overheads - total -= ov; // overhead of measurement - total /= BOOST_PP_LIMIT_MAG; // per call - total /= 2; // 2x jump_to c1->c2 && c2->c1 - - return total; -} #endif int main( int argc, char * argv[]) @@ -222,10 +216,10 @@ int main( int argc, char * argv[]) unsigned int res = test_fcontext_cycle( ov); std::cout << "fcontext: average of " << res << " cycles per switch" << std::endl; -#ifndef BOOST_WINDOWS +# ifdef BOOST_USE_UCONTEXT res = test_ucontext_cycle( ov); std::cout << "ucontext: average of " << res << " cycles per switch" << std::endl; -#endif +# endif res = test_function_cycle( ov); std::cout << "boost::function: average of " << res << " cycles per switch" << std::endl; } @@ -238,10 +232,10 @@ int main( int argc, char * argv[]) unsigned int res = test_fcontext_zeit( ov); std::cout << "fcontext: average of " << res << " ns per switch" << std::endl; -#ifndef BOOST_WINDOWS +# ifdef BOOST_USE_UCONTEXT res = test_ucontext_zeit( ov); std::cout << "ucontext: average of " << res << " ns per switch" << std::endl; -#endif +# endif res = test_function_zeit( ov); std::cout << "boost::function: average of " << res << " ns per switch" << std::endl; } diff --git a/src/asm/fcontext_arm_aapcs_elf_gas.S b/src/asm/fcontext_arm_aapcs_elf_gas.S index 42755c9..cd00cd3 100644 --- a/src/asm/fcontext_arm_aapcs_elf_gas.S +++ b/src/asm/fcontext_arm_aapcs_elf_gas.S @@ -26,7 +26,7 @@ * ------------------------------------------------------------- * * | 0x2c| 0x30| | * * ------------------------------------------------------------- * - * |sbase|slimit| | * + * | sp | size| | * * ------------------------------------------------------------- * * ------------------------------------------------------------- * * | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | * @@ -79,6 +79,8 @@ jump_fcontext: make_fcontext: str a2, [a1,#40] @ save address of context function ldr a2, [a1,#44] @ load address of context stack base + ldr a3, [a1,#48] @ load size of context stack + add a2, a3 @ compute start address of context stack push {a1,lr} @ save pointer to fcontext_t mov a1, a2 @ context stack pointer as arg for align_stack diff --git a/src/asm/fcontext_i386_ms_pe_masm.asm b/src/asm/fcontext_i386_ms_pe_masm.asm index b288c41..8cec2c8 100644 --- a/src/asm/fcontext_i386_ms_pe_masm.asm +++ b/src/asm/fcontext_i386_ms_pe_masm.asm @@ -16,7 +16,7 @@ ; -------------------------------------------------------------- ; | 018h | 01ch | 020h | | ; -------------------------------------------------------------- -; | base | limit | size | | +; | sp | size | base | | ; -------------------------------------------------------------- ; -------------------------------------------------------------- ; | 9 | | @@ -61,9 +61,9 @@ jump_fcontext PROC EXPORT mov eax, [edx] ; load current SEH exception list mov [ecx+024h], eax ; save current exception list mov eax, [edx+04h] ; load current stack base - mov [ecx+018h], eax ; save current stack base + mov [ecx+020h], eax ; save current stack base mov eax, [edx+08h] ; load current stack limit - mov [ecx+01ch], eax ; save current stack limit + mov [ecx+018h], eax ; save current stack limit mov eax, [edx+010h] ; load fiber local storage mov [ecx+028h], eax ; save fiber local storage @@ -93,9 +93,9 @@ nxt: assume fs:error mov eax, [ecx+024h] ; load SEH exception list mov [edx], eax ; restore next SEH item - mov eax, [ecx+018h] ; load stack base + mov eax, [ecx+020h] ; load stack base mov [edx+04h], eax ; restore stack base - mov eax, [ecx+01ch] ; load stack limit + mov eax, [ecx+018h] ; load stack limit mov [edx+08h], eax ; restore stack limit mov eax, [ecx+028h] ; load fiber local storage mov [edx+010h], eax ; restore fiber local storage @@ -110,44 +110,51 @@ nxt: jump_fcontext ENDP make_fcontext PROC EXPORT - mov eax, [esp+04h] ; load address of the fcontext_t - mov ecx, [esp+08h] ; load address of the context function - mov [eax+014h], ecx ; save address of the context function - mov edx, [eax+018h] ; load address of context stack base - mov ecx, [eax+020h] ; load context stack size - neg ecx ; negate stacksize for LEA - lea ecx, [edx+ecx] ; compute context stack limit - mov [eax+01ch], ecx ; save context stack limit + push ebp ; save previous frame pointer; get the stack 16 byte aligned + mov ebp, esp ; set EBP to ESP + sub esp, 010h ; allocate stack space - push eax ; save pointer to fcontext_t - push edx ; context stack as arg for align_stack + mov eax, [ebp+08h] ; load address of fcontext_t + mov ecx, [ebp+0ch] ; load address of context function + mov [eax+014h], ecx ; save address of context function + mov edx, [eax+018h] ; load address of context stack (limit) + mov ecx, [eax+01ch] ; load context stack size + lea edx, [edx+ecx] ; compute top address of context stack (base) + mov [eax+020h], edx ; save top address of context stack (base) + + mov [esp+04h], eax ; save pointer to fcontext_t + mov [esp], edx ; context stack as arg for align_stack call align_stack ; call align_stack mov edx, eax ; begin of aligned context stack - pop eax ; remove arg for align_stack - pop eax ; restore pointer to fcontext_t - - lea edx, [edx-014h] ; reserve space for last frame on context stack, (ESP + 4) & 15 == 0 - mov [eax+010h], edx ; save the aligned stack - - mov ecx, seh_fcontext ; set ECX to exception-handler - mov [edx+0ch], ecx ; save ECX as SEH handler - mov ecx, 0ffffffffh ; set ECX to -1 - mov [edx+08h], ecx ; save ECX as next SEH item - lea ecx, [edx+08h] ; load address of next SEH item - mov [eax+024h], ecx ; save next SEH - + mov eax, [esp+04h] ; restore pointer to fcontext_t + stmxcsr [eax+02ch] ; save MMX control word fnstcw [eax+030h] ; save x87 control word - mov ecx, finish ; address of finish - mov [edx], ecx + lea edx, [edx-01ch] ; reserve space for last frame and seh on context stack, (ESP - 0x4) % 16 == 0 + mov [eax+010h], edx ; save address in EDX as stack pointer for context stack + + mov ecx, seh_fcontext ; set ECX to exception-handler + mov [edx+018h], ecx ; save ECX as SEH handler + mov ecx, 0ffffffffh ; set ECX to -1 + mov [edx+014h], ecx ; save ECX as next SEH item + lea ecx, [edx+014h] ; load address of next SEH item + mov [eax+024h], ecx ; save next SEH + + mov ecx, finish ; abs address of finish + mov [edx], ecx ; save address of finish as return address for context function + ; entered after context function returns + + add esp, 010h ; deallocate stack space + pop ebp xor eax, eax ret finish: + ; ESP == stack pointer of context function + 0x4 xor eax, eax - push eax ; exit code is zero + mov [esp], eax ; exit code is zero call _exit ; exit application hlt make_fcontext ENDP diff --git a/src/asm/fcontext_i386_sysv_elf_gas.S b/src/asm/fcontext_i386_sysv_elf_gas.S index a813287..26a2099 100644 --- a/src/asm/fcontext_i386_sysv_elf_gas.S +++ b/src/asm/fcontext_i386_sysv_elf_gas.S @@ -19,7 +19,7 @@ * -------------------------------------------------------------- * * | 0x18 | 0x1c | | * * -------------------------------------------------------------- * - * | sbase | slimit | | * + * | sp | size | | * * -------------------------------------------------------------- * * -------------------------------------------------------------- * * | 8 | 9 | | * @@ -76,46 +76,54 @@ jump_fcontext: .align 2 .type make_fcontext,@function make_fcontext: - movl 0x4(%esp), %eax /* load address of the fcontext_t */ - movl 0x8(%esp), %ecx /* load address of the context function */ - movl %ecx, 0x14(%eax) /* save address of the context function */ - movl 0x18(%eax), %edx /* load address of context stack base */ + pushl %ebp /* save previous frame pointer; get the stack 16 byte aligned */ + movl %esp, %ebp /* set EBP to ESP */ + subl $0x10, %esp /* allocate stack space */ - pushl %eax /* save pointer to fcontext_t */ - pushl %ebx /* save EBX */ - pushl %edx /* context stack pointer as arg for align_stack */ + movl 0x8(%ebp), %eax /* load address of fcontext_t */ + movl 0xc(%ebp), %ecx /* load address of context function */ + movl %ecx, 0x14(%eax) /* save address of context function */ + movl 0x18(%eax), %edx /* load address of context stack base */ + movl 0x1c(%eax), %ecx /* load size of context stack */ + leal (%edx,%ecx), %edx /* compute top address of context stack */ + + movl %ebx, 0x8(%esp) /* save EBX */ + movl %eax, 0x4(%esp) /* save pointer to fcontext_t */ + movl %edx, (%esp) /* context stack pointer as arg for align_stack */ call 1f 1: popl %ebx /* address of label 1 */ addl $_GLOBAL_OFFSET_TABLE_+[.-1b], %ebx /* compute address of GOT and store it in EBX */ call align_stack@PLT /* call align_stack */ movl %eax, %edx /* begin of aligned context stack */ - popl %eax /* remove arg for align_stack */ - popl %ebx /* restore EBX */ - popl %eax /* restore pointer to fcontext_t */ - - leal -0x14(%edx), %edx /* reserve space for the last frame on context stack, (ESP + 4) % 16 == 0 */ - movl %edx, 0x10(%eax) /* save the aligned context stack base */ + movl 0x4(%esp), %eax /* restore pointer to fcontext_t */ + movl 0x8(%esp), %ebx /* restore EBX */ stmxcsr 0x20(%eax) /* save MMX control and status word */ fnstcw 0x24(%eax) /* save x87 control word */ - call 2f -2: popl %ecx /* address of label 2 */ - addl $finish-2b, %ecx /* helper code executed after context function returns */ - movl %ecx, (%edx) + leal -0xc(%edx), %edx /* reserve space for the last frame on context stack; (ESP - 0x4) % 16 == 0 */ + movl %edx, 0x10(%eax) /* save address in EDX as stack pointer for context function */ - xorl %eax, %eax + call 2f +2: popl %ecx /* address of label 2 */ + addl $finish-2b, %ecx /* compute abs address of label finish */ + movl %ecx, (%edx) /* save address of finish as return address for context functions */ + /* entered after context function returns */ + + addl $0x10, %esp /* deallocate stack space */ + pop %ebp + + xorl %eax, %eax ret finish: - leal -0xc(%esp), %esp - + /* ESP == stack pointer of context function + 0x4 */ call 3f 3: popl %ebx /* address of label 3 */ addl $_GLOBAL_OFFSET_TABLE_+[.-3b], %ebx /* compute address of GOT and store it in EBX */ - xorl %eax, %eax - pushl %eax /* exit code is zero */ + xorl %eax, %eax + movl %eax, (%esp) /* exit code is zero */ call _exit@PLT /* exit application */ hlt .size make_fcontext,.-make_fcontext diff --git a/src/asm/fcontext_i386_sysv_macho_gas.S b/src/asm/fcontext_i386_sysv_macho_gas.S index 5972a03..9799127 100644 --- a/src/asm/fcontext_i386_sysv_macho_gas.S +++ b/src/asm/fcontext_i386_sysv_macho_gas.S @@ -19,7 +19,7 @@ * -------------------------------------------------------------- * * | 0x18 | 0x1c | | * * -------------------------------------------------------------- * - * | sbase | slimit | | * + * | sp | size | | * * -------------------------------------------------------------- * * -------------------------------------------------------------- * * | 8 | 9 | | * @@ -73,37 +73,44 @@ _jump_fcontext: .globl _make_fcontext .align 2 _make_fcontext: - movl 0x4(%esp), %eax /* load address of the fcontext_t */ - movl 0x8(%esp), %ecx /* load address of the context function */ - movl %ecx, 0x14(%eax) /* save address of the context function */ - movl 0x18(%eax), %edx /* load address of context stack base */ + pushl %ebp /* save previous frame pointer; get the stack 16 byte aligned */ + movl %esp, %ebp /* set EBP to ESP */ + subl $0x10, %esp /* allocate stack space */ - pushl %eax /* save pointer to fcontext_t */ - pushl %ebx /* save EBX */ - pushl %edx /* context stack pointer as arg for align_stack */ + movl 0x8(%ebp), %eax /* load address of fcontext_t */ + movl 0xc(%ebp), %ecx /* load address of context function */ + movl %ecx, 0x14(%eax) /* save address of context function */ + movl 0x18(%eax), %edx /* load address of context stack base */ + movl 0x1c(%eax), %ecx /* load size of context stack */ + leal (%edx,%ecx), %edx /* compute top address of context stack */ + + movl %eax, 0x4(%esp) /* save pointer to fcontext_t */ + movl %edx, (%esp) /* context stack pointer as arg for align_stack */ call _align_stack /* call align_stack */ movl %eax, %edx /* begin of aligned context stack */ - popl %eax /* remove arg for align_stack */ - popl %ebx /* restore EBX */ - popl %eax /* restore pointer to fcontext_t */ - - leal -0x14(%edx), %edx /* reserve space for the last frame on context stack, (ESP + 4) % 16 == 0 */ - movl %edx, 0x10(%eax) /* save the aligned context stack base */ + movl 0x4(%esp), %eax /* restore pointer to fcontext_t */ stmxcsr 0x20(%eax) /* save MMX control and status word */ fnstcw 0x24(%eax) /* save x87 control word */ + leal -0xc(%edx), %edx /* reserve space for the last frame on context stack, (ESP - 0x4) % 16 == 0 */ + movl %edx, 0x10(%eax) /* save address in EDX as stack pointer for context function */ + call 1f -1: popl %ecx /* address of lable 1 */ - addl $finish-1b, %ecx /* helper code executed after context function returns */ - movl %ecx, (%edx) +1: popl %ecx /* address of label 1 */ + addl $finish-1b, %ecx /* compute abs address of label finish */ + movl %ecx, (%edx) /* save address of finish as return address for context function */ + /* entered after context function returns */ + + addl $0x10, %esp /* deallocate stack space */ + pop %ebp xorl %eax, %eax ret finish: - leal -0xc(%esp), %esp - xorl %eax, %eax - pushl %eax /* exit code is zero */ + /* ESP == stack pointer of context function + 0x4 */ + xorl %eax, %eax + movl %eax, (%esp) /* exit code is zero */ call _exit /* exit application */ hlt diff --git a/src/asm/fcontext_mips32_o32_elf_gas.S b/src/asm/fcontext_mips32_o32_elf_gas.S index df0bc52..9aef66f 100644 --- a/src/asm/fcontext_mips32_o32_elf_gas.S +++ b/src/asm/fcontext_mips32_o32_elf_gas.S @@ -26,7 +26,7 @@ * ------------------------------------------------------------- * * | 104 | 112 | | * * ------------------------------------------------------------- * - * |sbase|slimt| | * + * | sp | size| | * * ------------------------------------------------------------- * * ------------------------------------------------------------- * * | 15 | 16 | 17 | 18 | 19 | 20 | | * @@ -112,6 +112,8 @@ make_fcontext: sw $gp, 24($a0) # save global pointer sw $a1, 96($a0) # save address of context function lw $t0, 104($a0) # load address of context stack base + lw $t1, 112($a0) # load size of context stack + add $t0, $t1, $t0 # compute start address of context stack sub $sp, $sp, 28 sw $ra, 24($sp) diff --git a/src/asm/fcontext_ppc32_sysv_elf_gas.S b/src/asm/fcontext_ppc32_sysv_elf_gas.S index bdac7b8..978a27e 100644 --- a/src/asm/fcontext_ppc32_sysv_elf_gas.S +++ b/src/asm/fcontext_ppc32_sysv_elf_gas.S @@ -33,7 +33,7 @@ * ------------------------------------------------------------- * * | 92 | 96 | | * * ------------------------------------------------------------- * - * |sbase|slimt| | * + * | sp | size| | * * ------------------------------------------------------------- * * ------------------------------------------------------------- * * | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | * @@ -186,6 +186,8 @@ jump_fcontext: make_fcontext: stw %r4, 88(%r3) # save address of context function lwz %r0, 92(%r3) # load address of context stack base + lwz %r4, 96(%r3) # load size of context stack + add %r0, %r4, %r0 # compute start address of context stack li %r4, 28 subf %r1, %r4, %r1 # reserve space on stack diff --git a/src/asm/fcontext_ppc64_sysv_elf_gas.S b/src/asm/fcontext_ppc64_sysv_elf_gas.S index 89ad544..bb14819 100644 --- a/src/asm/fcontext_ppc64_sysv_elf_gas.S +++ b/src/asm/fcontext_ppc64_sysv_elf_gas.S @@ -47,7 +47,7 @@ * ------------------------------------------------------------- * * | 184 | 188 | 192 | 196 | | * * ------------------------------------------------------------- * - * | sbase | slimt | | * + * | sp | size | | * * ------------------------------------------------------------- * * ------------------------------------------------------------- * * | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | * @@ -214,6 +214,8 @@ make_fcontext: .make_fcontext: std %r4, 176(%r3) # save address of context function ld %r0, 184(%r3) # load address of context stack base + ld %r4, 192(%r3) # load size of context stack + add %r0, %r4, %r0 # compute start address of context stack li %r4, 56 subf %r1, %r4, %r1 # reserve space on stack diff --git a/src/asm/fcontext_x86_64_ms_pe_masm.asm b/src/asm/fcontext_x86_64_ms_pe_masm.asm index c42bf99..43a5255 100644 --- a/src/asm/fcontext_x86_64_ms_pe_masm.asm +++ b/src/asm/fcontext_x86_64_ms_pe_masm.asm @@ -30,7 +30,7 @@ ; ---------------------------------------------------------------------------------- ; | 0x50 | 0x54 | 0x58 | 0x5c | 0x60 | 0x64 | | ; ---------------------------------------------------------------------------------- -; | base | limit | size | | +; | sp | size | base | | ; ---------------------------------------------------------------------------------- ; ---------------------------------------------------------------------------------- ; | 26 | 27 | | @@ -101,9 +101,9 @@ jump_fcontext PROC EXPORT FRAME:seh_fcontext mov r10, gs:[030h] ; load NT_TIB mov rax, [r10+08h] ; load current stack base - mov [rcx+050h], rax ; save current stack base + mov [rcx+060h], rax ; save current stack base mov rax, [r10+010h] ; load current stack limit - mov [rcx+058h], rax ; save current stack limit + mov [rcx+050h], rax ; save current stack limit mov rax, [r10+018h] ; load fiber local storage mov [rcx+068h], rax ; save fiber local storage @@ -112,7 +112,7 @@ jump_fcontext PROC EXPORT FRAME:seh_fcontext stmxcsr [rcx+070h] ; save MMX control and status word fnstcw [rcx+074h] ; save x87 control word - mov r10, [rcx+078h] ; address of aligned XMM storage + mov r10, [rcx+078h] ; address of aligned XMM storage movaps [r10], xmm6 movaps [r10+010h], xmm7 movaps [r10+020h], xmm8 @@ -126,7 +126,7 @@ jump_fcontext PROC EXPORT FRAME:seh_fcontext ldmxcsr [rdx+070h] ; restore MMX control and status word fldcw [rdx+074h] ; restore x87 control word - mov r10, [rdx+078h] ; address of aligned XMM storage + mov r10, [rdx+078h] ; address of aligned XMM storage movaps xmm6, [r10] movaps xmm7, [r10+010h] movaps xmm8, [r10+020h] @@ -154,9 +154,9 @@ nxt: mov rbp, [rdx+038h] ; restore RBP mov r10, gs:[030h] ; load NT_TIB - mov rax, [rdx+050h] ; load stack base + mov rax, [rdx+060h] ; load stack base mov [r10+08h], rax ; restore stack base - mov rax, [rdx+058h] ; load stack limit + mov rax, [rdx+050h] ; load stack limit mov [r10+010h], rax ; restore stack limit mov rax, [rdx+068h] ; load fiber local storage mov [r10+018h], rax ; restore fiber local storage @@ -170,40 +170,44 @@ nxt: jmp r10 ; indirect jump to caller jump_fcontext ENDP -make_fcontext PROC EXPORT FRAME ; generate function table entry in .pdata and unwind information in E +make_fcontext PROC EXPORT FRAME ; generate function table entry in .pdata and unwind information in .endprolog ; .xdata for a function's structured exception handling unwind behavior - mov [rcx+048h], rdx ; save address of context function - mov rdx, [rcx+050h] ; load address of context stack base - mov r8, [rcx+060h] ; load context stack size - neg r8 ; negate r8 for LEA - lea r8, [rdx+r8] ; compute the address of context stack limit - mov [rcx+058h], r8 ; save the address of context stack limit + push rbp ; save previous frame pointer; get the stack 16 byte aligned + mov rbp, rsp ; set RBP to RSP + sub rsp, 040h ; allocate shadow space - push rcx ; save pointer to fcontext_t - sub rsp, 028h ; reserve shadow space for align_stack + mov [rcx+048h], rdx ; save address of context function + mov rdx, [rcx+050h] ; load address of context stack pointer (limit) + mov r8, [rcx+058h] ; load context stack size + lea rdx, [rdx+r8] ; compute top address of context stack (base) + mov [rcx+060h], rdx ; save top address of context stack (base) + + mov [rbp-08h], rcx ; save pointer to fcontext_t mov rcx, rdx ; context stack pointer as arg for align_stack - mov [rsp+8], rcx call align_stack ; call align_stack mov rdx, rax ; begin of aligned context stack - add rsp, 028h - pop rcx ; restore pointer to fcontext_t - - lea rdx, [rdx-028h] ; reserve 32byte shadow space + return address on stack, (RSP + 8) % 16 == 0 - mov [rcx+040h], rdx ; save the address where the context stack begins + mov rcx, [rbp-08h] ; restore pointer to fcontext_t stmxcsr [rcx+070h] ; save MMX control and status word fnstcw [rcx+074h] ; save x87 control word - lea rax, finish ; helper code executed after fn() returns - mov [rdx], rax ; store address off the helper function as return address + lea rdx, [rdx-028h] ; reserve 32byte shadow space + return address on stack, (RSP - 0x8) % 16 == 0 + mov [rcx+040h], rdx ; save address in RDX as stack pointer for context function - xor rax, rax ; set RAX to zero + lea rax, finish ; compute abs address of label finish + mov [rdx], rax ; save address of finish as return address for context function + ; entered after context function returns + + add rsp, 040h ; deallocate shadow space + pop rbp ; restore previous frame pointer + + xor rax, rax ret finish: - xor rcx, rcx - mov [rsp+08h], rcx + ; RSP == stack pointer in fcontext + 0x8 + xor rcx, rcx ; exit code is zero call _exit ; exit application hlt make_fcontext ENDP diff --git a/src/asm/fcontext_x86_64_sysv_elf_gas.S b/src/asm/fcontext_x86_64_sysv_elf_gas.S index d5e7411..3d06311 100644 --- a/src/asm/fcontext_x86_64_sysv_elf_gas.S +++ b/src/asm/fcontext_x86_64_sysv_elf_gas.S @@ -26,7 +26,7 @@ * ---------------------------------------------------------------------------------- * * | 0x40 | 0x44 | 0x48 | 0x4c | | * * ---------------------------------------------------------------------------------- * - * | sbase | slimit | | * + * | sp | size | | * * ---------------------------------------------------------------------------------- * * ---------------------------------------------------------------------------------- * * | 20 | 21 | | * @@ -86,30 +86,41 @@ jump_fcontext: .type make_fcontext,@function .align 16 make_fcontext: - movq %rsi, 0x38(%rdi) /* save address of context function */ - movq 0x40(%rdi), %rdx /* load address of context stack base */ + pushq %rbp /* save previous frame pointer; get the stack 16 byte aligned */ + movq %rsp, %rbp /* set RBP to RSP */ + subq $0x10, %rsp /* allocate stack space */ - pushq %rdi /* save pointer to fcontext_t */ - movq %rdx, %rdi /* context stack pointer as arg for align_stack */ - call align_stack@PLT /* align context stack */ - movq %rax, %rdx /* begin of aligned context stack */ - popq %rdi /* restore pointer to fcontext_t */ + movq %rsi, 0x38(%rdi) /* save address of context function */ + movq 0x40(%rdi), %rdx /* load address of context stack base */ + movq 0x48(%rdi), %rax /* load size of context stack */ + leaq (%rdx,%rax), %rdx /* compute top address of context stack */ - leaq -0x8(%rdx), %rdx /* reserve space for the last frame on context stack, (RSP + 8) & 15 == 0 */ - movq %rdx, 0x30(%rdi) /* save the algined context stack base */ + movq %rdi, (%rsp) /* save pointer to fcontext_t */ + movq %rdx, %rdi /* context stack pointer as arg for align_stack */ + call align_stack@PLT /* align context stack */ + movq %rax, %rdx /* begin of aligned context stack */ + movq (%rsp), %rdi /* restore pointer to fcontext_t */ - stmxcsr 0x50(%rdi) /* save MMX control and status word */ - fnstcw 0x54(%rdi) /* save x87 control word */ + stmxcsr 0x50(%rdi) /* save MMX control and status word */ + fnstcw 0x54(%rdi) /* save x87 control word */ - leaq finish(%rip), %rcx /* address of finish; called after context function returns */ - movq %rcx, (%rdx) + leaq -0x8(%rdx), %rdx /* reserve space for the last frame on context stack, (RSP - 0x8) % 16 == 0 */ + movq %rdx, 0x30(%rdi) /* save address in RDX as stack pointer for context function */ - xorq %rax, %rax + leaq finish(%rip), %rcx /* compute abs address of label finish */ + movq %rcx, (%rdx) /* save address of finish as return address for context function */ + /* entered after context function returns */ + + addq $0x10, %rsp /* deallocate shadow space */ + popq %rbp /* restore previous frame pointer */ + + xorq %rax, %rax ret finish: - xorq %rdi, %rdi /* exit code is zero */ - call _exit@PLT /* exit application */ + /* RSP == stack pointer of context function + 0x8 */ + xorq %rdi, %rdi /* exit code is zero */ + call _exit@PLT /* exit application */ hlt .size make_fcontext,.-make_fcontext diff --git a/src/asm/fcontext_x86_64_sysv_macho_gas.S b/src/asm/fcontext_x86_64_sysv_macho_gas.S index 7f73066..ec30b3c 100644 --- a/src/asm/fcontext_x86_64_sysv_macho_gas.S +++ b/src/asm/fcontext_x86_64_sysv_macho_gas.S @@ -26,7 +26,7 @@ * ---------------------------------------------------------------------------------- * * | 0x40 | 0x44 | 0x48 | 0x4c | | * * ---------------------------------------------------------------------------------- * - * | sbase | slimit | | * + * | sp | size | | * * ---------------------------------------------------------------------------------- * * ---------------------------------------------------------------------------------- * * | 20 | 21 | | * @@ -83,28 +83,39 @@ _jump_fcontext: .globl _make_fcontext .align 8 _make_fcontext: - movq %rsi, 0x38(%rdi) /* save address of context function */ - movq 0x40(%rdi), %rdx /* load address of context stack base */ + pushq %rbp /* save previous frame pointer; get the stack 16 byte aligned */ + movq %rsp, %rbp /* set RBP to RSP */ + subq $0x10, %rsp /* allocate stack space */ - pushq %rdi /* save pointer to fcontext_t */ - movq %rdx, %rdi /* context stack pointer as arg for align_stack */ - call _align_stack /* call align_stack */ - movq %rax, %rdx /* begin of aligned context stack */ - popq %rdi /* restore pointer to fcontext_t */ + movq %rsi, 0x38(%rdi) /* save address of context function */ + movq 0x40(%rdi), %rdx /* load address of context stack base */ + movq 0x48(%rdi), %rax /* load size of context stack */ + leaq (%rdx,%rax), %rdx /* compute top address of context stack */ - leaq -0x8(%rdx), %rdx /* reserve space for the last frame on context stack, (RSP + 8) % 16 == 0 */ - movq %rdx, 0x30(%rdi) /* save the algined context stack base */ + movq %rdi, (%rsp) /* save pointer to fcontext_t */ + movq %rdx, %rdi /* context stack pointer as arg for align_stack */ + call _align_stack /* call align_stack */ + movq %rax, %rdx /* begin of aligned context stack */ + movq (%rsp), %rdi /* restore pointer to fcontext_t */ - stmxcsr 0x50(%rdi) /* save MMX control and status word */ - fnstcw 0x54(%rdi) /* save x87 control word */ + stmxcsr 0x50(%rdi) /* save MMX control and status word */ + fnstcw 0x54(%rdi) /* save x87 control word */ - leaq finish(%rip), %rcx /* helper code executed after context function returns */ - movq %rcx, (%rdx) + leaq -0x8(%rdx), %rdx /* reserve space for the last frame on context stack, (RSP - 0x8) % 16 == 0 */ + movq %rdx, 0x30(%rdi) /* save address in RDX as stack pointer for context function */ - xorq %rax, %rax /* set RAX to zero */ + leaq finish(%rip), %rcx /* compute abs address of label finish */ + movq %rcx, (%rdx) /* save address of finish as return address for context function */ + /* entered after context function returns */ + + addq $0x10, %rsp /* deallocate shadow space */ + popq %rbp /* restore previous frame pointer */ + + xorq %rax, %rax ret finish: - xorq %rdi, %rdi /* exit code is zero */ - call _exit /* exit application */ + /* RSP == stack pointer of context function + 0x8 */ + xorq %rdi, %rdi /* exit code is zero */ + call _exit /* exit application */ hlt diff --git a/src/stack_allocator_posix.cpp b/src/stack_allocator_posix.cpp index a373e56..6bc6a77 100644 --- a/src/stack_allocator_posix.cpp +++ b/src/stack_allocator_posix.cpp @@ -44,8 +44,10 @@ stack_allocator::allocate( std::size_t size) const boost::str( boost::format("invalid stack size: must not be larger than %d bytes") % maximum_stacksize() ) ); - const std::size_t pages( page_count( size) + 1); // add +1 for guard page - std::size_t size_ = pages * pagesize(); + const std::size_t pages( page_count( size) ); + BOOST_ASSERT( 2 <= pages); // one page is reserved for protection + const std::size_t size_( pages * pagesize() ); + BOOST_ASSERT( 0 < size && 0 < size_); const int fd( ::open("/dev/zero", O_RDONLY) ); BOOST_ASSERT( -1 != fd); @@ -61,7 +63,7 @@ stack_allocator::allocate( std::size_t size) const const int result( ::mprotect( limit, pagesize(), PROT_NONE) ); BOOST_ASSERT( 0 == result); - return static_cast< char * >( limit) + size_; + return limit; } void @@ -69,11 +71,10 @@ stack_allocator::deallocate( void * vp, std::size_t size) const { if ( vp) { - const std::size_t pages( page_count( size) + 1); // add +1 for guard page - std::size_t size_ = pages * pagesize(); + const std::size_t pages = page_count( size); + const std::size_t size_ = pages * pagesize(); BOOST_ASSERT( 0 < size && 0 < size_); - void * limit = static_cast< char * >( vp) - size_; - ::munmap( limit, size_); + ::munmap( vp, size_); } } diff --git a/src/stack_allocator_windows.cpp b/src/stack_allocator_windows.cpp index 9223272..5c02236 100644 --- a/src/stack_allocator_windows.cpp +++ b/src/stack_allocator_windows.cpp @@ -46,10 +46,11 @@ stack_allocator::allocate( std::size_t size) const boost::str( boost::format("invalid stack size: must not be larger than %d bytes") % maximum_stacksize() ) ); - const std::size_t pages( page_count( size) + 1); // add +1 for guard page - std::size_t size_ = pages * pagesize(); + const std::size_t pages( page_count( size) ); + BOOST_ASSERT( 2 <= pages); // one page is reserved for protection + const std::size_t size_ = pages * pagesize(); + BOOST_ASSERT( 0 < size && 0 < size_); -#ifndef BOOST_CONTEXT_FIBER void * limit = ::VirtualAlloc( 0, size_, MEM_COMMIT, PAGE_READWRITE); if ( ! limit) throw std::bad_alloc(); @@ -58,8 +59,7 @@ stack_allocator::allocate( std::size_t size) const limit, pagesize(), PAGE_READWRITE | PAGE_GUARD /*PAGE_NOACCESS*/, & old_options); BOOST_ASSERT( FALSE != result); - return static_cast< char * >( limit) + size_; -#endif + return limit; } void @@ -67,11 +67,10 @@ stack_allocator::deallocate( void * vp, std::size_t size) const { if ( vp) { - const std::size_t pages( page_count( size) + 1); // add +1 for guard page - std::size_t size_ = pages * pagesize(); + const std::size_t pages = page_count( size); + const std::size_t size_ = pages * pagesize(); BOOST_ASSERT( 0 < size && 0 < size_); - void * limit = static_cast< char * >( vp) - size_; - ::VirtualFree( limit, 0, MEM_RELEASE); + ::VirtualFree( vp, 0, MEM_RELEASE); } } diff --git a/src/stack_utils_posix.cpp b/src/stack_utils_posix.cpp index 7a0b0d7..47d0706 100644 --- a/src/stack_utils_posix.cpp +++ b/src/stack_utils_posix.cpp @@ -6,6 +6,8 @@ #define BOOST_CONTEXT_SOURCE +#include + #include extern "C" { @@ -39,38 +41,28 @@ static rlimit stacksize_limit() return limit; } +static std::size_t compute_default_stacksize_() +{ + std::size_t size = 256 * 1024; // 256 kB + if ( boost::ctx::is_stack_unbound() ) + return std::max( size, boost::ctx::minimum_stacksize() ); + + BOOST_ASSERT( boost::ctx::maximum_stacksize() >= boost::ctx::minimum_stacksize() ); + return boost::ctx::maximum_stacksize() == boost::ctx::minimum_stacksize() + ? boost::ctx::minimum_stacksize() + : std::min( size, boost::ctx::maximum_stacksize() ); +} + } namespace boost { namespace ctx { -BOOST_CONTEXT_DECL -std::size_t default_stacksize() -{ - static std::size_t size = 256 * 1024; - return size; -} - -BOOST_CONTEXT_DECL -std::size_t minimum_stacksize() -{ return SIGSTKSZ; } - -BOOST_CONTEXT_DECL -std::size_t maximum_stacksize() -{ - BOOST_ASSERT( ! is_stack_unbound() ); - return static_cast< std::size_t >( stacksize_limit().rlim_max); -} - -BOOST_CONTEXT_DECL -bool is_stack_unbound() -{ return RLIM_INFINITY == stacksize_limit().rlim_max; } - BOOST_CONTEXT_DECL std::size_t pagesize() { - static std::size_t pagesize( ::getpagesize() ); - return pagesize; + static std::size_t size = ::getpagesize(); + return size; } BOOST_CONTEXT_DECL @@ -81,4 +73,30 @@ std::size_t page_count( std::size_t stacksize) static_cast< float >( stacksize) / pagesize() ) ); } +BOOST_CONTEXT_DECL +bool is_stack_unbound() +{ return RLIM_INFINITY == stacksize_limit().rlim_max; } + +BOOST_CONTEXT_DECL +std::size_t maximum_stacksize() +{ + BOOST_ASSERT( ! is_stack_unbound() ); + return static_cast< std::size_t >( stacksize_limit().rlim_max); +} + +BOOST_CONTEXT_DECL +std::size_t minimum_stacksize() +{ + // space for guard page added + static std::size_t size = SIGSTKSZ + pagesize(); + return size; +} + +BOOST_CONTEXT_DECL +std::size_t default_stacksize() +{ + static std::size_t size = compute_default_stacksize_(); + return size; +} + }} diff --git a/src/stack_utils_windows.cpp b/src/stack_utils_windows.cpp index 373033d..7942609 100644 --- a/src/stack_utils_windows.cpp +++ b/src/stack_utils_windows.cpp @@ -5,6 +5,7 @@ // http://www.boost.org/LICENSE_1_0.txt) #define BOOST_CONTEXT_SOURCE +#define NOMINMAX #include @@ -32,45 +33,29 @@ static SYSTEM_INFO system_info() return si; } +static std::size_t compute_default_stacksize_() +{ + std::size_t size = 256 * 1024; // 256 kB + if ( boost::ctx::is_stack_unbound() ) + return std::max( size, boost::ctx::minimum_stacksize() ); + + BOOST_ASSERT( boost::ctx::maximum_stacksize() >= boost::ctx::minimum_stacksize() ); + return boost::ctx::maximum_stacksize() == boost::ctx::minimum_stacksize() + ? boost::ctx::minimum_stacksize() + : std::min( size, boost::ctx::maximum_stacksize() ); +} + } namespace boost { namespace ctx { -BOOST_CONTEXT_DECL -std::size_t default_stacksize() -{ - static std::size_t size = 256 * 1024; - return size; -} - -BOOST_CONTEXT_DECL -std::size_t minimum_stacksize() -{ - static std::size_t stacksize( - static_cast< std::size_t >( system_info().dwAllocationGranularity) ); - return stacksize; -} - -BOOST_CONTEXT_DECL -std::size_t maximum_stacksize() -{ - BOOST_ASSERT( ! is_stack_unbound() ); - static std::size_t stacksize = 8 * 1024 * 1024; - return stacksize; -} - -// Windows seams not to provide a limit for the stacksize -BOOST_CONTEXT_DECL -bool is_stack_unbound() -{ return true; } - BOOST_CONTEXT_DECL std::size_t pagesize() { - static std::size_t pagesize( - static_cast< std::size_t >( system_info().dwPageSize) ); - return pagesize; + static std::size_t size = + static_cast< std::size_t >( system_info().dwPageSize); + return size; } BOOST_CONTEXT_DECL @@ -81,4 +66,34 @@ std::size_t page_count( std::size_t stacksize) static_cast< float >( stacksize) / pagesize() ) ); } +// Windows seams not to provide a limit for the stacksize +BOOST_CONTEXT_DECL +bool is_stack_unbound() +{ return true; } + +BOOST_CONTEXT_DECL +std::size_t maximum_stacksize() +{ + BOOST_ASSERT( ! is_stack_unbound() ); + static std::size_t size = 1 * 1024 * 1024 * 1024; // 1GB + return size; +} + +BOOST_CONTEXT_DECL +std::size_t minimum_stacksize() +{ + // space for guard page added + static std::size_t size = + static_cast< std::size_t >( system_info().dwAllocationGranularity) + + pagesize(); + return size; +} + +BOOST_CONTEXT_DECL +std::size_t default_stacksize() +{ + static std::size_t size = compute_default_stacksize_(); + return size; +} + }} diff --git a/test/test_context.cpp b/test/test_context.cpp index cb32e09..674d185 100644 --- a/test/test_context.cpp +++ b/test/test_context.cpp @@ -74,13 +74,20 @@ void f8( intptr_t arg) ctx::jump_fcontext( & fc1, & fcm, 0); } +void test_stack_utils() +{ + if ( ! ctx::is_stack_unbound() ) + BOOST_CHECK( ctx::maximum_stacksize() >= ctx::default_stacksize() ); + BOOST_CHECK( ctx::default_stacksize() >= ctx::minimum_stacksize() ); +} + void test_start() { value1 = 0; ctx::stack_allocator alloc; - fc1.fc_stack.base = alloc.allocate( ctx::minimum_stacksize() ); + fc1.fc_stack.sp = alloc.allocate( ctx::minimum_stacksize() ); fc1.fc_stack.size = ctx::minimum_stacksize(); ctx::make_fcontext( & fc1, f1); @@ -95,7 +102,7 @@ void test_jump() ctx::stack_allocator alloc; - fc1.fc_stack.base = alloc.allocate( ctx::minimum_stacksize() ); + fc1.fc_stack.sp = alloc.allocate( ctx::minimum_stacksize() ); fc1.fc_stack.size = ctx::minimum_stacksize(); ctx::make_fcontext( & fc1, f3); @@ -110,7 +117,7 @@ void test_result() { ctx::stack_allocator alloc; - fc1.fc_stack.base = alloc.allocate( ctx::minimum_stacksize() ); + fc1.fc_stack.sp = alloc.allocate( ctx::minimum_stacksize() ); fc1.fc_stack.size = ctx::minimum_stacksize(); ctx::make_fcontext( & fc1, f4); @@ -122,7 +129,7 @@ void test_arg() { ctx::stack_allocator alloc; - fc1.fc_stack.base = alloc.allocate( ctx::minimum_stacksize() ); + fc1.fc_stack.sp = alloc.allocate( ctx::minimum_stacksize() ); fc1.fc_stack.size = ctx::minimum_stacksize(); int i = 7; ctx::make_fcontext( & fc1, f5); @@ -135,7 +142,7 @@ void test_transfer() { ctx::stack_allocator alloc; - fc1.fc_stack.base = alloc.allocate( ctx::minimum_stacksize() ); + fc1.fc_stack.sp = alloc.allocate( ctx::minimum_stacksize() ); fc1.fc_stack.size = ctx::minimum_stacksize(); std::pair< int, int > data = std::make_pair( 3, 7); ctx::make_fcontext( & fc1, f6); @@ -151,7 +158,7 @@ void test_exception() { ctx::stack_allocator alloc; - fc1.fc_stack.base = alloc.allocate( ctx::minimum_stacksize() ); + fc1.fc_stack.sp = alloc.allocate( ctx::minimum_stacksize() ); fc1.fc_stack.size = ctx::minimum_stacksize(); const char * what = "hello world"; ctx::make_fcontext( & fc1, f7); @@ -164,7 +171,7 @@ void test_fp() { ctx::stack_allocator alloc; - fc1.fc_stack.base = alloc.allocate( ctx::minimum_stacksize() ); + fc1.fc_stack.sp = alloc.allocate( ctx::minimum_stacksize() ); fc1.fc_stack.size = ctx::minimum_stacksize(); double d = 7.13; ctx::make_fcontext( & fc1, f8); @@ -178,6 +185,7 @@ boost::unit_test::test_suite * init_unit_test_suite( int, char* []) boost::unit_test::test_suite * test = BOOST_TEST_SUITE("Boost.Context: context test suite"); + test->add( BOOST_TEST_CASE( & test_stack_utils) ); test->add( BOOST_TEST_CASE( & test_start) ); test->add( BOOST_TEST_CASE( & test_jump) ); test->add( BOOST_TEST_CASE( & test_result) );