From db6c3d7a79bc3f7c50ddb49491fe3f78f0087a33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juraj=20Ivan=C4=8Di=C4=87?= Date: Tue, 14 Jan 2014 11:14:03 +0100 Subject: [PATCH 1/3] Support more than 64 parallel jobs (-j). Windows WaitForMultipleObjects() can wait on max. 64 handles. This limitation is overcome by splitting the handle set into parts which are of adequate size, and spawning a thread which does the waiting. --- src/engine/execnt.c | 120 +++++++++++++++++++++++++++++++++++++++++--- src/engine/jam.h | 2 +- 2 files changed, 114 insertions(+), 8 deletions(-) diff --git a/src/engine/execnt.c b/src/engine/execnt.c index ca89bd0f0..89eca9ce6 100644 --- a/src/engine/execnt.c +++ b/src/engine/execnt.c @@ -780,31 +780,137 @@ static void read_output() * cmdtab array, or -1. */ +typedef struct _twh_params +{ + int * active_procs; + HANDLE * active_handles; + DWORD num_active; + DWORD timeoutMillis; +} twh_params; + +static int try_wait_helper( twh_params * ); + static int try_wait( int const timeoutMillis ) { +#define MAX_THREADS MAXJOBS/(MAXIMUM_WAIT_OBJECTS - 1) + 1 int i; int num_active; int wait_api_result; - HANDLE active_handles[ MAXJOBS ]; - int active_procs[ MAXJOBS ]; + HANDLE active_handles[ MAXJOBS + MAX_THREADS ]; + int active_procs[ MAXJOBS + MAX_THREADS ]; + unsigned int num_threads; + unsigned int last_chunk_size; + HANDLE completed_event = INVALID_HANDLE_VALUE; + HANDLE thread_handles[MAX_THREADS]; + twh_params thread_params[MAX_THREADS]; + int result = -1; + BOOL success; /* Prepare a list of all active processes to wait for. */ for ( num_active = 0, i = 0; i < globs.jobs; ++i ) if ( cmdtab[ i ].pi.hProcess ) { + if ( num_active == MAXIMUM_WAIT_OBJECTS ) + { + /* + * We surpassed MAXIMUM_WAIT_OBJECTS, so we need to use threads + * to wait for this set. Create an event which will notify + * threads to stop waiting. Every wait set should have this + * event as its last element. + */ + assert( completed_event == INVALID_HANDLE_VALUE ); + completed_event = CreateEvent(NULL, FALSE, FALSE, NULL); + active_handles[ num_active ] = active_handles[ num_active - 1 ]; + active_procs[ num_active ] = active_procs[ num_active - 1 ]; + active_handles[ num_active - 1 ] = completed_event; + active_procs[ num_active - 1 ] = -1; + ++num_active; + } + else if ( ( completed_event != INVALID_HANDLE_VALUE ) && + !((num_active + 1) % MAXIMUM_WAIT_OBJECTS) ) + { + active_handles[ num_active ] = completed_event; + active_procs[ num_active ] = -1; + ++num_active; + } active_handles[ num_active ] = cmdtab[ i ].pi.hProcess; active_procs[ num_active ] = i; ++num_active; } - /* Wait for a child to complete, or for our timeout window to expire. */ - wait_api_result = WaitForMultipleObjects( num_active, active_handles, - FALSE, timeoutMillis ); + assert( (num_active <= MAXIMUM_WAIT_OBJECTS) == + (completed_event == INVALID_HANDLE_VALUE) ); + if ( num_active <= MAXIMUM_WAIT_OBJECTS ) + { + twh_params twh; + twh.active_procs = active_procs; + twh.active_handles = active_handles; + twh.num_active = num_active; + twh.timeoutMillis = timeoutMillis; + return try_wait_helper( &twh ); + } + + num_threads = num_active / MAXIMUM_WAIT_OBJECTS; + last_chunk_size = num_active % MAXIMUM_WAIT_OBJECTS; + if ( last_chunk_size ) + { + /* The last chunk does not have event handle, so add it now. */ + active_handles[ num_active ] = completed_event; + active_procs[ num_active ] = -1; + ++num_active; + ++num_threads; + ++last_chunk_size; + } + + assert( num_threads <= MAX_THREADS ); + + for ( i = 0; i < num_threads; ++i ) + { + thread_params[i].active_procs = active_procs + + i * MAXIMUM_WAIT_OBJECTS; + thread_params[i].active_handles = active_handles + + i * MAXIMUM_WAIT_OBJECTS; + thread_params[i].timeoutMillis = INFINITE; + thread_params[i].num_active = MAXIMUM_WAIT_OBJECTS; + if ( ( i == num_threads - 1 ) && last_chunk_size ) + thread_params[i].num_active = last_chunk_size; + thread_handles[i] = CreateThread(NULL, 4 * 1024, + (LPTHREAD_START_ROUTINE)&try_wait_helper, &thread_params[i], + 0, NULL); + } + wait_api_result = WaitForMultipleObjects(num_threads, thread_handles, + FALSE, timeoutMillis); if ( ( WAIT_OBJECT_0 <= wait_api_result ) && - ( wait_api_result < WAIT_OBJECT_0 + num_active ) ) + ( wait_api_result < WAIT_OBJECT_0 + num_threads ) ) + { + HANDLE thread_handle = thread_handles[wait_api_result - WAIT_OBJECT_0]; + success = GetExitCodeThread(thread_handle, (DWORD *)&result); + assert( success ); + } + SetEvent(completed_event); + /* Should complete instantly. */ + WaitForMultipleObjects(num_threads, thread_handles, TRUE, INFINITE); + CloseHandle(completed_event); + for ( i = 0; i < num_threads; ++i ) + CloseHandle(thread_handles[i]); + return result; +#undef MAX_THREADS +} + +static int try_wait_helper( twh_params * params ) +{ + int wait_api_result; + + assert( params->num_active <= MAXIMUM_WAIT_OBJECTS ); + + /* Wait for a child to complete, or for our timeout window to expire. */ + wait_api_result = WaitForMultipleObjects( params->num_active, + params->active_handles, FALSE, params->timeoutMillis ); + if ( ( WAIT_OBJECT_0 <= wait_api_result ) && + ( wait_api_result < WAIT_OBJECT_0 + params->num_active ) ) { /* Terminated process detected - return its index. */ - return active_procs[ wait_api_result - WAIT_OBJECT_0 ]; + return params->active_procs[ wait_api_result - WAIT_OBJECT_0 ]; } /* Timeout. */ diff --git a/src/engine/jam.h b/src/engine/jam.h index 86ad0e86b..497a5bfb1 100644 --- a/src/engine/jam.h +++ b/src/engine/jam.h @@ -409,7 +409,7 @@ #define MAXSYM 1024 /* longest symbol in the environment */ #define MAXJPATH 1024 /* longest filename */ -#define MAXJOBS 64 /* internally enforced -j limit */ +#define MAXJOBS 256 /* internally enforced -j limit */ #define MAXARGC 32 /* words in $(JAMSHELL) */ /* Jam private definitions below. */ From d2cf5aed72cabe50b50b3737911fb5c980d5cff7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juraj=20Ivan=C4=8Di=C4=87?= Date: Tue, 14 Jan 2014 15:20:00 +0100 Subject: [PATCH 2/3] Avoid creating a thread if remaining jobs can be placed in the same array as thread handles. If we have to wait for, e.g. 65 jobs, first we create a thread which waits for first 63. The remaining two can be waited on by the same WFMO call which waits for thread completion. This will avoid creating 2 threads in this case. --- src/engine/execnt.c | 48 +++++++++++++++++++++++++++++++++------------ 1 file changed, 36 insertions(+), 12 deletions(-) diff --git a/src/engine/execnt.c b/src/engine/execnt.c index 89eca9ce6..3bdee0eae 100644 --- a/src/engine/execnt.c +++ b/src/engine/execnt.c @@ -799,9 +799,11 @@ static int try_wait( int const timeoutMillis ) HANDLE active_handles[ MAXJOBS + MAX_THREADS ]; int active_procs[ MAXJOBS + MAX_THREADS ]; unsigned int num_threads; + unsigned int num_handles; unsigned int last_chunk_size; + unsigned int last_chunk_offset; HANDLE completed_event = INVALID_HANDLE_VALUE; - HANDLE thread_handles[MAX_THREADS]; + HANDLE thread_handles[MAXIMUM_WAIT_OBJECTS]; twh_params thread_params[MAX_THREADS]; int result = -1; BOOL success; @@ -814,9 +816,9 @@ static int try_wait( int const timeoutMillis ) { /* * We surpassed MAXIMUM_WAIT_OBJECTS, so we need to use threads - * to wait for this set. Create an event which will notify - * threads to stop waiting. Every wait set should have this - * event as its last element. + * to wait for this set. Create an event object which will + * notify threads to stop waiting. Every handle set chunk should + * have this event as its last element. */ assert( completed_event == INVALID_HANDLE_VALUE ); completed_event = CreateEvent(NULL, FALSE, FALSE, NULL); @@ -852,14 +854,29 @@ static int try_wait( int const timeoutMillis ) num_threads = num_active / MAXIMUM_WAIT_OBJECTS; last_chunk_size = num_active % MAXIMUM_WAIT_OBJECTS; + num_handles = num_threads; if ( last_chunk_size ) { - /* The last chunk does not have event handle, so add it now. */ - active_handles[ num_active ] = completed_event; - active_procs[ num_active ] = -1; - ++num_active; - ++num_threads; - ++last_chunk_size; + /* Can we fit the last chunk in the outer WFMO call? */ + if ( last_chunk_size <= MAXIMUM_WAIT_OBJECTS - num_threads ) + { + last_chunk_offset = num_threads * MAXIMUM_WAIT_OBJECTS; + for ( i = 0; i < last_chunk_size; ++i ) + thread_handles[ i + num_threads ] = + active_handles[ i + last_chunk_offset ]; + num_handles = num_threads + last_chunk_size; + } + else + { + /* We need another thread for the remainder. */ + /* Add completed_event handle to the last chunk. */ + active_handles[ num_active ] = completed_event; + active_procs[ num_active ] = -1; + ++last_chunk_size; + ++num_active; + ++num_threads; + num_handles = num_threads; + } } assert( num_threads <= MAX_THREADS ); @@ -872,13 +889,14 @@ static int try_wait( int const timeoutMillis ) i * MAXIMUM_WAIT_OBJECTS; thread_params[i].timeoutMillis = INFINITE; thread_params[i].num_active = MAXIMUM_WAIT_OBJECTS; - if ( ( i == num_threads - 1 ) && last_chunk_size ) + if ( ( i == num_threads - 1 ) && last_chunk_size && + ( num_handles == num_threads ) ) thread_params[i].num_active = last_chunk_size; thread_handles[i] = CreateThread(NULL, 4 * 1024, (LPTHREAD_START_ROUTINE)&try_wait_helper, &thread_params[i], 0, NULL); } - wait_api_result = WaitForMultipleObjects(num_threads, thread_handles, + wait_api_result = WaitForMultipleObjects(num_handles, thread_handles, FALSE, timeoutMillis); if ( ( WAIT_OBJECT_0 <= wait_api_result ) && ( wait_api_result < WAIT_OBJECT_0 + num_threads ) ) @@ -887,6 +905,12 @@ static int try_wait( int const timeoutMillis ) success = GetExitCodeThread(thread_handle, (DWORD *)&result); assert( success ); } + else if ( ( WAIT_OBJECT_0 + num_threads <= wait_api_result ) && + ( wait_api_result < WAIT_OBJECT_0 + num_handles ) ) + { + unsigned int offset = wait_api_result - num_threads - WAIT_OBJECT_0; + result = active_procs[ last_chunk_offset + offset ]; + } SetEvent(completed_event); /* Should complete instantly. */ WaitForMultipleObjects(num_threads, thread_handles, TRUE, INFINITE); From 164318ffba1915aef727509d004d0c22ae01fd44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juraj=20Ivan=C4=8Di=C4=87?= Date: Tue, 17 Jun 2014 12:07:39 +0200 Subject: [PATCH 3/3] add support for MSVC setup script rewrite MSVC setup scripts are very slow, so should not be used in build actions directly. Rewrite these scripts into their fast equivalents and use those instead. Add toolset initialization option to customize/disable this new behavior. --- src/tools/msvc.jam | 65 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 64 insertions(+), 1 deletion(-) diff --git a/src/tools/msvc.jam b/src/tools/msvc.jam index bab9a9fb6..2bc7be7bf 100644 --- a/src/tools/msvc.jam +++ b/src/tools/msvc.jam @@ -32,6 +32,7 @@ import path ; import pch ; import property ; import rc ; +import set ; import toolset ; import type ; @@ -130,6 +131,14 @@ rule init ( # Platform specific setup command to invoke before running any of the # msvc tools used when builing a target for a specific platform, e.g. # when building a 32 or 64 bit executable. + # + # + # Whether to rewrite setup scripts. New scripts will be output in + # TEMP directory and will be used instead of originals in build actions. + # Possible values: + # * on - rewrite scripts, if they do not already exist (default) + # * always - always rewrite scripts, even if they already exist + # * off - use original setup scripts : options * ) { @@ -676,6 +685,58 @@ local rule auto-detect-toolset-versions ( ) } } +# Helper rule to generate a faster alternative to MSVC setup scripts. +# We used to call MSVC setup scripts directly in every action, however in +# newer MSVC versions (10.0+) they make long-lasting registry queries +# which have a significant impact on build time. +local rule maybe-rewrite-setup ( setup-script : setup-options : version : rewrite-setup ? ) +{ + local result = $(setup-script)" "$(setup-options) ; + # At the moment we only know how to rewrite scripts with cmd shell. + if ( [ os.name ] in NT ) && ( $(rewrite-setup) != off ) + { + setup-script-id = b2_msvc_$(version)_$(setup-script:B) ; + if $(setup-options)-is-not-empty + { + setup-script-id = $(setup-script-id)_$(setup-options) ; + } + + if $(.$(setup-script-id)) + { + errors.error rewriting setup script for the second time ; + } + + local tmpdir = [ os.environ TEMP ] ; + local replacement = [ path.native $(tmpdir)/$(setup-script-id).cmd ] ; + if ( $(rewrite-setup) = always ) || ( ! [ path.exists $(replacement) ] ) + { + local original-vars = [ SPLIT_BY_CHARACTERS [ SHELL set ] : "\n" ] ; + local new-vars = [ SPLIT_BY_CHARACTERS [ SHELL "$(setup-script) $(setup-options)>nul && set" ] : "\n" ] ; + local diff-vars = [ set.difference $(new-vars) : $(original-vars) ] ; + if $(diff-vars) + { + local target = $(replacement) ; + FILE_CONTENTS on $(target) = "SET "$(diff-vars) ; + ALWAYS $(target) ; + msvc.write-setup-script $(target) ; + UPDATE_NOW $(target) : : ignore-minus-n ; + .$(setup-script-id) = $(replacement) ; + result = $(replacement) ; + } + } + else + { + result = $(replacement) ; + } + } + return $(result) ; +} + +actions write-setup-script +{ + @($(STDOUT):E=$(FILE_CONTENTS:J=$(.nl))) > "$(<)" +} + # Worker rule for toolset version configuration. Takes an explicit version id or # nothing in case it should configure the default toolset version (the first @@ -927,7 +988,9 @@ local rule configure-really ( version ? : options * ) # Append setup options to the setup name and add the final setup # prefix & suffix. setup-options ?= "" ; - setup-$(c) = $(setup-prefix)$(setup-$(c):J=" ")" "$(setup-options:J=" ")$(setup-suffix) ; + local rewrite = [ feature.get-values : $(options) ] ; + setup-$(c) = [ maybe-rewrite-setup $(setup-$(c):J=" ") : $(setup-options:J=" ") : $(version) : $(rewrite) ] ; + setup-$(c) = $(setup-prefix)$(setup-$(c))$(setup-suffix) ; } }