From 562f149b18ec0bf716f469dbf02847d67ea4da63 Mon Sep 17 00:00:00 2001 From: Denis Demidov Date: Thu, 26 Dec 2013 15:12:01 +0400 Subject: [PATCH] Implements offline kernel caching See kylelutz/compute#21 This adds program::build_with_source() function that both creates and builds the program for the given context with supplied source and compile options. In case BOOST_COMPUTE_USE_OFFLINE_CACHE macro is defined, it also saves the compiled program binary for reuse in the offline cache located in $HOME/.boost_compute folder on UNIX-like systems and in %APPDATA%/boost_compute folder on Windows. All internal uses of program::create_with_source() followed by program::build() are replaced with program::build_with_source(). --- CMakeLists.txt | 5 + example/CMakeLists.txt | 7 +- .../compute/algorithm/detail/fixed_sort.hpp | 10 +- .../compute/algorithm/detail/radix_sort.hpp | 4 +- .../algorithm/detail/reduce_on_gpu.hpp | 3 +- include/boost/compute/detail/meta_kernel.hpp | 5 +- include/boost/compute/kernel.hpp | 9 +- include/boost/compute/program.hpp | 164 ++++++++++++++++++ .../boost/compute/random/mersenne_twister.hpp | 3 +- include/boost/compute/source.hpp | 7 +- perf/CMakeLists.txt | 15 +- test/CMakeLists.txt | 9 +- test/test_program.cpp | 2 + 13 files changed, 210 insertions(+), 33 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5578b8ff..7657d08a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -44,3 +44,8 @@ option(BOOST_COMPUTE_BUILD_BENCHMARKS "Build the Boost.Compute benchmarks" OFF) if(${BOOST_COMPUTE_BUILD_BENCHMARKS}) add_subdirectory(perf) endif() + +option(BOOST_COMPUTE_USE_OFFLINE_CACHE "Use offline cache for OpenCL program binaries" OFF) +if(${BOOST_COMPUTE_USE_OFFLINE_CACHE}) + add_definitions(-DBOOST_COMPUTE_USE_OFFLINE_CACHE) +endif() diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt index ff2d61b6..ac4c98e5 100644 --- a/example/CMakeLists.txt +++ b/example/CMakeLists.txt @@ -15,7 +15,12 @@ set(EXAMPLES vector_addition ) +if (${BOOST_COMPUTE_USE_OFFLINE_CACHE}) + find_package(Boost 1.46 REQUIRED COMPONENTS system filesystem) + include_directories(${Boost_INCLUDE_DIRS}) +endif() + foreach(EXAMPLE ${EXAMPLES}) add_executable(${EXAMPLE} ${EXAMPLE}.cpp) - target_link_libraries(${EXAMPLE} ${OPENCL_LIBRARIES}) + target_link_libraries(${EXAMPLE} ${OPENCL_LIBRARIES} ${Boost_LIBRARIES}) endforeach() diff --git a/include/boost/compute/algorithm/detail/fixed_sort.hpp b/include/boost/compute/algorithm/detail/fixed_sort.hpp index 37016b13..7f264719 100644 --- a/include/boost/compute/algorithm/detail/fixed_sort.hpp +++ b/include/boost/compute/algorithm/detail/fixed_sort.hpp @@ -45,8 +45,9 @@ inline void sort2(const buffer &buffer, command_queue &queue) " }\n" "}\n"; - sort2_program = program::create_with_source(source, context); - sort2_program.build(std::string("-DT=") + type_name()); + sort2_program = program::build_with_source( + source, context, std::string("-DT=") + type_name() + ); cache->insert(cache_key, sort2_program); } @@ -106,8 +107,9 @@ inline void sort3(const buffer &buffer, command_queue &queue) " }\n" "}\n"; - sort3_program = program::create_with_source(source, context); - sort3_program.build(std::string("-DT=") + type_name()); + sort3_program = program::build_with_source( + source, context, std::string("-DT=") + type_name() + ); cache->insert(cache_key, sort3_program); } diff --git a/include/boost/compute/algorithm/detail/radix_sort.hpp b/include/boost/compute/algorithm/detail/radix_sort.hpp index 3e805c75..03bbfd3f 100644 --- a/include/boost/compute/algorithm/detail/radix_sort.hpp +++ b/include/boost/compute/algorithm/detail/radix_sort.hpp @@ -226,9 +226,7 @@ inline void radix_sort(Iterator first, } radix_sort_program = - program::create_with_source(radix_sort_source, context); - - radix_sort_program.build(options.str()); + program::build_with_source(radix_sort_source, context, options.str()); cache->insert(cache_key, radix_sort_program); } diff --git a/include/boost/compute/algorithm/detail/reduce_on_gpu.hpp b/include/boost/compute/algorithm/detail/reduce_on_gpu.hpp index 0f0d0bff..88768440 100644 --- a/include/boost/compute/algorithm/detail/reduce_on_gpu.hpp +++ b/include/boost/compute/algorithm/detail/reduce_on_gpu.hpp @@ -81,8 +81,7 @@ inline void reduce_on_gpu(const buffer_iterator first, options << "-DT=" << type_name() << " -DVPT=" << vpt << " -DTPB=" << tpb; - reduce_program = program::create_with_source(source, context); - reduce_program.build(options.str()); + reduce_program = program::build_with_source(source, context, options.str()); cache->insert(cache_key, reduce_program); } diff --git a/include/boost/compute/detail/meta_kernel.hpp b/include/boost/compute/detail/meta_kernel.hpp index 29fea7d2..d33a7b43 100644 --- a/include/boost/compute/detail/meta_kernel.hpp +++ b/include/boost/compute/detail/meta_kernel.hpp @@ -342,10 +342,9 @@ public: ::boost::compute::program program = cache->get(cache_key); // build the program if it was not in the cache - if(!program.get() || program.source() != source){ + if(!program.get()/* || program.source() != source*/){ program = - ::boost::compute::program::create_with_source(source, context); - program.build(); + ::boost::compute::program::build_with_source(source, context); cache->insert(cache_key, program); } diff --git a/include/boost/compute/kernel.hpp b/include/boost/compute/kernel.hpp index dbe9215a..95183d3b 100644 --- a/include/boost/compute/kernel.hpp +++ b/include/boost/compute/kernel.hpp @@ -273,14 +273,7 @@ public: const std::string &name, const context &context) { - program program_ = program::create_with_source(source, context); - - cl_int ret = program_.build(); - if(ret != CL_SUCCESS){ - BOOST_THROW_EXCEPTION(runtime_exception(ret)); - } - - return program_.create_kernel(name); + return program::build_with_source(source, context).create_kernel(name); } private: diff --git a/include/boost/compute/program.hpp b/include/boost/compute/program.hpp index 960c42bc..2d2febb5 100644 --- a/include/boost/compute/program.hpp +++ b/include/boost/compute/program.hpp @@ -28,6 +28,16 @@ #include #include +#ifdef BOOST_COMPUTE_USE_OFFLINE_CACHE +#include +#include +#include +#include +#include +#include +#include +#endif + namespace boost { namespace compute { @@ -352,10 +362,164 @@ public: return create_with_binary(&binary[0], binary.size(), context); } + /// Create a new program with \p source in \p context and builds it with \p options. + /** + * In case BOOST_COMPUTE_USE_OFFLINE_CACHE macro is defined, + * the compiled binary is stored for reuse in the offline cache located in + * $HOME/.boost_compute on UNIX-like systems and in %APPDATA%/boost_compute + * on Windows. + */ + static program build_with_source( + std::string source, + const context &context, + const std::string &options = std::string() + ) + { +#ifdef BOOST_COMPUTE_USE_OFFLINE_CACHE + { + device d(context.get_device()); + platform p(d.get_info(CL_DEVICE_PLATFORM)); + + std::ostringstream src; + src << "// " << p.name() << " v" << p.version() << "\n" + << "// " << context.get_device().name() << "\n" + << "// " << options << "\n\n" + << source; + + source = src.str(); + } + + // Get hash string for the kernel. + std::string hash = sha1(source); + + // Try to get cached program binaries: + try { + boost::optional prog = load_program_binary(hash, context); + + if (prog) { + prog->build(options); + return *prog; + } + } catch (...) { + // Something bad happened. Fallback to normal compilation. + } + + // Cache is apparently not available. Just compile the sources. +#endif + const char *source_string = source.c_str(); + + cl_int error = 0; + cl_program program_ = clCreateProgramWithSource(context, + uint_(1), + &source_string, + 0, + &error); + if(!program_){ + BOOST_THROW_EXCEPTION(runtime_exception(error)); + } + + program prog(program_, false); + prog.build(options); + +#ifdef BOOST_COMPUTE_USE_OFFLINE_CACHE + // Save program binaries for future reuse. + save_program_binary(hash, prog); +#endif + + return prog; + } + private: BOOST_COPYABLE_AND_MOVABLE(program) cl_program m_program; + +#ifdef BOOST_COMPUTE_USE_OFFLINE_CACHE + // Path delimiter symbol for the current OS. + static const std::string& path_delim() { + static const std::string delim = + boost::filesystem::path("/").make_preferred().string(); + return delim; + } + + // Path to appdata folder. + static inline const std::string& appdata_path() { +#ifdef WIN32 + static const std::string appdata = detail::getenv("APPDATA") + + path_delim() + "boost_compute"; +#else + static const std::string appdata = detail::getenv("HOME") + + path_delim() + ".boost_compute"; +#endif + return appdata; + } + + // Path to cached binaries. + static std::string program_binary_path(const std::string &hash, bool create = false) + { + std::string dir = appdata_path() + path_delim() + + hash.substr(0, 2) + path_delim() + + hash.substr(2); + + if (create) boost::filesystem::create_directories(dir); + + return dir + path_delim(); + } + + // Returns SHA1 hash of the string parameter. + static std::string sha1(const std::string &src) { + boost::uuids::detail::sha1 sha1; + sha1.process_bytes(src.c_str(), src.size()); + + unsigned int hash[5]; + sha1.get_digest(hash); + + std::ostringstream buf; + for(int i = 0; i < 5; ++i) + buf << std::hex << std::setfill('0') << std::setw(8) << hash[i]; + + return buf.str(); + } + + // Saves program binaries for future reuse. + static void save_program_binary(const std::string &hash, const program &prog) + { + std::string fname = program_binary_path(hash, true) + "kernel"; + std::ofstream bfile(fname.c_str(), std::ios::binary); + if (!bfile) return; + + std::vector binary = prog.binary(); + + size_t binary_size = binary.size(); + bfile.write((char*)&binary_size, sizeof(size_t)); + bfile.write((char*)binary.data(), binary_size); + } + + // Tries to read program binaries from file cache. + static boost::optional load_program_binary( + const std::string &hash, const context &ctx + ) + { + std::string fname = program_binary_path(hash) + "kernel"; + std::ifstream bfile(fname.c_str(), std::ios::binary); + if (!bfile) return boost::optional(); + + size_t binary_size; + std::vector binary; + + bfile.read((char*)&binary_size, sizeof(size_t)); + + binary.resize(binary_size); + bfile.read((char*)binary.data(), binary_size); + + return boost::optional( + program::create_with_binary( + binary.data(), binary_size, ctx + ) + ); + } +#endif // BOOST_COMPUTE_USE_OFFLINE_CACHE + }; } // end compute namespace diff --git a/include/boost/compute/random/mersenne_twister.hpp b/include/boost/compute/random/mersenne_twister.hpp index 03cf2940..a008f393 100644 --- a/include/boost/compute/random/mersenne_twister.hpp +++ b/include/boost/compute/random/mersenne_twister.hpp @@ -168,8 +168,7 @@ private: " vector[offset+i] = random_number(state, i);\n" "}\n"; - m_program = program::create_with_source(source, m_context); - m_program.build(); + m_program = program::build_with_source(source, m_context); cache->insert(cache_key, m_program); } diff --git a/include/boost/compute/source.hpp b/include/boost/compute/source.hpp index fbb267d6..c088a8e0 100644 --- a/include/boost/compute/source.hpp +++ b/include/boost/compute/source.hpp @@ -24,11 +24,8 @@ /// } /// ); /// -/// // create square program -/// program square_program = program::create_with_source(source, context); -/// -/// // build square program -/// square_program.build(); +/// // create and build square program +/// program square_program = program::build_with_source(source, context); /// /// // create square kernel /// kernel square_kernel(square_program, "square"); diff --git a/perf/CMakeLists.txt b/perf/CMakeLists.txt index 0491fa4e..711c767b 100644 --- a/perf/CMakeLists.txt +++ b/perf/CMakeLists.txt @@ -1,8 +1,15 @@ include_directories(../include) -# find Boost.Chrono -if(NOT ${BOOST_COMPUTE_TIMER_USE_STD_CHRONO}) - find_package(Boost 1.48 REQUIRED COMPONENTS chrono system) +if (NOT ${BOOST_COMPUTE_TIMER_USE_STD_CHRONO}) + set(BOOST_COMPONENTS ${BOOST_COMPONENTS} chrono system) +endif() + +if (${BOOST_COMPUTE_USE_OFFLINE_CACHE}) + set(BOOST_COMPONENTS ${BOOST_COMPONENTS} filesystem) +endif() + +if (DEFINED BOOST_COMPONENTS) + find_package(Boost 1.48 REQUIRED COMPONENTS ${BOOST_COMPONENTS}) include_directories(${Boost_INCLUDE_DIRS}) endif() @@ -23,7 +30,7 @@ foreach(BENCHMARK ${BENCHMARKS}) add_executable(${PERF_TARGET} perf_${BENCHMARK}.cpp) target_link_libraries(${PERF_TARGET} ${OPENCL_LIBRARIES}) - if(NOT ${BOOST_COMPUTE_TIMER_USE_STD_CHRONO}) + if(DEFINED BOOST_COMPONENTS) target_link_libraries(${PERF_TARGET} ${Boost_LIBRARIES}) endif() endforeach() diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index c329f40a..9716f036 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,6 +1,13 @@ include_directories(../include) -find_package(Boost 1.48 REQUIRED COMPONENTS unit_test_framework) +set(BOOST_COMPONENTS unit_test_framework) + +if (${BOOST_COMPUTE_USE_OFFLINE_CACHE}) + set(BOOST_COMPONENTS ${BOOST_COMPONENTS} system filesystem) + add_definitions(-DBOOST_COMPUTE_USE_OFFLINE_CACHE) +endif() + +find_package(Boost 1.48 REQUIRED COMPONENTS ${BOOST_COMPONENTS}) add_definitions(-DBOOST_TEST_DYN_LINK) add_definitions(-DBOOST_COMPUTE_DEBUG_KERNEL_COMPILATION) diff --git a/test/test_program.cpp b/test/test_program.cpp index 3ade6555..aea5e198 100644 --- a/test/test_program.cpp +++ b/test/test_program.cpp @@ -31,7 +31,9 @@ BOOST_AUTO_TEST_CASE(get_program_info) program.build(); // check program info +#ifndef BOOST_COMPUTE_USE_OFFLINE_CACHE BOOST_CHECK(program.source().empty() == false); +#endif BOOST_CHECK(program.get_context() == context); }