From 3ff3002a22369e68939f27890f88ec5f2da33a61 Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Thu, 6 Feb 2020 10:39:27 +0100 Subject: [PATCH 1/2] Improve performance test Run multiple times and take mean speed Present results in a table for easier comparison Add some tricks to avoid optimization reordering stuff that breaks the benchmark --- test/benchmark_fstream.cpp | 197 +++++++++++++++++++++++++++---------- 1 file changed, 145 insertions(+), 52 deletions(-) diff --git a/test/benchmark_fstream.cpp b/test/benchmark_fstream.cpp index 40b7e66..56279c0 100644 --- a/test/benchmark_fstream.cpp +++ b/test/benchmark_fstream.cpp @@ -13,9 +13,12 @@ #include #define BOOST_CHRONO_HEADER_ONLY #include +#include +#include #include #include #include +#include #include #ifdef BOOST_MSVC @@ -27,18 +30,22 @@ template class io_fstream { public: - void open(const char* file) + explicit io_fstream(const char* file, bool read) { - f_.open(file, std::fstream::out | std::fstream::in | std::fstream::trunc); + f_.open(file, read ? std::fstream::in : std::fstream::out | std::fstream::trunc); TEST(f_); } - void write(char* buf, int size) + ~io_fstream() { - f_.write(buf, size); + f_.close(); + } + void write(const char* buf, int size) + { + TEST(f_.write(buf, size)); } void read(char* buf, int size) { - f_.read(buf, size); + TEST(f_.read(buf, size)); } void rewind() { @@ -49,10 +56,6 @@ public: { f_ << std::flush; } - void close() - { - f_.close(); - } private: FStream f_; @@ -61,89 +64,179 @@ private: class io_stdio { public: - void open(const char* file) + io_stdio(const char* file, bool read) { - f_ = fopen(file, "w+"); + f_ = std::fopen(file, read ? "r" : "w+"); TEST(f_); } - void write(char* buf, int size) + ~io_stdio() { - fwrite(buf, 1, size, f_); + std::fclose(f_); + f_ = 0; + } + void write(const char* buf, int size) + { + TEST(std::fwrite(buf, 1, size, f_) == static_cast(size)); } void read(char* buf, int size) { - size_t res = fread(buf, 1, size, f_); - (void)res; + TEST(std::fread(buf, 1, size, f_) == static_cast(size)); } void rewind() { - ::rewind(f_); + std::rewind(f_); } void flush() { - fflush(f_); - } - void close() - { - fclose(f_); - f_ = 0; + std::fflush(f_); } private: FILE* f_; }; -template -void test_io(const char* file, const char* type) +#if defined(_MSC_VER) +extern "C" void _ReadWriteBarrier(void); +#pragma intrinsic(_ReadWriteBarrier) +#define BOOST_NOWIDE_READ_WRITE_BARRIER() _ReadWriteBarrier() +#elif defined(__GNUC__) +#if(__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) > 40100 +#define BOOST_NOWIDE_READ_WRITE_BARRIER() __sync_synchronize() +#else +#define BOOST_NOWIDE_READ_WRITE_BARRIER() __asm__ __volatile__("" : : : "memory") +#endif +#else +#define BOOST_NOWIDE_READ_WRITE_BARRIER() (void) +#endif + +struct perf_data { - std::cout << "Testing I/O performance " << type << std::endl; - FStream tmp; - tmp.open(file); - int data_size = 64 * 1024 * 1024; - for(int block_size = 16; block_size <= 8192; block_size *= 2) + // Block-size to read/write performance in MB/s + std::map read, write; +}; + +char rand_char() +{ + return static_cast(std::rand()); +} + +std::vector get_rand_data(int size) +{ + std::vector data(size); + std::generate(data.begin(), data.end(), rand_char); + return data; +} + +static const int MIN_BLOCK_SIZE = 32; +static const int MAX_BLOCK_SIZE = 8192; + +template +perf_data test_io(const char* file) +{ + namespace chrono = boost::chrono; + typedef chrono::high_resolution_clock clock; + typedef chrono::duration milliseconds; + perf_data results; + // Use vector to force write to memory and avoid possible reordering + std::vector start_and_end(2); + const int data_size = 64 * 1024 * 1024; + for(int block_size = MIN_BLOCK_SIZE / 2; block_size <= MAX_BLOCK_SIZE; block_size *= 2) { - std::vector buf(block_size, ' '); - int size = 0; + std::vector buf = get_rand_data(block_size); + FStream tmp(file, false); tmp.rewind(); - boost::chrono::high_resolution_clock::time_point t1 = boost::chrono::high_resolution_clock::now(); - while(size < data_size) + start_and_end[0] = clock::now(); + BOOST_NOWIDE_READ_WRITE_BARRIER(); + for(int size = 0; size < data_size; size += block_size) { tmp.write(&buf[0], block_size); - size += block_size; + BOOST_NOWIDE_READ_WRITE_BARRIER(); } tmp.flush(); - boost::chrono::high_resolution_clock::time_point t2 = boost::chrono::high_resolution_clock::now(); - double tm = boost::chrono::duration_cast(t2 - t1).count() * 1e-3; + start_and_end[1] = clock::now(); + const milliseconds duration = chrono::duration_cast(start_and_end[1] - start_and_end[0]); // heatup - if(block_size >= 32) + if(block_size >= MIN_BLOCK_SIZE) + { + const double speed = data_size / duration.count() / 1024; // MB/s + results.write[block_size] = speed; std::cout << " write block size " << std::setw(8) << block_size << " " << std::fixed - << std::setprecision(3) << (data_size / 1024.0 / 1024 / tm) << " MB/s" << std::endl; + << std::setprecision(3) << speed << " MB/s" << std::endl; + } } - for(int block_size = 32; block_size <= 8192; block_size *= 2) + for(int block_size = MIN_BLOCK_SIZE; block_size <= MAX_BLOCK_SIZE; block_size *= 2) { - std::vector buf(block_size, ' '); - int size = 0; + std::vector buf = get_rand_data(block_size); + FStream tmp(file, true); tmp.rewind(); - boost::chrono::high_resolution_clock::time_point t1 = boost::chrono::high_resolution_clock::now(); - while(size < data_size) + start_and_end[0] = clock::now(); + BOOST_NOWIDE_READ_WRITE_BARRIER(); + for(int size = 0; size < data_size; size += block_size) { tmp.read(&buf[0], block_size); - size += block_size; + BOOST_NOWIDE_READ_WRITE_BARRIER(); } - boost::chrono::high_resolution_clock::time_point t2 = boost::chrono::high_resolution_clock::now(); - double tm = boost::chrono::duration_cast(t2 - t1).count() * 1e-3; - std::cout << " read block size " << std::setw(8) << block_size << " " << std::fixed << std::setprecision(3) - << (data_size / 1024.0 / 1024 / tm) << " MB/s" << std::endl; + start_and_end[1] = clock::now(); + const milliseconds duration = chrono::duration_cast(start_and_end[1] - start_and_end[0]); + const double speed = data_size / duration.count() / 1024; // MB/s + results.read[block_size] = speed; + std::cout << " read block size " << std::setw(8) << block_size << " " << std::fixed << std::setprecision(3) + << speed << " MB/s" << std::endl; } - tmp.close(); std::remove(file); + return results; +} + +template +perf_data test_io_driver(const char* file, const char* type) +{ + std::cout << "Testing I/O performance for " << type << std::endl; + const int repeats = 5; + std::vector results(repeats); + + for(int i = 0; i < repeats; i++) + results[i] = test_io(file); + for(int block_size = MIN_BLOCK_SIZE; block_size <= MAX_BLOCK_SIZE; block_size *= 2) + { + double read_speed = 0, write_speed = 0; + for(int i = 0; i < repeats; i++) + { + read_speed += results[i].read.at(block_size); + write_speed += results[i].write.at(block_size); + } + results[0].read[block_size] = read_speed / repeats; + results[0].write[block_size] = write_speed / repeats; + } + return results[0]; +} + +void print_perf_data(const std::map& stdio_data, + const std::map& std_data, + const std::map& nowide_data) +{ + std::cout << "block size" + << " stdio " + << " std::fstream " + << "nowide::fstream" << std::endl; + for(int block_size = MIN_BLOCK_SIZE; block_size <= MAX_BLOCK_SIZE; block_size *= 2) + { + std::cout << std::setw(8) << block_size << " "; + std::cout << std::fixed << std::setprecision(3) << std::setw(8) << stdio_data.at(block_size) << " MB/s "; + std::cout << std::fixed << std::setprecision(3) << std::setw(8) << std_data.at(block_size) << " MB/s "; + std::cout << std::fixed << std::setprecision(3) << std::setw(8) << nowide_data.at(block_size) << " MB/s "; + std::cout << std::endl; + } } void test_perf(const char* file) { - test_io(file, "stdio"); - test_io >(file, "std::fstream"); - test_io >(file, "nowide::fstream"); + perf_data stdio_data = test_io_driver(file, "stdio"); + perf_data std_data = test_io_driver >(file, "std::fstream"); + perf_data nowide_data = test_io_driver >(file, "nowide::fstream"); + std::cout << "================== Read performance ==================" << std::endl; + print_perf_data(stdio_data.read, std_data.read, nowide_data.read); + std::cout << "================== Write performance =================" << std::endl; + print_perf_data(stdio_data.write, std_data.write, nowide_data.write); } int main(int argc, char** argv) From 228699f2dd1bc1b5422eb6c877a58ff3dc20998d Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Thu, 6 Feb 2020 12:42:28 +0100 Subject: [PATCH 2/2] Use BUFSIZ for the filebuf buffer size Improves performance greatly Fixes #31 --- include/boost/nowide/filebuf.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/boost/nowide/filebuf.hpp b/include/boost/nowide/filebuf.hpp index 8ce699d..4fbe428 100644 --- a/include/boost/nowide/filebuf.hpp +++ b/include/boost/nowide/filebuf.hpp @@ -64,7 +64,8 @@ namespace nowide { /// Creates new filebuf /// basic_filebuf() : - buffer_size_(4), buffer_(0), file_(0), owns_buffer_(false), last_char_(0), mode_(std::ios_base::openmode(0)) + buffer_size_(BUFSIZ), buffer_(0), file_(0), owns_buffer_(false), last_char_(0), + mode_(std::ios_base::openmode(0)) { setg(0, 0, 0); setp(0, 0);