From ce3c5e5fbcd1bdc3f80c1bbd103ca5567ad8c147 Mon Sep 17 00:00:00 2001 From: jzmaddock Date: Sat, 9 May 2015 18:32:40 +0100 Subject: [PATCH] Improve cbrt comparison code. Tidy up docs. Clean up unnecessary #includes Improve file name handling. Re run performance tests. --- doc/math.qbk | 2 +- doc/roots/root_comparison.qbk | 42 +++---- doc/roots/root_comparison_tables_msvc.qbk | 20 +-- example/root_finding_algorithms.cpp | 141 +++++++++++++++------- 4 files changed, 124 insertions(+), 81 deletions(-) diff --git a/doc/math.qbk b/doc/math.qbk index 2813c8371..87b2d6557 100644 --- a/doc/math.qbk +++ b/doc/math.qbk @@ -401,7 +401,7 @@ and use the function's name as the link text.] [def __epsilon [@http://en.wikipedia.org/wiki/Machine_epsilon machine epsilon]] [def __ADL [@http://en.cppreference.com/w/cpp/language/adl Argument Dependent Lookup (ADL)]] [def __function_template_instantiation [@http://en.cppreference.com/w/cpp/language/function_template Function template instantiation]] -[def __fundamental_types [@http://en.cppreference.com/w/cpp/language/types fundamental]] +[def __fundamental_types [@http://en.cppreference.com/w/cpp/language/types fundamental types]] [def __guard_digits [@http://en.wikipedia.org/wiki/Guard_digit guard digits]] [def __SSE2 [@http://en.wikipedia.org/wiki/SSE2 SSE2 instructions]] [def __representable [@http://en.wikipedia.org/wiki/Floating_point#Representable_numbers.2C_conversion_and_rounding representable]] diff --git a/doc/roots/root_comparison.qbk b/doc/roots/root_comparison.qbk index 01f0de759..aa03a8521 100644 --- a/doc/roots/root_comparison.qbk +++ b/doc/roots/root_comparison.qbk @@ -10,7 +10,7 @@ http://www.boost.org/LICENSE_1_0.txt). [section:cbrt_comparison Comparison of Cube Root Finding Algorithms] -In the table below, the cube root of 28 was computed 10000 for three __fundamental_types floating-point types, +In the table below, the cube root of 28 was computed for three __fundamental_types floating-point types, and one __multiprecision type __cpp_bin_float using 50 decimal digit precision, using four algorithms. The 'exact' answer was computed using a 100 decimal digit type: @@ -19,7 +19,7 @@ The 'exact' answer was computed using a 100 decimal digit type: Times were measured using __boost_timer using `class cpu_timer`. -* ['Its] is the number of iterations. +* ['Its] is the number of iterations taken to find the root. * ['Times] is the CPU time-taken in arbitrary units. * ['Norm] is a normalized time, in comparison to the quickest algorithm (with value 1.00). * ['Dis] is the distance from the nearest representation of the 'exact' root in bits. @@ -36,18 +36,28 @@ The program used was [@ ../../example/root_finding_algorithms.cpp root_finding_a judged from repeat runs to have an uncertainty of 10 %. Comparing MSVC for `double` and `long double` (which are identical on this patform) may give a guide to uncertainty of timing. +The requested precision was set as follows: + +[table +[[Function][Precision Requested]] +[[TOMS748][numeric_limits::digits - 2]] +[[Newton][[lfloor]numeric_limits::digits * 0.6[rfloor]]] +[[Halley][[lfloor]numeric_limits::digits * 0.4[rfloor]]] +[[Schroeder][[lfloor]numeric_limits::digits * 0.4[rfloor]]] +] + * The C++ Standard cube root function [@http://en.cppreference.com/w/cpp/numeric/math/cbrt std::cbrt] is only defined for built-in or fundamental types, so cannot be used with any User-Defined floating-point types like __multiprecision. This, and that the cube function is so impeccably-behaved, allows the implementer to use many tricks to achieve a fast computation. On some platforms,`std::cbrt` appeared several times as quick as the more general `boost::math::cbrt`, -on other platforms `boost::math::cbrt` is noticably faster. In general, the results are highly +on other platforms / compiler options `boost::math::cbrt` is noticably faster. In general, the results are highly dependent on the code-generation / processor architecture selection compiler options used. One can assume that the standard library will have been compiled with options ['nearly] optimal for the platform it was installed on, where as the user has more choice over the options used for Boost.Math. Pick something too general/conservative and performance suffers, while selecting options that make use of the latest -instruction set opcodes speed's things up noticably.' +instruction set opcodes speed's things up noticably. * Two compilers in optimise mode were compared: GCC 4.9.1 using Netbeans IDS and Microsoft Visual Studio 2013 (Update 1) on the same hardware. @@ -67,14 +77,10 @@ Typically, it was found that computation using type `double` took a few times longer when using the various root-finding algorithms directly rather than the hand coded/optimized `cbrt` routine. -* The importance of getting a good guess can be shown by using the `std::pow(x, 1.L/3)` function call -to obtain a `double` or `long double` 15 decimal-digit precision guess at cube-root -(much closer than the simple divide-exponent-by-3 method to get a 'guess'). -Using Halley's algorithm, it only took [*a single iteration] to achieve 50 decimal-digit precision. +* The importance of getting a good guess can be seen by the iteration count for the multiprecision case: +here we "cheat" a little and use the cube-root calculated to double precision as the initial guess. The limitation of this tatic is that the range of possible (exponent) values may be less than the multiprecision type. -* Using type `float` and __TOMS748, a few less iterations were required, but the time the same as Newton-Raphson. - * For __fundamental_types, there was little to choose between the three derivative methods, but for __cpp_bin_float, __newton was twice as fast. Note that the cube-root is an extreme test case as the cost of calling the functor is so cheap that the runtimes are largely @@ -90,24 +96,16 @@ Speeds are often more than 50 times slower. * Using `cpp_bin_float_50`, __TOMS748 was much slower showing the benefit of using derivatives. __newton was found to be twice as quick as either of the second-derivative methods: -this must reflect the computational cost of calculating and using the extra derivatives -versus saving one or two iterations. +this is an extreme case though, the function and its derivatives are so cheap to compute that we're +really measuring the complexity of the boilerplate root-finding code. -* Only one or two extra ['iterations] are needed to get the remaining 35 digits, whatever the algorithm used. -(The time taken was of course much greater for __multiprecision types). +* For multiprecision types only one or two extra ['iterations] are needed to get the remaining 35 digits, whatever the algorithm used. +(The time taken was of course much greater for these types). * Using a 100 decimal-digit type only doubled the time and required only a very few more iterations, so the cost of extra precision is mainly the underlying cost of computing more digits, not in the way the algorithm works. This confirms previous observations using __NTL high-precision types. -* Experiment suggests that decreasing the number of bits of accuracy required from the maximum possible, -for example, to 3/4, does not decrease the accuracy as measured by the floating-point distances: -they remain essentially 'right'. -This suggests that requiring the maximum-possible accuracy means that an extra unproductive iteration may take place. - -[include root_comparison_tables_gcc_075.qbk] -[include root_comparison_tables_msvc_075.qbk] -[include root_comparison_tables_gcc.qbk] [include root_comparison_tables_msvc.qbk] [endsect] [/section:cbrt_comparison Comparison of Cube Root Finding Algorithms] diff --git a/doc/roots/root_comparison_tables_msvc.qbk b/doc/roots/root_comparison_tables_msvc.qbk index 8a874b08a..24b8703ad 100644 --- a/doc/roots/root_comparison_tables_msvc.qbk +++ b/doc/roots/root_comparison_tables_msvc.qbk @@ -7,21 +7,13 @@ http://www.boost.org/LICENSE_1_0.txt). ] -[h5 Program I:\modular-boost\libs\math\example\root_finding_algorithms.cpp, Microsoft Visual C++ version 12.0, Dinkumware standard library version 610, Win32, Compiled in optimise mode.[br]1000000 evaluations of each of 5 root_finding algorithms.[br]Fraction of maximum possible bits of accuracy required is 1] -[table:cbrt_5 Info for float, double, long double and cpp_bin_float_50 -[[type name] [max_digits10] [binary digits] [required digits]] -[[float][9][24][24]] -[[double][17][53][53]] -[[long double][17][53][53]] -[[cpp_bin_float_50][52][168][168]] -] [/table cbrt_5] - +[h5 Program root_finding_algorithms.cpp, Microsoft Visual C++ version 12.0, Dinkumware standard library version 610, Win32, x64[br]1000000 evaluations of each of 5 root_finding algorithms.] [table:cbrt_4 Cube root(28) for float, double, long double and cpp_bin_float_50 [[][float][][][] [][double][][][] [][long d][][][] [][cpp50][][]] [[Algorithm][Its][Times][Norm][Dis][ ][Its][Times][Norm][Dis][ ][Its][Times][Norm][Dis][ ][Its][Times][Norm][Dis][ ]] -[[cbrt ][ 0][203125][1.0][ 0][ ][ 0][203125][1.0][ 1][ ][ 0][203125][1.0][ 1][ ][ 0][9312500][1.0][ 0][ ]] -[[TOMS748 ][ 8][859375][4.2][ -1][ ][ 11][1109375][5.5][ 2][ ][ 11][1140625][5.6][ 2][ ][ 7][130703125][14.][ -2][ ]] -[[Newton ][ 6][1062500][5.2][ 0][ ][ 7][1015625][5.0][ 0][ ][ 7][1031250][5.1][ 0][ ][ 9][33640625][3.6][ -1][ ]] -[[Halley ][ 4][1046875][5.2][ 0][ ][ 5][1093750][5.4][ 0][ ][ 5][1078125][5.3][ 0][ ][ 6][60750000][6.5][ 0][ ]] -[[Schroeder][ 5][1078125][5.3][ 0][ ][ 6][1078125][5.3][ 0][ ][ 6][1062500][5.2][ 0][ ][ 7][64343750][6.9][ 0][ ]] +[[cbrt ][ 0][46875][1.0][ 0][ ][ 0][46875][1.0][ 1][ ][ 0][46875][1.0][ 1][ ][ 0][6078125][1.2][ 0][ ]] +[[TOMS748 ][ 8][234375][5.0][ -1][ ][ 11][453125][9.7][ 2][ ][ 11][437500][9.3][ 2][ ][ 7][73859375][14.][ -2][ ]] +[[Newton ][ 5][109375][2.3][ 0][ ][ 6][125000][2.7][ 0][ ][ 6][140625][3.0][ 0][ ][ 2][5125000][1.0][ 0][ ]] +[[Halley ][ 3][125000][2.7][ 0][ ][ 4][171875][3.7][ 0][ ][ 4][171875][3.7][ 0][ ][ 2][11531250][2.3][ 0][ ]] +[[Schroeder][ 4][265625][5.7][ 0][ ][ 5][218750][4.7][ 0][ ][ 5][234375][5.0][ 0][ ][ 2][12000000][2.3][ 0][ ]] ] [/end of table cbrt_4] diff --git a/example/root_finding_algorithms.cpp b/example/root_finding_algorithms.cpp index 25b415f6b..edda9c9a4 100644 --- a/example/root_finding_algorithms.cpp +++ b/example/root_finding_algorithms.cpp @@ -16,9 +16,6 @@ #include #include #include -#include -#include -#include #include "table_type.hpp" // Copy of i:\modular-boost\libs\math\test\table_type.hpp @@ -35,7 +32,7 @@ //using boost::math::tools::schroeder_iterate; #include // For float_distance. -#include // for tuple and make_tuple. +#include // for tuple and make_tuple. #include // For boost::math::cbrt. #include // is binary. @@ -67,14 +64,52 @@ using boost::multiprecision::cpp_bin_float_50; std::string sourcefilename = __FILE__; #endif -#ifdef BOOST_ROOT - std::string boost_root = BOOST_STRINGIZE(BOOST_ROOT); -#endif +std::string chop_last(std::string s) +{ + std::string::size_type pos = s.find_last_of("\\/"); + if(pos != std::string::npos) + s.erase(pos); + else if(s.empty()) + abort(); + else + s.erase(); + return s; +} + +std::string make_root() +{ + std::string result; + if(sourcefilename.find_first_of(":") != std::string::npos) + { + result = chop_last(sourcefilename); // lose filename part + result = chop_last(result); // lose /example/ + result = chop_last(result); // lose /math/ + result = chop_last(result); // lose /libs/ + } + else + { + result = chop_last(sourcefilename); // lose filename part + if(result.empty()) + result = "."; + result += "/../../.."; + } + return result; +} + +std::string short_file_name(std::string s) +{ + std::string::size_type pos = s.find_last_of("\\/"); + if(pos != std::string::npos) + s.erase(0, pos + 1); + return s; +} + +std::string boost_root = make_root(); #ifdef _MSC_VER - std::string filename = boost_root.append("libs/math/doc/roots/root_comparison_tables_msvc.qbk"); + std::string filename = boost_root.append("/libs/math/doc/roots/root_comparison_tables_msvc.qbk"); #else // assume GCC - std::string filename = boost_root.append("libs/math/doc/roots/root_comparison_tables_gcc.qbk"); + std::string filename = boost_root.append("/libs/math/doc/roots/root_comparison_tables_gcc.qbk"); #endif std::ofstream fout (filename.c_str(), std::ios_base::out); @@ -134,21 +169,17 @@ inline std::string build_test_name(const char* type_name, const char* test_name) result += type_name; result += "|"; result += test_name; -#ifdef _DEBUG -#if (_DEBUG == 0) +#if defined(_DEBUG ) || !defined(NDEBUG) result += "|"; result += " debug"; #else result += "|"; result += " release"; -#endif #endif result += "|"; return result; } -double digits_accuracy = 1. ; // 1 == maximum possible accuracy. - // No derivatives - using TOMS748 internally. template struct cbrt_functor_noderiv @@ -192,7 +223,7 @@ T cbrt_noderiv(T x) bool is_rising = true; // So if result if guess^3 is too low, then try increasing guess. int digits = std::numeric_limits::digits; // Maximum possible binary digits accuracy for type T. // Some fraction of digits is used to control how accurate to try to make the result. - int get_digits = static_cast(digits * digits_accuracy); + int get_digits = static_cast(std::numeric_limits::digits - 2); eps_tolerance tol(get_digits); // Set the tolerance. std::pair r = @@ -227,12 +258,18 @@ T cbrt_deriv(T x) { // return cube root of x using 1st derivative and Newton_Raphson. using namespace boost::math::tools; int exponent; - frexp(x, &exponent); // Get exponent of z (ignore mantissa). - T guess = ldexp(static_cast(1), exponent / 3); // Rough guess is to divide the exponent by three. - T min = ldexp(static_cast(0.5), exponent / 3); // Minimum possible value is half our guess. - T max = ldexp(static_cast(2), exponent / 3); // Maximum possible value is twice our guess. + T guess; + if(boost::is_fundamental::value) + { + frexp(x, &exponent); // Get exponent of z (ignore mantissa). + guess = ldexp(static_cast(1), exponent / 3); // Rough guess is to divide the exponent by three. + } + else + guess = boost::math::cbrt(static_cast(x)); + T min = guess / 2; // Minimum possible value is half our guess. + T max = 2 * guess; // Maximum possible value is twice our guess. const int digits = std::numeric_limits::digits; // Maximum possible binary digits accuracy for type T. - int get_digits = static_cast(digits * digits_accuracy); + int get_digits = static_cast(std::numeric_limits::digits * 0.6); const boost::uintmax_t maxit = 20; boost::uintmax_t it = maxit; T result = newton_raphson_iterate(cbrt_functor_deriv(x), guess, min, max, get_digits, it); @@ -249,13 +286,12 @@ struct cbrt_functor_2deriv { // Constructor stores value a to find root of, for example: // calling cbrt_functor_2deriv(x) to get cube root of x, } - boost::math::tuple operator()(T const& x) + std::tuple operator()(T const& x) { // Return both f(x) and f'(x) and f''(x). - using boost::math::make_tuple; T fx = x*x*x - a; // Difference (estimate x^3 - value). T dx = 3 * x*x; // 1st derivative = 3x^2. T d2x = 6 * x; // 2nd derivative = 6x. - return make_tuple(fx, dx, d2x); // 'return' fx, dx and d2x. + return std::make_tuple(fx, dx, d2x); // 'return' fx, dx and d2x. } private: T a; // to be 'cube_rooted'. @@ -267,13 +303,19 @@ T cbrt_2deriv(T x) //using namespace std; // Help ADL of std functions. using namespace boost::math::tools; int exponent; - frexp(x, &exponent); // Get exponent of z (ignore mantissa). - T guess = ldexp(static_cast(1), exponent / 3); // Rough guess is to divide the exponent by three. - T min = ldexp(static_cast(0.5), exponent / 3); // Minimum possible value is half our guess. - T max = ldexp(static_cast(2), exponent / 3);// Maximum possible value is twice our guess. + T guess; + if(boost::is_fundamental::value) + { + frexp(x, &exponent); // Get exponent of z (ignore mantissa). + guess = ldexp(static_cast(1), exponent / 3); // Rough guess is to divide the exponent by three. + } + else + guess = boost::math::cbrt(static_cast(x)); + T min = guess / 2; // Minimum possible value is half our guess. + T max = 2 * guess; // Maximum possible value is twice our guess. const int digits = std::numeric_limits::digits; // Maximum possible binary digits accuracy for type T. // digits used to control how accurate to try to make the result. - int get_digits = static_cast(digits * digits_accuracy); + int get_digits = static_cast(std::numeric_limits::digits * 0.4); boost::uintmax_t maxit = 20; boost::uintmax_t it = maxit; T result = halley_iterate(cbrt_functor_2deriv(x), guess, min, max, get_digits, it); @@ -289,13 +331,19 @@ T cbrt_2deriv_s(T x) //using namespace std; // Help ADL of std functions. using namespace boost::math::tools; int exponent; - frexp(x, &exponent); // Get exponent of z (ignore mantissa). - T guess = ldexp(static_cast(1), exponent / 3); // Rough guess is to divide the exponent by three. - T min = ldexp(static_cast(0.5), exponent / 3); // Minimum possible value is half our guess. - T max = ldexp(static_cast(2), exponent / 3);// Maximum possible value is twice our guess. + T guess; + if(boost::is_fundamental::value) + { + frexp(x, &exponent); // Get exponent of z (ignore mantissa). + guess = ldexp(static_cast(1), exponent / 3); // Rough guess is to divide the exponent by three. + } + else + guess = boost::math::cbrt(static_cast(x)); + T min = guess / 2; // Minimum possible value is half our guess. + T max = 2 * guess; // Maximum possible value is twice our guess. const int digits = std::numeric_limits::digits; // Maximum possible binary digits accuracy for type T. // digits used to control how accurate to try to make the result. - int get_digits = static_cast(digits * digits_accuracy); + int get_digits = static_cast(std::numeric_limits::digits * 0.4); const boost::uintmax_t maxit = 20; boost::uintmax_t it = maxit; T result = schroeder_iterate(cbrt_functor_2deriv(x), guess, min, max, get_digits, it); @@ -328,7 +376,7 @@ int test_root(cpp_bin_float_100 big_value, cpp_bin_float_100 answer, const char* root_infos[type_no].bin_digits = std::numeric_limits::digits; - root_infos[type_no].get_digits = static_cast(std::numeric_limits::digits * digits_accuracy); + root_infos[type_no].get_digits = std::numeric_limits::digits; T to_root = static_cast(big_value); T result; // root @@ -339,7 +387,8 @@ int test_root(cpp_bin_float_100 big_value, cpp_bin_float_100 answer, const char* using boost::timer::cpu_times; using boost::timer::cpu_timer; - cpu_times now; // Holds wall, user and system times. + cpu_times now; // Holds wall, user and system times. + T sum = 0; // std::cbrt is much the fastest, but not useful for this comparison because it only handles fundamental types. // Using enable_if allows us to avoid a compile fail with multiprecision types, but still distorts the results too much. @@ -397,7 +446,8 @@ int test_root(cpp_bin_float_100 big_value, cpp_bin_float_100 answer, const char* ti.start(); for (long i = 0; i < count; ++i) { - result += boost::math::cbrt(to_root); // + result = boost::math::cbrt(to_root); // + sum += result; } now = ti.elapsed(); boost:int_least64_t n = now.user; @@ -409,7 +459,6 @@ int test_root(cpp_bin_float_100 big_value, cpp_bin_float_100 answer, const char* { root_infos[type_no].min_time = time; } - result = boost::math::cbrt(to_root); long int distance = static_cast(boost::math::float_distance(result, ans)); root_infos[type_no].distances.push_back(distance); root_infos[type_no].iterations.push_back(0); // Iterations not knowable. @@ -422,6 +471,7 @@ int test_root(cpp_bin_float_100 big_value, cpp_bin_float_100 answer, const char* for (long i = 0; i < count; ++i) { result = cbrt_noderiv(to_root); // + sum += result; } now = ti.elapsed(); // int time = static_cast(now.user / count); @@ -444,6 +494,7 @@ int test_root(cpp_bin_float_100 big_value, cpp_bin_float_100 answer, const char* for (long i = 0; i < count; ++i) { result = cbrt_deriv(to_root); // + sum += result; } now = ti.elapsed(); // int time = static_cast(now.user / count); @@ -467,6 +518,7 @@ int test_root(cpp_bin_float_100 big_value, cpp_bin_float_100 answer, const char* for (long i = 0; i < count; ++i) { result = cbrt_2deriv(to_root); // + sum += result; } now = ti.elapsed(); // int time = static_cast(now.user / count); @@ -490,6 +542,7 @@ int test_root(cpp_bin_float_100 big_value, cpp_bin_float_100 answer, const char* for (long i = 0; i < count; ++i) { result = cbrt_2deriv_s(to_root); // + sum += result; } now = ti.elapsed(); // int time = static_cast(now.user / count); @@ -512,6 +565,7 @@ int test_root(cpp_bin_float_100 big_value, cpp_bin_float_100 answer, const char* root_infos[type_no].normed_times.push_back(normed_time); } algo++; + std::cout << "Accumulated sum was " << sum << std::endl; return algo; // Count of how many algorithms used. } // test_root @@ -542,7 +596,7 @@ void table_root_info(cpp_bin_float_100 full_value, cpp_bin_float_100 full_answer std::cout << "Floating-point type = " << root_infos[tp].full_typename << std::endl; std::cout << "Max_digits10 = " << root_infos[tp].max_digits10 << std::endl; std::cout << "Binary digits = " << root_infos[tp].bin_digits << std::endl; - std::cout << "Accuracy digits = " << root_infos[tp].get_digits << std::endl; + std::cout << "Accuracy digits = " << root_infos[tp].get_digits - 2 << ", " << static_cast(root_infos[tp].get_digits * 0.6) << ", " << static_cast(root_infos[tp].get_digits * 0.4) << std::endl; std::cout << "min_time = " << root_infos[tp].min_time << std::endl; std::cout << std::setprecision(root_infos[tp].max_digits10 ) << "Roots = "; @@ -567,7 +621,7 @@ void table_root_info(cpp_bin_float_100 full_value, cpp_bin_float_100 full_answer } // for tp // Print info as Quickbook table. - +#if 0 fout << "[table:cbrt_5 Info for float, double, long double and cpp_bin_float_50\n" << "[[type name] [max_digits10] [binary digits] [required digits]]\n";// header. @@ -580,7 +634,7 @@ void table_root_info(cpp_bin_float_100 full_value, cpp_bin_float_100 full_answer << "[" << root_infos[tp].get_digits << "]]\n"; // Accuracy digits. } // tp fout << "] [/table cbrt_5] \n" << std::endl; - +#endif // Prepare Quickbook table of floating-point types. fout << "[table:cbrt_4 Cube root(28) for float, double, long double and cpp_bin_float_50\n" << "[[][float][][][] [][double][][][] [][long d][][][] [][cpp50][][]]\n" @@ -650,13 +704,12 @@ int main() #endif // Print out the program/compiler/stdlib/platform names as a Quickbook comment: - fout << "\n[h5 Program " << sourcefilename << ", " + fout << "\n[h5 Program " << short_file_name(sourcefilename) << ", " << BOOST_COMPILER << ", " << BOOST_STDLIB << ", " - << BOOST_PLATFORM << ", " + << BOOST_PLATFORM << (sizeof(void*) == 8 ? ", x64" : ", x86") << debug_or_optimize << "[br]" - << count << " evaluations of each of " << algo_names.size() << " root_finding algorithms.[br]" - << "Fraction of maximum possible bits of accuracy required is " << digits_accuracy + << count << " evaluations of each of " << algo_names.size() << " root_finding algorithms." << "]" << std::endl;