From f5a17cdf7e6fb114b16cb4ab0e3fa051af21ef8b Mon Sep 17 00:00:00 2001 From: ruben Date: Mon, 11 May 2020 13:21:29 +0100 Subject: [PATCH] Now text protocol supports invalid dates Changed text protocol to support invalid dates and datetimes Added tests Now months greater than 12, days greater than 31, years greater than 9999 are always rejected --- TODO.txt | 1 - .../boost/mysql/detail/protocol/constants.hpp | 17 +- .../protocol/impl/binary_deserialization.ipp | 54 +-- .../protocol/impl/text_deserialization.ipp | 339 ++++++++++++------ include/boost/mysql/impl/value.hpp | 7 + .../protocol/binary_deserialization_error.cpp | 56 ++- .../protocol/binary_deserialization_value.cpp | 82 ++--- .../protocol/text_deserialization_error.cpp | 111 +++--- .../protocol/text_deserialization_value.cpp | 69 +++- 9 files changed, 449 insertions(+), 287 deletions(-) diff --git a/TODO.txt b/TODO.txt index 68a40562..57fca2a6 100644 --- a/TODO.txt +++ b/TODO.txt @@ -1,5 +1,4 @@ Sanitize - Change deserialize_text_value to allow zero and invalid dates/datetimes Integ tests for zero and invalid dates Remove deserialization test error fixture Change text row to use parameterized tests diff --git a/include/boost/mysql/detail/protocol/constants.hpp b/include/boost/mysql/detail/protocol/constants.hpp index b65c549e..889119ba 100644 --- a/include/boost/mysql/detail/protocol/constants.hpp +++ b/include/boost/mysql/detail/protocol/constants.hpp @@ -121,11 +121,9 @@ constexpr std::size_t date_sz = year_sz + month_sz + day_sz + 2; // delimiters constexpr std::size_t time_min_sz = hours_min_sz + mins_sz + secs_sz + 2; // delimiters constexpr std::size_t time_max_sz = time_min_sz + max_decimals + 3; // sign, period, hour extra character constexpr std::size_t datetime_min_sz = date_sz + time_min_sz + 1; // delimiter +constexpr std::size_t datetime_max_sz = datetime_min_sz + max_decimals + 1; // period -constexpr unsigned max_hour = 838; -constexpr unsigned max_min = 59; -constexpr unsigned max_sec = 59; -constexpr unsigned max_micro = 999999; +constexpr unsigned time_max_hour = 838; } // textc @@ -152,14 +150,19 @@ constexpr std::size_t datetime_dhmsu_sz = datetime_dhms_sz + micros_sz; constexpr std::size_t time_dhms_sz = time_sign_sz + time_days_sz + hours_sz + mins_sz + secs_sz; constexpr std::size_t time_dhmsu_sz = time_dhms_sz + micros_sz; -constexpr std::size_t max_days = 34; // equivalent to the 839 hours, in the broken format +constexpr std::size_t time_max_days = 34; // equivalent to the 839 hours, in the broken format + +} // binc + +// Constants common to both protocols +constexpr unsigned max_year = 9999; +constexpr unsigned max_month = 12; +constexpr unsigned max_day = 31; constexpr unsigned max_hour = 23; constexpr unsigned max_min = 59; constexpr unsigned max_sec = 59; constexpr unsigned max_micro = 999999; -} // binc - } // detail } // mysql diff --git a/include/boost/mysql/detail/protocol/impl/binary_deserialization.ipp b/include/boost/mysql/detail/protocol/impl/binary_deserialization.ipp index dcfdc001..3c5bfb4e 100644 --- a/include/boost/mysql/detail/protocol/impl/binary_deserialization.ipp +++ b/include/boost/mysql/detail/protocol/impl/binary_deserialization.ipp @@ -20,7 +20,7 @@ namespace detail { // ints and strings template -errc deserialize_binary_value_to_variant_value_holder( +errc deserialize_binary_value_value_holder( deserialization_context& ctx, value& output ) noexcept @@ -39,22 +39,22 @@ template < typename DeserializableTypeUnsigned, typename DeserializableTypeSigned > -errc deserialize_binary_value_to_variant_int( +errc deserialize_binary_value_int( const field_metadata& meta, deserialization_context& ctx, value& output ) noexcept { return meta.is_unsigned() ? - deserialize_binary_value_to_variant_value_holder< + deserialize_binary_value_value_holder< TargetTypeUnsigned, DeserializableTypeUnsigned>(ctx, output) : - deserialize_binary_value_to_variant_value_holder< + deserialize_binary_value_value_holder< TargetTypeSigned, DeserializableTypeSigned>(ctx, output); } // Floats template -errc deserialize_binary_value_to_variant_float( +errc deserialize_binary_value_float( deserialization_context& ctx, value& output ) noexcept @@ -94,10 +94,19 @@ inline errc deserialize_binary_ymd( int1 month; int1 day; + // Deserialize auto err = deserialize_fields(ctx, year, month, day); if (err != errc::ok) return err; + // Range check + if (year.value > max_year || + month.value > max_month || + day.value > max_day) + { + return errc::protocol_value_error; + } + output = ::date::year_month_day ( ::date::year(year.value), ::date::month(month.value), @@ -107,14 +116,7 @@ inline errc deserialize_binary_ymd( return errc::ok; } -inline bool is_out_of_range( - const date& d -) -{ - return d < min_date || d > max_date; -} - -inline errc deserialize_binary_value_to_variant_date( +inline errc deserialize_binary_value_date( deserialization_context& ctx, value& output ) noexcept @@ -155,7 +157,7 @@ inline errc deserialize_binary_value_to_variant_date( return errc::ok; } -inline errc deserialize_binary_value_to_variant_datetime( +inline errc deserialize_binary_value_datetime( deserialization_context& ctx, value& output ) noexcept @@ -237,7 +239,7 @@ inline errc deserialize_binary_value_to_variant_datetime( return errc::ok; } -inline errc deserialize_binary_value_to_variant_time( +inline errc deserialize_binary_value_time( deserialization_context& ctx, value& output ) noexcept @@ -282,7 +284,7 @@ inline errc deserialize_binary_value_to_variant_time( } // Range check - if (days.value > max_days || + if (days.value > time_max_days || hours.value > max_hour || minutes.value > max_min || seconds.value > max_sec || @@ -316,30 +318,30 @@ inline boost::mysql::errc boost::mysql::detail::deserialize_binary_value( switch (meta.protocol_type()) { case protocol_field_type::tiny: - return deserialize_binary_value_to_variant_int< + return deserialize_binary_value_int< std::uint32_t, std::int32_t, int1, int1_signed>(meta, ctx, output); case protocol_field_type::short_: case protocol_field_type::year: - return deserialize_binary_value_to_variant_int< + return deserialize_binary_value_int< std::uint32_t, std::int32_t, int2, int2_signed>(meta, ctx, output); case protocol_field_type::int24: case protocol_field_type::long_: - return deserialize_binary_value_to_variant_int< + return deserialize_binary_value_int< std::uint32_t, std::int32_t, int4, int4_signed>(meta, ctx, output); case protocol_field_type::longlong: - return deserialize_binary_value_to_variant_int< + return deserialize_binary_value_int< std::uint64_t, std::int64_t, int8, int8_signed>(meta, ctx, output); case protocol_field_type::float_: - return deserialize_binary_value_to_variant_float(ctx, output); + return deserialize_binary_value_float(ctx, output); case protocol_field_type::double_: - return deserialize_binary_value_to_variant_float(ctx, output); + return deserialize_binary_value_float(ctx, output); case protocol_field_type::timestamp: case protocol_field_type::datetime: - return deserialize_binary_value_to_variant_datetime(ctx, output); + return deserialize_binary_value_datetime(ctx, output); case protocol_field_type::date: - return deserialize_binary_value_to_variant_date(ctx, output); + return deserialize_binary_value_date(ctx, output); case protocol_field_type::time: - return deserialize_binary_value_to_variant_time(ctx, output); + return deserialize_binary_value_time(ctx, output); // True string types case protocol_field_type::varchar: case protocol_field_type::var_string: @@ -356,7 +358,7 @@ inline boost::mysql::errc boost::mysql::detail::deserialize_binary_value( case protocol_field_type::newdecimal: case protocol_field_type::geometry: default: - return deserialize_binary_value_to_variant_value_holder(ctx, output); + return deserialize_binary_value_value_holder(ctx, output); } } diff --git a/include/boost/mysql/detail/protocol/impl/text_deserialization.ipp b/include/boost/mysql/detail/protocol/impl/text_deserialization.ipp index d26fd633..85cf78d0 100644 --- a/include/boost/mysql/detail/protocol/impl/text_deserialization.ipp +++ b/include/boost/mysql/detail/protocol/impl/text_deserialization.ipp @@ -10,6 +10,7 @@ #include #include +#include #include #include "boost/mysql/detail/protocol/constants.hpp" @@ -17,60 +18,223 @@ namespace boost { namespace mysql { namespace detail { -inline unsigned sanitize_decimals(unsigned decimals) +// Integers +template +errc deserialize_text_value_int_impl( + std::string_view from, + value& to +) noexcept +{ + bool ok = boost::conversion::try_lexical_convert(from.data(), from.size(), to.emplace()); + return ok ? errc::ok : errc::protocol_value_error; +} + +template +errc deserialize_text_value_int( + std::string_view from, + value& to, + const field_metadata& meta +) noexcept +{ + using SignedType = std::make_signed_t; + return meta.is_unsigned() ? + deserialize_text_value_int_impl(from, to) : + deserialize_text_value_int_impl(from, to); +} + +// Floating points +template +errc deserialize_text_value_float( + std::string_view from, + value& to +) noexcept +{ + T val; + bool ok = boost::conversion::try_lexical_convert(from.data(), from.size(), val); + if (!ok || std::isnan(val) || std::isinf(val)) // SQL std forbids these values + return errc::protocol_value_error; + to = val; + return errc::ok; +} + +// Strings +inline errc deserialize_text_value_string( + std::string_view from, + value& to +) noexcept +{ + to = from; + return errc::ok; +} + +// Date/time types +inline unsigned sanitize_decimals(unsigned decimals) noexcept { return std::min(decimals, textc::max_decimals); } // Computes the meaning of the parsed microsecond number, taking into // account decimals (85 with 2 decimals means 850000us) -inline unsigned compute_micros(unsigned parsed_micros, unsigned decimals) +inline unsigned compute_micros(unsigned parsed_micros, unsigned decimals) noexcept { return parsed_micros * static_cast(std::pow(10, textc::max_decimals - decimals)); } -inline errc deserialize_text_value_impl( +inline errc deserialize_text_ymd( std::string_view from, - date& to -) -{ - // Size check - if (from.size() != textc::date_sz) - return errc::protocol_value_error; - - // Copy to a NULL-terminated buffer - char buffer [textc::date_sz + 1] {}; - memcpy(buffer, from.data(), from.size()); - - // Parse individual components - unsigned year, month, day; - int parsed = sscanf(buffer, "%4u-%2u-%2u", &year, &month, &day); - if (parsed != 3) - return errc::protocol_value_error; - - // Verify date validity - ::date::year_month_day result (::date::year(year)/::date::month(month)/::date::day(day)); - if (!result.ok()) - return errc::protocol_value_error; - - // Range check - to = result; - if (to < min_date || to > max_date) - return errc::protocol_value_error; - - return errc::ok; -} - -inline errc deserialize_text_value_impl( - std::string_view from, - time& to, - unsigned decimals + ::date::year_month_day& to ) { using namespace textc; - // Adjust decimals - decimals = sanitize_decimals(decimals); + // Size check + if (from.size() != date_sz) + return errc::protocol_value_error; + + // Copy to a NULL-terminated buffer + char buffer [date_sz + 1] {}; + std::memcpy(buffer, from.data(), from.size()); + + // Parse individual components + unsigned year, month, day; + char extra_char; + int parsed = sscanf(buffer, "%4u-%2u-%2u%c", &year, &month, &day, &extra_char); + if (parsed != 3) + return errc::protocol_value_error; + + // Range check for individual components + if (year > max_year || month > max_month || day > max_day) + return errc::protocol_value_error; + + to = ::date::year_month_day( + ::date::year{static_cast(year)}, + ::date::month{month}, + ::date::day{day} + ); + return errc::ok; +} + +inline errc deserialize_text_value_date( + std::string_view from, + value& to +) noexcept +{ + // Deserialize ymd + ::date::year_month_day ymd; + auto err = deserialize_text_ymd(from, ymd); + if (err != errc::ok) + return err; + + // Verify date validity. MySQL allows zero and invalid dates, which + // we represent in C++ as NULL + if (!ymd.ok()) + { + to = nullptr; + return errc::ok; + } + + // Range check + date d (ymd); + if (is_out_of_range(d)) + return errc::protocol_value_error; + + // Done + to = d; + return errc::ok; +} + +inline errc deserialize_text_value_datetime( + std::string_view from, + value& to, + const field_metadata& meta +) noexcept +{ + using namespace textc; + + // Sanitize decimals + unsigned decimals = sanitize_decimals(meta.decimals()); + + // Length check + std::size_t expected_size = datetime_min_sz + (decimals ? decimals + 1 : 0); + if (from.size() != expected_size) + return errc::protocol_value_error; + + // Deserialize date part + ::date::year_month_day ymd; + auto err = deserialize_text_ymd(from.substr(0, date_sz), ymd); + if (err != errc::ok) + return err; + + // Copy to NULL-terminated buffer + constexpr std::size_t datetime_time_first = date_sz + 1; // date + space + char buffer [datetime_max_sz - datetime_time_first + 1] {}; + std::memcpy(buffer, from.data() + datetime_time_first, from.size() - datetime_time_first); + + // Parse + unsigned hours, minutes, seconds; + unsigned micros = 0; + char extra_char; + if (decimals) + { + int parsed = sscanf(buffer, "%2u:%2u:%2u.%6u%c", + &hours, &minutes, &seconds, µs, &extra_char); + if (parsed != 4) + return errc::protocol_value_error; + micros = compute_micros(micros, decimals); + } + else + { + int parsed = sscanf(buffer, "%2u:%2u:%2u%c", + &hours, &minutes, &seconds, &extra_char); + if (parsed != 3) + return errc::protocol_value_error; + } + + // Validity check. We make this check before + // the invalid date check to make invalid dates with incorrect + // hours/mins/secs/micros fail + if (hours > max_hour || + minutes > max_min || + seconds > max_sec || + micros > max_micro) + { + return errc::protocol_value_error; + } + + // Date validity. MySQL allows DATETIMEs with invalid dates, which + // we represent here as NULL + if (!ymd.ok()) + { + to = nullptr; + return errc::ok; + } + + // Range check for date + date d (ymd); + if (is_out_of_range(d)) + return errc::protocol_value_error; + + // Sum it up + to = datetime( + d + + std::chrono::hours(hours) + + std::chrono::minutes(minutes) + + std::chrono::seconds(seconds) + + std::chrono::microseconds(micros) + ); + return errc::ok; +} + +inline errc deserialize_text_value_time( + std::string_view from, + value& to, + const field_metadata& meta +) noexcept +{ + using namespace textc; + + // Sanitize decimals + unsigned decimals = sanitize_decimals(meta.decimals()); // size check std::size_t actual_min_size = time_min_sz + (decimals ? decimals + 1 : 0); @@ -106,12 +270,20 @@ inline errc deserialize_text_value_impl( } // Range check - if (hours > max_hour || minutes > max_min || seconds > max_sec || micros > max_micro) + if (hours > time_max_hour || + minutes > max_min || + seconds > max_sec || + micros > max_micro) + { return errc::protocol_value_error; + } // Sum it - auto res = std::chrono::hours(hours) + std::chrono::minutes(minutes) + - std::chrono::seconds(seconds) + std::chrono::microseconds(micros); + auto res = + std::chrono::hours(hours) + + std::chrono::minutes(minutes) + + std::chrono::seconds(seconds) + + std::chrono::microseconds(micros); if (is_negative) { res = -res; @@ -122,69 +294,6 @@ inline errc deserialize_text_value_impl( return errc::ok; } -inline errc deserialize_text_value_impl( - std::string_view from, - datetime& to, - unsigned decimals -) -{ - using namespace textc; - - // Sanitize decimals - decimals = sanitize_decimals(decimals); - - // Length check - std::size_t expected_size = datetime_min_sz + (decimals ? decimals + 1 : 0); - if (from.size() != expected_size) - return errc::protocol_value_error; - - // Parse date - date d; - auto err = deserialize_text_value_impl(from.substr(0, date_sz), d); - if (err != errc::ok) - return err; - - // Time of day part - time time_of_day; - err = deserialize_text_value_impl(from.substr(date_sz + 1), time_of_day, decimals); - if (err != errc::ok) - return err; - - // Range check - constexpr auto max_time_of_day = std::chrono::hours(24) - std::chrono::microseconds(1); - if (time_of_day < std::chrono::seconds(0) || time_of_day > max_time_of_day) - return errc::protocol_value_error; - - // Sum it up - to = d + time_of_day; - return errc::ok; -} - - -template -std::enable_if_t, errc> -deserialize_text_value_impl(std::string_view from, T& to) -{ - bool ok = boost::conversion::try_lexical_convert(from.data(), from.size(), to); - if constexpr (std::is_floating_point_v) - { - ok &= !(std::isnan(to) || std::isinf(to)); // SQL std forbids these values - } - return ok ? errc::ok : errc::protocol_value_error; -} - -inline errc deserialize_text_value_impl(std::string_view from, std::string_view& to) -{ - to = from; - return errc::ok; -} - -template -errc deserialize_text_value_to_variant(std::string_view from, value& to, Args&&... args) -{ - return deserialize_text_value_impl(from, to.emplace(), std::forward(args)...); -} - inline bool is_next_field_null( const deserialization_context& ctx ) @@ -211,24 +320,20 @@ inline boost::mysql::errc boost::mysql::detail::deserialize_text_value( case protocol_field_type::int24: case protocol_field_type::long_: case protocol_field_type::year: - return meta.is_unsigned() ? - deserialize_text_value_to_variant(from, output) : - deserialize_text_value_to_variant(from, output); + return deserialize_text_value_int(from, output, meta); case protocol_field_type::longlong: - return meta.is_unsigned() ? - deserialize_text_value_to_variant(from, output) : - deserialize_text_value_to_variant(from, output); + return deserialize_text_value_int(from, output, meta); case protocol_field_type::float_: - return deserialize_text_value_to_variant(from, output); + return deserialize_text_value_float(from, output); case protocol_field_type::double_: - return deserialize_text_value_to_variant(from, output); + return deserialize_text_value_float(from, output); case protocol_field_type::timestamp: case protocol_field_type::datetime: - return deserialize_text_value_to_variant(from, output, meta.decimals()); + return deserialize_text_value_datetime(from, output, meta); case protocol_field_type::date: - return deserialize_text_value_to_variant(from, output); + return deserialize_text_value_date(from, output); case protocol_field_type::time: - return deserialize_text_value_to_variant