2
0
mirror of https://github.com/boostorg/redis.git synced 2026-01-19 04:42:09 +00:00

Makes health checks flexible so they don't tear down connections under heavy load (#328)

Adds error::write_timeout

close #104
This commit is contained in:
Anarthal (Rubén Pérez)
2025-10-20 15:29:20 +02:00
committed by GitHub
parent da09787d29
commit 2b09ecbd78
26 changed files with 863 additions and 399 deletions

View File

@@ -10,6 +10,7 @@
#include <boost/asio/consign.hpp>
#include <boost/asio/detached.hpp>
#include <boost/asio/posix/stream_descriptor.hpp>
#include <boost/asio/read_until.hpp>
#include <boost/asio/redirect_error.hpp>
#include <boost/asio/signal_set.hpp>

View File

@@ -9,6 +9,7 @@
#include <boost/asio/co_spawn.hpp>
#include <boost/asio/consign.hpp>
#include <boost/asio/detached.hpp>
#include <boost/asio/read_until.hpp>
#include <boost/asio/redirect_error.hpp>
#include <boost/asio/signal_set.hpp>

View File

@@ -83,7 +83,7 @@ struct config {
*/
std::optional<int> database_index = 0;
/// Message used by the health-checker in @ref boost::redis::basic_connection::async_run.
/// Message used by `PING` commands sent by the health checker.
std::string health_check_id = "Boost.Redis";
/**
@@ -105,7 +105,24 @@ struct config {
std::chrono::steady_clock::duration ssl_handshake_timeout = std::chrono::seconds{10};
/** @brief Time span between successive health checks.
* Set to zero to disable health-checks pass zero as duration.
* Set to zero to disable health-checks.
*
* When this value is set to a non-zero duration, @ref basic_connection::async_run
* will issue `PING` commands whenever no command is sent to the server for more
* than `health_check_interval`. You can configure the message passed to the `PING`
* command using @ref health_check_id.
*
* Enabling health checks also sets timeouts to individual network
* operations. The connection is considered dead if:
*
* @li No byte can be written to the server after `health_check_interval`.
* @li No byte is read from the server after `2 * health_check_interval`.
*
* If the health checker finds that the connection is unresponsive, it will be closed,
* and a reconnection will be triggered, as if a network error had occurred.
*
* The exact timeout values are *not* part of the interface, and might change
* in future versions.
*/
std::chrono::steady_clock::duration health_check_interval = std::chrono::seconds{2};

View File

@@ -13,11 +13,9 @@
#include <boost/redis/detail/connection_logger.hpp>
#include <boost/redis/detail/connection_state.hpp>
#include <boost/redis/detail/exec_fsm.hpp>
#include <boost/redis/detail/helper.hpp>
#include <boost/redis/detail/multiplexer.hpp>
#include <boost/redis/detail/reader_fsm.hpp>
#include <boost/redis/detail/redis_stream.hpp>
#include <boost/redis/detail/resp3_type_to_error.hpp>
#include <boost/redis/detail/run_fsm.hpp>
#include <boost/redis/detail/setup_request_utils.hpp>
#include <boost/redis/detail/writer_fsm.hpp>
@@ -36,10 +34,9 @@
#include <boost/asio/bind_cancellation_slot.hpp>
#include <boost/asio/bind_executor.hpp>
#include <boost/asio/buffer.hpp>
#include <boost/asio/cancel_after.hpp>
#include <boost/asio/cancel_at.hpp>
#include <boost/asio/cancellation_signal.hpp>
#include <boost/asio/cancellation_type.hpp>
#include <boost/asio/coroutine.hpp>
#include <boost/asio/deferred.hpp>
#include <boost/asio/error.hpp>
#include <boost/asio/experimental/channel.hpp>
@@ -47,14 +44,10 @@
#include <boost/asio/immediate.hpp>
#include <boost/asio/io_context.hpp>
#include <boost/asio/ip/tcp.hpp>
#include <boost/asio/prepend.hpp>
#include <boost/asio/read_until.hpp>
#include <boost/asio/ssl/stream.hpp>
#include <boost/asio/steady_timer.hpp>
#include <boost/asio/write.hpp>
#include <boost/assert.hpp>
#include <boost/config.hpp>
#include <boost/core/ignore_unused.hpp>
#include <array>
#include <chrono>
@@ -66,6 +59,14 @@
namespace boost::redis {
namespace detail {
// Given a timeout value, compute the expiry time. A zero timeout is considered to mean "no timeout"
inline std::chrono::steady_clock::time_point compute_expiry(
std::chrono::steady_clock::duration timeout)
{
return timeout.count() == 0 ? (std::chrono::steady_clock::time_point::max)()
: std::chrono::steady_clock::now() + timeout;
}
template <class Executor>
struct connection_impl {
using clock_type = std::chrono::steady_clock;
@@ -80,11 +81,9 @@ struct connection_impl {
void(system::error_code, std::size_t)>;
redis_stream<Executor> stream_;
// Notice we use a timer to simulate a condition-variable. It is
// also more suitable than a channel and the notify operation does
// not suspend.
timer_type writer_timer_;
timer_type writer_timer_; // timer used for write timeouts
timer_type writer_cv_; // condition variable, cancelled when there is new data to write
timer_type reader_timer_; // timer used for read timeouts
timer_type reconnect_timer_; // to wait the reconnection period
timer_type ping_timer_; // to wait between pings
receive_channel_type receive_channel_;
@@ -93,7 +92,7 @@ struct connection_impl {
using executor_type = Executor;
executor_type get_executor() noexcept { return writer_timer_.get_executor(); }
executor_type get_executor() noexcept { return writer_cv_.get_executor(); }
struct exec_op {
connection_impl* obj_ = nullptr;
@@ -116,7 +115,7 @@ struct connection_impl {
asio::async_immediate(self.get_io_executor(), std::move(self));
return;
case exec_action_type::notify_writer:
obj_->writer_timer_.cancel();
obj_->writer_cv_.cancel();
continue; // this action does not require yielding
case exec_action_type::wait_for_response:
notifier_->async_receive(std::move(self));
@@ -133,13 +132,15 @@ struct connection_impl {
connection_impl(Executor&& ex, asio::ssl::context&& ctx, logger&& lgr)
: stream_{ex, std::move(ctx)}
, writer_timer_{ex}
, writer_cv_{ex}
, reader_timer_{ex}
, reconnect_timer_{ex}
, ping_timer_{ex}
, receive_channel_{ex, 256}
, st_{std::move(lgr)}
{
set_receive_adapter(any_adapter{ignore});
writer_timer_.expires_at((std::chrono::steady_clock::time_point::max)());
writer_cv_.expires_at((std::chrono::steady_clock::time_point::max)());
}
void cancel(operation op)
@@ -200,7 +201,7 @@ struct connection_impl {
return asio::async_compose<CompletionToken, void(system::error_code, std::size_t)>(
exec_op{this, notifier, exec_fsm(st_.mpx, std::move(info))},
token,
writer_timer_);
writer_cv_);
}
void set_receive_adapter(any_adapter adapter)
@@ -216,23 +217,32 @@ struct writer_op {
explicit writer_op(connection_impl<Executor>& conn) noexcept
: conn_(&conn)
, fsm_(conn.st_.mpx, conn.st_.logger)
{ }
template <class Self>
void operator()(Self& self, system::error_code ec = {}, std::size_t bytes_written = 0u)
{
auto act = fsm_.resume(ec, bytes_written, self.get_cancellation_state().cancelled());
auto* conn = conn_; // Prevent potential use-after-move errors with cancel_after
auto act = fsm_.resume(
conn->st_,
ec,
bytes_written,
self.get_cancellation_state().cancelled());
switch (act.type) {
case writer_action_type::done: self.complete(act.ec); return;
case writer_action_type::write:
asio::async_write(
conn_->stream_,
asio::buffer(conn_->st_.mpx.get_write_buffer()),
std::move(self));
switch (act.type()) {
case writer_action_type::done: self.complete(act.error()); return;
case writer_action_type::write_some:
conn->stream_.async_write_some(
asio::buffer(conn->st_.mpx.get_write_buffer()),
asio::cancel_at(
conn->writer_timer_,
compute_expiry(act.timeout()),
std::move(self)));
return;
case writer_action_type::wait:
conn->writer_cv_.expires_at(compute_expiry(act.timeout()));
conn->writer_cv_.async_wait(std::move(self));
return;
case writer_action_type::wait: conn_->writer_timer_.async_wait(std::move(self)); return;
}
}
};
@@ -251,130 +261,34 @@ public:
void operator()(Self& self, system::error_code ec = {}, std::size_t n = 0)
{
for (;;) {
auto act = fsm_.resume(conn_->st_, n, ec, self.get_cancellation_state().cancelled());
auto* conn = conn_; // Prevent potential use-after-move errors with cancel_after
auto act = fsm_.resume(conn->st_, n, ec, self.get_cancellation_state().cancelled());
switch (act.type_) {
switch (act.get_type()) {
case reader_fsm::action::type::read_some:
{
auto const buf = conn_->st_.mpx.get_prepared_read_buffer();
conn_->stream_.async_read_some(asio::buffer(buf), std::move(self));
}
conn->stream_.async_read_some(
asio::buffer(conn->st_.mpx.get_prepared_read_buffer()),
asio::cancel_at(
conn->reader_timer_,
compute_expiry(act.timeout()),
std::move(self)));
return;
case reader_fsm::action::type::notify_push_receiver:
if (conn_->receive_channel_.try_send(ec, act.push_size_)) {
if (conn->receive_channel_.try_send(ec, act.push_size())) {
continue;
} else {
conn_->receive_channel_.async_send(ec, act.push_size_, std::move(self));
return;
conn->receive_channel_.async_send(ec, act.push_size(), std::move(self));
}
return;
case reader_fsm::action::type::done: self.complete(act.ec_); return;
}
}
}
};
template <class Executor>
struct health_checker_op {
connection_impl<Executor>* conn_;
asio::coroutine coro_{};
system::error_code check_errors(system::error_code io_ec, asio::cancellation_type_t cancel_state)
{
// Did we have a cancellation? We might not have an error code here
if ((cancel_state & asio::cancellation_type_t::terminal) != asio::cancellation_type_t::none) {
conn_->st_.logger.log(logger::level::info, "Health checker: cancelled");
return asio::error::operation_aborted;
}
// operation_aborted and no cancel state means that asio::cancel_after timed out
if (io_ec == asio::error::operation_aborted) {
conn_->st_.logger.log(logger::level::info, "Health checker: ping timed out");
return error::pong_timeout;
}
// Did we have other unknown error?
if (io_ec) {
conn_->st_.logger.log(logger::level::info, "Health checker: ping error", io_ec);
return io_ec;
}
// Did the server answer with an error?
if (conn_->st_.ping_resp.has_error()) {
auto error = conn_->st_.ping_resp.error();
conn_->st_.logger.log(
logger::level::info,
"Health checker: server answered ping with an error",
error.diagnostic);
return resp3_type_to_error(error.data_type);
}
// No error
return system::error_code();
}
public:
health_checker_op(connection_impl<Executor>& conn) noexcept
: conn_{&conn}
{ }
template <class Self>
void operator()(Self& self, system::error_code ec = {}, std::size_t = {})
{
BOOST_ASIO_CORO_REENTER(coro_)
{
if (conn_->st_.cfg.health_check_interval == std::chrono::seconds::zero()) {
conn_->st_.logger.trace("ping_op (1): timeout disabled.");
// Wait until we're cancelled. This simplifies parallel group handling a lot
conn_->ping_timer_.expires_at((std::chrono::steady_clock::time_point::max)());
BOOST_ASIO_CORO_YIELD conn_->ping_timer_.async_wait(std::move(self));
self.complete(asio::error::operation_aborted);
return;
}
for (;;) {
// Clean up any previous leftover
clear_response(conn_->st_.ping_resp);
// Execute the request
BOOST_ASIO_CORO_YIELD
{
auto* conn = conn_; // avoid use-after-move problems
auto timeout = conn->st_.cfg.health_check_interval;
conn->async_exec(
conn->st_.ping_req,
any_adapter{conn->st_.ping_resp},
asio::cancel_after(conn->ping_timer_, timeout, std::move(self)));
}
// Check for cancellations and errors in PING
ec = check_errors(ec, self.get_cancellation_state().cancelled());
if (ec) {
self.complete(ec);
return;
}
// Wait before pinging again.
conn_->ping_timer_.expires_after(conn_->st_.cfg.health_check_interval);
BOOST_ASIO_CORO_YIELD
conn_->ping_timer_.async_wait(std::move(self));
if (is_cancelled(self)) {
conn_->st_.logger.trace("ping_op (2): cancelled");
self.complete(asio::error::operation_aborted);
return;
}
case reader_fsm::action::type::done: self.complete(act.error()); return;
}
}
}
};
system::error_code translate_parallel_group_errors(
std::array<std::size_t, 4u> order,
std::array<std::size_t, 3u> order,
system::error_code setup_ec,
system::error_code health_check_ec,
system::error_code reader_ec,
system::error_code writer_ec);
@@ -420,7 +334,7 @@ private:
return asio::async_compose<CompletionToken, void(system::error_code)>(
reader_op<Executor>{*conn_},
std::forward<CompletionToken>(token),
conn_->writer_timer_);
conn_->writer_cv_);
}
template <class CompletionToken>
@@ -429,16 +343,7 @@ private:
return asio::async_compose<CompletionToken, void(system::error_code)>(
writer_op<Executor>{*conn_},
std::forward<CompletionToken>(token),
conn_->writer_timer_);
}
template <class CompletionToken>
auto health_checker(CompletionToken&& token)
{
return asio::async_compose<CompletionToken, void(system::error_code)>(
health_checker_op<Executor>{*conn_},
std::forward<CompletionToken>(token),
conn_->writer_timer_);
conn_->writer_cv_);
}
public:
@@ -450,15 +355,12 @@ public:
template <class Self>
void operator()(
Self& self,
std::array<std::size_t, 4> order,
std::array<std::size_t, 3> order,
system::error_code setup_ec,
system::error_code health_check_ec,
system::error_code reader_ec,
system::error_code writer_ec)
{
(*this)(
self,
translate_parallel_group_errors(order, setup_ec, health_check_ec, reader_ec, writer_ec));
(*this)(self, translate_parallel_group_errors(order, setup_ec, reader_ec, writer_ec));
}
template <class Self>
@@ -483,9 +385,6 @@ public:
[this](auto token) {
return this->send_setup(token);
},
[this](auto token) {
return this->health_checker(token);
},
[this](auto token) {
return this->reader(token);
},
@@ -626,7 +525,7 @@ public:
{ }
/// Returns the associated executor.
executor_type get_executor() noexcept { return impl_->writer_timer_.get_executor(); }
executor_type get_executor() noexcept { return impl_->writer_cv_.get_executor(); }
/** @brief Starts the underlying connection operations.
*
@@ -637,19 +536,15 @@ public:
* @ref boost::redis::config::addr.
* @li Establishes a physical connection to the server. For TCP connections,
* connects to one of the endpoints obtained during name resolution.
* For UNIX domain socket connections, it connects to @ref boost::redis::config::unix_socket.
* For UNIX domain socket connections, it connects to @ref boost::redis::config::unix_sockets.
* @li If @ref boost::redis::config::use_ssl is `true`, performs the TLS handshake.
* @li Executes the setup request, as defined by the passed @ref config object.
* By default, this is a `HELLO` command, but it can contain any other arbitrary
* commands. See the @ref config docs for more info.
* @li Starts a health-check operation where ping commands are sent
* commands. See the @ref config::setup docs for more info.
* @li Starts a health-check operation where `PING` commands are sent
* at intervals specified by
* @ref boost::redis::config::health_check_interval.
* The message passed to `PING` will be @ref boost::redis::config::health_check_id.
* Passing an interval with a zero value will disable health-checks. If the Redis
* server does not respond to a health-check within two times the value
* specified here, it will be considered unresponsive and the connection
* will be closed.
* @ref config::health_check_interval when the connection is idle.
* See the documentation of @ref config::health_check_interval for more info.
* @li Starts read and write operations. Requests issued using @ref async_exec
* before `async_run` is called will be written to the server immediately.
*
@@ -1046,7 +941,7 @@ private:
asio::async_compose<decltype(token_with_slot), void(system::error_code)>(
detail::run_op<Executor>{self},
token_with_slot,
self->writer_timer_);
self->writer_cv_);
}
};

View File

@@ -36,7 +36,7 @@ public:
void on_connect(system::error_code const& ec, asio::ip::tcp::endpoint const& ep);
void on_connect(system::error_code const& ec, std::string_view unix_socket_ep);
void on_ssl_handshake(system::error_code const& ec);
void on_write(system::error_code const& ec, std::size_t n);
void on_write(std::size_t n);
void on_read(system::error_code const& ec, std::size_t n);
void on_setup(system::error_code const& ec, generic_response const& resp);
void log(logger::level lvl, std::string_view msg);

View File

@@ -25,7 +25,6 @@ struct connection_state {
multiplexer mpx{};
generic_response setup_resp{};
request ping_req{};
generic_response ping_resp{};
};
} // namespace boost::redis::detail

View File

@@ -1,22 +0,0 @@
/* Copyright (c) 2018-2024 Marcelo Zimbres Silva (mzimbres@gmail.com)
*
* Distributed under the Boost Software License, Version 1.0. (See
* accompanying file LICENSE.txt)
*/
#ifndef BOOST_REDIS_HELPER_HPP
#define BOOST_REDIS_HELPER_HPP
#include <boost/asio/cancellation_type.hpp>
namespace boost::redis::detail {
template <class T>
auto is_cancelled(T const& self)
{
return self.get_cancellation_state().cancelled() != asio::cancellation_type_t::none;
}
} // namespace boost::redis::detail
#endif // BOOST_REDIS_HELPER_HPP

View File

@@ -18,6 +18,7 @@
#include <boost/system/error_code.hpp>
#include <cstddef>
#include <deque>
#include <functional>
#include <memory>
@@ -136,12 +137,12 @@ public:
[[nodiscard]]
auto prepare_write() -> std::size_t;
// To be called after a successful write operation.
// Returns the number of requests that have been released because
// they don't have a response e.g. SUBSCRIBE.
// To be called after a write operation.
// Returns true once all the bytes in the buffer generated by prepare_write
// have been written.
// Must be called before cancel_on_conn_lost() because it might change
// request status.
auto commit_write() -> std::size_t;
auto commit_write(std::size_t bytes_written) -> bool;
// To be called after a successful read operation.
// Must be called before cancel_on_conn_lost() because it might change
@@ -175,9 +176,9 @@ public:
void cancel_on_conn_lost();
[[nodiscard]]
auto get_write_buffer() noexcept -> std::string_view
auto get_write_buffer() const noexcept -> std::string_view
{
return std::string_view{write_buffer_};
return std::string_view{write_buffer_}.substr(write_offset_);
}
[[nodiscard]]
@@ -199,9 +200,6 @@ public:
return usage_;
}
[[nodiscard]]
auto is_writing() const noexcept -> bool;
void set_config(config const& cfg);
private:
@@ -210,15 +208,15 @@ private:
[[nodiscard]]
auto is_next_push(std::string_view data) const noexcept -> bool;
// Releases the number of requests that have been released.
[[nodiscard]]
auto release_push_requests() -> std::size_t;
// Completes requests that don't expect a response
void release_push_requests();
[[nodiscard]]
consume_result consume_impl(system::error_code& ec);
read_buffer read_buffer_;
std::string write_buffer_;
std::size_t write_offset_{}; // how many bytes of the write buffer have been written?
std::deque<std::shared_ptr<elem>> reqs_;
resp3::parser parser_{};
bool on_push_ = false;

View File

@@ -13,6 +13,7 @@
#include <boost/asio/cancellation_type.hpp>
#include <boost/system/error_code.hpp>
#include <chrono>
#include <cstddef>
namespace boost::redis::detail {
@@ -21,7 +22,8 @@ class read_buffer;
class reader_fsm {
public:
struct action {
class action {
public:
enum class type
{
read_some,
@@ -29,24 +31,52 @@ public:
done,
};
action(type t, std::size_t push_size = 0u) noexcept
: type_(t)
, push_size_(push_size)
{ }
action(system::error_code ec) noexcept
: type_(type::done)
, ec_(ec)
{ }
static action notify_push_receiver(std::size_t bytes)
static action read_some(std::chrono::steady_clock::duration timeout) { return {timeout}; }
static action notify_push_receiver(std::size_t bytes) { return {bytes}; }
type get_type() const { return type_; }
system::error_code error() const
{
return {type::notify_push_receiver, bytes};
BOOST_ASSERT(type_ == type::done);
return ec_;
}
std::chrono::steady_clock::duration timeout() const
{
BOOST_ASSERT(type_ == type::read_some);
return timeout_;
}
std::size_t push_size() const
{
BOOST_ASSERT(type_ == type::notify_push_receiver);
return push_size_;
}
private:
action(std::size_t push_size) noexcept
: type_(type::notify_push_receiver)
, push_size_(push_size)
{ }
action(std::chrono::steady_clock::duration t) noexcept
: type_(type::read_some)
, timeout_(t)
{ }
type type_;
std::size_t push_size_{};
system::error_code ec_;
union {
system::error_code ec_;
std::chrono::steady_clock::duration timeout_;
std::size_t push_size_{};
};
};
action resume(

View File

@@ -9,9 +9,12 @@
#ifndef BOOST_REDIS_WRITER_FSM_HPP
#define BOOST_REDIS_WRITER_FSM_HPP
#include <boost/redis/detail/connection_state.hpp>
#include <boost/asio/cancellation_type.hpp>
#include <boost/system/error_code.hpp>
#include <chrono>
#include <cstddef>
// Sans-io algorithm for the writer task, as a finite state machine
@@ -25,37 +28,62 @@ class multiplexer;
// What should we do next?
enum class writer_action_type
{
done, // Call the final handler
write, // Issue a write on the stream
wait, // Wait until there is data to be written
done, // Call the final handler
write_some, // Issue a write on the stream
wait, // Wait until there is data to be written
};
struct writer_action {
writer_action_type type;
system::error_code ec;
class writer_action {
writer_action_type type_;
union {
system::error_code ec_;
std::chrono::steady_clock::duration timeout_;
};
writer_action(writer_action_type type) noexcept
: type{type}
writer_action(writer_action_type type, std::chrono::steady_clock::duration t) noexcept
: type_{type}
, timeout_{t}
{ }
public:
writer_action_type type() const { return type_; }
writer_action(system::error_code ec) noexcept
: type{writer_action_type::done}
, ec{ec}
: type_{writer_action_type::done}
, ec_{ec}
{ }
static writer_action write_some(std::chrono::steady_clock::duration timeout)
{
return {writer_action_type::write_some, timeout};
}
static writer_action wait(std::chrono::steady_clock::duration timeout)
{
return {writer_action_type::wait, timeout};
}
system::error_code error() const
{
BOOST_ASSERT(type_ == writer_action_type::done);
return ec_;
}
std::chrono::steady_clock::duration timeout() const
{
BOOST_ASSERT(type_ == writer_action_type::write_some || type_ == writer_action_type::wait);
return timeout_;
}
};
class writer_fsm {
int resume_point_{0};
multiplexer* mpx_;
connection_logger* logger_;
public:
writer_fsm(multiplexer& mpx, connection_logger& logger) noexcept
: mpx_(&mpx)
, logger_(&logger)
{ }
writer_fsm() = default;
writer_action resume(
connection_state& st,
system::error_code ec,
std::size_t bytes_written,
asio::cancellation_type_t cancel_state);

View File

@@ -68,7 +68,7 @@ enum class error
/// Connect timeout
connect_timeout,
/// Connect timeout
/// The server didn't answer the health checks on time and didn't send any data during the health check period.
pong_timeout,
/// SSL handshake timeout
@@ -91,6 +91,9 @@ enum class error
/// Reading data from the socket would exceed the maximum size allowed of the read buffer.
exceeds_maximum_read_buffer_size,
/// Timeout while writing data to the server.
write_timeout,
};
/**

View File

@@ -22,9 +22,8 @@ logger detail::make_stderr_logger(logger::level lvl, std::string prefix)
}
system::error_code detail::translate_parallel_group_errors(
std::array<std::size_t, 4u> order,
std::array<std::size_t, 3u> order,
system::error_code setup_ec,
system::error_code health_check_ec,
system::error_code reader_ec,
system::error_code writer_ec)
{
@@ -41,9 +40,8 @@ system::error_code detail::translate_parallel_group_errors(
// excluding the setup task
std::size_t task_number = order[0] == 0u ? order[1] : order[0];
switch (task_number) {
case 1u: return health_check_ec;
case 2u: return reader_ec;
case 3u: return writer_ec;
case 1u: return reader_ec;
case 2u: return writer_ec;
default: BOOST_ASSERT(false); return system::error_code();
}
}

View File

@@ -114,21 +114,16 @@ void connection_logger::on_ssl_handshake(system::error_code const& ec)
logger_.fn(logger::level::info, msg_);
}
void connection_logger::on_write(system::error_code const& ec, std::size_t n)
void connection_logger::on_write(std::size_t n)
{
if (logger_.lvl < logger::level::info)
if (logger_.lvl < logger::level::debug)
return;
if (ec) {
msg_ = "Writer task error: ";
format_error_code(ec, msg_);
} else {
msg_ = "Writer task: ";
msg_ += std::to_string(n);
msg_ += " bytes written.";
}
msg_ = "Writer task: ";
msg_ += std::to_string(n);
msg_ += " bytes written.";
logger_.fn(logger::level::info, msg_);
logger_.fn(logger::level::debug, msg_);
}
void connection_logger::on_read(system::error_code const& ec, std::size_t bytes_read)

View File

@@ -54,6 +54,8 @@ struct error_category_impl : system::error_category {
case error::exceeds_maximum_read_buffer_size:
return "Reading data from the socket would exceed the maximum size allowed of the read "
"buffer.";
case error::write_timeout:
return "Timeout while writing data to the server.";
default: BOOST_ASSERT(false); return "Boost.Redis error.";
}
}

View File

@@ -11,6 +11,7 @@
#include <boost/asio/error.hpp>
#include <boost/assert.hpp>
#include <cstddef>
#include <memory>
namespace boost::redis::detail {
@@ -65,13 +66,19 @@ void multiplexer::cancel(std::shared_ptr<elem> const& ptr)
}
}
std::size_t multiplexer::commit_write()
bool multiplexer::commit_write(std::size_t bytes_written)
{
BOOST_ASSERT(!cancel_run_called_);
usage_.bytes_sent += std::size(write_buffer_);
BOOST_ASSERT(bytes_written + write_offset_ <= write_buffer_.size());
// We have to clear the payload right after writing it to use it
// as a flag that informs there is no ongoing write.
usage_.bytes_sent += bytes_written;
write_offset_ += bytes_written;
// Are there still more bytes to write?
if (write_offset_ < write_buffer_.size())
return false;
// We've written all the bytes in the write buffer.
write_buffer_.clear();
// There is small optimization possible here: traverse only the
@@ -83,7 +90,9 @@ std::size_t multiplexer::commit_write()
}
});
return release_push_requests();
release_push_requests();
return true;
}
void multiplexer::add(std::shared_ptr<elem> const& info)
@@ -151,8 +160,7 @@ consume_result multiplexer::consume_impl(system::error_code& ec)
return consume_result::got_response;
}
std::pair<consume_result, std::size_t>
multiplexer::consume(system::error_code& ec)
std::pair<consume_result, std::size_t> multiplexer::consume(system::error_code& ec)
{
BOOST_ASSERT(!cancel_run_called_);
@@ -172,20 +180,14 @@ multiplexer::consume(system::error_code& ec)
return std::make_pair(consume_result::needs_more, consumed);
}
auto multiplexer::prepare_read() noexcept -> system::error_code
{
return read_buffer_.prepare();
}
auto multiplexer::prepare_read() noexcept -> system::error_code { return read_buffer_.prepare(); }
auto multiplexer::get_prepared_read_buffer() noexcept -> read_buffer::span_type
{
return read_buffer_.get_prepared();
}
void multiplexer::commit_read(std::size_t bytes_read)
{
read_buffer_.commit(bytes_read);
}
void multiplexer::commit_read(std::size_t bytes_read) { read_buffer_.commit(bytes_read); }
auto multiplexer::get_read_buffer_size() const noexcept -> std::size_t
{
@@ -196,6 +198,7 @@ void multiplexer::reset()
{
read_buffer_.clear();
write_buffer_.clear();
write_offset_ = 0u;
parser_.reset();
on_push_ = false;
cancel_run_called_ = false;
@@ -222,6 +225,8 @@ std::size_t multiplexer::prepare_write()
usage_.commands_sent += ri->get_request().get_commands();
});
write_offset_ = 0u;
auto const d = std::distance(point, std::cend(reqs_));
return static_cast<std::size_t>(d);
}
@@ -331,7 +336,7 @@ bool multiplexer::is_next_push(std::string_view data) const noexcept
return reqs_.front()->is_waiting();
}
std::size_t multiplexer::release_push_requests()
void multiplexer::release_push_requests()
{
auto point = std::stable_partition(
std::begin(reqs_),
@@ -344,13 +349,9 @@ std::size_t multiplexer::release_push_requests()
ptr->notify_done();
});
auto const d = std::distance(point, std::end(reqs_));
reqs_.erase(point, std::end(reqs_));
return static_cast<std::size_t>(d);
}
bool multiplexer::is_writing() const noexcept { return !write_buffer_.empty(); }
void multiplexer::set_receive_adapter(any_adapter adapter)
{
receive_adapter_ = std::move(adapter);

View File

@@ -32,14 +32,23 @@ reader_fsm::action reader_fsm::resume(
return {ec};
}
// Read
// Read. The connection might spend health_check_interval without writing data.
// Give it another health_check_interval for the response to arrive.
// If we don't get anything in this time, consider the connection as dead
st.logger.trace("Reader task: issuing read");
BOOST_REDIS_YIELD(resume_point_, 1, action::type::read_some)
BOOST_REDIS_YIELD(resume_point_, 1, action::read_some(2 * st.cfg.health_check_interval))
// Check for cancellations
if (is_terminal_cancel(cancel_state)) {
st.logger.trace("Reader task: cancelled (1)");
return {asio::error::operation_aborted};
return system::error_code(asio::error::operation_aborted);
}
// Translate timeout errors caused by operation_aborted to more legible ones.
// A timeout here means that we didn't receive data in time.
// Note that cancellation is already handled by the above statement.
if (ec == asio::error::operation_aborted) {
ec = error::pong_timeout;
}
// Log what we read
@@ -53,7 +62,7 @@ reader_fsm::action reader_fsm::resume(
// TODO: If an error occurred but data was read (i.e.
// bytes_read != 0) we should try to process that data and
// deliver it to the user before calling cancel_run.
return {ec};
return ec;
}
// Process the data that we've read
@@ -64,7 +73,7 @@ reader_fsm::action reader_fsm::resume(
// TODO: Perhaps log what has not been consumed to aid
// debugging.
st.logger.trace("Reader task: error processing message", ec);
return {ec};
return ec;
}
if (res_.first == consume_result::needs_more) {
@@ -77,13 +86,13 @@ reader_fsm::action reader_fsm::resume(
// Check for cancellations
if (is_terminal_cancel(cancel_state)) {
st.logger.trace("Reader task: cancelled (2)");
return {asio::error::operation_aborted};
return system::error_code(asio::error::operation_aborted);
}
// Check for other errors
if (ec) {
st.logger.trace("Reader task: error notifying push receiver", ec);
return {ec};
return ec;
}
} else {
// TODO: Here we should notify the exec operation that
@@ -98,7 +107,7 @@ reader_fsm::action reader_fsm::resume(
}
BOOST_ASSERT(false);
return {system::error_code()};
return system::error_code();
}
} // namespace boost::redis::detail

View File

@@ -9,7 +9,9 @@
#ifndef BOOST_REDIS_WRITER_FSM_IPP
#define BOOST_REDIS_WRITER_FSM_IPP
#include <boost/redis/adapter/any_adapter.hpp>
#include <boost/redis/detail/connection_logger.hpp>
#include <boost/redis/detail/connection_state.hpp>
#include <boost/redis/detail/coroutine.hpp>
#include <boost/redis/detail/multiplexer.hpp>
#include <boost/redis/detail/writer_fsm.hpp>
@@ -24,7 +26,33 @@
namespace boost::redis::detail {
inline void process_ping_node(
connection_logger& lgr,
resp3::basic_node<std::string_view> const& nd,
system::error_code& ec)
{
switch (nd.data_type) {
case resp3::type::simple_error: ec = redis::error::resp3_simple_error; break;
case resp3::type::blob_error: ec = redis::error::resp3_blob_error; break;
default: ;
}
if (ec) {
lgr.log(logger::level::info, "Health checker: server answered ping with an error", nd.value);
}
}
inline any_adapter make_ping_adapter(connection_logger& lgr)
{
return any_adapter{
[&lgr](any_adapter::parse_event evt, resp3::node_view const& nd, system::error_code& ec) {
if (evt == any_adapter::parse_event::node)
process_ping_node(lgr, nd, ec);
}};
}
writer_action writer_fsm::resume(
connection_state& st,
system::error_code ec,
std::size_t bytes_written,
asio::cancellation_type_t cancel_state)
@@ -34,37 +62,58 @@ writer_action writer_fsm::resume(
for (;;) {
// Attempt to write while we have requests ready to send
while (mpx_->prepare_write() != 0u) {
// Write
BOOST_REDIS_YIELD(resume_point_, 1, writer_action_type::write)
while (st.mpx.prepare_write() != 0u) {
// Write an entire message. We can't use asio::async_write because we want
// to apply timeouts to individual write operations
for (;;) {
// Write what we can. If nothing has been written for the health check
// interval, we consider the connection as failed
BOOST_REDIS_YIELD(
resume_point_,
1,
writer_action::write_some(st.cfg.health_check_interval))
// Mark requests as written
if (!ec)
mpx_->commit_write();
// Commit the received bytes. This accounts for partial success
bool finished = st.mpx.commit_write(bytes_written);
st.logger.on_write(bytes_written);
// Check for cancellations
if (is_terminal_cancel(cancel_state)) {
logger_->trace("Writer task: cancelled (1).");
return system::error_code(asio::error::operation_aborted);
}
// Check for cancellations and translate error codes
if (is_terminal_cancel(cancel_state))
ec = asio::error::operation_aborted;
else if (ec == asio::error::operation_aborted)
ec = error::write_timeout;
// Log what we wrote
logger_->on_write(ec, bytes_written);
// Check for errors
if (ec) {
if (ec == asio::error::operation_aborted) {
st.logger.trace("Writer task: cancelled (1).");
} else {
st.logger.trace("Writer task error", ec);
}
return ec;
}
// Check for errors
if (ec) {
return ec;
// Are we done yet?
if (finished)
break;
}
}
// No more requests ready to be written. Wait for more
BOOST_REDIS_YIELD(resume_point_, 2, writer_action_type::wait)
// No more requests ready to be written. Wait for more, or until we need to send a PING
BOOST_REDIS_YIELD(resume_point_, 2, writer_action::wait(st.cfg.health_check_interval))
// Check for cancellations
if (is_terminal_cancel(cancel_state)) {
logger_->trace("Writer task: cancelled (2).");
st.logger.trace("Writer task: cancelled (2).");
return system::error_code(asio::error::operation_aborted);
}
// If we weren't notified, it's because there is no data and we should send a health check
if (!ec) {
auto elem = make_elem(st.ping_req, make_ping_adapter(st.logger));
elem->set_done_callback([] { });
st.mpx.add(elem);
}
}
}

View File

@@ -11,6 +11,7 @@
#include <boost/assert/source_location.hpp>
#include <chrono>
#include <initializer_list>
#include <string>
#include <string_view>
@@ -44,6 +45,11 @@ struct log_fixture {
logger make_logger();
};
constexpr auto to_milliseconds(std::chrono::steady_clock::duration d)
{
return std::chrono::duration_cast<std::chrono::milliseconds>(d).count();
}
} // namespace boost::redis::detail
#endif // BOOST_REDIS_TEST_SANSIO_UTILS_HPP

View File

@@ -6,14 +6,19 @@
#include <boost/redis/connection.hpp>
#include <boost/redis/ignore.hpp>
#include <boost/redis/request.hpp>
#include <boost/redis/response.hpp>
#include <boost/asio/error.hpp>
#include <boost/asio/io_context.hpp>
#include <boost/asio/steady_timer.hpp>
#include <boost/core/lightweight_test.hpp>
#include "common.hpp"
#include <chrono>
#include <cstddef>
#include <string>
namespace net = boost::asio;
namespace redis = boost::redis;
@@ -21,6 +26,7 @@ using error_code = boost::system::error_code;
using connection = boost::redis::connection;
using boost::redis::request;
using boost::redis::ignore;
using boost::redis::generic_response;
using namespace std::chrono_literals;
namespace {
@@ -154,6 +160,98 @@ void test_disabled()
BOOST_TEST(exec2_finished);
}
// Receiving data is sufficient to consider our connection healthy.
// Sends a blocking request that causes PINGs to not be answered,
// and subscribes to a channel to receive pushes periodically.
// This simulates situations of heavy load, where PINGs may not be answered on time.
class test_flexible {
net::io_context ioc;
connection conn1{ioc}; // The one that simulates a heavy load condition
connection conn2{ioc}; // Publishes messages
net::steady_timer timer{ioc};
request publish_req;
bool run1_finished = false, run2_finished = false, exec_finished{false},
publisher_finished{false};
// Starts publishing messages to the channel
void start_publish()
{
conn2.async_exec(publish_req, ignore, [this](error_code ec, std::size_t) {
BOOST_TEST_EQ(ec, error_code());
if (exec_finished) {
// The blocking request finished, we're done
conn2.cancel();
publisher_finished = true;
} else {
// Wait for some time and publish again
timer.expires_after(100ms);
timer.async_wait([this](error_code ec) {
BOOST_TEST_EQ(ec, error_code());
start_publish();
});
}
});
}
// Generates a sufficiently unique name for channels so
// tests may be run in parallel for different configurations
static std::string make_unique_id()
{
auto t = std::chrono::high_resolution_clock::now();
return "test-flexible-health-checks-" + std::to_string(t.time_since_epoch().count());
}
public:
test_flexible() = default;
void run()
{
// Setup
auto cfg = make_test_config();
cfg.health_check_interval = 500ms;
generic_response resp;
std::string channel_name = make_unique_id();
publish_req.push("PUBLISH", channel_name, "test_health_check_flexible");
// This request will block for much longer than the health check
// interval. If we weren't receiving pushes, the connection would be considered dead.
// If this request finishes successfully, the health checker is doing good
request blocking_req;
blocking_req.push("SUBSCRIBE", channel_name);
blocking_req.push("BLPOP", "any", 2);
blocking_req.get_config().cancel_if_unresponded = true;
blocking_req.get_config().cancel_on_connection_lost = true;
conn1.async_run(cfg, [&](error_code ec) {
run1_finished = true;
BOOST_TEST_EQ(ec, net::error::operation_aborted);
});
conn2.async_run(cfg, [&](error_code ec) {
run2_finished = true;
BOOST_TEST_EQ(ec, net::error::operation_aborted);
});
// BLPOP will return NIL, so we can't use ignore
conn1.async_exec(blocking_req, resp, [&](error_code ec, std::size_t) {
exec_finished = true;
BOOST_TEST_EQ(ec, error_code());
conn1.cancel();
});
start_publish();
ioc.run_for(test_timeout);
BOOST_TEST(run1_finished);
BOOST_TEST(run2_finished);
BOOST_TEST(exec_finished);
BOOST_TEST(publisher_finished);
}
};
} // namespace
int main()
@@ -161,6 +259,7 @@ int main()
test_reconnection();
test_error_code();
test_disabled();
test_flexible().run();
return boost::report_errors();
}

View File

@@ -95,7 +95,6 @@ BOOST_AUTO_TEST_CASE(echo_stress)
net::io_context ctx;
connection conn{ctx};
auto cfg = make_test_config();
cfg.health_check_interval = std::chrono::seconds::zero();
// Number of coroutines that will send pings sharing the same
// connection to redis.

View File

@@ -134,8 +134,7 @@ BOOST_AUTO_TEST_CASE(large_number_of_concurrent_requests_issue_170)
auto conn = std::make_shared<connection>(ioc);
auto cfg = make_test_config();
cfg.health_check_interval = std::chrono::seconds(0);
conn->async_run(cfg, {}, net::detached);
conn->async_run(cfg, net::detached);
constexpr int repeat = 8000;
int remaining = repeat;

View File

@@ -138,7 +138,7 @@ void test_success()
// Simulate a successful write
BOOST_TEST_EQ(mpx.prepare_write(), 1u); // one request was placed in the packet to write
BOOST_TEST_EQ(mpx.commit_write(), 0u); // all requests expect a response
BOOST_TEST(mpx.commit_write(mpx.get_write_buffer().size()));
// Simulate a successful read
read(mpx, "$5\r\nhello\r\n");
@@ -177,7 +177,7 @@ void test_parse_error()
// Simulate a successful write
BOOST_TEST_EQ(mpx.prepare_write(), 1u); // one request was placed in the packet to write
BOOST_TEST_EQ(mpx.commit_write(), 0u); // all requests expect a response
BOOST_TEST(mpx.commit_write(mpx.get_write_buffer().size()));
// Simulate a read that will trigger an error.
// The second field should be a number (rather than the empty string).
@@ -239,7 +239,7 @@ void test_not_connected()
// Simulate a successful write
BOOST_TEST_EQ(mpx.prepare_write(), 1u); // one request was placed in the packet to write
BOOST_TEST_EQ(mpx.commit_write(), 0u); // all requests expect a response
BOOST_TEST(mpx.commit_write(mpx.get_write_buffer().size()));
// Simulate a successful read
read(mpx, "$5\r\nhello\r\n");
@@ -329,7 +329,7 @@ void test_cancel_notwaiting_terminal_partial()
// The multiplexer starts writing the request
BOOST_TEST_EQ_MSG(mpx.prepare_write(), 1u, tc.name);
BOOST_TEST_EQ_MSG(mpx.commit_write(), 0u, tc.name);
BOOST_TEST_EQ_MSG(mpx.commit_write(mpx.get_write_buffer().size()), true, tc.name);
// A cancellation arrives
act = fsm.resume(true, tc.type);
@@ -368,7 +368,7 @@ void test_cancel_notwaiting_total()
// Simulate a successful write
BOOST_TEST_EQ(mpx.prepare_write(), 1u);
BOOST_TEST_EQ(mpx.commit_write(), 0u); // all requests expect a response
BOOST_TEST(mpx.commit_write(mpx.get_write_buffer().size()));
// We got requested a cancellation here, but we can't honor it
act = fsm.resume(true, asio::cancellation_type_t::total);

View File

@@ -529,6 +529,7 @@ BOOST_AUTO_TEST_CASE(cover_error)
check_error("boost.redis", boost::redis::error::incompatible_node_depth);
check_error("boost.redis", boost::redis::error::resp3_hello);
check_error("boost.redis", boost::redis::error::exceeds_maximum_read_buffer_size);
check_error("boost.redis", boost::redis::error::write_timeout);
}
std::string get_type_as_str(boost::redis::resp3::type t)

View File

@@ -15,14 +15,13 @@
#include <boost/assert/source_location.hpp>
#include <boost/core/lightweight_test.hpp>
#include "sansio_utils.hpp"
#include <iostream>
#include <memory>
#include <ostream>
#include <string>
#include <string_view>
#include "sansio_utils.hpp"
using boost::redis::request;
using boost::redis::detail::multiplexer;
using boost::redis::detail::consume_result;
@@ -118,7 +117,7 @@ void test_request_needs_more()
// Add the element to the multiplexer and simulate a successful write
mpx.add(item1.elem_ptr);
BOOST_TEST_EQ(mpx.prepare_write(), 1u);
BOOST_TEST_EQ(mpx.commit_write(), 0u);
BOOST_TEST(mpx.commit_write(item1.req.payload().size()));
BOOST_TEST(item1.elem_ptr->is_written());
BOOST_TEST(!item1.done);
@@ -161,6 +160,13 @@ void test_several_requests()
BOOST_TEST_EQ(mpx.prepare_write(), 3u);
BOOST_TEST_EQ(mpx.prepare_write(), 0u);
// The write buffer holds the 3 requests, coalesced
constexpr std::string_view expected_buffer =
"*2\r\n$4\r\nPING\r\n$7\r\ncmd-arg\r\n"
"*2\r\n$4\r\nPING\r\n$7\r\ncmd-arg\r\n"
"*2\r\n$9\r\nSUBSCRIBE\r\n$7\r\ncmd-arg\r\n";
BOOST_TEST_EQ(mpx.get_write_buffer(), expected_buffer);
// After coalescing the requests for writing their statuses should
// be changed to "staged".
BOOST_TEST(item1.elem_ptr->is_staged());
@@ -180,7 +186,7 @@ void test_several_requests()
// The commit_write call informs the multiplexer the payload was
// sent (e.g. written to the socket). This step releases requests
// that has no response.
BOOST_TEST_EQ(mpx.commit_write(), 1u);
BOOST_TEST(mpx.commit_write(expected_buffer.size()));
// The staged status should now have changed to written.
BOOST_TEST(item1.elem_ptr->is_written());
@@ -222,6 +228,61 @@ void test_several_requests()
BOOST_TEST(item3.done);
}
void test_short_writes()
{
// Setup
multiplexer mpx;
test_item item1{};
test_item item2{false};
// Add some requests to the multiplexer.
mpx.add(item1.elem_ptr);
mpx.add(item2.elem_ptr);
BOOST_TEST(item1.elem_ptr->is_waiting());
BOOST_TEST(item2.elem_ptr->is_waiting());
// Start writing them
BOOST_TEST_EQ(mpx.prepare_write(), 2u);
BOOST_TEST_EQ(
mpx.get_write_buffer(),
"*2\r\n$4\r\nPING\r\n$7\r\ncmd-arg\r\n"
"*2\r\n$9\r\nSUBSCRIBE\r\n$7\r\ncmd-arg\r\n");
BOOST_TEST(item1.elem_ptr->is_staged());
BOOST_TEST(item2.elem_ptr->is_staged());
// Write a small part. The write buffer gets updated, but request status is not changed
BOOST_TEST_NOT(mpx.commit_write(8u));
BOOST_TEST_EQ(
mpx.get_write_buffer(),
"PING\r\n$7\r\ncmd-arg\r\n"
"*2\r\n$9\r\nSUBSCRIBE\r\n$7\r\ncmd-arg\r\n");
BOOST_TEST(item1.elem_ptr->is_staged());
BOOST_TEST(item2.elem_ptr->is_staged());
// Write another part
BOOST_TEST_NOT(mpx.commit_write(19u));
BOOST_TEST_EQ(mpx.get_write_buffer(), "*2\r\n$9\r\nSUBSCRIBE\r\n$7\r\ncmd-arg\r\n");
BOOST_TEST(item1.elem_ptr->is_staged());
BOOST_TEST(item2.elem_ptr->is_staged());
// A zero-size write doesn't cause trouble
BOOST_TEST_NOT(mpx.commit_write(0u));
BOOST_TEST_EQ(mpx.get_write_buffer(), "*2\r\n$9\r\nSUBSCRIBE\r\n$7\r\ncmd-arg\r\n");
BOOST_TEST(item1.elem_ptr->is_staged());
BOOST_TEST(item2.elem_ptr->is_staged());
// Write everything except the last byte
BOOST_TEST_NOT(mpx.commit_write(31u));
BOOST_TEST_EQ(mpx.get_write_buffer(), "\n");
BOOST_TEST(item1.elem_ptr->is_staged());
BOOST_TEST(item2.elem_ptr->is_staged());
// Write the last byte
BOOST_TEST(mpx.commit_write(1u));
BOOST_TEST(item1.elem_ptr->is_written());
BOOST_TEST(item2.elem_ptr->is_done());
}
// The response to a request is received before its write
// confirmation. This might happen on heavy load
void test_request_response_before_write()
@@ -251,7 +312,7 @@ void test_request_response_before_write()
item.reset();
// The write gets confirmed and causes no problem
BOOST_TEST_EQ(mpx.commit_write(), 0u);
BOOST_TEST(mpx.commit_write(mpx.get_write_buffer().size()));
}
void test_push()
@@ -395,7 +456,7 @@ void test_mix_responses_pushes()
mpx.add(item1.elem_ptr);
mpx.add(item2.elem_ptr);
BOOST_TEST_EQ(mpx.prepare_write(), 2u);
BOOST_TEST_EQ(mpx.commit_write(), 0u);
BOOST_TEST(mpx.commit_write(mpx.get_write_buffer().size()));
BOOST_TEST(item1.elem_ptr->is_written());
BOOST_TEST(!item1.done);
BOOST_TEST(item2.elem_ptr->is_written());
@@ -488,7 +549,7 @@ void test_cancel_waiting()
// We can progress the other request normally
BOOST_TEST_EQ(mpx.prepare_write(), 1u);
BOOST_TEST_EQ(mpx.commit_write(), 0u);
BOOST_TEST(mpx.commit_write(mpx.get_write_buffer().size()));
error_code ec;
read(mpx, "$11\r\nHello world\r\n");
auto res = mpx.consume(ec);
@@ -518,7 +579,7 @@ void test_cancel_staged()
item1.reset(); // Verify we don't reference this item anyhow
// The write gets confirmed
BOOST_TEST_EQ(mpx.commit_write(), 0u);
BOOST_TEST(mpx.commit_write(mpx.get_write_buffer().size()));
// The cancelled request's response arrives. It gets discarded
error_code ec;
@@ -556,7 +617,7 @@ void test_cancel_staged_command_without_response()
item1.reset(); // Verify we don't reference this item anyhow
// The write gets confirmed
BOOST_TEST_EQ(mpx.commit_write(), 1u);
BOOST_TEST(mpx.commit_write(mpx.get_write_buffer().size()));
// The 2nd request's response arrives. It gets parsed successfully
error_code ec;
@@ -582,7 +643,7 @@ void test_cancel_written()
// A write succeeds
BOOST_TEST_EQ(mpx.prepare_write(), 2u);
BOOST_TEST_EQ(mpx.commit_write(), 0u);
BOOST_TEST(mpx.commit_write(mpx.get_write_buffer().size()));
// Cancel the first request
mpx.cancel(item1->elem_ptr);
@@ -623,7 +684,7 @@ void test_cancel_written_half_parsed_response()
// A write succeeds
BOOST_TEST_EQ(mpx.prepare_write(), 2u);
BOOST_TEST_EQ(mpx.commit_write(), 0u);
BOOST_TEST(mpx.commit_write(mpx.get_write_buffer().size()));
// Get the response for the 1st command in req1
error_code ec;
@@ -686,7 +747,7 @@ void test_cancel_written_null_error()
// A write succeeds
BOOST_TEST_EQ(mpx.prepare_write(), 2u);
BOOST_TEST_EQ(mpx.commit_write(), 0u);
BOOST_TEST(mpx.commit_write(mpx.get_write_buffer().size()));
// Cancel the first request
mpx.cancel(item1->elem_ptr);
@@ -741,7 +802,7 @@ void test_cancel_on_connection_lost()
mpx.add(item_written1.elem_ptr);
mpx.add(item_written2.elem_ptr);
BOOST_TEST_EQ(mpx.prepare_write(), 2u);
BOOST_TEST_EQ(mpx.commit_write(), 0u);
BOOST_TEST(mpx.commit_write(mpx.get_write_buffer().size()));
mpx.add(item_staged1.elem_ptr);
mpx.add(item_staged2.elem_ptr);
@@ -794,7 +855,7 @@ void test_cancel_on_connection_lost_abandoned()
mpx.add(item_written1->elem_ptr);
mpx.add(item_written2->elem_ptr);
BOOST_TEST_EQ(mpx.prepare_write(), 2u);
BOOST_TEST_EQ(mpx.commit_write(), 0u);
BOOST_TEST(mpx.commit_write(mpx.get_write_buffer().size()));
mpx.add(item_staged1->elem_ptr);
mpx.add(item_staged2->elem_ptr);
@@ -898,7 +959,7 @@ void test_reset()
// We're able to add write requests and read responses - all state was reset
mpx.add(item2.elem_ptr);
BOOST_TEST_EQ(mpx.prepare_write(), 1u);
BOOST_TEST_EQ(mpx.commit_write(), 0u);
BOOST_TEST(mpx.commit_write(mpx.get_write_buffer().size()));
std::string_view response_buffer = "$11\r\nHello world\r\n";
read(mpx, response_buffer);
@@ -919,6 +980,7 @@ int main()
test_request_needs_more();
test_several_requests();
test_request_response_before_write();
test_short_writes();
test_push();
test_push_needs_more();
test_push_heuristics_no_request();

View File

@@ -18,6 +18,7 @@
#include "sansio_utils.hpp"
#include <chrono>
#include <string_view>
namespace net = boost::asio;
@@ -32,6 +33,7 @@ using redis::config;
using redis::detail::connection_state;
using action = redis::detail::reader_fsm::action;
using redis::logger;
using namespace std::chrono_literals;
// Operators
static const char* to_string(action::type type)
@@ -50,16 +52,29 @@ std::ostream& operator<<(std::ostream& os, action::type type) { return os << to_
bool operator==(const action& lhs, const action& rhs) noexcept
{
return lhs.type_ == rhs.type_ && lhs.push_size_ == rhs.push_size_ && lhs.ec_ == rhs.ec_;
if (lhs.get_type() != rhs.get_type())
return false;
switch (lhs.get_type()) {
case action::type::done: return lhs.error() == rhs.error();
case action::type::read_some: return lhs.timeout() == rhs.timeout();
case action::type::notify_push_receiver: return lhs.push_size() == rhs.push_size();
default: BOOST_ASSERT(false); return false;
}
}
std::ostream& operator<<(std::ostream& os, const action& act)
{
os << "action{ .type=" << act.type_;
if (act.type_ == action::type::done)
os << ", .error=" << act.ec_;
else if (act.type_ == action::type::notify_push_receiver)
os << ", .push_size=" << act.push_size_;
auto t = act.get_type();
os << "action{ .type=" << t;
switch (t) {
case action::type::done: os << ", .error=" << act.error(); break;
case action::type::read_some:
os << ", .timeout=" << to_milliseconds(act.timeout()) << "ms";
break;
case action::type::notify_push_receiver: os << ", .push_size=" << act.push_size(); break;
default: BOOST_ASSERT(false);
}
return os << " }";
}
@@ -84,7 +99,11 @@ struct fixture : redis::detail::log_fixture {
connection_state st{make_logger()};
generic_response resp;
fixture() { st.mpx.set_receive_adapter(any_adapter{resp}); }
fixture()
{
st.mpx.set_receive_adapter(any_adapter{resp});
st.cfg.health_check_interval = 3s;
}
};
void test_push()
@@ -94,7 +113,7 @@ void test_push()
// Initiate
auto act = fsm.resume(fix.st, 0, error_code(), cancellation_type_t::none);
BOOST_TEST_EQ(act.type_, action::type::read_some);
BOOST_TEST_EQ(act, action::read_some(6s));
// The fsm is asking for data.
std::string const payload =
@@ -118,7 +137,7 @@ void test_push()
// All pushes were delivered so the fsm should demand more data
act = fsm.resume(fix.st, 0, error_code(), cancellation_type_t::none);
BOOST_TEST_EQ(act, action::type::read_some);
BOOST_TEST_EQ(act, action::read_some(6s));
// Check logging
fix.check_log({
@@ -135,7 +154,7 @@ void test_read_needs_more()
// Initiate
auto act = fsm.resume(fix.st, 0, error_code(), cancellation_type_t::none);
BOOST_TEST_EQ(act.type_, action::type::read_some);
BOOST_TEST_EQ(act, action::read_some(6s));
// Split the incoming message in three random parts and deliver
// them to the reader individually.
@@ -144,12 +163,12 @@ void test_read_needs_more()
// Passes the first part to the fsm.
copy_to(fix.st.mpx, msg[0]);
act = fsm.resume(fix.st, msg[0].size(), error_code(), cancellation_type_t::none);
BOOST_TEST_EQ(act, action::type::read_some);
BOOST_TEST_EQ(act, action::read_some(6s));
// Passes the second part to the fsm.
copy_to(fix.st.mpx, msg[1]);
act = fsm.resume(fix.st, msg[1].size(), error_code(), cancellation_type_t::none);
BOOST_TEST_EQ(act, action::type::read_some);
BOOST_TEST_EQ(act, action::read_some(6s));
// Passes the third and last part to the fsm, next it should ask us
// to deliver the message.
@@ -159,7 +178,7 @@ void test_read_needs_more()
// All pushes were delivered so the fsm should demand more data
act = fsm.resume(fix.st, 0, error_code(), cancellation_type_t::none);
BOOST_TEST_EQ(act, action::type::read_some);
BOOST_TEST_EQ(act, action::read_some(6s));
// Check logging
fix.check_log({
@@ -175,6 +194,44 @@ void test_read_needs_more()
});
}
void test_health_checks_disabled()
{
fixture fix;
reader_fsm fsm;
fix.st.cfg.health_check_interval = 0s;
// Initiate
auto act = fsm.resume(fix.st, 0, error_code(), cancellation_type_t::none);
BOOST_TEST_EQ(act, action::read_some(0s));
// Split the message into two so we cover both the regular read and the needs more branch
constexpr std::string_view msg[] = {">3\r\n+msg1\r\n+ms", "g2\r\n+msg3\r\n"};
// Passes the first part to the fsm.
copy_to(fix.st.mpx, msg[0]);
act = fsm.resume(fix.st, msg[0].size(), error_code(), cancellation_type_t::none);
BOOST_TEST_EQ(act, action::read_some(0s));
// Push delivery complete
copy_to(fix.st.mpx, msg[1]);
act = fsm.resume(fix.st, msg[1].size(), error_code(), cancellation_type_t::none);
BOOST_TEST_EQ(act, action::notify_push_receiver(25u));
// All pushes were delivered so the fsm should demand more data
act = fsm.resume(fix.st, 0, error_code(), cancellation_type_t::none);
BOOST_TEST_EQ(act, action::read_some(0s));
// Check logging
fix.check_log({
{logger::level::debug, "Reader task: issuing read" },
{logger::level::debug, "Reader task: 14 bytes read" },
{logger::level::debug, "Reader task: incomplete message received"},
{logger::level::debug, "Reader task: issuing read" },
{logger::level::debug, "Reader task: 11 bytes read" },
{logger::level::debug, "Reader task: issuing read" },
});
}
void test_read_error()
{
fixture fix;
@@ -182,7 +239,7 @@ void test_read_error()
// Initiate
auto act = fsm.resume(fix.st, 0, error_code(), cancellation_type_t::none);
BOOST_TEST_EQ(act.type_, action::type::read_some);
BOOST_TEST_EQ(act, action::read_some(6s));
// The fsm is asking for data.
std::string const payload = ">1\r\n+msg1\r\n";
@@ -201,6 +258,29 @@ void test_read_error()
});
}
// A timeout in a read means that the connection is unhealthy (i.e. a PING timed out)
void test_read_timeout()
{
fixture fix;
reader_fsm fsm;
// Initiate
auto act = fsm.resume(fix.st, 0, error_code(), cancellation_type_t::none);
BOOST_TEST_EQ(act, action::read_some(6s));
// Timeout
act = fsm.resume(fix.st, 0, {net::error::operation_aborted}, cancellation_type_t::none);
BOOST_TEST_EQ(act, error_code{redis::error::pong_timeout});
// Check logging
fix.check_log({
// clang-format off
{logger::level::debug, "Reader task: issuing read" },
{logger::level::debug, "Reader task: 0 bytes read, error: Pong timeout. [boost.redis:19]"},
// clang-format on
});
}
void test_parse_error()
{
fixture fix;
@@ -208,7 +288,7 @@ void test_parse_error()
// Initiate
auto act = fsm.resume(fix.st, 0, error_code(), cancellation_type_t::none);
BOOST_TEST_EQ(act, action::type::read_some);
BOOST_TEST_EQ(act, action::read_some(6s));
// The fsm is asking for data.
std::string const payload = ">a\r\n";
@@ -235,7 +315,7 @@ void test_push_deliver_error()
// Initiate
auto act = fsm.resume(fix.st, 0, error_code(), cancellation_type_t::none);
BOOST_TEST_EQ(act, action::type::read_some);
BOOST_TEST_EQ(act, action::read_some(6s));
// The fsm is asking for data.
std::string const payload = ">1\r\n+msg1\r\n";
@@ -269,7 +349,7 @@ void test_max_read_buffer_size()
// Initiate
auto act = fsm.resume(fix.st, 0, error_code(), cancellation_type_t::none);
BOOST_TEST_EQ(act, action::type::read_some);
BOOST_TEST_EQ(act, action::read_some(6s));
// Passes the first part to the fsm.
std::string const part1 = ">3\r\n";
@@ -296,7 +376,7 @@ void test_cancel_read()
// Initiate
auto act = fsm.resume(fix.st, 0, error_code(), cancellation_type_t::none);
BOOST_TEST_EQ(act, action::type::read_some);
BOOST_TEST_EQ(act, action::read_some(6s));
// The read was cancelled (maybe after delivering some bytes)
constexpr std::string_view payload = ">1\r\n";
@@ -322,7 +402,7 @@ void test_cancel_read_edge()
// Initiate
auto act = fsm.resume(fix.st, 0, error_code(), cancellation_type_t::none);
BOOST_TEST_EQ(act, action::type::read_some);
BOOST_TEST_EQ(act, action::read_some(6s));
// Deliver a push, and notify a cancellation.
// This can happen if the cancellation signal arrives before the read handler runs
@@ -345,7 +425,7 @@ void test_cancel_push_delivery()
// Initiate
auto act = fsm.resume(fix.st, 0, error_code(), cancellation_type_t::none);
BOOST_TEST_EQ(act.type_, action::type::read_some);
BOOST_TEST_EQ(act, action::read_some(6s));
// The fsm is asking for data.
constexpr std::string_view payload =
@@ -377,7 +457,7 @@ void test_cancel_push_delivery_edge()
// Initiate
auto act = fsm.resume(fix.st, 0, error_code(), cancellation_type_t::none);
BOOST_TEST_EQ(act.type_, action::type::read_some);
BOOST_TEST_EQ(act, action::read_some(6s));
// The fsm is asking for data.
constexpr std::string_view payload =
@@ -409,8 +489,10 @@ int main()
{
test_push();
test_read_needs_more();
test_health_checks_disabled();
test_read_error();
test_read_timeout();
test_parse_error();
test_push_deliver_error();
test_max_read_buffer_size();

View File

@@ -7,6 +7,7 @@
//
#include <boost/redis/detail/connection_logger.hpp>
#include <boost/redis/detail/connection_state.hpp>
#include <boost/redis/detail/multiplexer.hpp>
#include <boost/redis/detail/writer_fsm.hpp>
#include <boost/redis/logger.hpp>
@@ -20,8 +21,10 @@
#include "sansio_utils.hpp"
#include <chrono>
#include <memory>
#include <ostream>
#include <string_view>
using namespace boost::redis;
namespace asio = boost::asio;
@@ -30,18 +33,20 @@ using detail::multiplexer;
using detail::writer_action_type;
using detail::consume_result;
using detail::writer_action;
using detail::connection_state;
using boost::system::error_code;
using boost::asio::cancellation_type_t;
using detail::connection_logger;
using namespace std::chrono_literals;
// Operators
static const char* to_string(writer_action_type value)
{
switch (value) {
case writer_action_type::done: return "writer_action_type::done";
case writer_action_type::write: return "writer_action_type::write";
case writer_action_type::wait: return "writer_action_type::wait";
default: return "<unknown writer_action_type>";
case writer_action_type::done: return "writer_action_type::done";
case writer_action_type::write_some: return "writer_action_type::write";
case writer_action_type::wait: return "writer_action_type::wait";
default: return "<unknown writer_action_type>";
}
}
@@ -55,14 +60,30 @@ std::ostream& operator<<(std::ostream& os, writer_action_type type)
bool operator==(const writer_action& lhs, const writer_action& rhs) noexcept
{
return lhs.type == rhs.type && lhs.ec == rhs.ec;
if (lhs.type() != rhs.type())
return false;
switch (lhs.type()) {
case writer_action_type::done: return lhs.error() == rhs.error();
case writer_action_type::write_some:
case writer_action_type::wait: return lhs.timeout() == rhs.timeout();
default: BOOST_ASSERT(false);
}
return false;
}
std::ostream& operator<<(std::ostream& os, const writer_action& act)
{
os << "writer_action{ .type=" << act.type;
if (act.type == writer_action_type::done)
os << ", .error=" << act.ec;
auto t = act.type();
os << "writer_action{ .type=" << t;
switch (t) {
case writer_action_type::done: os << ", .error=" << act.error(); break;
case writer_action_type::write_some:
case writer_action_type::wait:
os << ", .timeout=" << to_milliseconds(act.timeout()) << "ms";
break;
default: BOOST_ASSERT(false);
}
return os << " }";
}
@@ -89,9 +110,14 @@ struct test_elem {
};
struct fixture : detail::log_fixture {
multiplexer mpx;
connection_logger lgr{make_logger()};
writer_fsm fsm{mpx, lgr};
connection_state st{make_logger()};
writer_fsm fsm;
fixture()
{
st.ping_req.push("PING", "ping_msg"); // would be set up by the runner
st.cfg.health_check_interval = 4s;
}
};
// A single request is written, then we wait and repeat
@@ -102,35 +128,37 @@ void test_single_request()
test_elem item1, item2;
// A request arrives before the writer starts
fix.mpx.add(item1.elm);
fix.st.mpx.add(item1.elm);
// Start. A write is triggered, and the request is marked as staged
auto act = fix.fsm.resume(error_code(), 0u, cancellation_type_t::none);
BOOST_TEST_EQ(act, writer_action_type::write);
auto act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none);
BOOST_TEST_EQ(act, writer_action::write_some(4s));
BOOST_TEST(item1.elm->is_staged());
// The write completes successfully. The request is written, and we go back to sleep.
act = fix.fsm.resume(error_code(), item1.req.payload().size(), cancellation_type_t::none);
BOOST_TEST_EQ(act, writer_action_type::wait);
act = fix.fsm
.resume(fix.st, error_code(), item1.req.payload().size(), cancellation_type_t::none);
BOOST_TEST_EQ(act, writer_action::wait(4s));
BOOST_TEST(item1.elm->is_written());
// Another request arrives
fix.mpx.add(item2.elm);
fix.st.mpx.add(item2.elm);
// The wait is cancelled to signal we've got a new request
act = fix.fsm.resume(asio::error::operation_aborted, 0u, cancellation_type_t::none);
BOOST_TEST_EQ(act, writer_action_type::write);
act = fix.fsm.resume(fix.st, asio::error::operation_aborted, 0u, cancellation_type_t::none);
BOOST_TEST_EQ(act, writer_action::write_some(4s));
BOOST_TEST(item2.elm->is_staged());
// Write successful
act = fix.fsm.resume(error_code(), item2.req.payload().size(), cancellation_type_t::none);
BOOST_TEST_EQ(act, writer_action_type::wait);
act = fix.fsm
.resume(fix.st, error_code(), item2.req.payload().size(), cancellation_type_t::none);
BOOST_TEST_EQ(act, writer_action::wait(4s));
BOOST_TEST(item2.elm->is_written());
// Logs
fix.check_log({
{logger::level::info, "Writer task: 24 bytes written."},
{logger::level::info, "Writer task: 24 bytes written."},
{logger::level::debug, "Writer task: 24 bytes written."},
{logger::level::debug, "Writer task: 24 bytes written."},
});
}
@@ -142,32 +170,34 @@ void test_request_arrives_while_writing()
test_elem item1, item2;
// A request arrives before the writer starts
fix.mpx.add(item1.elm);
fix.st.mpx.add(item1.elm);
// Start. A write is triggered, and the request is marked as staged
auto act = fix.fsm.resume(error_code(), 0u, cancellation_type_t::none);
BOOST_TEST_EQ(act, writer_action_type::write);
auto act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none);
BOOST_TEST_EQ(act, writer_action::write_some(4s));
BOOST_TEST(item1.elm->is_staged());
// While the write is outstanding, a new request arrives
fix.mpx.add(item2.elm);
fix.st.mpx.add(item2.elm);
// The write completes successfully. The request is written,
// and we start writing the new one
act = fix.fsm.resume(error_code(), item1.req.payload().size(), cancellation_type_t::none);
BOOST_TEST_EQ(act, writer_action_type::write);
act = fix.fsm
.resume(fix.st, error_code(), item1.req.payload().size(), cancellation_type_t::none);
BOOST_TEST_EQ(act, writer_action::write_some(4s));
BOOST_TEST(item1.elm->is_written());
BOOST_TEST(item2.elm->is_staged());
// Write successful
act = fix.fsm.resume(error_code(), item2.req.payload().size(), cancellation_type_t::none);
BOOST_TEST_EQ(act, writer_action_type::wait);
act = fix.fsm
.resume(fix.st, error_code(), item2.req.payload().size(), cancellation_type_t::none);
BOOST_TEST_EQ(act, writer_action::wait(4s));
BOOST_TEST(item2.elm->is_written());
// Logs
fix.check_log({
{logger::level::info, "Writer task: 24 bytes written."},
{logger::level::info, "Writer task: 24 bytes written."},
{logger::level::debug, "Writer task: 24 bytes written."},
{logger::level::debug, "Writer task: 24 bytes written."},
});
}
@@ -179,25 +209,166 @@ void test_no_request_at_startup()
test_elem item;
// Start. There is no request, so we wait
auto act = fix.fsm.resume(error_code(), 0u, cancellation_type_t::none);
BOOST_TEST_EQ(act, writer_action_type::wait);
auto act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none);
BOOST_TEST_EQ(act, writer_action::wait(4s));
// A request arrives
fix.mpx.add(item.elm);
fix.st.mpx.add(item.elm);
// The wait is cancelled to signal we've got a new request
act = fix.fsm.resume(asio::error::operation_aborted, 0u, cancellation_type_t::none);
BOOST_TEST_EQ(act, writer_action_type::write);
act = fix.fsm.resume(fix.st, asio::error::operation_aborted, 0u, cancellation_type_t::none);
BOOST_TEST_EQ(act, writer_action::write_some(4s));
BOOST_TEST(item.elm->is_staged());
// Write successful
act = fix.fsm.resume(error_code(), item.req.payload().size(), cancellation_type_t::none);
BOOST_TEST_EQ(act, writer_action_type::wait);
act = fix.fsm.resume(fix.st, error_code(), item.req.payload().size(), cancellation_type_t::none);
BOOST_TEST_EQ(act, writer_action::wait(4s));
BOOST_TEST(item.elm->is_written());
// Logs
fix.check_log({
{logger::level::info, "Writer task: 24 bytes written."},
{logger::level::debug, "Writer task: 24 bytes written."},
});
}
// We correctly handle short writes
void test_short_writes()
{
// Setup
fixture fix;
test_elem item1;
// A request arrives before the writer starts
fix.st.mpx.add(item1.elm);
// Start. A write is triggered, and the request is marked as staged
auto act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none);
BOOST_TEST_EQ(act, writer_action::write_some(4s));
BOOST_TEST(item1.elm->is_staged());
// We write a few bytes. It's not the entire message, so we write again
act = fix.fsm.resume(fix.st, error_code(), 2u, cancellation_type_t::none);
BOOST_TEST_EQ(act, writer_action::write_some(4s));
BOOST_TEST(item1.elm->is_staged());
// We write some more bytes, but still not the entire message.
act = fix.fsm.resume(fix.st, error_code(), 5u, cancellation_type_t::none);
BOOST_TEST_EQ(act, writer_action::write_some(4s));
BOOST_TEST(item1.elm->is_staged());
// A zero size write doesn't cause trouble
act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none);
BOOST_TEST_EQ(act, writer_action::write_some(4s));
BOOST_TEST(item1.elm->is_staged());
// Complete writing the message (the entire payload is 24 bytes long)
act = fix.fsm.resume(fix.st, error_code(), 17u, cancellation_type_t::none);
BOOST_TEST_EQ(act, writer_action::wait(4s));
BOOST_TEST(item1.elm->is_written());
// Logs
fix.check_log({
{logger::level::debug, "Writer task: 2 bytes written." },
{logger::level::debug, "Writer task: 5 bytes written." },
{logger::level::debug, "Writer task: 0 bytes written." },
{logger::level::debug, "Writer task: 17 bytes written."},
});
}
// If no data arrives during the health check interval, a ping is written
void test_ping()
{
// Setup
fixture fix;
error_code ec;
constexpr std::string_view ping_payload = "*2\r\n$4\r\nPING\r\n$8\r\nping_msg\r\n";
// Start. There is no request, so we wait
auto act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none);
BOOST_TEST_EQ(act, writer_action::wait(4s));
// No request arrives during the wait interval so a ping is added
act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none);
BOOST_TEST_EQ(act, writer_action::write_some(4s));
BOOST_TEST_EQ(fix.st.mpx.get_write_buffer(), ping_payload);
// Write successful
act = fix.fsm.resume(fix.st, error_code(), ping_payload.size(), cancellation_type_t::none);
BOOST_TEST_EQ(act, writer_action::wait(4s));
// Simulate a successful response to the PING
constexpr std::string_view ping_response = "$8\r\nping_msg\r\n";
read(fix.st.mpx, ping_response);
auto res = fix.st.mpx.consume(ec);
BOOST_TEST_EQ(ec, error_code());
BOOST_TEST(res.first == consume_result::got_response);
BOOST_TEST_EQ(res.second, ping_response.size());
// Logs
fix.check_log({
{logger::level::debug, "Writer task: 28 bytes written."},
});
}
// Disabled health checks don't cause trouble
void test_health_checks_disabled()
{
// Setup
fixture fix;
test_elem item;
fix.st.cfg.health_check_interval = 0s;
// A request arrives before the writer starts
fix.st.mpx.add(item.elm);
// Start. A write is triggered, and the request is marked as staged
auto act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none);
BOOST_TEST_EQ(act, writer_action::write_some(0s));
BOOST_TEST(item.elm->is_staged());
// The write completes successfully. The request is written, and we go back to sleep.
act = fix.fsm.resume(fix.st, error_code(), item.req.payload().size(), cancellation_type_t::none);
BOOST_TEST_EQ(act, writer_action::wait(0s));
BOOST_TEST(item.elm->is_written());
// Logs
fix.check_log({
{logger::level::debug, "Writer task: 24 bytes written."},
});
}
// If the server answers with an error in PING, we log it and produce an error
void test_ping_error()
{
// Setup
fixture fix;
error_code ec;
// Start. There is no request, so we wait
auto act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none);
BOOST_TEST_EQ(act, writer_action::wait(4s));
// No request arrives during the wait interval so a ping is added
act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none);
BOOST_TEST_EQ(act, writer_action::write_some(4s));
// Write successful
const auto ping_size = fix.st.mpx.get_write_buffer().size();
act = fix.fsm.resume(fix.st, error_code(), ping_size, cancellation_type_t::none);
BOOST_TEST_EQ(act, writer_action::wait(4s));
// Simulate an error response to the PING
constexpr std::string_view ping_response = "-ERR: bad command\r\n";
read(fix.st.mpx, ping_response);
auto res = fix.st.mpx.consume(ec);
BOOST_TEST_EQ(ec, error::resp3_simple_error);
BOOST_TEST(res.first == consume_result::got_response);
BOOST_TEST_EQ(res.second, ping_response.size());
// Logs
fix.check_log({
{logger::level::debug, "Writer task: 28 bytes written." },
{logger::level::info, "Health checker: server answered ping with an error: ERR: bad command"},
});
}
@@ -209,23 +380,51 @@ void test_write_error()
test_elem item;
// A request arrives before the writer starts
fix.mpx.add(item.elm);
fix.st.mpx.add(item.elm);
// Start. A write is triggered, and the request is marked as staged
auto act = fix.fsm.resume(error_code(), 0u, cancellation_type_t::none);
BOOST_TEST_EQ(act, writer_action_type::write);
auto act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none);
BOOST_TEST_EQ(act, writer_action::write_some(4s));
BOOST_TEST(item.elm->is_staged());
// The write completes with an error (possibly with partial success).
// The request is still staged, and the writer exits.
// Use an error we control so we can check logs
act = fix.fsm.resume(error::empty_field, 2u, cancellation_type_t::none);
act = fix.fsm.resume(fix.st, error::empty_field, 2u, cancellation_type_t::none);
BOOST_TEST_EQ(act, error_code(error::empty_field));
BOOST_TEST(item.elm->is_staged());
// Logs
fix.check_log({
{logger::level::info, "Writer task error: Expected field value is empty. [boost.redis:5]"},
{logger::level::debug, "Writer task: 2 bytes written." },
{logger::level::debug, "Writer task error: Expected field value is empty. [boost.redis:5]"},
});
}
void test_write_timeout()
{
// Setup
fixture fix;
test_elem item;
// A request arrives before the writer starts
fix.st.mpx.add(item.elm);
// Start. A write is triggered, and the request is marked as staged
auto act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none);
BOOST_TEST_EQ(act, writer_action::write_some(4s));
BOOST_TEST(item.elm->is_staged());
// The write times out, so it completes with operation_aborted
act = fix.fsm.resume(fix.st, asio::error::operation_aborted, 0u, cancellation_type_t::none);
BOOST_TEST_EQ(act, error_code(error::write_timeout));
BOOST_TEST(item.elm->is_staged());
// Logs
fix.check_log({
{logger::level::debug, "Writer task: 0 bytes written." },
{logger::level::debug,
"Writer task error: Timeout while writing data to the server. [boost.redis:27]"},
});
}
@@ -237,21 +436,22 @@ void test_cancel_write()
test_elem item;
// A request arrives before the writer starts
fix.mpx.add(item.elm);
fix.st.mpx.add(item.elm);
// Start. A write is triggered
auto act = fix.fsm.resume(error_code(), 0u, cancellation_type_t::none);
BOOST_TEST_EQ(act, writer_action_type::write);
auto act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none);
BOOST_TEST_EQ(act, writer_action::write_some(4s));
BOOST_TEST(item.elm->is_staged());
// Write cancelled and failed with operation_aborted
act = fix.fsm.resume(asio::error::operation_aborted, 2u, cancellation_type_t::terminal);
act = fix.fsm.resume(fix.st, asio::error::operation_aborted, 2u, cancellation_type_t::terminal);
BOOST_TEST_EQ(act, error_code(asio::error::operation_aborted));
BOOST_TEST(item.elm->is_staged());
// Logs
fix.check_log({
{logger::level::debug, "Writer task: cancelled (1)."},
{logger::level::debug, "Writer task: 2 bytes written."},
{logger::level::debug, "Writer task: cancelled (1)." },
});
}
@@ -263,21 +463,23 @@ void test_cancel_write_edge()
test_elem item;
// A request arrives before the writer starts
fix.mpx.add(item.elm);
fix.st.mpx.add(item.elm);
// Start. A write is triggered
auto act = fix.fsm.resume(error_code(), 0u, cancellation_type_t::none);
BOOST_TEST_EQ(act, writer_action_type::write);
auto act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none);
BOOST_TEST_EQ(act, writer_action::write_some(4s));
BOOST_TEST(item.elm->is_staged());
// Write cancelled but without error
act = fix.fsm.resume(error_code(), item.req.payload().size(), cancellation_type_t::terminal);
act = fix.fsm
.resume(fix.st, error_code(), item.req.payload().size(), cancellation_type_t::terminal);
BOOST_TEST_EQ(act, error_code(asio::error::operation_aborted));
BOOST_TEST(item.elm->is_written());
// Logs
fix.check_log({
{logger::level::debug, "Writer task: cancelled (1)."},
{logger::level::debug, "Writer task: 24 bytes written."},
{logger::level::debug, "Writer task: cancelled (1)." },
});
}
@@ -289,14 +491,18 @@ void test_cancel_wait()
test_elem item;
// Start. There is no request, so we wait
auto act = fix.fsm.resume(error_code(), 0u, cancellation_type_t::none);
BOOST_TEST_EQ(act, writer_action_type::wait);
auto act = fix.fsm.resume(fix.st, error_code(), 0u, cancellation_type_t::none);
BOOST_TEST_EQ(act, writer_action::wait(4s));
// Sanity check: the writer doesn't touch the multiplexer after a cancellation
fix.mpx.add(item.elm);
fix.st.mpx.add(item.elm);
// Cancel the wait, setting the cancellation state
act = fix.fsm.resume(asio::error::operation_aborted, 0u, asio::cancellation_type_t::terminal);
act = fix.fsm.resume(
fix.st,
asio::error::operation_aborted,
0u,
asio::cancellation_type_t::terminal);
BOOST_TEST_EQ(act, error_code(asio::error::operation_aborted));
BOOST_TEST(item.elm->is_waiting());
@@ -313,8 +519,14 @@ int main()
test_single_request();
test_request_arrives_while_writing();
test_no_request_at_startup();
test_short_writes();
test_health_checks_disabled();
test_ping();
test_ping_error();
test_write_error();
test_write_timeout();
test_cancel_write();
test_cancel_write_edge();