2
0
mirror of https://github.com/boostorg/redis.git synced 2026-01-19 04:42:09 +00:00

Adds per-operation cancellation support to async_run and cleans up cancellation (#321)

* Adds support for terminal and partial cancellation to async_run.
* Makes basic_connection::cancel use per-operation cancellation under the hood.
* Fixes a number of race conditions during cancellation which could cause the cancellation to be ignored. This could happen if the cancellation is delivered while an async handler is pending execution.
* Deprecates operation::{resolve, connect, ssl_handshake, reconnection, health_check}, in favor of operation::run. Calling basic_connection::cancel with these values (excepting reconnection) is now equivalent to using operation::run.
* Fixes a problem in the health checker that caused ping timeouts to be reported as cancellations.
* Sanitizes how the parallel group computes its final error code.
* Simplifies the reader, writer and health checker to not care about connection cancellation. This is now the responsibility of the parallel group.
* Removes an unnecessary setup_cancellation action in the reader FSM.
* Adds documentation regarding per-operation cancellation to async_receive.
* Adds additional health checker tests.
* Adds async_run per-operation cancellation tests.
* Adds reader FSM cancellation tests.
* Makes test_conn_exec_retry tests more resilient.
* Removes leftovers in the UNIX and TLS reconnection tests. These were required due to race conditions that have already been fixed.

close #318 
close #319
This commit is contained in:
Anarthal (Rubén Pérez)
2025-10-03 18:48:30 +02:00
committed by GitHub
parent 2babb79425
commit 5771128f2d
16 changed files with 528 additions and 230 deletions

View File

@@ -49,6 +49,7 @@ make_test(test_conn_quit)
make_test(test_conn_exec_retry)
make_test(test_conn_exec_error)
make_test(test_run)
make_test(test_conn_run_cancel)
make_test(test_conn_check_health)
make_test(test_conn_exec)
make_test(test_conn_push)

View File

@@ -25,7 +25,8 @@ using namespace std::chrono_literals;
namespace {
void test_check_health()
// The health checker detects dead connections and triggers reconnection
void test_reconnection()
{
// Setup
net::io_context ioc;
@@ -75,11 +76,91 @@ void test_check_health()
BOOST_TEST(exec2_finished);
}
// We use the correct error code when a ping times out
void test_error_code()
{
// Setup
net::io_context ioc;
connection conn{ioc};
// This request will block forever, causing the connection to become unresponsive
request req;
req.push("BLPOP", "any", 0);
// Make the test run faster
auto cfg = make_test_config();
cfg.health_check_interval = 200ms;
cfg.reconnect_wait_interval = 0s;
bool run_finished = false, exec_finished = false;
conn.async_run(cfg, [&](error_code ec) {
run_finished = true;
BOOST_TEST_EQ(ec, boost::redis::error::pong_timeout);
});
// This request will complete after the health checker deems the connection
// as unresponsive and triggers a reconnection (it's configured to be cancelled
// on connection lost).
conn.async_exec(req, ignore, [&](error_code ec, std::size_t) {
exec_finished = true;
BOOST_TEST_EQ(ec, net::error::operation_aborted);
});
ioc.run_for(test_timeout);
BOOST_TEST(run_finished);
BOOST_TEST(exec_finished);
}
// A ping interval of zero disables timeouts (and doesn't cause trouble)
void test_disabled()
{
// Setup
net::io_context ioc;
connection conn{ioc};
// Run a couple of requests to verify that the connection works fine
request req1;
req1.push("PING", "health_check_disabled_1");
request req2;
req1.push("PING", "health_check_disabled_2");
auto cfg = make_test_config();
cfg.health_check_interval = 0s;
bool run_finished = false, exec1_finished = false, exec2_finished = false;
conn.async_run(cfg, [&](error_code ec) {
run_finished = true;
BOOST_TEST_EQ(ec, net::error::operation_aborted);
});
conn.async_exec(req1, ignore, [&](error_code ec, std::size_t) {
exec1_finished = true;
BOOST_TEST_EQ(ec, error_code());
conn.async_exec(req2, ignore, [&](error_code ec2, std::size_t) {
exec2_finished = true;
BOOST_TEST_EQ(ec2, error_code());
conn.cancel();
});
});
ioc.run_for(test_timeout);
BOOST_TEST(run_finished);
BOOST_TEST(exec1_finished);
BOOST_TEST(exec2_finished);
}
} // namespace
int main()
{
test_check_health();
test_reconnection();
test_error_code();
test_disabled();
return boost::report_errors();
}

View File

@@ -30,7 +30,7 @@ using namespace std::chrono_literals;
namespace {
BOOST_AUTO_TEST_CASE(request_retry_false)
BOOST_AUTO_TEST_CASE(request_cancel_if_unresponded_true)
{
request req0;
req0.get_config().cancel_on_connection_lost = true;
@@ -105,8 +105,12 @@ BOOST_AUTO_TEST_CASE(request_retry_false)
BOOST_TEST(run_finished);
}
BOOST_AUTO_TEST_CASE(request_retry_true)
BOOST_AUTO_TEST_CASE(request_cancel_if_unresponded_false)
{
// The BLPOP request will block forever, causing the health checker
// to trigger a reconnection. Although req2 has been written,
// it has cancel_if_unresponded=false, so it will be retried
// after reconnection
request req0;
req0.get_config().cancel_on_connection_lost = true;
req0.push("HELLO", 3);
@@ -126,23 +130,10 @@ BOOST_AUTO_TEST_CASE(request_retry_true)
req3.push("QUIT");
net::io_context ioc;
auto conn = std::make_shared<connection>(ioc);
auto conn = std::make_shared<connection>(ioc, logger::level::debug);
net::steady_timer st{ioc};
bool timer_finished = false, c0_called = false, c1_called = false, c2_called = false,
c3_called = false, run_finished = false;
st.expires_after(std::chrono::seconds{1});
st.async_wait([&](error_code ec) {
// Cancels the request before receiving the response. This
// should cause the third request to not complete with error
// since it has cancel_if_unresponded = true and cancellation
// comes after it was written.
timer_finished = true;
BOOST_TEST(ec == error_code());
conn->cancel(operation::run);
});
bool c0_called = false, c1_called = false, c2_called = false, c3_called = false,
run_finished = false;
auto c3 = [&](error_code ec, std::size_t) {
c3_called = true;
@@ -172,8 +163,8 @@ BOOST_AUTO_TEST_CASE(request_retry_true)
conn->async_exec(req0, ignore, c0);
auto cfg = make_test_config();
cfg.health_check_interval = 5s;
conn->async_run(cfg, {}, [&](error_code ec) {
cfg.health_check_interval = 200ms;
conn->async_run(cfg, [&](error_code ec) {
run_finished = true;
std::cout << ec.message() << std::endl;
BOOST_TEST(ec != error_code());
@@ -181,7 +172,6 @@ BOOST_AUTO_TEST_CASE(request_retry_true)
ioc.run_for(test_timeout);
BOOST_TEST(timer_finished);
BOOST_TEST(c0_called);
BOOST_TEST(c1_called);
BOOST_TEST(c2_called);

View File

@@ -210,7 +210,6 @@ BOOST_AUTO_TEST_CASE(test_push_adapter)
conn->async_receive([&, conn](error_code ec, std::size_t) {
BOOST_CHECK_EQUAL(ec, boost::asio::experimental::error::channel_cancelled);
conn->cancel(operation::reconnection);
push_received = true;
});
@@ -220,7 +219,8 @@ BOOST_AUTO_TEST_CASE(test_push_adapter)
});
auto cfg = make_test_config();
conn->async_run(cfg, {}, [&run_finished](error_code ec) {
cfg.reconnect_wait_interval = 0s;
conn->async_run(cfg, [&run_finished](error_code ec) {
BOOST_CHECK_EQUAL(ec, redis::error::incompatible_size);
run_finished = true;
});

View File

@@ -0,0 +1,73 @@
//
// Copyright (c) 2025 Marcelo Zimbres Silva (mzimbres@gmail.com),
// Ruben Perez Hidalgo (rubenperez038 at gmail dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#include <boost/redis/connection.hpp>
#include <boost/redis/ignore.hpp>
#include <boost/asio/bind_cancellation_slot.hpp>
#include <boost/asio/cancellation_signal.hpp>
#include <boost/asio/cancellation_type.hpp>
#include <boost/core/lightweight_test.hpp>
#include "common.hpp"
#include <cstddef>
#include <iostream>
#include <string_view>
using boost::system::error_code;
namespace net = boost::asio;
using namespace boost::redis;
namespace {
// Terminal and partial cancellation work for async_run
void test_per_operation_cancellation(std::string_view name, net::cancellation_type_t cancel_type)
{
std::cerr << "Running test case: " << name << std::endl;
// Setup
net::io_context ioc;
connection conn{ioc};
net::cancellation_signal sig;
request req;
req.push("PING", "something");
bool run_finished = false, exec_finished = false;
// Run the connection
auto run_cb = [&](error_code ec) {
run_finished = true;
BOOST_TEST_EQ(ec, net::error::operation_aborted);
};
conn.async_run(make_test_config(), net::bind_cancellation_slot(sig.slot(), run_cb));
// Launch a PING
conn.async_exec(req, ignore, [&](error_code ec, std::size_t) {
exec_finished = true;
BOOST_TEST_EQ(ec, error_code());
sig.emit(cancel_type);
});
ioc.run_for(test_timeout);
// Check
BOOST_TEST(run_finished);
BOOST_TEST(exec_finished);
}
} // namespace
int main()
{
test_per_operation_cancellation("terminal", net::cancellation_type_t::terminal);
test_per_operation_cancellation("partial", net::cancellation_type_t::partial);
return boost::report_errors();
}

View File

@@ -152,6 +152,8 @@ BOOST_AUTO_TEST_CASE(reconnection)
request ping_request;
ping_request.push("PING", "some_value");
ping_request.get_config().cancel_if_unresponded = false;
ping_request.get_config().cancel_on_connection_lost = false;
request quit_request;
quit_request.push("QUIT");
@@ -173,12 +175,6 @@ BOOST_AUTO_TEST_CASE(reconnection)
auto quit_callback = [&](error_code ec, std::size_t) {
BOOST_TEST(ec == error_code());
// If a request is issued immediately after QUIT, the request sometimes
// fails, probably due to a race condition. This dispatches any pending
// handlers, triggering the reconnection process.
// TODO: this should not be required.
ioc.poll();
conn.async_exec(ping_request, ignore, ping_callback);
};

View File

@@ -12,7 +12,7 @@
#include <boost/core/lightweight_test.hpp>
#include <boost/system/error_code.hpp>
#include "common.hpp"
#include <string_view>
namespace net = boost::asio;
namespace redis = boost::redis;
@@ -25,6 +25,7 @@ using redis::any_adapter;
using redis::config;
using action = redis::detail::reader_fsm::action;
// Operators
namespace boost::redis::detail {
extern auto to_string(reader_fsm::action::type t) noexcept -> char const*;
@@ -35,6 +36,10 @@ std::ostream& operator<<(std::ostream& os, reader_fsm::action::type t)
return os;
}
} // namespace boost::redis::detail
namespace {
// Copy data into the multiplexer with the following steps
//
// 1. get_read_buffer
@@ -48,11 +53,6 @@ void copy_to(multiplexer& mpx, std::string_view data)
std::copy(data.cbegin(), data.cend(), buffer.begin());
}
} // namespace boost::redis::detail
// Operators
namespace {
void test_push()
{
multiplexer mpx;
@@ -64,8 +64,6 @@ void test_push()
// Initiate
act = fsm.resume(0, ec, cancellation_type_t::none);
BOOST_TEST_EQ(act.type_, action::type::setup_cancellation);
act = fsm.resume(0, ec, cancellation_type_t::none);
BOOST_TEST_EQ(act.type_, action::type::read_some);
// The fsm is asking for data.
@@ -111,8 +109,6 @@ void test_read_needs_more()
// Initiate
act = fsm.resume(0, ec, cancellation_type_t::none);
BOOST_TEST_EQ(act.type_, action::type::setup_cancellation);
act = fsm.resume(0, ec, cancellation_type_t::none);
BOOST_TEST_EQ(act.type_, action::type::read_some);
// Split the incoming message in three random parts and deliver
@@ -156,8 +152,6 @@ void test_read_error()
// Initiate
act = fsm.resume(0, ec, cancellation_type_t::none);
BOOST_TEST_EQ(act.type_, action::type::setup_cancellation);
act = fsm.resume(0, ec, cancellation_type_t::none);
BOOST_TEST_EQ(act.type_, action::type::read_some);
// The fsm is asking for data.
@@ -166,11 +160,6 @@ void test_read_error()
// Deliver the data
act = fsm.resume(payload.size(), {net::error::operation_aborted}, cancellation_type_t::none);
BOOST_TEST_EQ(act.type_, action::type::cancel_run);
BOOST_TEST_EQ(act.ec_, error_code());
// Finish
act = fsm.resume(0, ec, cancellation_type_t::none);
BOOST_TEST_EQ(act.type_, action::type::done);
BOOST_TEST_EQ(act.ec_, error_code{net::error::operation_aborted});
}
@@ -186,8 +175,6 @@ void test_parse_error()
// Initiate
act = fsm.resume(0, ec, cancellation_type_t::none);
BOOST_TEST_EQ(act.type_, action::type::setup_cancellation);
act = fsm.resume(0, ec, cancellation_type_t::none);
BOOST_TEST_EQ(act.type_, action::type::read_some);
// The fsm is asking for data.
@@ -196,11 +183,6 @@ void test_parse_error()
// Deliver the data
act = fsm.resume(payload.size(), {}, cancellation_type_t::none);
BOOST_TEST_EQ(act.type_, action::type::cancel_run);
BOOST_TEST_EQ(act.ec_, error_code());
// Finish
act = fsm.resume(0, {}, cancellation_type_t::none);
BOOST_TEST_EQ(act.type_, action::type::done);
BOOST_TEST_EQ(act.ec_, error_code{redis::error::not_a_number});
}
@@ -216,8 +198,6 @@ void test_push_deliver_error()
// Initiate
act = fsm.resume(0, ec, cancellation_type_t::none);
BOOST_TEST_EQ(act.type_, action::type::setup_cancellation);
act = fsm.resume(0, ec, cancellation_type_t::none);
BOOST_TEST_EQ(act.type_, action::type::read_some);
// The fsm is asking for data.
@@ -231,10 +211,6 @@ void test_push_deliver_error()
// Resumes from notifying a push with an error.
act = fsm.resume(0, net::error::operation_aborted, cancellation_type_t::none);
BOOST_TEST_EQ(act.type_, action::type::cancel_run);
// Finish
act = fsm.resume(0, {}, cancellation_type_t::none);
BOOST_TEST_EQ(act.type_, action::type::done);
BOOST_TEST_EQ(act.ec_, error_code{net::error::operation_aborted});
}
@@ -255,32 +231,88 @@ void test_max_read_buffer_size()
// Initiate
act = fsm.resume(0, ec, cancellation_type_t::none);
BOOST_TEST_EQ(act.type_, action::type::setup_cancellation);
act = fsm.resume(0, ec, cancellation_type_t::none);
BOOST_TEST_EQ(act.type_, action::type::read_some);
// Passes the first part to the fsm.
std::string const part1 = ">3\r\n";
copy_to(mpx, part1);
act = fsm.resume(part1.size(), {}, cancellation_type_t::none);
BOOST_TEST_EQ(act.type_, action::type::cancel_run);
BOOST_TEST_EQ(act.ec_, error_code());
act = fsm.resume({}, {}, cancellation_type_t::none);
BOOST_TEST_EQ(act.type_, action::type::done);
BOOST_TEST_EQ(act.ec_, redis::error::exceeds_maximum_read_buffer_size);
}
// Cancellations
void test_cancel_after_read()
{
multiplexer mpx;
generic_response resp;
mpx.set_receive_adapter(any_adapter{resp});
reader_fsm fsm{mpx};
error_code ec;
action act;
// Initiate
act = fsm.resume(0, ec, cancellation_type_t::none);
BOOST_TEST_EQ(act.type_, action::type::read_some);
// Deliver a push, and notify a cancellation.
// This can happen if the cancellation signal arrives before the read handler runs
constexpr std::string_view payload = ">1\r\n+msg1\r\n";
copy_to(mpx, payload);
act = fsm.resume(payload.size(), ec, cancellation_type_t::terminal);
BOOST_TEST_EQ(act.type_, action::type::done);
BOOST_TEST_EQ(act.push_size_, 0u);
BOOST_TEST_EQ(act.ec_, net::error::operation_aborted);
}
void test_cancel_after_push_delivery()
{
multiplexer mpx;
generic_response resp;
mpx.set_receive_adapter(any_adapter{resp});
reader_fsm fsm{mpx};
error_code ec;
action act;
// Initiate
act = fsm.resume(0, ec, cancellation_type_t::none);
BOOST_TEST_EQ(act.type_, action::type::read_some);
// The fsm is asking for data.
constexpr std::string_view payload =
">1\r\n+msg1\r\n"
">1\r\n+msg2 \r\n";
copy_to(mpx, payload);
// Deliver the 1st push
act = fsm.resume(payload.size(), ec, cancellation_type_t::none);
BOOST_TEST_EQ(act.type_, action::type::notify_push_receiver);
BOOST_TEST_EQ(act.push_size_, 11u);
BOOST_TEST_EQ(act.ec_, error_code());
// We got a cancellation after delivering it.
// This can happen if the cancellation signal arrives before the channel send handler runs
act = fsm.resume(0, ec, cancellation_type_t::terminal);
BOOST_TEST_EQ(act.type_, action::type::done);
BOOST_TEST_EQ(act.push_size_, 0u);
BOOST_TEST_EQ(act.ec_, net::error::operation_aborted);
}
} // namespace
int main()
{
test_max_read_buffer_size();
test_push_deliver_error();
test_read_needs_more();
test_push();
test_read_needs_more();
test_read_error();
test_parse_error();
test_push_deliver_error();
test_max_read_buffer_size();
test_cancel_after_read();
test_cancel_after_push_delivery();
return boost::report_errors();
}

View File

@@ -81,6 +81,9 @@ void test_reconnection()
cfg.reconnect_wait_interval = 10ms; // make the test run faster
request ping_request;
ping_request.get_config().cancel_if_not_connected = false;
ping_request.get_config().cancel_if_unresponded = false;
ping_request.get_config().cancel_on_connection_lost = false;
ping_request.push("PING", "some_value");
request quit_request;
@@ -103,12 +106,6 @@ void test_reconnection()
auto quit_callback = [&](error_code ec, std::size_t) {
BOOST_TEST(ec == error_code());
// If a request is issued immediately after QUIT, the request sometimes
// fails, probably due to a race condition. This dispatches any pending
// handlers, triggering the reconnection process.
// TODO: this should not be required.
ioc.poll();
conn.async_exec(ping_request, ignore, ping_callback);
};