diff --git a/include/seastar/net/api.hh b/include/seastar/net/api.hh index db9c430897..96c1260c35 100644 --- a/include/seastar/net/api.hh +++ b/include/seastar/net/api.hh @@ -389,6 +389,11 @@ public: /// @} +/// Options for creating a listening socket. +/// +/// WARNING: these options currently only have an effect when using +/// the POSIX stack: all options are ignored on the native stack as they +/// are not implemented there. struct listen_options { bool reuse_address = false; server_socket::load_balancing_algorithm lba = server_socket::load_balancing_algorithm::default_; @@ -396,6 +401,19 @@ struct listen_options { int listen_backlog = 100; unsigned fixed_cpu = 0u; std::optional unix_domain_socket_permissions; + + /// If set, the SO_SNDBUF size will be set to the given value on the listening socket + /// via setsockopt. This buffer size is inherited by the sockets returned by + /// accept and is the preferred way to set the buffer size for these sockets since + /// setting it directly on the already-accepted socket is ineffective (see TCP(7)). + std::optional so_sndbuf; + + /// If set, the SO_RCVBUF size will be set to the given value on the listening socket + /// via setsockopt. This buffer size is inherited by the sockets returned by + /// accept and is the preferred way to set the buffer size for these sockets since + /// setting it directly on the already-accepted socket is ineffective (see TCP(7)). + std::optional so_rcvbuf; + void set_fixed_cpu(unsigned cpu) { lba = server_socket::load_balancing_algorithm::fixed; fixed_cpu = cpu; @@ -450,8 +468,8 @@ public: return false; } - /** - * Returns available network interfaces. This represents a + /** + * Returns available network interfaces. This represents a * snapshot of interfaces available at call time, hence the * return by value. */ diff --git a/src/core/reactor.cc b/src/core/reactor.cc index 54e6b8b4b8..169d1eb44b 100644 --- a/src/core/reactor.cc +++ b/src/core/reactor.cc @@ -27,7 +27,7 @@ module; #include #include #include -#include +#include #include #include #include @@ -1566,6 +1566,15 @@ reactor::posix_listen(socket_address sa, listen_options opts) { if (opts.reuse_address) { fd.setsockopt(SOL_SOCKET, SO_REUSEADDR, 1); } + + if (opts.so_sndbuf) { + fd.setsockopt(SOL_SOCKET, SO_SNDBUF, *opts.so_sndbuf); + } + + if (opts.so_rcvbuf) { + fd.setsockopt(SOL_SOCKET, SO_RCVBUF, *opts.so_rcvbuf); + } + if (_reuseport && !sa.is_af_unix()) fd.setsockopt(SOL_SOCKET, SO_REUSEPORT, 1); diff --git a/tests/unit/socket_test.cc b/tests/unit/socket_test.cc index 5ebbac1852..fa1955bdc3 100644 --- a/tests/unit/socket_test.cc +++ b/tests/unit/socket_test.cc @@ -27,13 +27,17 @@ #include #include #include +#include #include #include #include #include - +#include #include +#include +#include + using namespace seastar; future<> handle_connection(connected_socket s) { @@ -258,3 +262,77 @@ SEASTAR_TEST_CASE(socket_connect_abort_test) { when_all(std::move(cf), std::move(check), std::move(abort)).get(); }); } + +SEASTAR_THREAD_TEST_CASE(socket_bufsize) { + + // Test that setting the send and recv buffer sizes on the listening + // socket is propagated to the socket returned by accept(). + + auto buf_size = [](std::optional snd_size, std::optional rcv_size) { + listen_options lo{ + .reuse_address = true, + .lba = server_socket::load_balancing_algorithm::fixed, + .so_sndbuf = snd_size, + .so_rcvbuf = rcv_size + }; + + ipv4_addr addr("127.0.0.1", 1234); + server_socket ss = seastar::listen(addr, lo); + connected_socket client = connect(addr).get(); + connected_socket server = ss.accept().get().connection; + + auto sockopt = [&](int option) { + int val{}; + int ret = server.get_sockopt(SOL_SOCKET, option, &val, sizeof(val)); + BOOST_REQUIRE_EQUAL(ret, 0); + return val; + }; + + int send = sockopt(SO_SNDBUF); + int recv = sockopt(SO_RCVBUF); + + ss.abort_accept(); + client.shutdown_output(); + server.shutdown_output(); + + + return std::make_tuple(send, recv); + }; + + constexpr int small_size = 8192, big_size = 128 * 1024; + + // we pass different sizes for send and recv to catch any copy/paste + // style bugs + auto [send_small, recv_small] = buf_size(small_size, small_size * 2); + auto [send_big, recv_big] = buf_size(big_size, big_size * 2); + + // Setting socket buffer sizes isn't an exact science: the kernel does + // some rounding, and also (currently) doubles the requested size and + // also applies so limits. So as a basic check, assert simply that the + // explicit small buffer ends up smaller than the explicit big buffer, + // and that both results are at least as large as the requested amount. + // The latter condition could plausibly fail if the OS clamped the size + // at a small amount, but this is unlikely for the chosen buffer sizes. + + BOOST_CHECK_LT(send_small, send_big); + BOOST_CHECK_LT(recv_small, recv_big); + + BOOST_CHECK_GE(send_small, small_size); + BOOST_CHECK_GE(send_big, big_size); + + BOOST_CHECK_GE(recv_small, small_size * 2); + BOOST_CHECK_GE(recv_big, big_size * 2); + + // not much to check here with "default" sizes, but let's at least call it + // and check that we get a reasonable answer + auto [send_default, recv_default] = buf_size({}, {}); + + BOOST_CHECK_GE(send_default, 4096); + BOOST_CHECK_GE(recv_default, 4096); + + // we don't really know the default socket size and it can vary by kernel + // config, but 20 MB should be enough for everyone. + BOOST_CHECK_LT(send_default, 20'000'000); + BOOST_CHECK_LT(recv_default, 20'000'000); +} +