From 45984d205539b965b23a084b2f51efdce7d8d08f Mon Sep 17 00:00:00 2001 From: Travis Downs Date: Tue, 18 Feb 2025 16:47:34 -0300 Subject: [PATCH] cpu_profiler: test for scheduling groups Test that scheduling groups are recorded in the sample. Additionally for all the tests that don't set up any explicit scheduling groups, assert that all samples are in the main group. --- tests/unit/cpu_profiler_test.cc | 128 ++++++++++++++++---- tests/unit/stall_detector_test_utilities.hh | 12 ++ 2 files changed, 116 insertions(+), 24 deletions(-) diff --git a/tests/unit/cpu_profiler_test.cc b/tests/unit/cpu_profiler_test.cc index 9c1caad94c..0a29eb7d04 100644 --- a/tests/unit/cpu_profiler_test.cc +++ b/tests/unit/cpu_profiler_test.cc @@ -19,22 +19,30 @@ * Copyright (C) 2023 ScyllaDB Ltd. */ -#include +#include +#include #include + #include #include +#include #include +#include #include -#include -#include +#include #include #include -#include -#include -#include +#include +#include #include "stall_detector_test_utilities.hh" +#include + +#include + +namespace { + struct temporary_profiler_settings { std::chrono::nanoseconds prev_ns; bool prev_enabled; @@ -64,9 +72,37 @@ bool close_to_expected(size_t actual_size, size_t expected_size, double allowed_ auto lower_bound = (1 - allowed_dev) * expected_size; auto upper_bound = (1 + allowed_dev) * expected_size; + BOOST_TEST_INFO("actual_size: " << actual_size << ", lower_bound " << lower_bound << ", upper_bound " << upper_bound); + return actual_size <= upper_bound && actual_size >= lower_bound; } +/* + * Get the current profile results and dropped count. If sg_in_main is true, also validates that + * the sg associated with the profile is always main, as we expect unless some SG have been + * created explicitly. + */ +std::pair, size_t> get_profile_and_dropped(bool sg_is_main = true) { + std::vector results; + auto dropped = engine().profiler_results(results); + + for (auto& result: results) { + BOOST_CHECK(result.sg == default_scheduling_group()); + } + + return {results, dropped}; +} + + +// get profile and validate results +std::vector get_profile() { + return get_profile_and_dropped().first; +} + +} + + + SEASTAR_THREAD_TEST_CASE(config_case) { // Ensure that repeatedly configuring the profiler results // in expected behavior. @@ -79,14 +115,12 @@ SEASTAR_THREAD_TEST_CASE(config_case) { spin_some_cooperatively(120*10ms); - std::vector results; - engine().profiler_results(results); + auto results = get_profile(); BOOST_REQUIRE(close_to_expected(results.size(), 12)); } spin_some_cooperatively(128*10ms); - std::vector results; - engine().profiler_results(results); + auto results = get_profile(); BOOST_REQUIRE_EQUAL(results.size(), 0); } @@ -95,8 +129,7 @@ SEASTAR_THREAD_TEST_CASE(simple_case) { spin_some_cooperatively(120*10ms); - std::vector results; - auto dropped_samples = engine().profiler_results(results); + auto [results, dropped_samples] = get_profile_and_dropped(); BOOST_REQUIRE(close_to_expected(results.size(), 12)); BOOST_REQUIRE_EQUAL(dropped_samples, 0); } @@ -108,8 +141,7 @@ SEASTAR_THREAD_TEST_CASE(overwrite_case) { spin_some_cooperatively(256*10ms); - std::vector results; - auto dropped_samples = engine().profiler_results(results); + auto [results, dropped_samples] = get_profile_and_dropped(); // 128 is the maximum number of samples the profiler can // retain. BOOST_REQUIRE_EQUAL(results.size(), 128); @@ -130,8 +162,7 @@ SEASTAR_THREAD_TEST_CASE(mixed_case) { } BOOST_REQUIRE_EQUAL(reports, 5); - std::vector results; - engine().profiler_results(results); + auto results = get_profile(); BOOST_REQUIRE(close_to_expected(results.size(), 12)); } @@ -141,8 +172,7 @@ SEASTAR_THREAD_TEST_CASE(spin_in_kernel) { spin_some_cooperatively(100ms, [] { mmap_populate(128 * 1024); }); - std::vector results; - engine().profiler_results(results); + auto results = get_profile(); int count = 0; for(auto& result : results) { if(result.kernel_backtrace.size() > 0){ @@ -225,8 +255,7 @@ SEASTAR_THREAD_TEST_CASE(exception_handler_case) { random_exception_catcher(100, d(gen)); } - std::vector results; - auto dropped_samples = engine().profiler_results(results); + auto [results, dropped_samples] = get_profile_and_dropped(); BOOST_REQUIRE_EQUAL(results.size(), 128); BOOST_REQUIRE(dropped_samples > 0); } @@ -238,8 +267,7 @@ SEASTAR_THREAD_TEST_CASE(manually_disable) { spin_some_cooperatively(100ms); - std::vector results; - auto dropped_samples = engine().profiler_results(results); + auto [_, dropped_samples] = get_profile_and_dropped(); BOOST_REQUIRE(dropped_samples > 0); } @@ -261,7 +289,59 @@ SEASTAR_THREAD_TEST_CASE(config_thrashing) { spin_some_cooperatively(1us); } - std::vector results; - engine().profiler_results(results); + auto results = get_profile(); BOOST_REQUIRE(results.size() > 0); } + +SEASTAR_THREAD_TEST_CASE(scheduling_group_test) { + + [[maybe_unused]] auto sg_a = create_scheduling_group("sg_a", 200).get(); + [[maybe_unused]] auto sg_b = create_scheduling_group("sg_b", 200).get(); + + auto destoy_groups = defer([&]() noexcept { + destroy_scheduling_group(sg_b).get(); + destroy_scheduling_group(sg_a).get(); + }); + + temporary_profiler_settings cp{true, 100ms}; + + auto fut_a = with_scheduling_group(sg_a, [] { + return spin_some_cooperatively_coro(2100ms); + }); + + with_scheduling_group(sg_b, [] { + return spin_some_cooperatively_coro(2100ms); + }).get(); + + std::move(fut_a).get(); + + std::vector results; + auto dropped_samples = engine().profiler_results(results); + + size_t count_a = 0, count_b = 0, count_main = 0; + for (auto& r : results) { + if (r.sg == sg_a) { + ++count_a; + } else if (r.sg == sg_b) { + ++count_b; + } else if (r.sg == default_scheduling_group()) { + // this happens when the profiler triggers during non-task + // work, such as in the reactor pollers + ++count_main; + } else { + BOOST_TEST_FAIL("unexpected SG: " << r.sg.name()); + } + } + + // We expect a and b to be a 1:1 ratio, though we accept large + // variance since this is random sampling of two "randomly" scheduled + // groups so we don't really have the same guarantees we do in the + // single group case where we expect sort of +/- 1 due to the way we + // calculate the sampling intervals. + // Nominally the split is 10/10/0 for a/b/main, but we accept the + // below to keep flakiness to a minimum. + BOOST_CHECK_GT(count_a, 5); + BOOST_CHECK_GT(count_b, 5); + BOOST_CHECK_LT(count_main, 3); + BOOST_CHECK_EQUAL(dropped_samples, 0); +} diff --git a/tests/unit/stall_detector_test_utilities.hh b/tests/unit/stall_detector_test_utilities.hh index d42eff1480..d7e0cbf03e 100644 --- a/tests/unit/stall_detector_test_utilities.hh +++ b/tests/unit/stall_detector_test_utilities.hh @@ -24,6 +24,9 @@ #include #include #include +#include "seastar/core/scheduling.hh" +#include "seastar/core/thread.hh" +#include "seastar/coroutine/maybe_yield.hh" #include #include #include @@ -88,6 +91,15 @@ void spin_some_cooperatively(std::chrono::duration how_much, void_fn bod } } +future<> spin_some_cooperatively_coro(std::chrono::duration how_much, void_fn body = []{}) { + auto end = std::chrono::steady_clock::now() + how_much; + while (std::chrono::steady_clock::now() < end) { + // fmt::print("GC: {}\n", current_scheduling_group().name()); + spin(200us, body); + co_await coroutine::maybe_yield(); + } +} + // Triggers stalls by spinning with a specify "body" function // which takes most of the spin time.