From 462a6cd30b5d295d03b248a2aba331dcf67c8048 Mon Sep 17 00:00:00 2001 From: Ivan Maidanski Date: Fri, 1 Nov 2024 21:06:53 +0300 Subject: [PATCH] New GC_get_avg_stopped_mark_time_ns API function Add a public function to get the average time spent in all mark phases with the world stopped. Also, print the value at the end of gctest. * alloc.c [!NO_CLOCK] (world_stopped_total_time, world_stopped_total_divisor, MAX_TOTAL_TIME_DIVISOR): Move definition upper (to be near `GC_get_stopped_mark_total_time`). * alloc.c [!NO_CLOCK] (world_stopped_total_time): Refine comment; add TODO item. * alloc.c [!NO_CLOCK] (GC_get_avg_stopped_mark_time_ns): Implement. * alloc.c [!NO_CLOCK] (GC_stopped_mark): Update value of `world_stopped_total_time` and `world_stopped_total_divisor` also if `measure_performance`. * include/gc/gc.h (GC_start_performance_measurement): Update comment. * include/gc/gc.h (GC_get_avg_stopped_mark_time_ns): New API function declaration. * tests/gctest.c [!NO_CLOCK] (check_heap_stats): Print value of `GC_get_stopped_mark_total_time()` on a separate line; print value of `GC_get_avg_stopped_mark_time_ns()` (divided by 1000). --- alloc.c | 63 ++++++++++++++++++++++++++++++++++++------------- include/gc/gc.h | 14 +++++++++-- tests/gctest.c | 6 +++-- 3 files changed, 63 insertions(+), 20 deletions(-) diff --git a/alloc.c b/alloc.c index d2ef72642..47009bec4 100644 --- a/alloc.c +++ b/alloc.c @@ -56,6 +56,7 @@ word GC_non_gc_bytes = 0; word GC_gc_no = 0; #ifndef NO_CLOCK + static unsigned long full_gc_total_time = 0; /* in ms, may wrap */ static unsigned long stopped_mark_total_time = 0; static unsigned32 full_gc_total_ns_frac = 0; /* fraction of 1 ms */ @@ -82,6 +83,50 @@ GC_get_stopped_mark_total_time(void) { return stopped_mark_total_time; } + +/* Variables for world-stop average delay time statistic computation. */ +/* "divisor" is incremented every world stop and halved when reached */ +/* its maximum (or upon "total_time" overflow). In milliseconds. */ +/* TODO: Store the nanosecond part. */ +static unsigned world_stopped_total_time = 0; +static unsigned world_stopped_total_divisor = 0; + +# ifndef MAX_TOTAL_TIME_DIVISOR +/* We shall not use big values here (so "outdated" delay time */ +/* values would have less impact on "average" delay time value than */ +/* newer ones). */ +# define MAX_TOTAL_TIME_DIVISOR 1000 +# endif + +GC_API unsigned long GC_CALL +GC_get_avg_stopped_mark_time_ns(void) +{ + unsigned long total_time; + unsigned divisor; + + READER_LOCK(); + total_time = (unsigned long)world_stopped_total_time; + divisor = world_stopped_total_divisor; + READER_UNLOCK(); + if (0 == divisor) { + GC_ASSERT(0 == total_time); + /* No world-stopped collection has occurred since the start of */ + /* performance measurements. */ + return 0; + } + + /* Halve values to prevent overflow during the multiplication. */ + for (; total_time > ~0UL / (1000UL * 1000); total_time >>= 1) { + divisor >>= 1; + if (EXPECT(0 == divisor, FALSE)) { + /* The actual result is larger than representable value. */ + return ~0UL; + } + } + + return total_time * (1000UL * 1000) / divisor; +} + #endif /* !NO_CLOCK */ #ifndef GC_DISABLE_INCREMENTAL @@ -824,20 +869,6 @@ GC_start_world_external(void) } #endif /* THREADS */ -#ifndef NO_CLOCK -/* Variables for world-stop average delay time statistic computation. */ -/* "divisor" is incremented every world-stop and halved when reached */ -/* its maximum (or upon "total_time" overflow). */ -static unsigned world_stopped_total_time = 0; -static unsigned world_stopped_total_divisor = 0; -# ifndef MAX_TOTAL_TIME_DIVISOR -/* We shall not use big values here (so "outdated" delay time */ -/* values would have less impact on "average" delay time value than */ -/* newer ones). */ -# define MAX_TOTAL_TIME_DIVISOR 1000 -# endif -#endif /* !NO_CLOCK */ - #ifdef USE_MUNMAP # ifndef MUNMAP_THRESHOLD # define MUNMAP_THRESHOLD 7 @@ -982,7 +1013,7 @@ GC_stopped_mark(GC_stop_func stop_func) } } - if (GC_PRINT_STATS_FLAG) { + if (GC_PRINT_STATS_FLAG || measure_performance) { unsigned total_time = world_stopped_total_time; unsigned divisor = world_stopped_total_divisor; @@ -998,7 +1029,7 @@ GC_stopped_mark(GC_stop_func stop_func) /* Update old world_stopped_total_time and its divisor. */ world_stopped_total_time = total_time; world_stopped_total_divisor = ++divisor; - if (0 == abandoned_at) { + if (GC_PRINT_STATS_FLAG && 0 == abandoned_at) { GC_ASSERT(divisor != 0); GC_log_printf("World-stopped marking took %lu ms %lu ns" " (%u ms in average)\n", diff --git a/include/gc/gc.h b/include/gc/gc.h index 72b7e0f44..c80aee716 100644 --- a/include/gc/gc.h +++ b/include/gc/gc.h @@ -413,8 +413,9 @@ GC_API void GC_CALL GC_set_allocd_bytes_per_finalizer(GC_word); GC_API GC_word GC_CALL GC_get_allocd_bytes_per_finalizer(void); /* Tell the collector to start various performance measurements. */ -/* Only the total time taken by full collections is calculated, as */ -/* of now. And, currently, there is no way to stop the measurements. */ +/* Only the total time taken by full collections and the average time */ +/* spent in the world-stopped collections are calculated, as of now. */ +/* And, currently, there is no way to stop the measurements. */ /* The function does not use any synchronization. Defined only if the */ /* library has been compiled without NO_CLOCK. */ GC_API void GC_CALL GC_start_performance_measurement(void); @@ -432,6 +433,15 @@ GC_API unsigned long GC_CALL GC_get_full_gc_total_time(void); /* phases with the world stopped and nothing else. */ GC_API unsigned long GC_CALL GC_get_stopped_mark_total_time(void); +/* Get the average time spent in all mark phases with the world */ +/* stopped. The average value is computed since the start of the */ +/* performance measurements (or right since the collector */ +/* initialization if the GC logging is enabled). The result is in */ +/* nanoseconds. The function acquires the allocator lock (in the */ +/* reader mode) to avoid data race. Defined only if the library has */ +/* been compiled without NO_CLOCK. */ +GC_API unsigned long GC_CALL GC_get_avg_stopped_mark_time_ns(void); + /* Set whether the garbage collector will allocate executable memory */ /* pages or not. A non-zero argument instructs the collector to */ /* allocate memory with the executable flag on. Must be called before */ diff --git a/tests/gctest.c b/tests/gctest.c index 5b171dd91..853804729 100644 --- a/tests/gctest.c +++ b/tests/gctest.c @@ -2226,8 +2226,10 @@ check_heap_stats(void) test_long_mult(); #ifndef NO_CLOCK - GC_printf("Full/world-stopped collections took %lu/%lu ms\n", - GC_get_full_gc_total_time(), GC_get_stopped_mark_total_time()); + GC_printf("Full collections took %lu ms\n", GC_get_full_gc_total_time()); + GC_printf("World-stopped pauses took %lu ms (%lu us each in avg.)\n", + GC_get_stopped_mark_total_time(), + GC_get_avg_stopped_mark_time_ns() / 1000); #endif #ifdef PARALLEL_MARK GC_printf("Completed %u collections (using %d marker threads)\n",