Skip to content

Commit

Permalink
Merge pull request #597 from Keno/kf/noplrdtsc
Browse files Browse the repository at this point in the history
Use patchable rdtsc sequence to avoid slowdowns under rr
  • Loading branch information
wolfpld authored Sep 24, 2023
2 parents 855fd29 + 5417227 commit da1bc2b
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 0 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ set_option(TRACY_NO_VERIFY "Disable zone validation for C API" OFF)
set_option(TRACY_NO_VSYNC_CAPTURE "Disable capture of hardware Vsync events" OFF)
set_option(TRACY_NO_FRAME_IMAGE "Disable the frame image support and its thread" OFF)
set_option(TRACY_NO_SYSTEM_TRACING "Disable systrace sampling" OFF)
set_option(TRACY_PATCHABLE_NOPSLEDS "Enable nopsleds for efficient patching by system-level tools (e.g. rr)" OFF)
set_option(TRACY_DELAYED_INIT "Enable delayed initialization of the library (init on first call)" OFF)
set_option(TRACY_MANUAL_LIFETIME "Enable the manual lifetime management of the profile" OFF)
set_option(TRACY_FIBERS "Enable fibers support" OFF)
Expand Down
4 changes: 4 additions & 0 deletions meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,10 @@ if get_option('tracy_no_system_tracing')
add_project_arguments('-DTRACY_NO_SYSTEM_TRACING', language : 'cpp')
endif

if get_option('tracy_no_extra_nopsleds')
add_project_arguments('-DTRACY_PATCHABLE_NOPSLEDS', language : 'cpp')
endif

if get_option('tracy_delayed_init')
add_project_arguments('-DTRACY_DELAYED_INIT', language : 'cpp')
endif
Expand Down
15 changes: 15 additions & 0 deletions public/client/TracyProfiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,22 @@ class Profiler
if( HardwareSupportsInvariantTSC() )
{
uint64_t rax, rdx;
#ifdef TRACY_PATCHABLE_NOPSLEDS
// Some external tooling (such as rr) wants to patch our rdtsc and replace it by a
// branch to control the external input seen by a program. This kind of patching is
// not generally possible depending on the surrounding code and can lead to significant
// slowdowns if the compiler generated unlucky code and rr and tracy are used together.
// To avoid this, use the rr-safe `nopl 0(%rax, %rax, 1); rdtsc` instruction sequence,
// which rr promises will be patchable independent of the surrounding code.
asm volatile (
// This is nopl 0(%rax, %rax, 1), but assemblers are inconsistent about whether
// they emit that as a 4 or 5 byte sequence and we need to be guaranteed to use
// the 5 byte one.
".byte 0x0f, 0x1f, 0x44, 0x00, 0x00\n\t"
"rdtsc" : "=a" (rax), "=d" (rdx) );
#else
asm volatile ( "rdtsc" : "=a" (rax), "=d" (rdx) );
#endif
return (int64_t)(( rdx << 32 ) + rax);
}
# else
Expand Down

0 comments on commit da1bc2b

Please sign in to comment.