Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(profiling): avoid stack size limits on musl for stack v2 threads #12174

Open
wants to merge 22 commits into
base: 3.x-staging
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
aaedab9
chore: update changelog for version 2.19.2 (#12088)
Yun-Kim Jan 27, 2025
e3045a1
fix(profiling): fix SystemError when collecting memory profiler event…
nsrip-dd Jan 27, 2025
55767a7
chore(tracing): refactor web server integrations to use the core modu…
wconti27 Jan 28, 2025
16d5280
ci(tracer): make serverless test unrequired (#12121)
christophe-papazian Jan 28, 2025
4f0bcb5
chore(asm): clean libddwaf loading (#12102)
christophe-papazian Jan 28, 2025
c4448ea
fix(llmobs): propagate distributed headers via signal dispatching, no…
Yun-Kim Jan 28, 2025
cb41f8e
feat(provider): expose context provider in ddtrace.trace (#12135)
mabdinur Jan 29, 2025
b4add53
Make stackv2 sampler use pthreads
sanchda Jan 29, 2025
706c1e4
Update
sanchda Jan 30, 2025
acbcbdb
Make stackv2 sampler use pthreads
sanchda Jan 29, 2025
c18b1f6
Update
sanchda Jan 30, 2025
c412655
test on musl
taegyunkim Jan 30, 2025
6182e20
Merge branch 'sanchda/stackv2_thread_limits' of github.com:DataDog/dd…
sanchda Jan 30, 2025
bd4d0be
Fix
sanchda Jan 30, 2025
6c4e928
Merge branch '3.x-staging' into sanchda/stackv2_thread_limits
sanchda Jan 31, 2025
e006f64
Merge branch '3.x-staging' into sanchda/stackv2_thread_limits
sanchda Jan 31, 2025
7008e0e
Add release note
sanchda Jan 31, 2025
1ad58c2
Merge branch 'sanchda/stackv2_thread_limits' of github.com:DataDog/dd…
sanchda Jan 31, 2025
4cb09da
Merge branch '3.x-staging' into sanchda/stackv2_thread_limits
sanchda Feb 4, 2025
11134cf
fmt
sanchda Feb 4, 2025
cdf4475
Merge branch '3.x-staging' into sanchda/stackv2_thread_limits
sanchda Feb 4, 2025
d25c5ce
Merge branch '3.x-staging' into sanchda/stackv2_thread_limits
sanchda Feb 4, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,6 @@ class Sampler
// Parameters
uint64_t echion_frame_cache_size = g_default_echion_frame_cache_size;

// Helper function; implementation of the echion sampling thread
void sampling_thread(const uint64_t seq_num);

// This is a singleton, so no public constructor
Sampler();

Expand All @@ -37,7 +34,7 @@ class Sampler
public:
// Singleton instance
static Sampler& get();
void start();
bool start();
void stop();
void register_thread(uint64_t id, uint64_t native_id, const char* name);
void unregister_thread(uint64_t id);
Expand All @@ -46,6 +43,7 @@ class Sampler
PyObject* _asyncio_scheduled_tasks,
PyObject* _asyncio_eager_tasks);
void link_tasks(PyObject* parent, PyObject* child);
void sampling_thread(const uint64_t seq_num);

// The Python side dynamically adjusts the sampling rate based on overhead, so we need to be able to update our
// own intervals accordingly. Rather than a preemptive measure, we assume the rate is ~fairly stable and just
Expand Down
66 changes: 63 additions & 3 deletions ddtrace/internal/datadog/profiling/stack_v2/src/sampler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,52 @@

using namespace Datadog;

// Helper class for spawning a std::thread with control over its default stack size
#ifdef __linux__
#include <sys/resource.h>
#include <unistd.h>

struct ThreadArgs
{
Sampler* sampler;
uint64_t seq_num;
};

void*
call_sampling_thread(void* args)
{
ThreadArgs thread_args = *static_cast<ThreadArgs*>(args);
delete static_cast<ThreadArgs*>(args); // no longer needed, dynamic alloc
Sampler* sampler = thread_args.sampler;
sampler->sampling_thread(thread_args.seq_num);
return nullptr;
}

pthread_t
create_thread_with_stack(size_t stack_size, Sampler* sampler, uint64_t seq_num)
{
pthread_attr_t attr;
if (pthread_attr_init(&attr) != 0) {
return 0;
}
if (stack_size > 0) {
pthread_attr_setstacksize(&attr, stack_size);
}

pthread_t thread_id;
ThreadArgs* thread_args = new ThreadArgs{ sampler, seq_num };
int ret = pthread_create(&thread_id, &attr, call_sampling_thread, thread_args);

pthread_attr_destroy(&attr);

if (ret != 0) {
delete thread_args; // usually deleted in the thread, but need to clean it up here
return 0;
}
return thread_id;
}
#endif

void
Sampler::sampling_thread(const uint64_t seq_num)
{
Expand Down Expand Up @@ -134,7 +180,7 @@ Sampler::unregister_thread(uint64_t id)
thread_info_map.erase(id);
}

void
bool
Sampler::start()
{
static std::once_flag once;
Expand All @@ -143,8 +189,22 @@ Sampler::start()
// Launch the sampling thread.
// Thread lifetime is bounded by the value of the sequence number. When it is changed from the value the thread was
// launched with, the thread will exit.
std::thread t(&Sampler::sampling_thread, this, ++thread_seq_num);
t.detach();
#ifdef __linux__
// We might as well get the default stack size and use that
rlimit stack_sz = {};
getrlimit(RLIMIT_STACK, &stack_sz);
if (create_thread_with_stack(stack_sz.rlim_cur, this, ++thread_seq_num) == 0) {
return false;
}
#else
try {
std::thread t(&Sampler::sampling_thread, this, ++thread_seq_num);
t.detach();
} catch (const std::exception& e) {
return false;
}
#endif
return true;
}

void
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,10 @@ _stack_v2_start(PyObject* self, PyObject* args, PyObject* kwargs)
}

Sampler::get().set_interval(min_interval_s);
Sampler::get().start();
Py_RETURN_NONE;
if (Sampler::get().start()) {
Py_RETURN_TRUE;
}
Py_RETURN_FALSE;
}

// Bypasses the old-style cast warning with an unchecked helper function
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
fixes:
- |
Fixes an issue where Profiling native threads would respect the musl libc
default stack size, which could cause stack overflows in certain
configurations.
4 changes: 2 additions & 2 deletions src/native/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 2 additions & 3 deletions tests/smoke_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,9 +91,8 @@ def emit(self, record):
platform.system() == "Windows"
# libdatadog x86_64-apple-darwin has not yet been integrated to dd-trace-py
or (platform.system() == "Darwin" and platform.machine() == "x86_64")
# echion crashes on musl linux with Python 3.12 for both x86_64 and
# aarch64
or (platform.system() == "Linux" and sys.version_info[:2] == (3, 12) and platform.libc_ver()[0] != "glibc")
# echion only works with 3.8+
or sys.version_info < (3, 8, 0)
):
orig_env = os.environ.copy()
copied_env = copy.deepcopy(orig_env)
Expand Down
Loading