From 2e8426faa3b2c530323ba9eeb837126f893e0990 Mon Sep 17 00:00:00 2001 From: Matt Wozniski Date: Tue, 13 Aug 2024 17:14:00 -0400 Subject: [PATCH 01/15] Encapsulate accessing structure fields Add a new `Structure` class that encapsulates reading and interpreting the memory of the various Python data structures we need to work with. This is both easier to work with, and lays the groundwork for us to be able to handle reading structures without knowing their maximum size at compile time. This also fixes several places where we read more bytes than necessary, which could have resulted in spurious memory read failures due to reading past the end of a mapping. Signed-off-by: Matt Wozniski --- src/pystack/_pystack/process.cpp | 66 ++++++++++------------ src/pystack/_pystack/process.h | 15 ----- src/pystack/_pystack/pycode.cpp | 24 ++++---- src/pystack/_pystack/pyframe.cpp | 25 ++++----- src/pystack/_pystack/pyframe.h | 14 +++-- src/pystack/_pystack/pythread.cpp | 86 +++++++++++++---------------- src/pystack/_pystack/pythread.h | 11 ++-- src/pystack/_pystack/pytypes.cpp | 73 +++++++++++-------------- src/pystack/_pystack/pytypes.h | 5 +- src/pystack/_pystack/structure.h | 91 +++++++++++++++++++++++++++++++ src/pystack/_pystack/version.cpp | 1 + src/pystack/_pystack/version.h | 8 ++- 12 files changed, 236 insertions(+), 183 deletions(-) create mode 100644 src/pystack/_pystack/structure.h diff --git a/src/pystack/_pystack/process.cpp b/src/pystack/_pystack/process.cpp index 4b7dd3fd..c414e0c9 100644 --- a/src/pystack/_pystack/process.cpp +++ b/src/pystack/_pystack/process.cpp @@ -234,30 +234,30 @@ AbstractProcessManager::isValidInterpreterState(remote_addr_t addr) const return false; } - PyInterpreterState is; + Structure is(shared_from_this(), addr); // The check for valid addresses may fail if the address falls in the stack // space (there are "holes" in the address map space so just checking for // min_addr < addr < max_addr does not guarantee a valid address) so we need // to catch InvalidRemoteAddress exceptions. try { - copyObjectFromProcess(addr, &is); + is.copyFromRemote(); } catch (RemoteMemCopyError& ex) { return false; } - PyThreadState current_thread; - auto current_thread_addr = getField(is, &py_is_v::o_tstate_head); + auto current_thread_addr = is.getField(&py_is_v::o_tstate_head); if (!isAddressValid(current_thread_addr)) { return false; } + Structure current_thread(shared_from_this(), current_thread_addr); try { - copyObjectFromProcess(current_thread_addr, ¤t_thread); + current_thread.copyFromRemote(); } catch (RemoteMemCopyError& ex) { return false; } - if (getField(current_thread, &py_thread_v::o_interp) != addr) { + if (current_thread.getField(&py_thread_v::o_interp) != addr) { return false; } @@ -266,9 +266,9 @@ AbstractProcessManager::isValidInterpreterState(remote_addr_t addr) const // Validate dictionaries in the interpreter state std::unordered_map dictionaries( - {{"modules", getField(is, &py_is_v::o_modules)}, - {"sysdict", getField(is, &py_is_v::o_sysdict)}, - {"builtins", getField(is, &py_is_v::o_builtins)}}); + {{"modules", is.getField(&py_is_v::o_modules)}, + {"sysdict", is.getField(&py_is_v::o_sysdict)}, + {"builtins", is.getField(&py_is_v::o_builtins)}}); for (const auto& [dictname, addr] : dictionaries) { if (!isValidDictionaryObject(addr)) { LOG(DEBUG) << "The '" << dictname << "' dictionary object is not valid"; @@ -304,9 +304,8 @@ AbstractProcessManager::findInterpreterStateFromPyRuntime(remote_addr_t runtime_ LOG(INFO) << "Searching for PyInterpreterState based on PyRuntime address " << std::hex << std::showbase << runtime_addr; - PyRuntimeState py_runtime; - copyObjectFromProcess(runtime_addr, &py_runtime); - remote_addr_t interp_state = getField(py_runtime, &py_runtime_v::o_interp_head); + Structure py_runtime(shared_from_this(), runtime_addr); + remote_addr_t interp_state = py_runtime.getField(&py_runtime_v::o_interp_head); if (!isValidInterpreterState(interp_state)) { LOG(INFO) << "Failing to resolve PyInterpreterState based on PyRuntime address " << std::hex @@ -411,35 +410,34 @@ std::string AbstractProcessManager::getStringFromAddress(remote_addr_t addr) const { Python2::_PyStringObject string; - PyUnicodeObject unicode; std::vector buffer; ssize_t len; remote_addr_t data_addr; if (d_major == 2) { - LOG(DEBUG) << std::hex << std::showbase << "Handling unicode object of version 2 from address " + LOG(DEBUG) << std::hex << std::showbase << "Handling string object of version 2 from address " << addr; copyObjectFromProcess(addr, &string); len = string.ob_base.ob_size; buffer.resize(len); data_addr = (remote_addr_t)((char*)addr + offsetof(Python2::_PyStringObject, ob_sval)); - LOG(DEBUG) << std::hex << std::showbase << "Copying ASCII data for unicode object from address " + LOG(DEBUG) << std::hex << std::showbase << "Copying ASCII data for string object from address " << data_addr; copyMemoryFromProcess(data_addr, len, buffer.data()); } else { LOG(DEBUG) << std::hex << std::showbase << "Handling unicode object of version 3 from address " << addr; - copyMemoryFromProcess(addr, offsets().py_unicode.size, &unicode); + Structure unicode(shared_from_this(), addr); - Python3::_PyUnicode_State state = getField(unicode, &py_unicode_v::o_state); + Python3::_PyUnicode_State state = unicode.getField(&py_unicode_v::o_state); if (state.kind != 1 || state.compact != 1) { throw InvalidRemoteObject(); } - len = getField(unicode, &py_unicode_v::o_length); + len = unicode.getField(&py_unicode_v::o_length); buffer.resize(len); - data_addr = addr + getFieldOffset(&py_unicode_v::o_ascii); + data_addr = unicode.getFieldRemoteAddress(&py_unicode_v::o_ascii); LOG(DEBUG) << std::hex << std::showbase << "Copying ASCII data for unicode object from address " << data_addr; copyMemoryFromProcess(data_addr, len, buffer.data()); @@ -469,15 +467,13 @@ AbstractProcessManager::getBytesFromAddress(remote_addr_t addr) const } else { LOG(DEBUG) << std::hex << std::showbase << "Handling bytes object of version 3 from address " << addr; - PyBytesObject bytes; - - copyMemoryFromProcess(addr, offsets().py_bytes.size, &bytes); - len = getField(bytes, &py_bytes_v::o_ob_size) + 1; + Structure bytes(shared_from_this(), addr); + len = bytes.getField(&py_bytes_v::o_ob_size) + 1; if (len < 1) { throw std::runtime_error("Incorrect size of the fetched bytes object"); } buffer.resize(len); - data_addr = addr + getFieldOffset(&py_bytes_v::o_ob_sval); + data_addr = bytes.getFieldRemoteAddress(&py_bytes_v::o_ob_sval); LOG(DEBUG) << std::hex << std::showbase << "Copying data for bytes object from address " << data_addr; @@ -559,9 +555,8 @@ AbstractProcessManager::isInterpreterActive() const { remote_addr_t runtime_addr = findSymbol("_PyRuntime"); if (runtime_addr) { - PyRuntimeState py_runtime; - copyObjectFromProcess(runtime_addr, &py_runtime); - remote_addr_t p = getField(py_runtime, &py_runtime_v::o_finalizing); + Structure py_runtime(shared_from_this(), runtime_addr); + remote_addr_t p = py_runtime.getField(&py_runtime_v::o_finalizing); return p == 0 ? InterpreterStatus::RUNNING : InterpreterStatus::FINALIZED; } @@ -612,33 +607,32 @@ AbstractProcessManager::warnIfOffsetsAreMismatched() const return; // We need to start from the _PyRuntime structure } - PyRuntimeState py_runtime; - copyObjectFromProcess(runtime_addr, &py_runtime); + Structure py_runtime(shared_from_this(), runtime_addr); - if (0 != memcmp(&py_runtime, "xdebugpy", 8)) { + if (0 != memcmp(py_runtime.getField(&py_runtime_v::o_dbg_off_cookie), "xdebugpy", 8)) { LOG(WARNING) << "Debug offsets cookie doesn't match!"; return; } // Note: It's OK for pystack's size to be smaller, but not larger. #define compare_size(size_offset, pystack_struct) \ - if (getFieldOffset(size_offset) \ - && ((uint64_t)offsets().pystack_struct.size > getField(py_runtime, size_offset))) \ + if ((d_py_v->py_runtime.*size_offset).offset \ + && ((uint64_t)offsets().pystack_struct.size > py_runtime.getField(size_offset))) \ { \ LOG(WARNING) << "Debug offsets mismatch: " #pystack_struct ".size " \ - << offsets().pystack_struct.size << " > " << getField(py_runtime, size_offset) \ + << offsets().pystack_struct.size << " > " << py_runtime.getField(size_offset) \ << " reported by CPython"; \ } else \ do { \ } while (0) #define compare_offset(field_offset_offset, pystack_field) \ - if (getFieldOffset(field_offset_offset) \ - && (uint64_t)offsets().pystack_field.offset != getField(py_runtime, field_offset_offset)) \ + if ((d_py_v->py_runtime.*field_offset_offset).offset \ + && (uint64_t)offsets().pystack_field.offset != py_runtime.getField(field_offset_offset)) \ { \ LOG(WARNING) << "Debug offsets mismatch: " #pystack_field << " " \ << offsets().pystack_field.offset \ - << " != " << getField(py_runtime, field_offset_offset) << " reported by CPython"; \ + << " != " << py_runtime.getField(field_offset_offset) << " reported by CPython"; \ } else \ do { \ } while (0) diff --git a/src/pystack/_pystack/process.h b/src/pystack/_pystack/process.h index fad6d303..ff3d21b3 100644 --- a/src/pystack/_pystack/process.h +++ b/src/pystack/_pystack/process.h @@ -92,21 +92,6 @@ class AbstractProcessManager : public std::enable_shared_from_this - inline offset_t getFieldOffset(FieldPointer OffsetsStruct::*field) const - { - return (d_py_v->get().*field).offset; - } - - template - inline const typename FieldPointer::Type& - getField(const typename OffsetsStruct::Structure& obj, FieldPointer OffsetsStruct::*field) const - { - offset_t offset = getFieldOffset(field); - auto address = reinterpret_cast(&obj) + offset; - return *reinterpret_cast(address); - } - protected: // Data members pid_t d_pid; diff --git a/src/pystack/_pystack/pycode.cpp b/src/pystack/_pystack/pycode.cpp index d85727d8..2420937f 100644 --- a/src/pystack/_pystack/pycode.cpp +++ b/src/pystack/_pystack/pycode.cpp @@ -105,11 +105,11 @@ static LocationInfo getLocationInfo( const std::shared_ptr& manager, remote_addr_t code_addr, - PyCodeObject& code, + Structure& code, uintptr_t last_instruction_index) { - int code_lineno = manager->getField(code, &py_code_v::o_firstlineno); - remote_addr_t lnotab_addr = manager->getField(code, &py_code_v::o_lnotab); + int code_lineno = code.getField(&py_code_v::o_firstlineno); + remote_addr_t lnotab_addr = code.getField(&py_code_v::o_lnotab); LOG(DEBUG) << std::hex << std::showbase << "Copying lnotab data from address " << lnotab_addr; std::string lnotab = manager->getBytesFromAddress(lnotab_addr); @@ -121,7 +121,7 @@ getLocationInfo( // Check out https://github.com/python/cpython/blob/main/Objects/lnotab_notes.txt for the format of // the lnotab table in different versions of the interpreter. if (manager->versionIsAtLeast(3, 11)) { - uintptr_t code_adaptive = code_addr + manager->getFieldOffset(&py_code_v::o_code_adaptive); + uintptr_t code_adaptive = code.getFieldRemoteAddress(&py_code_v::o_code_adaptive); ptrdiff_t addrq = (reinterpret_cast(last_instruction_index) - reinterpret_cast(code_adaptive)); @@ -178,9 +178,8 @@ isValid(const std::shared_ptr& manager, remote_add } return false; } else { - PyObject obj; - manager->copyObjectFromProcess(addr, &obj); - return reinterpret_cast(obj.ob_type) == pycodeobject_addr; + Structure obj(manager, addr); + return obj.getField(&py_object_v::o_ob_type) == pycodeobject_addr; } } return true; @@ -191,7 +190,6 @@ CodeObject::CodeObject( remote_addr_t addr, uintptr_t lasti) { - PyCodeObject code; if (!isValid(manager, addr)) { d_filename = "???"; d_scope = "???"; @@ -200,15 +198,15 @@ CodeObject::CodeObject( return; } LOG(DEBUG) << std::hex << std::showbase << "Copying code struct from address " << addr; - manager->copyMemoryFromProcess(addr, manager->offsets().py_code.size, &code); + Structure code(manager, addr); - remote_addr_t filename_addr = manager->getField(code, &py_code_v::o_filename); + remote_addr_t filename_addr = code.getField(&py_code_v::o_filename); LOG(DEBUG) << std::hex << std::showbase << "Copying filename Python string from address " << filename_addr; d_filename = manager->getStringFromAddress(filename_addr); LOG(DEBUG) << "Code object filename: " << d_filename; - remote_addr_t name_addr = manager->getField(code, &py_code_v::o_name); + remote_addr_t name_addr = code.getField(&py_code_v::o_name); LOG(DEBUG) << std::hex << std::showbase << "Copying code name Python string from address " << name_addr; d_scope = manager->getStringFromAddress(name_addr); @@ -220,11 +218,11 @@ CodeObject::CodeObject( << d_location_info.end_lineno << ") column_range=(" << d_location_info.column << ", " << d_location_info.end_column << ")"; - d_narguments = manager->getField(code, &py_code_v::o_argcount); + d_narguments = code.getField(&py_code_v::o_argcount); LOG(DEBUG) << "Code object n arguments: " << d_narguments; LOG(DEBUG) << "Copying variable names"; - remote_addr_t varnames_addr = manager->getField(code, &py_code_v::o_varnames); + remote_addr_t varnames_addr = code.getField(&py_code_v::o_varnames); TupleObject varnames(manager, varnames_addr); std::transform( varnames.Items().cbegin(), diff --git a/src/pystack/_pystack/pyframe.cpp b/src/pystack/_pystack/pyframe.cpp index 72169969..c752ddd9 100644 --- a/src/pystack/_pystack/pyframe.cpp +++ b/src/pystack/_pystack/pyframe.cpp @@ -19,11 +19,9 @@ FrameObject::FrameObject( ssize_t frame_no) : d_manager(manager) { - PyFrameObject frame; LOG(DEBUG) << "Copying frame number " << frame_no; LOG(DEBUG) << std::hex << std::showbase << "Copying frame struct from address " << addr; - - manager->copyMemoryFromProcess(addr, manager->offsets().py_frame.size, &frame); + Structure frame(manager, addr); d_addr = addr; d_frame_no = frame_no; @@ -37,7 +35,7 @@ FrameObject::FrameObject( d_code = getCode(manager, frame); - auto prev_addr = manager->getField(frame, &py_frame_v::o_back); + auto prev_addr = frame.getField(&py_frame_v::o_back); LOG(DEBUG) << std::hex << std::showbase << "Previous frame address: " << prev_addr; if (prev_addr) { d_prev = std::make_shared(manager, prev_addr, next_frame_no); @@ -48,11 +46,11 @@ FrameObject::FrameObject( bool FrameObject::getIsShim( const std::shared_ptr& manager, - const PyFrameObject& frame) + Structure& frame) { if (manager->versionIsAtLeast(3, 12)) { constexpr int FRAME_OWNED_BY_CSTACK = 3; - return manager->getField(frame, &py_frame_v::o_owner) == FRAME_OWNED_BY_CSTACK; + return frame.getField(&py_frame_v::o_owner) == FRAME_OWNED_BY_CSTACK; } return false; // Versions before 3.12 don't have shim frames. } @@ -60,18 +58,18 @@ FrameObject::getIsShim( std::unique_ptr FrameObject::getCode( const std::shared_ptr& manager, - const PyFrameObject& frame) + Structure& frame) { - remote_addr_t py_code_addr = manager->getField(frame, &py_frame_v::o_code); + remote_addr_t py_code_addr = frame.getField(&py_frame_v::o_code); LOG(DEBUG) << std::hex << std::showbase << "Attempting to construct code object from address " << py_code_addr; uintptr_t last_instruction; if (manager->versionIsAtLeast(3, 11)) { - last_instruction = manager->getField(frame, &py_frame_v::o_prev_instr); + last_instruction = frame.getField(&py_frame_v::o_prev_instr); } else { - last_instruction = manager->getField(frame, &py_frame_v::o_lasti); + last_instruction = frame.getField(&py_frame_v::o_lasti); } return std::make_unique(manager, py_code_addr, last_instruction); } @@ -79,7 +77,7 @@ FrameObject::getCode( bool FrameObject::isEntry( const std::shared_ptr& manager, - const PyFrameObject& frame) + Structure& frame) { if (manager->versionIsAtLeast(3, 12)) { // This is an entry frame if the previous frame was a shim, or if @@ -89,7 +87,7 @@ FrameObject::isEntry( return (d_prev && d_prev->d_is_shim) || (d_frame_no == 0 && d_is_shim); } else if (manager->versionIsAtLeast(3, 11)) { // This is an entry frame if it has an entry flag set. - return manager->getField(frame, &py_frame_v::o_is_entry); + return frame.getField(&py_frame_v::o_is_entry); } return true; } @@ -105,7 +103,8 @@ FrameObject::resolveLocalVariables() const size_t n_arguments = d_code->NArguments(); const size_t n_locals = d_code->Varnames().size(); - const remote_addr_t locals_addr = d_addr + d_manager->getFieldOffset(&py_frame_v::o_localsplus); + Structure frame(d_manager, d_addr); + const remote_addr_t locals_addr = frame.getFieldRemoteAddress(&py_frame_v::o_localsplus); if (n_locals < n_arguments) { throw std::runtime_error("Found more arguments than local variables"); diff --git a/src/pystack/_pystack/pyframe.h b/src/pystack/_pystack/pyframe.h index 3c5418c9..69d9127b 100644 --- a/src/pystack/_pystack/pyframe.h +++ b/src/pystack/_pystack/pyframe.h @@ -1,11 +1,12 @@ #pragma once -#include "memory" -#include "unordered_map" +#include +#include #include "mem.h" #include "process.h" #include "pycode.h" +#include "structure.h" namespace pystack { @@ -32,14 +33,15 @@ class FrameObject private: // Methods - static bool - getIsShim(const std::shared_ptr& manager, const PyFrameObject& frame); + static bool getIsShim( + const std::shared_ptr& manager, + Structure& frame); static std::unique_ptr - getCode(const std::shared_ptr& manager, const PyFrameObject& frame); + getCode(const std::shared_ptr& manager, Structure& frame); bool - isEntry(const std::shared_ptr& manager, const PyFrameObject& frame); + isEntry(const std::shared_ptr& manager, Structure& frame); // Data members const std::shared_ptr d_manager{}; diff --git a/src/pystack/_pystack/pythread.cpp b/src/pystack/_pystack/pythread.cpp index 5ea4837b..23c5dce9 100644 --- a/src/pystack/_pystack/pythread.cpp +++ b/src/pystack/_pystack/pythread.cpp @@ -6,9 +6,9 @@ #include "mem.h" #include "native_frame.h" #include "process.h" -#include "pycompat.h" #include "pyframe.h" #include "pythread.h" +#include "structure.h" #include "version.h" #include "cpython/pthread.h" @@ -47,10 +47,9 @@ findPthreadTidOffset( remote_addr_t interp_state_addr) { LOG(DEBUG) << "Attempting to locate tid offset in pthread structure"; - PyInterpreterState is; - manager->copyObjectFromProcess(interp_state_addr, &is); + Structure is(manager, interp_state_addr); - auto current_thread_addr = manager->getField(is, &py_is_v::o_tstate_head); + auto current_thread_addr = is.getField(&py_is_v::o_tstate_head); auto thread_head = current_thread_addr; @@ -64,9 +63,8 @@ findPthreadTidOffset( // pthread' that we know about to avoid having to do guess-work by doing a // linear scan over the struct. while (current_thread_addr != (remote_addr_t) nullptr) { - PyThreadState current_thread; - manager->copyObjectFromProcess(current_thread_addr, ¤t_thread); - auto pthread_id_addr = manager->getField(current_thread, &py_thread_v::o_thread_id); + Structure current_thread(manager, current_thread_addr); + auto pthread_id_addr = current_thread.getField(&py_thread_v::o_thread_id); pid_t the_tid; std::vector glibc_pthread_offset_candidates = { @@ -80,7 +78,7 @@ findPthreadTidOffset( return candidate; } } - remote_addr_t next_thread_addr = manager->getField(current_thread, &py_thread_v::o_next); + remote_addr_t next_thread_addr = current_thread.getField(&py_thread_v::o_next); if (next_thread_addr == current_thread_addr) { break; } @@ -91,9 +89,8 @@ findPthreadTidOffset( current_thread_addr = thread_head; while (current_thread_addr != (remote_addr_t) nullptr) { - PyThreadState current_thread; - manager->copyObjectFromProcess(current_thread_addr, ¤t_thread); - auto pthread_id_addr = manager->getField(current_thread, &py_thread_v::o_thread_id); + Structure current_thread(manager, current_thread_addr); + auto pthread_id_addr = current_thread.getField(&py_thread_v::o_thread_id); // Attempt to locate a field in the pthread struct that's equal to the pid. uintptr_t buffer[100]; @@ -118,7 +115,7 @@ findPthreadTidOffset( } } - remote_addr_t next_thread_addr = manager->getField(current_thread, &py_thread_v::o_next); + remote_addr_t next_thread_addr = current_thread.getField(&py_thread_v::o_next); if (next_thread_addr == current_thread_addr) { break; } @@ -133,9 +130,8 @@ PyThread::PyThread(const std::shared_ptr& manager, { d_pid = manager->Pid(); - PyThreadState ts; LOG(DEBUG) << std::hex << std::showbase << "Copying main thread struct from address " << addr; - manager->copyObjectFromProcess(addr, &ts); + Structure ts(manager, addr); remote_addr_t frame_addr = getFrameAddr(manager, ts); if (frame_addr != (remote_addr_t) nullptr) { @@ -145,11 +141,11 @@ PyThread::PyThread(const std::shared_ptr& manager, } d_addr = addr; - remote_addr_t candidate_next_addr = manager->getField(ts, &py_thread_v::o_next); + remote_addr_t candidate_next_addr = ts.getField(&py_thread_v::o_next); d_next_addr = candidate_next_addr == addr ? (remote_addr_t) nullptr : candidate_next_addr; - d_pthread_id = manager->getField(ts, &py_thread_v::o_thread_id); - d_tid = getThreadTid(manager, addr, d_pthread_id); + d_pthread_id = ts.getField(&py_thread_v::o_thread_id); + d_tid = getThreadTid(manager, ts, d_pthread_id); d_next = nullptr; if (d_next_addr != (remote_addr_t)NULL) { @@ -165,14 +161,12 @@ PyThread::PyThread(const std::shared_ptr& manager, int PyThread::getThreadTid( const std::shared_ptr& manager, - remote_addr_t thread_addr, + Structure& ts, unsigned long pthread_id) { int the_tid = -1; if (manager->versionIsAtLeast(3, 11)) { - manager->copyObjectFromProcess( - (remote_addr_t)(thread_addr + manager->getFieldOffset(&py_thread_v::o_native_thread_id)), - &the_tid); + the_tid = ts.getField(&py_thread_v::o_native_thread_id); } else { the_tid = inferTidFromPThreadStructure(manager, pthread_id); } @@ -219,19 +213,18 @@ PyThread::inferTidFromPThreadStructure( remote_addr_t PyThread::getFrameAddr( const std::shared_ptr& manager, - const PyThreadState& ts) + Structure& ts) { if (manager->versionIsAtLeast(3, 11) && !manager->versionIsAtLeast(3, 13)) { - remote_addr_t cframe_addr = manager->getField(ts, &py_thread_v::o_frame); + remote_addr_t cframe_addr = ts.getField(&py_thread_v::o_frame); if (!manager->isAddressValid(cframe_addr)) { return reinterpret_cast(nullptr); } - CFrame cframe; - manager->copyObjectFromProcess(cframe_addr, &cframe); - return manager->getField(cframe, &py_cframe_v::current_frame); + Structure cframe(manager, cframe_addr); + return cframe.getField(&py_cframe_v::current_frame); } else { - return manager->getField(ts, &py_thread_v::o_frame); + return ts.getField(&py_thread_v::o_frame); } } @@ -261,7 +254,7 @@ PyThread::isGCCollecting() const PyThread::GilStatus PyThread::calculateGilStatus( - PyThreadState& ts, + Structure& ts, const std::shared_ptr& manager) const { LOG(DEBUG) << "Attempting to determine GIL Status"; @@ -277,11 +270,10 @@ PyThread::calculateGilStatus( // a ceval state, which points to a GIL runtime state. // If that GIL state has `locked` set and `last_holder` is d_addr, // then the thread represented by this PyThread holds the GIL. - PyInterpreterState interp; - auto is_addr = manager->getField(ts, &py_thread_v::o_interp); - manager->copyObjectFromProcess(is_addr, &interp); + auto is_addr = ts.getField(&py_thread_v::o_interp); + Structure interp(manager, is_addr); - auto gil_addr = manager->getField(interp, &py_is_v::o_gil_runtime_state); + auto gil_addr = interp.getField(&py_is_v::o_gil_runtime_state); Python3_9::_gil_runtime_state gil; manager->copyObjectFromProcess(gil_addr, &gil); @@ -293,9 +285,8 @@ PyThread::calculateGilStatus( } else if (manager->versionIsAtLeast(3, 8)) { // Fast, exact method by checking the gilstate structure in _PyRuntime LOG(DEBUG) << "Searching for the GIL by checking the value of 'tstate_current'"; - PyRuntimeState runtime; - manager->copyObjectFromProcess(pyruntime, &runtime); - uintptr_t tstate_current = manager->getField(runtime, &py_runtime_v::o_tstate_current); + Structure runtime(manager, pyruntime); + uintptr_t tstate_current = runtime.getField(&py_runtime_v::o_tstate_current); return (tstate_current == d_addr ? GilStatus::HELD : GilStatus::NOT_HELD); } else { LOG(DEBUG) << "Searching for the GIL by scanning the _PyRuntime structure"; @@ -338,32 +329,31 @@ PyThread::calculateGilStatus( PyThread::GCStatus PyThread::calculateGCStatus( - PyThreadState& ts, + Structure& ts, const std::shared_ptr& manager) const { LOG(DEBUG) << "Attempting to determine GC Status"; - GCRuntimeState gcstate; + remote_addr_t gcstate_addr; if (manager->versionIsAtLeast(3, 9)) { - PyInterpreterState interp; - auto is_addr = manager->getField(ts, &py_thread_v::o_interp); - manager->copyObjectFromProcess(is_addr, &interp); - gcstate = manager->getField(interp, &py_is_v ::o_gc); + auto is_addr = ts.getField(&py_thread_v::o_interp); + Structure interp(manager, is_addr); + gcstate_addr = interp.getFieldRemoteAddress(&py_is_v::o_gc); } else if (manager->versionIsAtLeast(3, 7)) { remote_addr_t pyruntime = manager->findSymbol("_PyRuntime"); if (!pyruntime) { LOG(DEBUG) << "Failed to get GC status because the _PyRuntime symbol is unavailable"; return GCStatus::COLLECTING_UNKNOWN; } - PyRuntimeState runtime; - manager->copyObjectFromProcess(pyruntime, &runtime); - gcstate = manager->getField(runtime, &py_runtime_v::o_gc); + Structure runtime(manager, pyruntime); + gcstate_addr = runtime.getFieldRemoteAddress(&py_runtime_v::o_gc); } else { LOG(DEBUG) << "GC Status retrieval not supported by this Python version"; return GCStatus::COLLECTING_UNKNOWN; } - auto collecting = manager->getField(gcstate, &py_gc_v::o_collecting); + Structure gcstate(manager, gcstate_addr); + auto collecting = gcstate.getField(&py_gc_v::o_collecting); LOG(DEBUG) << "GC status correctly retrieved: " << collecting; return collecting ? GCStatus::COLLECTING : GCStatus::NOT_COLLECTING; } @@ -380,10 +370,8 @@ getThreadFromInterpreterState( } LOG(DEBUG) << std::hex << std::showbase << "Copying PyInterpreterState struct from address " << addr; - PyInterpreterState is; - manager->copyObjectFromProcess(addr, &is); - - auto thread_addr = manager->getField(is, &py_is_v::o_tstate_head); + Structure is(manager, addr); + auto thread_addr = is.getField(&py_is_v::o_tstate_head); return std::make_shared(manager, thread_addr); } diff --git a/src/pystack/_pystack/pythread.h b/src/pystack/_pystack/pythread.h index 93b9626d..ab02c672 100644 --- a/src/pystack/_pystack/pythread.h +++ b/src/pystack/_pystack/pythread.h @@ -47,8 +47,9 @@ class PyThread : public Thread GCStatus isGCCollecting() const; // Static Methods - static remote_addr_t - getFrameAddr(const std::shared_ptr& manager, const PyThreadState& ts); + static remote_addr_t getFrameAddr( + const std::shared_ptr& manager, + Structure& ts); private: // Data members @@ -62,10 +63,10 @@ class PyThread : public Thread // Methods GilStatus calculateGilStatus( - PyThreadState& ts, + Structure& ts, const std::shared_ptr& manager) const; GCStatus calculateGCStatus( - PyThreadState& ts, + Structure& ts, const std::shared_ptr& manager) const; // Static Methods @@ -74,7 +75,7 @@ class PyThread : public Thread unsigned long pthread_id); static int getThreadTid( const std::shared_ptr& manager, - remote_addr_t thread_addr, + Structure& ts, unsigned long pthread_id); }; diff --git a/src/pystack/_pystack/pytypes.cpp b/src/pystack/_pystack/pytypes.cpp index 892c9520..8e9fc790 100644 --- a/src/pystack/_pystack/pytypes.cpp +++ b/src/pystack/_pystack/pytypes.cpp @@ -7,6 +7,7 @@ #include "logging.h" #include "pytypes.h" +#include "structure.h" #include "version.h" namespace pystack { @@ -88,17 +89,15 @@ TupleObject::TupleObject( { d_manager = manager; - PyTupleObject tuple; - manager->copyMemoryFromProcess(addr, manager->offsets().py_tuple.size, &tuple); - - ssize_t num_items = manager->getField(tuple, &py_tuple_v::o_ob_size); + Structure tuple(manager, addr); + ssize_t num_items = tuple.getField(&py_tuple_v::o_ob_size); if (num_items == 0) { LOG(DEBUG) << std::hex << std::showbase << "There are no elements in this tuple"; return; } d_items.resize(num_items); manager->copyMemoryFromProcess( - addr + manager->getFieldOffset(&py_tuple_v::o_ob_item), + tuple.getFieldRemoteAddress(&py_tuple_v::o_ob_item), num_items * sizeof(PyObject*), d_items.data()); } @@ -120,17 +119,15 @@ ListObject::ListObject(const std::shared_ptr& mana { d_manager = manager; - PyListObject list; - manager->copyMemoryFromProcess(addr, manager->offsets().py_list.size, &list); - - ssize_t num_items = manager->getField(list, &py_list_v::o_ob_size); + Structure list(manager, addr); + ssize_t num_items = list.getField(&py_list_v::o_ob_size); if (num_items == 0) { LOG(DEBUG) << std::hex << std::showbase << "There are no elements in this list"; return; } d_items.resize(num_items); manager->copyMemoryFromProcess( - (remote_addr_t)manager->getField(list, &py_list_v::o_ob_item), + (remote_addr_t)list.getField(&py_list_v::o_ob_item), num_items * sizeof(PyObject*), d_items.data()); } @@ -160,12 +157,11 @@ LongObject::LongObject( constexpr unsigned int shift = 15; #endif - _PyLongObject longobj; - manager->copyMemoryFromProcess(addr, manager->offsets().py_long.size, &longobj); + Structure longobj(manager, addr); ssize_t size; bool negative; - Py_ssize_t ob_size = manager->getField(longobj, &py_long_v::o_ob_size); + Py_ssize_t ob_size = longobj.getField(&py_long_v::o_ob_size); if (manager->versionIsAtLeast(3, 12)) { auto lv_tag = *reinterpret_cast(&ob_size); negative = (lv_tag & 3) == 2; @@ -200,7 +196,7 @@ LongObject::LongObject( std::vector digits; digits.resize(size); manager->copyMemoryFromProcess( - addr + manager->getFieldOffset(&py_long_v::o_ob_digit), + longobj.getFieldRemoteAddress(&py_long_v::o_ob_digit), sizeof(digit) * size, digits.data()); for (ssize_t i = 0; i < size; ++i) { @@ -250,25 +246,24 @@ LongObject::Overflowed() const void getDictEntries( const std::shared_ptr& manager, - const Python3::PyDictObject& dict, + Structure& dict, ssize_t& num_items, std::vector& valid_entries) { - remote_addr_t keys_addr = manager->getField(dict, &py_dict_v::o_ma_keys); assert(manager->versionIsAtLeast(3, 0)); + remote_addr_t keys_addr = dict.getField(&py_dict_v::o_ma_keys); ssize_t dk_size = 0; int dk_kind = 0; - PyDictKeysObject keys; - manager->copyMemoryFromProcess(keys_addr, manager->offsets().py_dictkeys.size, &keys); - num_items = manager->getField(keys, &py_dictkeys_v::o_dk_nentries); - dk_size = manager->getField(keys, &py_dictkeys_v::o_dk_size); + Structure keys(manager, keys_addr); + num_items = keys.getField(&py_dictkeys_v::o_dk_nentries); + dk_size = keys.getField(&py_dictkeys_v::o_dk_size); if (manager->versionIsAtLeast(3, 11)) { // We're reusing the o_dk_size offset for dk_log2_size. Fix up the value. dk_size = 1L << dk_size; // Added in 3.11 - dk_kind = manager->getField(keys, &py_dictkeys_v::o_dk_kind); + dk_kind = keys.getField(&py_dictkeys_v::o_dk_kind); } if (num_items == 0) { LOG(DEBUG) << std::hex << std::showbase << "There are no elements in this dict"; @@ -293,8 +288,8 @@ getDictEntries( offset = 8 * dk_size; } - offset_t dk_indices_offset = manager->getFieldOffset(&py_dictkeys_v::o_dk_indices); - remote_addr_t entries_addr = keys_addr + dk_indices_offset + offset; + offset_t dk_indices_addr = keys.getFieldRemoteAddress(&py_dictkeys_v::o_dk_indices); + remote_addr_t entries_addr = dk_indices_addr + offset; std::vector raw_entries; raw_entries.resize(num_items); @@ -363,8 +358,7 @@ DictObject::DictObject(std::shared_ptr manager, re void DictObject::loadFromPython3(remote_addr_t addr) { - Python3::PyDictObject dict; - d_manager->copyMemoryFromProcess(addr, d_manager->offsets().py_dict.size, &dict); + Structure dict(d_manager, addr); ssize_t num_items; std::vector valid_entries; @@ -393,13 +387,13 @@ DictObject::loadFromPython3(remote_addr_t addr) * All dicts sharing same key must have same insertion order. */ - remote_addr_t dictvalues_addr = d_manager->getField(dict, &py_dict_v::o_ma_values); + remote_addr_t dictvalues_addr = dict.getField(&py_dict_v::o_ma_values); + Structure dictvalues(d_manager, dictvalues_addr); // Get the values in one copy if we are dealing with a split-table dictionary if (dictvalues_addr != 0) { d_values.resize(num_items); - auto values_offset = d_manager->getFieldOffset(&py_dictvalues_v::o_values); - auto values_addr = dictvalues_addr + values_offset; + auto values_addr = dictvalues.getFieldRemoteAddress(&py_dictvalues_v::o_values); d_manager->copyMemoryFromProcess(values_addr, num_items * sizeof(PyObject*), d_values.data()); } else { std::transform( @@ -519,9 +513,9 @@ Object::Object(const std::shared_ptr& manager, rem { LOG(DEBUG) << std::hex << std::showbase << "Copying PyObject data from address " << addr; - PyObject obj; + Structure obj(manager, addr); try { - manager->copyMemoryFromProcess(addr, manager->offsets().py_object.size, &obj); + obj.copyFromRemote(); } catch (RemoteMemCopyError& ex) { LOG(WARNING) << std::hex << std::showbase << "Failed to read PyObject data from address " << d_addr; @@ -529,13 +523,11 @@ Object::Object(const std::shared_ptr& manager, rem return; } - PyTypeObject cls; - d_type_addr = manager->getField(obj, &py_object_v::o_ob_type); + d_type_addr = obj.getField(&py_object_v::o_ob_type); LOG(DEBUG) << std::hex << std::showbase << "Copying typeobject from address " << d_type_addr; + Structure cls(manager, d_type_addr); try { - manager->copyMemoryFromProcess(d_type_addr, manager->offsets().py_type.size, &cls); - - d_flags = manager->getField(cls, &py_type_v::o_tp_flags); + d_flags = cls.getField(&py_type_v::o_tp_flags); } catch (RemoteMemCopyError& ex) { LOG(WARNING) << std::hex << std::showbase << "Failed to read typeobject from address " << d_type_addr; @@ -543,7 +535,7 @@ Object::Object(const std::shared_ptr& manager, rem return; } - remote_addr_t name_addr = manager->getField(cls, &py_type_v::o_tp_name); + remote_addr_t name_addr = cls.getField(&py_type_v::o_tp_name); try { d_classname = manager->getCStringFromAddress(name_addr); } catch (RemoteMemCopyError& ex) { @@ -625,9 +617,8 @@ Object::toInteger() const double Object::toFloat() const { - PyFloatObject the_float; - d_manager->copyMemoryFromProcess(d_addr, d_manager->offsets().py_float.size, &the_float); - return d_manager->getField(the_float, &py_float_v::o_ob_fval); + Structure the_float(d_manager, d_addr); + return the_float.getField(&py_float_v::o_ob_fval); } bool @@ -728,9 +719,9 @@ Object::toConcreteObject() const } std::string -Object::guessClassName(PyTypeObject& type) const +Object::guessClassName(Structure& type) const { - remote_addr_t tp_repr = d_manager->getField(type, &py_type_v::o_tp_repr); + remote_addr_t tp_repr = type.getField(&py_type_v::o_tp_repr); if (tp_repr == d_manager->findSymbol("float_repr")) { return "float"; } diff --git a/src/pystack/_pystack/pytypes.h b/src/pystack/_pystack/pytypes.h index cca2e1a8..92672a38 100644 --- a/src/pystack/_pystack/pytypes.h +++ b/src/pystack/_pystack/pytypes.h @@ -1,14 +1,13 @@ #pragma once #include -#include #include #include #include #include "mem.h" #include "process.h" -#include "pycompat.h" +#include "structure.h" namespace pystack { @@ -181,7 +180,7 @@ class Object bool toBool() const; long toInteger() const; double toFloat() const; - std::string guessClassName(PyTypeObject& type) const; + std::string guessClassName(Structure& type) const; }; } // namespace pystack diff --git a/src/pystack/_pystack/structure.h b/src/pystack/_pystack/structure.h new file mode 100644 index 00000000..b9a0c161 --- /dev/null +++ b/src/pystack/_pystack/structure.h @@ -0,0 +1,91 @@ +#pragma once + +#include +#include + +#include "process.h" + +namespace pystack { + +template +class Structure +{ + public: + // Constructors + Structure(std::shared_ptr manager, remote_addr_t addr); + Structure(const Structure&) = delete; + Structure& operator=(const Structure&) = delete; + + // Methods + void copyFromRemote(); + + template + remote_addr_t getFieldRemoteAddress(FieldPointer OffsetsStruct::*field) const; + + template + const typename FieldPointer::Type& getField(FieldPointer OffsetsStruct::*field); + + private: + // Data members + std::shared_ptr d_manager; + remote_addr_t d_addr; + ssize_t d_size; + std::array d_footprintbuf; + std::vector d_heapbuf; + char* d_buf; +}; + +template +inline Structure::Structure( + std::shared_ptr manager, + remote_addr_t addr) +: d_manager(manager) +, d_addr(addr) +, d_size(d_manager->offsets().get().size) +, d_buf{} +{ +} + +template +inline void +Structure::copyFromRemote() +{ + if (d_buf) { + return; // already copied + } + + if (d_size < 512) { + d_buf = &d_footprintbuf[0]; + } else { + d_heapbuf.resize(d_size); + d_buf = &d_heapbuf[0]; + } + d_manager->copyMemoryFromProcess(d_addr, d_size, d_buf); +} + +template +template +inline remote_addr_t +Structure::getFieldRemoteAddress(FieldPointer OffsetsStruct::*field) const +{ + offset_t offset = (d_manager->offsets().get().*field).offset; + return d_addr + offset; +} + +template +template +inline const typename FieldPointer::Type& +Structure::getField(FieldPointer OffsetsStruct::*field) +{ + copyFromRemote(); + offset_t offset = (d_manager->offsets().get().*field).offset; + if (d_size < 0 || (size_t)d_size < sizeof(typename FieldPointer::Type) + || d_size - sizeof(typename FieldPointer::Type) < offset) + { + abort(); + } + auto address = d_buf + offset; + return *reinterpret_cast(address); +} + +} // namespace pystack diff --git a/src/pystack/_pystack/version.cpp b/src/pystack/_pystack/version.cpp index 69ba621d..508cebff 100644 --- a/src/pystack/_pystack/version.cpp +++ b/src/pystack/_pystack/version.cpp @@ -226,6 +226,7 @@ py_runtimev313() offsetof(T, interpreters.head), {}, {}, + offsetof(T, debug_offsets.cookie), offsetof(T, debug_offsets.runtime_state.size), offsetof(T, debug_offsets.runtime_state.finalizing), offsetof(T, debug_offsets.runtime_state.interpreters_head), diff --git a/src/pystack/_pystack/version.h b/src/pystack/_pystack/version.h index 3fc76c62..83d1a9f0 100644 --- a/src/pystack/_pystack/version.h +++ b/src/pystack/_pystack/version.h @@ -142,9 +142,12 @@ struct py_runtime_v ssize_t size; FieldOffset o_finalizing; FieldOffset o_interp_head; - FieldOffset o_gc; + FieldOffset o_gc; // Using char because we can only use the offset, + // as the size and members change between versions FieldOffset o_tstate_current; + FieldOffset o_dbg_off_cookie; + FieldOffset o_dbg_off_runtime_state_struct_size; FieldOffset o_dbg_off_runtime_state_finalizing; FieldOffset o_dbg_off_runtime_state_interpreters_head; @@ -222,7 +225,8 @@ struct py_is_v ssize_t size; FieldOffset o_next; FieldOffset o_tstate_head; - FieldOffset o_gc; + FieldOffset o_gc; // Using char because we can only use the offset, + // as the size and members change between versions FieldOffset o_modules; FieldOffset o_sysdict; FieldOffset o_builtins; From 3a30f6e4dab5705570ca4dc36ba2fa8287812f2a Mon Sep 17 00:00:00 2001 From: Matt Wozniski Date: Tue, 13 Aug 2024 17:48:59 -0400 Subject: [PATCH 02/15] Drop unions and structure typedefs Now that we're using `Structure` everywhere, we no longer need to know the largest possible static footprint for each type of structure, so we can drop the unions, and the typedefs that referenced them. Signed-off-by: Matt Wozniski --- src/pystack/_pystack/cpython/code.h | 10 ---------- src/pystack/_pystack/cpython/dict.h | 12 +----------- src/pystack/_pystack/cpython/frame.h | 12 ++---------- src/pystack/_pystack/cpython/gc.h | 6 ------ src/pystack/_pystack/cpython/interpreter.h | 11 ----------- src/pystack/_pystack/cpython/object.h | 6 ------ src/pystack/_pystack/cpython/runtime.h | 6 ++++-- src/pystack/_pystack/cpython/string.h | 10 ---------- src/pystack/_pystack/cpython/thread.h | 14 -------------- src/pystack/_pystack/version.h | 18 ------------------ 10 files changed, 7 insertions(+), 98 deletions(-) diff --git a/src/pystack/_pystack/cpython/code.h b/src/pystack/_pystack/cpython/code.h index 10033cd1..5069ffe7 100644 --- a/src/pystack/_pystack/cpython/code.h +++ b/src/pystack/_pystack/cpython/code.h @@ -197,14 +197,4 @@ typedef struct } PyCodeObject; } // namespace Python3_13 -typedef union { - Python2::PyCodeObject v2; - Python3_3::PyCodeObject v3_3; - Python3_6::PyCodeObject v3_6; - Python3_8::PyCodeObject v3_8; - Python3_11::PyCodeObject v3_11; - Python3_12::PyCodeObject v3_12; - Python3_13::PyCodeObject v3_13; -} PyCodeObject; - } // namespace pystack diff --git a/src/pystack/_pystack/cpython/dict.h b/src/pystack/_pystack/cpython/dict.h index 154c57d1..e0c5ce42 100644 --- a/src/pystack/_pystack/cpython/dict.h +++ b/src/pystack/_pystack/cpython/dict.h @@ -29,7 +29,7 @@ typedef struct _dictobject namespace Python3 { typedef Py_ssize_t (*dict_lookup_func)(void* mp, PyObject* key, Py_hash_t hash, PyObject** value_addr); -union PyDictKeysObject; +struct PyDictKeysObject; typedef struct { @@ -99,14 +99,4 @@ typedef struct _dictvalues } // namespace Python3_13 -typedef union { - Python3_3::PyDictKeysObject v3_3; - Python3_11::PyDictKeysObject v3_11; -} PyDictKeysObject; - -typedef union { - Python3::PyDictValuesObject v3_3; - Python3_13::PyDictValuesObject v3_13; -} PyDictValuesObject; - } // namespace pystack diff --git a/src/pystack/_pystack/cpython/frame.h b/src/pystack/_pystack/cpython/frame.h index ea0e297a..b31c58b1 100644 --- a/src/pystack/_pystack/cpython/frame.h +++ b/src/pystack/_pystack/cpython/frame.h @@ -39,7 +39,7 @@ namespace Python3_7 { typedef struct _pyframeobject { PyObject_VAR_HEAD struct _pyframeobject* f_back; - PyCodeObject* f_code; + PyObject* f_code; PyObject* f_builtins; PyObject* f_globals; PyObject* f_locals; @@ -64,7 +64,7 @@ typedef signed char PyFrameState; typedef struct _pyframeobject { PyObject_VAR_HEAD struct _pyframeobject* f_back; - PyCodeObject* f_code; + PyObject* f_code; PyObject* f_builtins; PyObject* f_globals; PyObject* f_locals; @@ -126,12 +126,4 @@ typedef struct _interpreter_frame } // namespace Python3_12 -typedef union { - Python2::PyFrameObject v2; - Python3_7::PyFrameObject v3_7; - Python3_10::PyFrameObject v3_10; - Python3_11::PyFrameObject v3_11; - Python3_12::PyFrameObject v3_12; -} PyFrameObject; - } // namespace pystack diff --git a/src/pystack/_pystack/cpython/gc.h b/src/pystack/_pystack/cpython/gc.h index ddfd8bcf..53c1ce16 100644 --- a/src/pystack/_pystack/cpython/gc.h +++ b/src/pystack/_pystack/cpython/gc.h @@ -106,10 +106,4 @@ struct _gc_runtime_state }; } // namespace Python3_13 - -typedef union { - struct Python3_7::_gc_runtime_state v3_7; - struct Python3_8::_gc_runtime_state v3_8; - struct Python3_13::_gc_runtime_state v3_13; -} GCRuntimeState; } // namespace pystack diff --git a/src/pystack/_pystack/cpython/interpreter.h b/src/pystack/_pystack/cpython/interpreter.h index 89e13232..719116c3 100644 --- a/src/pystack/_pystack/cpython/interpreter.h +++ b/src/pystack/_pystack/cpython/interpreter.h @@ -338,15 +338,4 @@ typedef struct _is struct _import_state imports; } PyInterpreterState; } // namespace Python3_13 - -typedef union { - Python2::PyInterpreterState v2; - Python3_5::PyInterpreterState v3_5; - Python3_7::PyInterpreterState v3_7; - Python3_8::PyInterpreterState v3_8; - Python3_9::PyInterpreterState v3_9; - Python3_11::PyInterpreterState v3_11; - Python3_12::PyInterpreterState v3_12; - Python3_13::PyInterpreterState v3_13; -} PyInterpreterState; } // namespace pystack diff --git a/src/pystack/_pystack/cpython/object.h b/src/pystack/_pystack/cpython/object.h index 15896cba..cc96e04e 100644 --- a/src/pystack/_pystack/cpython/object.h +++ b/src/pystack/_pystack/cpython/object.h @@ -205,12 +205,6 @@ typedef struct _typeobject } PyTypeObject; } // namespace Python3_8 -typedef union { - Python2::PyTypeObject v2; - Python3_3::PyTypeObject v3_3; - Python3_8::PyTypeObject v3_8; -} PyTypeObject; - /* These flags are used to determine if a type is a subclass. */ constexpr long Pystack_TPFLAGS_INT_SUBCLASS = 1ul << 23u; constexpr long Pystack_TPFLAGS_LONG_SUBCLASS = 1ul << 24u; diff --git a/src/pystack/_pystack/cpython/runtime.h b/src/pystack/_pystack/cpython/runtime.h index adf66674..e75bf780 100644 --- a/src/pystack/_pystack/cpython/runtime.h +++ b/src/pystack/_pystack/cpython/runtime.h @@ -105,13 +105,15 @@ struct _ceval_runtime_state struct _gil_runtime_state gil; }; +struct PyThreadState; + typedef struct pyruntimestate { int preinitializing; int preinitialized; int core_initialized; int initialized; - PyThreadState* finalizing; + void* finalizing; struct pyinterpreters { @@ -171,7 +173,7 @@ typedef struct pyruntimestate int preinitialized; int core_initialized; int initialized; - PyThreadState* finalizing; + void* finalizing; struct pyinterpreters { diff --git a/src/pystack/_pystack/cpython/string.h b/src/pystack/_pystack/cpython/string.h index eb18e346..cf61ddd8 100644 --- a/src/pystack/_pystack/cpython/string.h +++ b/src/pystack/_pystack/cpython/string.h @@ -109,14 +109,4 @@ typedef struct } // namespace Python3_12 -typedef union { - Python3::PyBytesObject v3; -} PyBytesObject; - -typedef union { - Python2::PyUnicodeObject v2; - Python3::PyUnicodeObject v3; - Python3_12::PyUnicodeObject v3_12; -} PyUnicodeObject; - } // namespace pystack diff --git a/src/pystack/_pystack/cpython/thread.h b/src/pystack/_pystack/cpython/thread.h index 2a701f04..c9b5da8d 100644 --- a/src/pystack/_pystack/cpython/thread.h +++ b/src/pystack/_pystack/cpython/thread.h @@ -295,18 +295,4 @@ typedef struct _pythreadstate } PyThreadState; } // namespace Python3_13 -typedef union { - Python2::PyThreadState v2; - Python3_4::PyThreadState v3_4; - Python3_7::PyThreadState v3_7; - Python3_11::PyThreadState v3_11; - Python3_12::PyThreadState v3_12; - Python3_13::PyThreadState v3_13; -} PyThreadState; - -union CFrame { - Python3_11::CFrame v3_11; - Python3_12::CFrame v3_12; -}; - } // namespace pystack diff --git a/src/pystack/_pystack/version.h b/src/pystack/_pystack/version.h index 83d1a9f0..7f9a598e 100644 --- a/src/pystack/_pystack/version.h +++ b/src/pystack/_pystack/version.h @@ -20,7 +20,6 @@ struct FieldOffset struct py_tuple_v { - typedef PyTupleObject Structure; ssize_t size; FieldOffset o_ob_size; FieldOffset o_ob_item; @@ -28,7 +27,6 @@ struct py_tuple_v struct py_list_v { - typedef PyListObject Structure; ssize_t size; FieldOffset o_ob_size; FieldOffset o_ob_item; @@ -36,7 +34,6 @@ struct py_list_v struct py_dict_v { - typedef Python3::PyDictObject Structure; ssize_t size; FieldOffset o_ma_keys; FieldOffset o_ma_values; @@ -44,7 +41,6 @@ struct py_dict_v struct py_dictkeys_v { - typedef PyDictKeysObject Structure; ssize_t size; FieldOffset o_dk_size; FieldOffset o_dk_kind; @@ -54,21 +50,18 @@ struct py_dictkeys_v struct py_dictvalues_v { - typedef PyDictValuesObject Structure; ssize_t size; FieldOffset o_values; }; struct py_float_v { - typedef PyFloatObject Structure; ssize_t size; FieldOffset o_ob_fval; }; struct py_long_v { - typedef _PyLongObject Structure; ssize_t size; FieldOffset o_ob_size; FieldOffset o_ob_digit; @@ -76,7 +69,6 @@ struct py_long_v struct py_bytes_v { - typedef PyBytesObject Structure; ssize_t size; FieldOffset o_ob_size; FieldOffset o_ob_sval; @@ -84,7 +76,6 @@ struct py_bytes_v struct py_unicode_v { - typedef PyUnicodeObject Structure; ssize_t size; FieldOffset o_state; FieldOffset o_length; @@ -93,14 +84,12 @@ struct py_unicode_v struct py_object_v { - typedef PyObject Structure; ssize_t size; FieldOffset o_ob_type; }; struct py_code_v { - typedef PyCodeObject Structure; ssize_t size; FieldOffset o_filename; FieldOffset o_name; @@ -113,7 +102,6 @@ struct py_code_v struct py_frame_v { - typedef PyFrameObject Structure; ssize_t size; FieldOffset o_back; FieldOffset o_code; @@ -126,7 +114,6 @@ struct py_frame_v struct py_thread_v { - typedef PyThreadState Structure; ssize_t size; FieldOffset o_prev; FieldOffset o_next; @@ -138,7 +125,6 @@ struct py_thread_v struct py_runtime_v { - typedef PyRuntimeState Structure; ssize_t size; FieldOffset o_finalizing; FieldOffset o_interp_head; @@ -212,7 +198,6 @@ struct py_runtime_v struct py_type_v { - typedef PyTypeObject Structure; ssize_t size; FieldOffset o_tp_name; FieldOffset o_tp_repr; @@ -221,7 +206,6 @@ struct py_type_v struct py_is_v { - typedef PyInterpreterState Structure; ssize_t size; FieldOffset o_next; FieldOffset o_tstate_head; @@ -235,14 +219,12 @@ struct py_is_v struct py_gc_v { - typedef GCRuntimeState Structure; ssize_t size; FieldOffset o_collecting; }; struct py_cframe_v { - typedef CFrame Structure; ssize_t size; FieldOffset current_frame; }; From 13589990f8f0943e427115309c5602c348b348c3 Mon Sep 17 00:00:00 2001 From: Matt Wozniski Date: Wed, 14 Aug 2024 18:30:35 -0400 Subject: [PATCH 03/15] Add offsets for _gil_runtime_state Previously we were handling different offsets between versions purely through version checks on the manager, but this won't work going forward, because free-threading builds of 3.13 use a different structure layout. Handle this by switching to offsets. Signed-off-by: Matt Wozniski --- src/pystack/_pystack/pythread.cpp | 9 +++------ src/pystack/_pystack/version.cpp | 13 +++++++++++++ src/pystack/_pystack/version.h | 9 +++++++++ 3 files changed, 25 insertions(+), 6 deletions(-) diff --git a/src/pystack/_pystack/pythread.cpp b/src/pystack/_pystack/pythread.cpp index 23c5dce9..d50e4126 100644 --- a/src/pystack/_pystack/pythread.cpp +++ b/src/pystack/_pystack/pythread.cpp @@ -274,13 +274,10 @@ PyThread::calculateGilStatus( Structure interp(manager, is_addr); auto gil_addr = interp.getField(&py_is_v::o_gil_runtime_state); + Structure gil(manager, gil_addr); - Python3_9::_gil_runtime_state gil; - manager->copyObjectFromProcess(gil_addr, &gil); - - auto locked = *reinterpret_cast(&gil.locked); - auto holder = *reinterpret_cast(&gil.last_holder); - + auto locked = gil.getField(&py_gilruntimestate_v::o_locked); + auto holder = gil.getField(&py_gilruntimestate_v::o_last_holder); return (locked && holder == d_addr ? GilStatus::HELD : GilStatus::NOT_HELD); } else if (manager->versionIsAtLeast(3, 8)) { // Fast, exact method by checking the gilstate structure in _PyRuntime diff --git a/src/pystack/_pystack/version.cpp b/src/pystack/_pystack/version.cpp index 508cebff..20e4edca 100644 --- a/src/pystack/_pystack/version.cpp +++ b/src/pystack/_pystack/version.cpp @@ -192,6 +192,17 @@ py_cframe() }; } +template +constexpr py_gilruntimestate_v +py_gilruntimestate() +{ + return { + sizeof(T), + offsetof(T, locked), + offsetof(T, last_holder), + }; +} + template constexpr py_runtime_v py_runtime() @@ -623,6 +634,7 @@ python_v python_v3_12 = { py_runtimev312(), py_gc(), py_cframe(), + py_gilruntimestate(), }; // ---- Python 3.13 ------------------------------------------------------------ @@ -646,6 +658,7 @@ python_v python_v3_13 = { py_runtimev313(), py_gc(), py_cframe(), + py_gilruntimestate(), }; // ----------------------------------------------------------------------------- diff --git a/src/pystack/_pystack/version.h b/src/pystack/_pystack/version.h index 7f9a598e..46e3ea5d 100644 --- a/src/pystack/_pystack/version.h +++ b/src/pystack/_pystack/version.h @@ -229,6 +229,13 @@ struct py_cframe_v FieldOffset current_frame; }; +struct py_gilruntimestate_v +{ + ssize_t size; + FieldOffset o_locked; + FieldOffset o_last_holder; +}; + struct python_v { py_tuple_v py_tuple; @@ -249,6 +256,7 @@ struct python_v py_runtime_v py_runtime; py_gc_v py_gc; py_cframe_v py_cframe; + py_gilruntimestate_v py_gilruntimestate; template inline const T& get() const; @@ -279,6 +287,7 @@ define_python_v_get_specialization(py_is); define_python_v_get_specialization(py_runtime); define_python_v_get_specialization(py_gc); define_python_v_get_specialization(py_cframe); +define_python_v_get_specialization(py_gilruntimestate); #undef define_python_v_get_specialization From 3486f21946ab705a1d9923f6e9eaa2d2ce23cd8d Mon Sep 17 00:00:00 2001 From: Matt Wozniski Date: Wed, 14 Aug 2024 19:51:44 -0400 Subject: [PATCH 04/15] Drop debug offsets mismatch warning to info level Now that we plan to actually make use of the debug offsets, instead of just checking against them, it's no longer an exceptional or erroneous case when they don't match the compiled-in offsets. Drop this down to a warning, so that it shows up when debugging a problem but isn't in the way when using PyStack as an end user. Signed-off-by: Matt Wozniski --- src/pystack/_pystack/process.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/pystack/_pystack/process.cpp b/src/pystack/_pystack/process.cpp index c414e0c9..611ef5c4 100644 --- a/src/pystack/_pystack/process.cpp +++ b/src/pystack/_pystack/process.cpp @@ -619,9 +619,9 @@ AbstractProcessManager::warnIfOffsetsAreMismatched() const if ((d_py_v->py_runtime.*size_offset).offset \ && ((uint64_t)offsets().pystack_struct.size > py_runtime.getField(size_offset))) \ { \ - LOG(WARNING) << "Debug offsets mismatch: " #pystack_struct ".size " \ - << offsets().pystack_struct.size << " > " << py_runtime.getField(size_offset) \ - << " reported by CPython"; \ + LOG(INFO) << "Debug offsets mismatch: compiled-in " << sizeof(void*) * 8 << "-bit python3." \ + << d_minor << " " #pystack_struct ".size " << offsets().pystack_struct.size << " > " \ + << py_runtime.getField(size_offset) << " loaded from _Py_DebugOffsets"; \ } else \ do { \ } while (0) @@ -630,9 +630,10 @@ AbstractProcessManager::warnIfOffsetsAreMismatched() const if ((d_py_v->py_runtime.*field_offset_offset).offset \ && (uint64_t)offsets().pystack_field.offset != py_runtime.getField(field_offset_offset)) \ { \ - LOG(WARNING) << "Debug offsets mismatch: " #pystack_field << " " \ - << offsets().pystack_field.offset \ - << " != " << py_runtime.getField(field_offset_offset) << " reported by CPython"; \ + LOG(INFO) << "Debug offsets mismatch: compiled-in " << sizeof(void*) * 8 << "-bit python3." \ + << d_minor << " " #pystack_field << " " << offsets().pystack_field.offset \ + << " != " << py_runtime.getField(field_offset_offset) \ + << " loaded from _Py_DebugOffsets"; \ } else \ do { \ } while (0) From 91b1b0d21e863504cf666d5dbbf4b1998d3ec06a Mon Sep 17 00:00:00 2001 From: Matt Wozniski Date: Thu, 15 Aug 2024 17:06:07 -0400 Subject: [PATCH 05/15] Fix an incorrect range check in core file reading The `End()` address of each map is exclusive, not inclusive, so a map where `addr == map.End()` does not contain that address. Signed-off-by: Matt Wozniski --- src/pystack/_pystack/mem.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pystack/_pystack/mem.cpp b/src/pystack/_pystack/mem.cpp index d82a3e63..5c735e1a 100644 --- a/src/pystack/_pystack/mem.cpp +++ b/src/pystack/_pystack/mem.cpp @@ -398,7 +398,7 @@ CorefileRemoteMemoryManager::StatusCode CorefileRemoteMemoryManager::getMemoryLocationFromCore(remote_addr_t addr, off_t* offset_in_file) const { auto corefile_it = std::find_if(d_vmaps.cbegin(), d_vmaps.cend(), [&](auto& map) { - return (map.Start() <= addr && addr <= map.End()) && (map.FileSize() != 0 && map.Offset() != 0); + return (map.Start() <= addr && addr < map.End()) && (map.FileSize() != 0 && map.Offset() != 0); }); if (corefile_it == d_vmaps.cend()) { return StatusCode::ERROR; From 45415ce0f1ac78b8d38643b38df393ae61a12a01 Mon Sep 17 00:00:00 2001 From: Matt Wozniski Date: Thu, 15 Aug 2024 17:21:14 -0400 Subject: [PATCH 06/15] Add an explicit size for an array member Flexible array members are a C feature that doesn't exist in standard C++. While g++ allows them, not being able to call `sizeof()` on the field gets in the way of some macro shenanigans I need to do. Signed-off-by: Matt Wozniski --- src/pystack/_pystack/cpython/dict.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pystack/_pystack/cpython/dict.h b/src/pystack/_pystack/cpython/dict.h index e0c5ce42..67e3804c 100644 --- a/src/pystack/_pystack/cpython/dict.h +++ b/src/pystack/_pystack/cpython/dict.h @@ -82,7 +82,7 @@ typedef struct _dictkeysobject uint32_t dk_version; Py_ssize_t dk_usable; Py_ssize_t dk_nentries; - char dk_indices[]; /* char is required to avoid strict aliasing. */ + char dk_indices[1]; /* char is required to avoid strict aliasing. */ } PyDictKeysObject; } // namespace Python3_11 From a143a0cc1e15bd79eac14bd8c6bf00617234bd74 Mon Sep 17 00:00:00 2001 From: Matt Wozniski Date: Fri, 16 Aug 2024 17:23:59 -0400 Subject: [PATCH 07/15] Allow testing with `python3.13t` And in particular, allow `PYTHON_TEST_VERSION=3.13t` for requesting to test only this one Python version. Signed-off-by: Matt Wozniski --- tests/utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/utils.py b/tests/utils.py index 195e8dd6..6dcc4984 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -23,6 +23,7 @@ PythonVersion = Tuple[Tuple[int, int], pathlib.Path] ALL_VERSIONS = [ + ((3, 13), "python3.13t"), ((3, 13), "python3.13"), ((3, 12), "python3.12"), ((3, 11), "python3.11"), @@ -44,6 +45,8 @@ def find_all_available_pythons() -> Iterable[Interpreter]: # pragma: no cover versions = [((sys.version_info[0], sys.version_info[1]), sys.executable)] elif test_version is not None: major, minor = test_version.split(".") + if minor.endswith("t"): + minor = minor[:-1] versions = [((int(major), int(minor)), f"python{test_version}")] else: versions = ALL_VERSIONS From 48e94e3b8377d0c707384d70a30fe5613c11ea6d Mon Sep 17 00:00:00 2001 From: Matt Wozniski Date: Fri, 16 Aug 2024 17:39:09 -0400 Subject: [PATCH 08/15] Make trashed local variables test more robust This test makes some assumptions about the layout of some Python objects that don't hold in free-threading interpreters. Tweak the assumptions a bit so that they do hold. Signed-off-by: Matt Wozniski --- tests/integration/test_local_variables.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/tests/integration/test_local_variables.py b/tests/integration/test_local_variables.py index 715e4917..d992eaee 100644 --- a/tests/integration/test_local_variables.py +++ b/tests/integration/test_local_variables.py @@ -573,7 +573,6 @@ def test_trashed_locals(generate_threads, python, tmpdir): class ListObject(ctypes.Structure): _fields_ = [ - ("ob_refcnt", ctypes.c_ssize_t), ("ob_type", ctypes.c_void_p), ("ob_size", ctypes.c_ssize_t), ("ob_item", ctypes.c_void_p), @@ -581,23 +580,26 @@ class ListObject(ctypes.Structure): class TupleObject(ctypes.Structure): _fields_ = [ - ("ob_refcnt", ctypes.c_ssize_t), ("ob_type", ctypes.c_void_p), ("ob_size", ctypes.c_ssize_t), ("ob_item0", ctypes.c_void_p), ("ob_item1", ctypes.c_void_p), ] +def ob_type_field(obj): + # Assume ob_type is the last field of PyObject + return id(obj) + sys.getsizeof(None) - ctypes.sizeof(ctypes.c_void_p) + def main(): bad_type = (1, 2, 3) bad_elem = (4, 5, 6) nullelem = (7, 8, 9) bad_list = [0, 1, 2] - TupleObject.from_address(id(bad_type)).ob_type = 0xded - TupleObject.from_address(id(bad_elem)).ob_item1 = 0xbad - TupleObject.from_address(id(nullelem)).ob_item1 = 0x0 - ListObject.from_address(id(bad_list)).ob_item = 0x0 + TupleObject.from_address(ob_type_field(bad_type)).ob_type = 0xded + TupleObject.from_address(ob_type_field(bad_elem)).ob_item1 = 0xbad + TupleObject.from_address(ob_type_field(nullelem)).ob_item1 = 0x0 + ListObject.from_address(ob_type_field(bad_list)).ob_item = 0x0 fifo = sys.argv[1] with open(sys.argv[1], "w") as fifo: From 4d7690034076839ff1fdcaaeabebe9c76bebf876 Mon Sep 17 00:00:00 2001 From: Matt Wozniski Date: Fri, 16 Aug 2024 17:41:26 -0400 Subject: [PATCH 09/15] tests: Force the GIL to be on for test_gil.py In free-threading builds the GIL may be enabled or disabled at runtime. In order to test that we can accurately report whether a thread in the free threading interpreter is holding the GIL, we need to run these tests with the GIL enabled. Signed-off-by: Matt Wozniski --- tests/integration/test_gil.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/integration/test_gil.py b/tests/integration/test_gil.py index 0dc15e56..a29d7fa5 100644 --- a/tests/integration/test_gil.py +++ b/tests/integration/test_gil.py @@ -1,5 +1,8 @@ +import subprocess from pathlib import Path +import pytest + from pystack.engine import get_process_threads from pystack.engine import get_process_threads_for_core from tests.utils import ALL_PYTHONS @@ -14,6 +17,15 @@ TEST_SINGLE_THREAD_FILE = Path(__file__).parent / "single_thread_program.py" +@pytest.fixture(autouse=True) +def enable_gil_if_free_threading(python, monkeypatch): + _, python_executable = python + proc = subprocess.run([python_executable, "-Xgil=1", "-cpass"], capture_output=True) + free_threading = proc.returncode == 0 + if free_threading: + monkeypatch.setenv("PYTHON_GIL", "1") + + @ALL_PYTHONS def test_gil_status_one_thread_among_many_holds_the_gil(python, tmpdir): # GIVEN From ce89424768a1ca8cc96a7ade1abdb1cb66f64aee Mon Sep 17 00:00:00 2001 From: Matt Wozniski Date: Fri, 16 Aug 2024 17:49:33 -0400 Subject: [PATCH 10/15] Recognize the free-threading Py_GetVersion string The free-threading build adds some extra text in the middle of the Py_GetVersion string that we need to account for. Signed-off-by: Matt Wozniski --- src/pystack/process.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/pystack/process.py b/src/pystack/process.py index cd8bf83a..513ea1f9 100644 --- a/src/pystack/process.py +++ b/src/pystack/process.py @@ -19,8 +19,12 @@ r".*libpython(?P\d+)\.(?P\d+).*", re.IGNORECASE ) +# Strings like "3.8.10 (default, May 26 2023, 14:05:08)" +# or "2.7.18rc1 (v2.7.18rc1:8d21aa21f2, Apr 20 2020, 13:19:08)" +# or "3.13.0+ experimental free-threading build (Python)" BSS_VERSION_REGEXP = re.compile( - rb"((2|3)\.(\d+)\.(\d{1,2}))((a|b|c|rc)\d{1,2})?\+? (\(.{1,64}\))" + rb"((2|3)\.(\d+)\.(\d{1,2}))((a|b|c|rc)\d{1,2})?\+?" + rb"(?: experimental free-threading build)? (\(.{1,64}\))" ) LOGGER = logging.getLogger(__file__) From bd144a6e9711b76b3def6777768d60f66eb326fb Mon Sep 17 00:00:00 2001 From: Matt Wozniski Date: Fri, 16 Aug 2024 18:08:09 -0400 Subject: [PATCH 11/15] Detect and leverage _Py_DebugOffsets If we're able to locate and validate the _Py_DebugOffsets structure, use the offsets contained in it for the rest of our work. Signed-off-by: Matt Wozniski --- src/pystack/_pystack.pyx | 3 + src/pystack/_pystack/process.cpp | 598 ++++++++++++++++++++++++++++++- src/pystack/_pystack/process.h | 16 +- src/pystack/_pystack/process.pxd | 3 +- src/pystack/_pystack/version.cpp | 1 + src/pystack/_pystack/version.h | 1 + 6 files changed, 605 insertions(+), 17 deletions(-) diff --git a/src/pystack/_pystack.pyx b/src/pystack/_pystack.pyx index 596768a9..a16d4a10 100644 --- a/src/pystack/_pystack.pyx +++ b/src/pystack/_pystack.pyx @@ -307,6 +307,7 @@ cdef class ProcessManager: ) ) + native_manager.get().setPythonVersionFromDebugOffsets() python_version = native_manager.get().findPythonVersion() if python_version == (-1, -1): python_version = get_python_version_for_process(pid, map_info) @@ -353,10 +354,12 @@ cdef class ProcessManager: make_shared[CoreFileProcessManager](pid, analyzer, maps, native_map_info) ) + native_manager.get().setPythonVersionFromDebugOffsets() python_version = native_manager.get().findPythonVersion() if python_version == (-1, -1): python_version = get_python_version_for_core(core_file, executable, map_info) native_manager.get().setPythonVersion(python_version) + cdef ProcessManager new_manager = cls( pid, python_version, virtual_maps, map_info ) diff --git a/src/pystack/_pystack/process.cpp b/src/pystack/_pystack/process.cpp index 611ef5c4..91c98d8e 100644 --- a/src/pystack/_pystack/process.cpp +++ b/src/pystack/_pystack/process.cpp @@ -63,6 +63,61 @@ class DirectoryReader } // namespace namespace pystack { +namespace { // unnamed + +struct ParsedPyVersion +{ + int major; + int minor; + int patch; + const char* release_level; + int serial; +}; + +std::ostream& +operator<<(std::ostream& out, const ParsedPyVersion& version) +{ + // Use a temporary stringstream in case `out` is using hex or showbase + std::ostringstream oss; + oss << version.major << "." << version.minor << "." << version.patch; + if (version.release_level) { + oss << version.release_level << version.serial; + } + + out << oss.str(); + return out; +} + +bool +parsePyVersionHex(uint64_t version, ParsedPyVersion& parsed) +{ + int major = (version >> 24) & 0xFF; + int minor = (version >> 16) & 0xFF; + int patch = (version >> 8) & 0xFF; + int level = (version >> 4) & 0x0F; + int count = (version >> 0) & 0x0F; + + const char* level_str = nullptr; + if (level == 0xA) { + level_str = "a"; + } else if (level == 0xB) { + level_str = "b"; + } else if (level == 0xC) { + level_str = "rc"; + } else if (level == 0xF) { + level_str = ""; + } + + if (major < 2 || major > 3 || level_str == nullptr || (level == 0xF && count != 0)) { + return false; // Doesn't look valid. + } + + parsed = ParsedPyVersion{major, minor, patch, level_str, count}; + return true; +} + +} // unnamed namespace + static std::vector getProcessTids(pid_t pid) { @@ -352,6 +407,40 @@ AbstractProcessManager::scanMemoryAreaForInterpreterState(const VirtualMap& map) return (remote_addr_t)result; } +remote_addr_t +AbstractProcessManager::scanMemoryAreaForDebugOffsets(const VirtualMap& map) const +{ + size_t size = map.Size(); + std::vector memory_buffer(size); + remote_addr_t base = map.Start(); + copyMemoryFromProcess(base, size, memory_buffer.data()); + + LOG(INFO) << std::showbase << std::hex << "Searching for debug offsets in memory area spanning from " + << map.Start() << " to " << map.End(); + + uint64_t* lower_bound = (uint64_t*)&memory_buffer.data()[0]; + uint64_t* upper_bound = (uint64_t*)&memory_buffer.data()[size]; + + uint64_t cookie; + memcpy(&cookie, "xdebugpy", sizeof(cookie)); + + for (uint64_t* raddr = lower_bound; raddr < upper_bound; raddr++) { + if (raddr[0] == cookie) { + uint64_t version = raddr[1]; + + ParsedPyVersion parsed; + if (parsePyVersionHex(version, parsed) && parsed.major == 3 && parsed.minor >= 13) { + auto offset = (remote_addr_t)raddr - (remote_addr_t)memory_buffer.data(); + auto addr = offset + base; + LOG(DEBUG) << std::hex << std::showbase << "Possible debug offsets found at address " + << addr << " in a mapping of " << map.Path(); + return addr; + } + } + } + return 0; +} + remote_addr_t AbstractProcessManager::scanBSS() const { @@ -392,6 +481,27 @@ AbstractProcessManager::scanHeap() const return scanMemoryAreaForInterpreterState(d_heap.value()); } +remote_addr_t +AbstractProcessManager::findDebugOffsetsFromMaps() const +{ + LOG(INFO) << "Scanning all writable path-backed maps for _Py_DebugOffsets"; + for (auto& map : d_memory_maps) { + if (map.Flags().find("w") != std::string::npos && !map.Path().empty()) { + LOG(DEBUG) << std::hex << std::showbase << "Attempting to locate _Py_DebugOffsets in map of " + << map.Path() << " starting at " << map.Start() << " and ending at " << map.End(); + LOG(DEBUG) << "Flags: " << map.Flags(); + try { + if (remote_addr_t result = scanMemoryAreaForDebugOffsets(map)) { + return result; + } + } catch (RemoteMemCopyError& ex) { + LOG(INFO) << "Failed to scan map starting at " << map.Start(); + } + } + } + return 0; +} + ssize_t AbstractProcessManager::copyMemoryFromProcess(remote_addr_t addr, size_t size, void* destination) const { @@ -563,12 +673,72 @@ AbstractProcessManager::isInterpreterActive() const return InterpreterStatus::UNKNOWN; } +void +AbstractProcessManager::setPythonVersionFromDebugOffsets() +{ + remote_addr_t pyruntime_addr = findSymbol("_PyRuntime"); + if (!pyruntime_addr) { + pyruntime_addr = findPyRuntimeFromElfData(); + } + if (!pyruntime_addr) { + pyruntime_addr = findDebugOffsetsFromMaps(); + } + + if (!pyruntime_addr) { + LOG(DEBUG) << "Unable to find _Py_DebugOffsets"; + return; + } + + try { + uint64_t cookie; + copyObjectFromProcess(pyruntime_addr, &cookie); + if (0 != memcmp(&cookie, "xdebugpy", 8)) { + LOG(DEBUG) << "Found a _PyRuntime structure without _Py_DebugOffsets"; + return; + } + + uint64_t version; + copyObjectFromProcess(pyruntime_addr + 8, &version); + + ParsedPyVersion parsed; + if (parsePyVersionHex(version, parsed) && parsed.major == 3 && parsed.minor >= 13) { + LOG(INFO) << std::hex << std::showbase << "_Py_DebugOffsets at " << pyruntime_addr + << " identify the version as " << parsed; + setPythonVersion(std::make_pair(parsed.major, parsed.minor)); + Structure py_runtime(shared_from_this(), pyruntime_addr); + std::unique_ptr offsets = loadDebugOffsets(py_runtime); + if (offsets) { + LOG(INFO) << "_Py_DebugOffsets appear to be valid and will be used"; + warnIfOffsetsAreMismatched(pyruntime_addr); + d_debug_offsets_addr = pyruntime_addr; + d_debug_offsets = std::move(offsets); + return; + } + } + } catch (const RemoteMemCopyError& ex) { + LOG(DEBUG) << std::hex << std::showbase << "Found apparently invalid _Py_DebugOffsets at " + << pyruntime_addr; + } + + LOG(DEBUG) << "Failed to validate _PyDebugOffsets structure"; + d_major = 0; + d_minor = 0; + d_py_v = nullptr; + d_debug_offsets_addr = 0; + d_debug_offsets.reset(); +} + std::pair AbstractProcessManager::findPythonVersion() const { + if (d_py_v) { + // Already set or previously found (probably via _Py_DebugOffsets) + return std::make_pair(d_major, d_minor); + } + auto version_symbol = findSymbol("Py_Version"); if (!version_symbol) { - LOG(DEBUG) << "Faled to determine Python version from symbols"; + LOG(DEBUG) << "Failed to determine Python version from symbols"; return {-1, -1}; } unsigned long version; @@ -591,22 +761,11 @@ AbstractProcessManager::setPythonVersion(const std::pair& version) // Note: getCPythonOffsets can throw. Don't set these if it does. d_major = version.first; d_minor = version.second; - - warnIfOffsetsAreMismatched(); } void -AbstractProcessManager::warnIfOffsetsAreMismatched() const +AbstractProcessManager::warnIfOffsetsAreMismatched(remote_addr_t runtime_addr) const { - if (!versionIsAtLeast(3, 13)) { - return; // Nothing to cross-reference; _Py_DebugOffsets was added in 3.13 - } - - remote_addr_t runtime_addr = findSymbol("_PyRuntime"); - if (!runtime_addr) { - return; // We need to start from the _PyRuntime structure - } - Structure py_runtime(shared_from_this(), runtime_addr); if (0 != memcmp(py_runtime.getField(&py_runtime_v::o_dbg_off_cookie), "xdebugpy", 8)) { @@ -696,6 +855,402 @@ AbstractProcessManager::warnIfOffsetsAreMismatched() const #undef compare_offset } +std::unique_ptr +AbstractProcessManager::loadDebugOffsets(Structure& py_runtime) const +{ + if (!versionIsAtLeast(3, 13)) { + return {}; // _Py_DebugOffsets was added in 3.13 + } + + if (0 != memcmp(py_runtime.getField(&py_runtime_v::o_dbg_off_cookie), "xdebugpy", 8)) { + LOG(WARNING) << "Debug offsets cookie doesn't match!"; + return {}; + } + + uint64_t version = py_runtime.getField(&py_runtime_v::o_dbg_off_py_version_hex); + int major = (version >> 24) & 0xff; + int minor = (version >> 16) & 0xff; + + if (major != d_major || minor != d_minor) { + LOG(WARNING) << "Detected version " << d_major << "." << d_minor + << " doesn't match debug offsets version " << major << "." << minor << "!"; + return {}; + } + + python_v debug_offsets{}; + if (!copyDebugOffsets(py_runtime, debug_offsets)) { + return {}; + } + + if (!validateDebugOffsets(py_runtime, debug_offsets)) { + return {}; + } + + auto ret = std::make_unique(); + *ret = debug_offsets; + clampSizes(*ret); + return ret; +} + +bool +AbstractProcessManager::copyDebugOffsets(Structure& py_runtime, python_v& debug_offsets) + const +{ + // Fill in a temporary python_v with the offsets from the remote. + // For fields that aren't in _Py_DebugOffsets, make some assumptions, based + // in part on the size delta between the sizeof(PyObject) baked into our + // static offsets and the sizeof(PyObject) in the remote process/core. + Py_ssize_t new_pyobject_size = py_runtime.getField(&py_runtime_v::o_dbg_off_pyobject_struct_size); + Py_ssize_t pyobject_size_delta = -d_py_v->py_object.size + new_pyobject_size; + +#define set_size(pystack_struct, size_offset) \ + debug_offsets.pystack_struct.size = py_runtime.getField(size_offset) + +#define set_offset(pystack_field, field_offset_offset) \ + debug_offsets.pystack_field = {(offset_t)py_runtime.getField(field_offset_offset)} + + set_size(py_runtime, &py_runtime_v::o_dbg_off_runtime_state_struct_size); + set_offset(py_runtime.o_finalizing, &py_runtime_v::o_dbg_off_runtime_state_finalizing); + set_offset(py_runtime.o_interp_head, &py_runtime_v::o_dbg_off_runtime_state_interpreters_head); + + set_size(py_is, &py_runtime_v::o_dbg_off_interpreter_state_struct_size); + set_offset(py_is.o_next, &py_runtime_v::o_dbg_off_interpreter_state_next); + set_offset(py_is.o_tstate_head, &py_runtime_v::o_dbg_off_interpreter_state_threads_head); + set_offset(py_is.o_gc, &py_runtime_v::o_dbg_off_interpreter_state_gc); + set_offset(py_is.o_modules, &py_runtime_v::o_dbg_off_interpreter_state_imports_modules); + set_offset(py_is.o_sysdict, &py_runtime_v::o_dbg_off_interpreter_state_sysdict); + set_offset(py_is.o_builtins, &py_runtime_v::o_dbg_off_interpreter_state_builtins); + set_offset(py_is.o_gil_runtime_state, &py_runtime_v::o_dbg_off_interpreter_state_ceval_gil); + + set_size(py_thread, &py_runtime_v::o_dbg_off_thread_state_struct_size); + set_offset(py_thread.o_prev, &py_runtime_v::o_dbg_off_thread_state_prev); + set_offset(py_thread.o_next, &py_runtime_v::o_dbg_off_thread_state_next); + set_offset(py_thread.o_interp, &py_runtime_v::o_dbg_off_thread_state_interp); + set_offset(py_thread.o_frame, &py_runtime_v::o_dbg_off_thread_state_current_frame); + set_offset(py_thread.o_thread_id, &py_runtime_v::o_dbg_off_thread_state_thread_id); + set_offset(py_thread.o_native_thread_id, &py_runtime_v::o_dbg_off_thread_state_native_thread_id); + + set_size(py_frame, &py_runtime_v::o_dbg_off_interpreter_frame_struct_size); + set_offset(py_frame.o_back, &py_runtime_v::o_dbg_off_interpreter_frame_previous); + set_offset(py_frame.o_code, &py_runtime_v::o_dbg_off_interpreter_frame_executable); + set_offset(py_frame.o_prev_instr, &py_runtime_v::o_dbg_off_interpreter_frame_instr_ptr); + set_offset(py_frame.o_localsplus, &py_runtime_v::o_dbg_off_interpreter_frame_localsplus); + set_offset(py_frame.o_owner, &py_runtime_v::o_dbg_off_interpreter_frame_owner); + + set_size(py_code, &py_runtime_v::o_dbg_off_code_object_struct_size); + set_offset(py_code.o_filename, &py_runtime_v::o_dbg_off_code_object_filename); + set_offset(py_code.o_name, &py_runtime_v::o_dbg_off_code_object_name); + set_offset(py_code.o_lnotab, &py_runtime_v::o_dbg_off_code_object_linetable); + set_offset(py_code.o_firstlineno, &py_runtime_v::o_dbg_off_code_object_firstlineno); + set_offset(py_code.o_argcount, &py_runtime_v::o_dbg_off_code_object_argcount); + set_offset(py_code.o_varnames, &py_runtime_v::o_dbg_off_code_object_localsplusnames); + set_offset(py_code.o_code_adaptive, &py_runtime_v::o_dbg_off_code_object_co_code_adaptive); + + set_size(py_object, &py_runtime_v::o_dbg_off_pyobject_struct_size); + set_offset(py_object.o_ob_type, &py_runtime_v::o_dbg_off_pyobject_ob_type); + + set_size(py_type, &py_runtime_v::o_dbg_off_type_object_struct_size); + set_offset(py_type.o_tp_name, &py_runtime_v::o_dbg_off_type_object_tp_name); + // Assume our static offsets are correct about the distance from tp_name to the other fields + debug_offsets.py_type.o_tp_repr = { + d_py_v->py_type.o_tp_repr.offset - d_py_v->py_type.o_tp_name.offset + + debug_offsets.py_type.o_tp_name.offset}; + debug_offsets.py_type.o_tp_flags = { + d_py_v->py_type.o_tp_flags.offset - d_py_v->py_type.o_tp_name.offset + + debug_offsets.py_type.o_tp_name.offset}; + + set_size(py_tuple, &py_runtime_v::o_dbg_off_tuple_object_struct_size); + // Assume ob_base is the first field of PyVarObject and ob_size is the second + static_assert(sizeof(PyTupleObject::ob_base.ob_base) == offsetof(PyTupleObject, ob_base.ob_size)); + debug_offsets.py_tuple.o_ob_size = {(offset_t)new_pyobject_size}; + set_offset(py_tuple.o_ob_item, &py_runtime_v::o_dbg_off_tuple_object_ob_item); + + set_size(py_unicode, &py_runtime_v::o_dbg_off_unicode_object_struct_size); + set_offset(py_unicode.o_state, &py_runtime_v::o_dbg_off_unicode_object_state); + set_offset(py_unicode.o_length, &py_runtime_v::o_dbg_off_unicode_object_length); + set_offset(py_unicode.o_ascii, &py_runtime_v::o_dbg_off_unicode_object_asciiobject_size); + + set_size(py_gc, &py_runtime_v::o_dbg_off_gc_struct_size); + set_offset(py_gc.o_collecting, &py_runtime_v::o_dbg_off_gc_collecting); + + // Assume ob_size and ob_item are at the same location for list as for tuple + static_assert( + offsetof(PyListObject, ob_item) + sizeof(PyListObject::ob_item) <= sizeof(PyTupleObject)); + debug_offsets.py_list.size = debug_offsets.py_tuple.size; + + static_assert(offsetof(PyListObject, ob_base.ob_size) == offsetof(PyTupleObject, ob_base.ob_size)); + debug_offsets.py_list.o_ob_size = debug_offsets.py_tuple.o_ob_size; + + static_assert(offsetof(PyListObject, ob_item) == offsetof(PyTupleObject, ob_item)); + debug_offsets.py_list.o_ob_item = {debug_offsets.py_tuple.o_ob_item.offset}; + + // Assume our static offsets for dict are correct save possibly for sizeof(PyObject) changing + debug_offsets.py_dictkeys = d_py_v->py_dictkeys; + debug_offsets.py_dictvalues = d_py_v->py_dictvalues; + debug_offsets.py_dict = d_py_v->py_dict; + debug_offsets.py_dict.size += pyobject_size_delta; + debug_offsets.py_dict.o_ma_keys.offset += pyobject_size_delta; + debug_offsets.py_dict.o_ma_values.offset += pyobject_size_delta; + + // Assume our static offsets for float are correct save possibly for sizeof(PyObject) changing + debug_offsets.py_float = d_py_v->py_float; + debug_offsets.py_float.size += pyobject_size_delta; + debug_offsets.py_float.o_ob_fval.offset += pyobject_size_delta; + + // Assume our static offsets for long are correct save possibly for sizeof(PyObject) changing + debug_offsets.py_long = d_py_v->py_long; + debug_offsets.py_long.size += pyobject_size_delta; + debug_offsets.py_long.o_ob_size.offset += pyobject_size_delta; + debug_offsets.py_long.o_ob_digit.offset += pyobject_size_delta; + + // Assume our static offsets for bytes are correct save possibly for sizeof(PyObject) changing + debug_offsets.py_bytes = d_py_v->py_bytes; + debug_offsets.py_bytes.size += pyobject_size_delta; + debug_offsets.py_bytes.o_ob_size.offset += pyobject_size_delta; + debug_offsets.py_bytes.o_ob_sval.offset += pyobject_size_delta; + + // Assume our static offsets for cframe are all correct + debug_offsets.py_cframe = d_py_v->py_cframe; + + // Assume our static offsets for gilruntimestate are off by 8 bytes in a free-threading build. + // This is quite a hack... + debug_offsets.py_gilruntimestate = d_py_v->py_gilruntimestate; + bool is_free_threading = static_cast(debug_offsets.py_object.size) > 2 * sizeof(void*); + if (is_free_threading) { + debug_offsets.py_gilruntimestate.size += 8; + debug_offsets.py_gilruntimestate.o_last_holder.offset += 8; + debug_offsets.py_gilruntimestate.o_locked.offset += 8; + } + +#undef set_size +#undef set_offset + + return true; +} + +bool +AbstractProcessManager::validateDebugOffsets( + const Structure& py_runtime, + python_v& debug_offsets) const +{ + // Simple sanity checks on the decoded offsets: + // - No structure is larger than 1 MB + // - Every field falls within its structure's size +#define check_size(pystack_struct, size_offset) \ + do { \ + if (debug_offsets.pystack_struct.size > 1024 * 1024) { \ + LOG(WARNING) << "Ignoring debug offsets because " #pystack_struct ".size (" \ + << debug_offsets.pystack_struct.size << ") reported at byte offset " \ + << (d_py_v->py_runtime.*size_offset).offset \ + << " in detected _Py_DebugOffsets structure at " << std::hex << std::showbase \ + << py_runtime.getFieldRemoteAddress(&py_runtime_v::o_dbg_off_cookie) \ + << " is implausibly large"; \ + return {}; \ + } \ + } while (0) + +#define check_field_bounds(structure, field) \ + do { \ + if (debug_offsets.structure.size < 0 \ + || (size_t)debug_offsets.structure.size < debug_offsets.structure.field.offset \ + || debug_offsets.structure.size - debug_offsets.structure.field.offset \ + < sizeof(decltype(debug_offsets.structure.field)::Type)) \ + { \ + LOG(WARNING) << "Ignoring debug offsets because " #structure ".size (" \ + << debug_offsets.structure.size << ") - " #structure "." #field ".offset (" \ + << debug_offsets.structure.field.offset << ") < the field's size (" \ + << sizeof(decltype(debug_offsets.structure.field)::Type) << ")"; \ + return {}; \ + } \ + } while (0) + + check_size(py_runtime, &py_runtime_v::o_dbg_off_runtime_state_struct_size); + check_field_bounds(py_runtime, o_finalizing); + check_field_bounds(py_runtime, o_interp_head); + + check_size(py_is, &py_runtime_v::o_dbg_off_interpreter_state_struct_size); + check_field_bounds(py_is, o_next); + check_field_bounds(py_is, o_tstate_head); + check_field_bounds(py_is, o_gc); + check_field_bounds(py_is, o_modules); + check_field_bounds(py_is, o_sysdict); + check_field_bounds(py_is, o_builtins); + check_field_bounds(py_is, o_gil_runtime_state); + + check_size(py_thread, &py_runtime_v::o_dbg_off_thread_state_struct_size); + check_field_bounds(py_thread, o_prev); + check_field_bounds(py_thread, o_next); + check_field_bounds(py_thread, o_interp); + check_field_bounds(py_thread, o_frame); + check_field_bounds(py_thread, o_thread_id); + check_field_bounds(py_thread, o_native_thread_id); + + check_size(py_frame, &py_runtime_v::o_dbg_off_interpreter_frame_struct_size); + check_field_bounds(py_frame, o_back); + check_field_bounds(py_frame, o_code); + check_field_bounds(py_frame, o_prev_instr); + check_field_bounds(py_frame, o_localsplus); + check_field_bounds(py_frame, o_owner); + + check_size(py_code, &py_runtime_v::o_dbg_off_code_object_struct_size); + check_field_bounds(py_code, o_filename); + check_field_bounds(py_code, o_name); + check_field_bounds(py_code, o_lnotab); + check_field_bounds(py_code, o_firstlineno); + check_field_bounds(py_code, o_argcount); + check_field_bounds(py_code, o_varnames); + check_field_bounds(py_code, o_code_adaptive); + + check_size(py_object, &py_runtime_v::o_dbg_off_pyobject_struct_size); + check_field_bounds(py_object, o_ob_type); + + check_size(py_type, &py_runtime_v::o_dbg_off_type_object_struct_size); + check_field_bounds(py_type, o_tp_name); + check_field_bounds(py_type, o_tp_repr); + check_field_bounds(py_type, o_tp_flags); + + check_size(py_tuple, &py_runtime_v::o_dbg_off_tuple_object_struct_size); + check_field_bounds(py_tuple, o_ob_size); + check_field_bounds(py_tuple, o_ob_item); + + check_size(py_unicode, &py_runtime_v::o_dbg_off_unicode_object_struct_size); + check_field_bounds(py_unicode, o_state); + check_field_bounds(py_unicode, o_length); + check_field_bounds(py_unicode, o_ascii); + + check_size(py_gc, &py_runtime_v::o_dbg_off_gc_struct_size); + check_field_bounds(py_gc, o_collecting); + + check_field_bounds(py_list, o_ob_size); + check_field_bounds(py_list, o_ob_item); + + check_field_bounds(py_dictkeys, o_dk_size); + check_field_bounds(py_dictkeys, o_dk_kind); + check_field_bounds(py_dictkeys, o_dk_nentries); + check_field_bounds(py_dictkeys, o_dk_indices); + + check_field_bounds(py_dictvalues, o_values); + + check_field_bounds(py_dict, o_ma_keys); + check_field_bounds(py_dict, o_ma_values); + + check_field_bounds(py_float, o_ob_fval); + + check_field_bounds(py_long, o_ob_size); + check_field_bounds(py_long, o_ob_digit); + + check_field_bounds(py_bytes, o_ob_size); + check_field_bounds(py_bytes, o_ob_sval); + + check_field_bounds(py_cframe, current_frame); + +#undef check_size +#undef check_field_bounds + + return true; +} + +void +AbstractProcessManager::clampSizes(python_v& debug_offsets) const +{ + // Clamp the size of each struct down to only what we need to copy. + // The runtime state and interpreter state both contain many fields beyond + // the ones that we're interested in or have offsets for. +#define update_size(structure, field) \ + debug_offsets.structure.size = std::max( \ + (size_t)debug_offsets.structure.size, \ + debug_offsets.structure.field.offset \ + + sizeof(decltype(debug_offsets.structure.field)::Type)) + + debug_offsets.py_runtime.size = 0; + update_size(py_runtime, o_finalizing); + update_size(py_runtime, o_interp_head); + + debug_offsets.py_is.size = 0; + update_size(py_is, o_next); + update_size(py_is, o_tstate_head); + update_size(py_is, o_gc); + update_size(py_is, o_modules); + update_size(py_is, o_sysdict); + update_size(py_is, o_builtins); + update_size(py_is, o_gil_runtime_state); + + debug_offsets.py_thread.size = 0; + update_size(py_thread, o_prev); + update_size(py_thread, o_next); + update_size(py_thread, o_interp); + update_size(py_thread, o_frame); + update_size(py_thread, o_thread_id); + update_size(py_thread, o_native_thread_id); + + debug_offsets.py_frame.size = 0; + update_size(py_frame, o_back); + update_size(py_frame, o_code); + update_size(py_frame, o_prev_instr); + update_size(py_frame, o_localsplus); + update_size(py_frame, o_owner); + + debug_offsets.py_code.size = 0; + update_size(py_code, o_filename); + update_size(py_code, o_name); + update_size(py_code, o_lnotab); + update_size(py_code, o_firstlineno); + update_size(py_code, o_argcount); + update_size(py_code, o_varnames); + update_size(py_code, o_code_adaptive); + + debug_offsets.py_object.size = 0; + update_size(py_object, o_ob_type); + + debug_offsets.py_type.size = 0; + update_size(py_type, o_tp_name); + update_size(py_type, o_tp_repr); + update_size(py_type, o_tp_flags); + + debug_offsets.py_tuple.size = 0; + update_size(py_tuple, o_ob_size); + update_size(py_tuple, o_ob_item); + + debug_offsets.py_unicode.size = 0; + update_size(py_unicode, o_state); + update_size(py_unicode, o_length); + update_size(py_unicode, o_ascii); + + debug_offsets.py_gc.size = 0; + update_size(py_gc, o_collecting); + + debug_offsets.py_list.size = 0; + update_size(py_list, o_ob_size); + update_size(py_list, o_ob_item); + + debug_offsets.py_dictkeys.size = 0; + update_size(py_dictkeys, o_dk_size); + update_size(py_dictkeys, o_dk_kind); + update_size(py_dictkeys, o_dk_nentries); + update_size(py_dictkeys, o_dk_indices); + + debug_offsets.py_dictvalues.size = 0; + update_size(py_dictvalues, o_values); + + debug_offsets.py_dict.size = 0; + update_size(py_dict, o_ma_keys); + update_size(py_dict, o_ma_values); + + debug_offsets.py_float.size = 0; + update_size(py_float, o_ob_fval); + + debug_offsets.py_long.size = 0; + update_size(py_long, o_ob_size); + update_size(py_long, o_ob_digit); + + debug_offsets.py_bytes.size = 0; + update_size(py_bytes, o_ob_size); + update_size(py_bytes, o_ob_sval); + + debug_offsets.py_cframe.size = 0; + update_size(py_cframe, current_frame); +} + bool AbstractProcessManager::versionIsAtLeast(int required_major, int required_minor) const { @@ -705,11 +1260,14 @@ AbstractProcessManager::versionIsAtLeast(int required_major, int required_minor) const python_v& AbstractProcessManager::offsets() const { + if (d_debug_offsets) { + return *d_debug_offsets; + } return *d_py_v; } remote_addr_t -AbstractProcessManager::findInterpreterStateFromElfData() const +AbstractProcessManager::findPyRuntimeFromElfData() const { LOG(INFO) << "Trying to resolve PyInterpreterState from Elf data"; SectionInfo section_info; @@ -724,7 +1282,17 @@ AbstractProcessManager::findInterpreterStateFromElfData() const "could not be found"; return 0; } - return findInterpreterStateFromPyRuntime(load_addr + section_info.corrected_addr); + return load_addr + section_info.corrected_addr; +} + +remote_addr_t +AbstractProcessManager::findInterpreterStateFromElfData() const +{ + remote_addr_t pyruntime = findPyRuntimeFromElfData(); + if (!pyruntime) { + return 0; + } + return findInterpreterStateFromPyRuntime(pyruntime); } ProcessManager::ProcessManager( diff --git a/src/pystack/_pystack/process.h b/src/pystack/_pystack/process.h index ff3d21b3..631923be 100644 --- a/src/pystack/_pystack/process.h +++ b/src/pystack/_pystack/process.h @@ -20,6 +20,9 @@ namespace pystack { +template +class Structure; + struct InvalidRemoteObject : public InvalidCopiedMemory { const char* what() const noexcept override @@ -88,6 +91,7 @@ class AbstractProcessManager : public std::enable_shared_from_this findPythonVersion() const; + void setPythonVersionFromDebugOffsets(); void setPythonVersion(const std::pair& version); bool versionIsAtLeast(int required_major, int required_minor) const; const python_v& offsets() const; @@ -106,6 +110,8 @@ class AbstractProcessManager : public std::enable_shared_from_this d_debug_offsets{}; mutable std::unordered_map d_type_cache; // Methods @@ -113,8 +119,16 @@ class AbstractProcessManager : public std::enable_shared_from_this loadDebugOffsets(Structure& py_runtime) const; + bool copyDebugOffsets(Structure& py_runtime, python_v& debug_offsets) const; + bool validateDebugOffsets(const Structure& py_runtime, python_v& debug_offsets) const; + void clampSizes(python_v& debug_offsets) const; remote_addr_t scanMemoryAreaForInterpreterState(const VirtualMap& map) const; + remote_addr_t scanMemoryAreaForDebugOffsets(const VirtualMap& map) const; }; template diff --git a/src/pystack/_pystack/process.pxd b/src/pystack/_pystack/process.pxd index cf750cd1..41adc861 100644 --- a/src/pystack/_pystack/process.pxd +++ b/src/pystack/_pystack/process.pxd @@ -30,7 +30,8 @@ cdef extern from "process.h" namespace "pystack": vector[int] Tids() except+ InterpreterStatus isInterpreterActive() except+ pair[int, int] findPythonVersion() - void setPythonVersion(pair[int, int] version) + void setPythonVersion(pair[int, int] version) except + + void setPythonVersionFromDebugOffsets() except + cdef cppclass ProcessManager(AbstractProcessManager): ProcessManager(int pid, shared_ptr[ProcessTracer] tracer, shared_ptr[ProcessAnalyzer] analyzer, vector[VirtualMap] memory_maps, MemoryMapInformation map_info) except+ diff --git a/src/pystack/_pystack/version.cpp b/src/pystack/_pystack/version.cpp index 20e4edca..775d3836 100644 --- a/src/pystack/_pystack/version.cpp +++ b/src/pystack/_pystack/version.cpp @@ -238,6 +238,7 @@ py_runtimev313() {}, {}, offsetof(T, debug_offsets.cookie), + offsetof(T, debug_offsets.version), offsetof(T, debug_offsets.runtime_state.size), offsetof(T, debug_offsets.runtime_state.finalizing), offsetof(T, debug_offsets.runtime_state.interpreters_head), diff --git a/src/pystack/_pystack/version.h b/src/pystack/_pystack/version.h index 46e3ea5d..6570f7d3 100644 --- a/src/pystack/_pystack/version.h +++ b/src/pystack/_pystack/version.h @@ -133,6 +133,7 @@ struct py_runtime_v FieldOffset o_tstate_current; FieldOffset o_dbg_off_cookie; + FieldOffset o_dbg_off_py_version_hex; FieldOffset o_dbg_off_runtime_state_struct_size; FieldOffset o_dbg_off_runtime_state_finalizing; From f258b6a9cf05a0ef4d4ca07bc1c701be5c000e83 Mon Sep 17 00:00:00 2001 From: Matt Wozniski Date: Fri, 16 Aug 2024 18:09:29 -0400 Subject: [PATCH 12/15] Try to locate the interpreter using debug offsets If we've found the `_Py_DebugOffsets` structure, we can use this to find the interpreter state quickly and efficiently: the debug offsets are at the start of the `_PyRuntime` structure, which contains a reference to the interpreter state, at an offset identified by the debug offsets. Signed-off-by: Matt Wozniski --- src/pystack/_pystack.pyi | 1 + src/pystack/_pystack.pyx | 9 +++++++-- src/pystack/_pystack/process.cpp | 30 ++++++++++++++++++++++++++++++ src/pystack/_pystack/process.h | 1 + src/pystack/_pystack/process.pxd | 1 + tests/integration/test_smoke.py | 13 ++++++++++++- tests/utils.py | 6 ++++++ 7 files changed, 58 insertions(+), 3 deletions(-) diff --git a/src/pystack/_pystack.pyi b/src/pystack/_pystack.pyi index f6096b69..8d4a6d43 100644 --- a/src/pystack/_pystack.pyi +++ b/src/pystack/_pystack.pyi @@ -36,6 +36,7 @@ class StackMethod(enum.Enum): ELF_DATA: int HEAP: int SYMBOLS: int + DEBUG_OFFSETS: int class ProcessManager: ... diff --git a/src/pystack/_pystack.pyx b/src/pystack/_pystack.pyx index a16d4a10..fa794f22 100644 --- a/src/pystack/_pystack.pyx +++ b/src/pystack/_pystack.pyx @@ -77,7 +77,8 @@ class StackMethod(enum.Enum): BSS = 1 << 2 ANONYMOUS_MAPS = 1 << 3 HEAP = 1 << 4 - AUTO = ELF_DATA | SYMBOLS | BSS + DEBUG_OFFSETS = 1 << 5 + AUTO = DEBUG_OFFSETS | ELF_DATA | SYMBOLS | BSS ALL = AUTO | ANONYMOUS_MAPS | HEAP @@ -530,6 +531,7 @@ cdef remote_addr_t _get_interpreter_state_addr( ) except*: cdef remote_addr_t head = 0 possible_methods = [ + StackMethod.DEBUG_OFFSETS, StackMethod.ELF_DATA, StackMethod.SYMBOLS, StackMethod.BSS, @@ -542,7 +544,10 @@ cdef remote_addr_t _get_interpreter_state_addr( continue try: - if possible_method == StackMethod.ELF_DATA: + if possible_method == StackMethod.DEBUG_OFFSETS: + how = "using debug offsets data" + head = manager.findInterpreterStateFromDebugOffsets() + elif possible_method == StackMethod.ELF_DATA: how = "using ELF data" head = manager.findInterpreterStateFromElfData() elif possible_method == StackMethod.SYMBOLS: diff --git a/src/pystack/_pystack/process.cpp b/src/pystack/_pystack/process.cpp index 91c98d8e..a905c191 100644 --- a/src/pystack/_pystack/process.cpp +++ b/src/pystack/_pystack/process.cpp @@ -1295,6 +1295,36 @@ AbstractProcessManager::findInterpreterStateFromElfData() const return findInterpreterStateFromPyRuntime(pyruntime); } +remote_addr_t +AbstractProcessManager::findInterpreterStateFromDebugOffsets() const +{ + if (!d_debug_offsets_addr) { + LOG(DEBUG) << "Debug offsets were never found"; + return 0; + } + + LOG(INFO) << "Searching for PyInterpreterState based on PyRuntime address " << std::hex + << std::showbase << d_debug_offsets_addr + << " found when searching for 3.13+ debug offsets"; + + try { + Structure runtime(shared_from_this(), d_debug_offsets_addr); + remote_addr_t interp_state = runtime.getField(&py_runtime_v::o_interp_head); + LOG(DEBUG) << "Checking interpreter state at " << std::hex << std::showbase << interp_state + << " found at address " + << runtime.getFieldRemoteAddress(&py_runtime_v::o_interp_head); + if (isValidInterpreterState(interp_state)) { + LOG(DEBUG) << "Interpreter head reference from debug offsets dereferences successfully"; + return interp_state; + } + } catch (...) { + // Swallow exceptions and fall through to return failure + } + LOG(INFO) << "Failed to resolve PyInterpreterState based on PyRuntime address " << std::hex + << std::showbase << d_debug_offsets_addr; + return 0; +} + ProcessManager::ProcessManager( pid_t pid, const std::shared_ptr& tracer, diff --git a/src/pystack/_pystack/process.h b/src/pystack/_pystack/process.h index 631923be..c8f7cb7a 100644 --- a/src/pystack/_pystack/process.h +++ b/src/pystack/_pystack/process.h @@ -78,6 +78,7 @@ class AbstractProcessManager : public std::enable_shared_from_this diff --git a/src/pystack/_pystack/process.pxd b/src/pystack/_pystack/process.pxd index 41adc861..ede12d58 100644 --- a/src/pystack/_pystack/process.pxd +++ b/src/pystack/_pystack/process.pxd @@ -24,6 +24,7 @@ cdef extern from "process.h" namespace "pystack": remote_addr_t scanBSS() except+ remote_addr_t scanHeap() except+ remote_addr_t scanAllAnonymousMaps() except+ + remote_addr_t findInterpreterStateFromDebugOffsets() except+ remote_addr_t findInterpreterStateFromSymbols() except+ remote_addr_t findInterpreterStateFromElfData() except+ ssize_t copyMemoryFromProcess(remote_addr_t addr, ssize_t size, void *destination) except+ diff --git a/tests/integration/test_smoke.py b/tests/integration/test_smoke.py index 47a02fa9..1a3278c6 100644 --- a/tests/integration/test_smoke.py +++ b/tests/integration/test_smoke.py @@ -20,9 +20,20 @@ elif sys.version_info < (3, 11): # pragma: no cover STACK_METHODS = (StackMethod.SYMBOLS, StackMethod.ELF_DATA, StackMethod.HEAP) CORE_STACK_METHODS = (StackMethod.SYMBOLS, StackMethod.ELF_DATA) -else: # pragma: no cover +elif sys.version_info < (3, 13): # pragma: no cover STACK_METHODS = (StackMethod.SYMBOLS, StackMethod.ELF_DATA) CORE_STACK_METHODS = (StackMethod.SYMBOLS, StackMethod.ELF_DATA) +else: # pragma: no cover + STACK_METHODS = ( + StackMethod.DEBUG_OFFSETS, + StackMethod.SYMBOLS, + StackMethod.ELF_DATA, + ) + CORE_STACK_METHODS = ( + StackMethod.DEBUG_OFFSETS, + StackMethod.SYMBOLS, + StackMethod.ELF_DATA, + ) @pytest.mark.parametrize("method", STACK_METHODS) diff --git a/tests/utils.py b/tests/utils.py index 6dcc4984..b2321400 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -218,6 +218,7 @@ def generate_all_pystack_combinations( ]: # pragma: no cover if corefile: stack_methods = ( + StackMethod.DEBUG_OFFSETS, StackMethod.SYMBOLS, StackMethod.BSS, StackMethod.ELF_DATA, @@ -225,6 +226,7 @@ def generate_all_pystack_combinations( ) else: stack_methods = ( + StackMethod.DEBUG_OFFSETS, StackMethod.SYMBOLS, StackMethod.BSS, StackMethod.HEAP, @@ -243,6 +245,10 @@ def generate_all_pystack_combinations( AVAILABLE_PYTHONS, ): (major_version, minor_version) = python.version + if method == StackMethod.DEBUG_OFFSETS and ( + major_version < 3 or (major_version == 3 and minor_version < 13) + ): + continue if method == StackMethod.BSS and ( major_version > 3 or (major_version == 3 and minor_version >= 10) ): From e5794d2a6e3b1d1928c508b99993898a3a37d154 Mon Sep 17 00:00:00 2001 From: Matt Wozniski Date: Fri, 16 Aug 2024 22:30:32 -0400 Subject: [PATCH 13/15] ci: Exercise python3.13t in CI Signed-off-by: Matt Wozniski --- .github/workflows/build_wheels.yml | 43 ++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml index 425c7061..368ab1bb 100644 --- a/.github/workflows/build_wheels.yml +++ b/.github/workflows/build_wheels.yml @@ -152,6 +152,49 @@ jobs: PYTHON_TEST_VERSION: "auto" run: python${{matrix.python_version}} -m pytest tests -k 'not 2.7' -n auto -vvv + test_free_threading: + needs: [build_wheels] + runs-on: ubuntu-20.04 + strategy: + fail-fast: false + matrix: + python_version: ["3.13"] + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "${{matrix.python_version}}-dev" + - uses: actions/download-artifact@v4 + with: + name: "manylinux_x86_64-wheels" + path: dist + - name: Set up dependencies + run: | + sudo add-apt-repository ppa:deadsnakes/ppa + sudo apt-get update + sudo apt-get install -qy \ + gdb \ + python${{matrix.python_version}}-dev \ + python${{matrix.python_version}}-nogil \ + python${{matrix.python_version}}-venv + - name: Install Python dependencies + run: | + python${{matrix.python_version}} -m pip install --upgrade pip + python${{matrix.python_version}} -m pip install -r requirements-test.txt + python${{matrix.python_version}} -m pip install --no-index --find-links=dist/ --only-binary=pystack pystack + - name: Install setuptools for the free-threading version + run: | + python${{matrix.python_version}}t -m venv --system-site-packages /tmp/pip${{matrix.python_version}} + /tmp/pip${{matrix.python_version}}/bin/pip install --user setuptools + - name: Disable ptrace security restrictions + run: | + echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope + - name: Run pytest + env: + PYTHON_TEST_VERSION: "${{matrix.python_version}}t" + run: python${{matrix.python_version}} -m pytest tests -k 'not 2.7' -n auto -vvv + test_in_alpine: needs: [build_wheels] runs-on: ubuntu-latest From d70029cd9e01746d994ff5cc5c238b540b0bfad7 Mon Sep 17 00:00:00 2001 From: Matt Wozniski Date: Fri, 16 Aug 2024 22:30:56 -0400 Subject: [PATCH 14/15] Document that we can debug python3.13t Signed-off-by: Matt Wozniski --- news/206.feature.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 news/206.feature.rst diff --git a/news/206.feature.rst b/news/206.feature.rst new file mode 100644 index 00000000..2747c2c8 --- /dev/null +++ b/news/206.feature.rst @@ -0,0 +1 @@ +Support debugging free-threading (a.k.a. "nogil") Python 3.13 builds. Note that PyStack can't itself be run with ``python3.13t``, it can only attach to a ``python3.13t`` process or core file from another interpreter. From 48ec2091b54b13d7907ba48ec02a24c79b8c451f Mon Sep 17 00:00:00 2001 From: Matt Wozniski Date: Fri, 16 Aug 2024 23:31:36 -0400 Subject: [PATCH 15/15] tests: Delete core files for successful tests By default pytest keeps temp files for the last 3 runs of the test suite around, but this can quickly fill up a disk due to the number of core files we create. Configure it to only keep temp files for failed tests. Signed-off-by: Matt Wozniski --- pyproject.toml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index fda17ed6..84f6a0dc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -116,3 +116,8 @@ omit = [ [tool.coverage.report] show_missing = true + +[tool.pytest.ini_options] +# pytest retains all temp files from the last 3 test suite runs by default. +# Keep only ones for failed tests to avoid filling up a disk. +tmp_path_retention_policy = "failed"