diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml index 425c706..368ab1b 100644 --- a/.github/workflows/build_wheels.yml +++ b/.github/workflows/build_wheels.yml @@ -152,6 +152,49 @@ jobs: PYTHON_TEST_VERSION: "auto" run: python${{matrix.python_version}} -m pytest tests -k 'not 2.7' -n auto -vvv + test_free_threading: + needs: [build_wheels] + runs-on: ubuntu-20.04 + strategy: + fail-fast: false + matrix: + python_version: ["3.13"] + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "${{matrix.python_version}}-dev" + - uses: actions/download-artifact@v4 + with: + name: "manylinux_x86_64-wheels" + path: dist + - name: Set up dependencies + run: | + sudo add-apt-repository ppa:deadsnakes/ppa + sudo apt-get update + sudo apt-get install -qy \ + gdb \ + python${{matrix.python_version}}-dev \ + python${{matrix.python_version}}-nogil \ + python${{matrix.python_version}}-venv + - name: Install Python dependencies + run: | + python${{matrix.python_version}} -m pip install --upgrade pip + python${{matrix.python_version}} -m pip install -r requirements-test.txt + python${{matrix.python_version}} -m pip install --no-index --find-links=dist/ --only-binary=pystack pystack + - name: Install setuptools for the free-threading version + run: | + python${{matrix.python_version}}t -m venv --system-site-packages /tmp/pip${{matrix.python_version}} + /tmp/pip${{matrix.python_version}}/bin/pip install --user setuptools + - name: Disable ptrace security restrictions + run: | + echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope + - name: Run pytest + env: + PYTHON_TEST_VERSION: "${{matrix.python_version}}t" + run: python${{matrix.python_version}} -m pytest tests -k 'not 2.7' -n auto -vvv + test_in_alpine: needs: [build_wheels] runs-on: ubuntu-latest diff --git a/news/206.feature.rst b/news/206.feature.rst new file mode 100644 index 0000000..2747c2c --- /dev/null +++ b/news/206.feature.rst @@ -0,0 +1 @@ +Support debugging free-threading (a.k.a. "nogil") Python 3.13 builds. Note that PyStack can't itself be run with ``python3.13t``, it can only attach to a ``python3.13t`` process or core file from another interpreter. diff --git a/pyproject.toml b/pyproject.toml index fda17ed..84f6a0d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -116,3 +116,8 @@ omit = [ [tool.coverage.report] show_missing = true + +[tool.pytest.ini_options] +# pytest retains all temp files from the last 3 test suite runs by default. +# Keep only ones for failed tests to avoid filling up a disk. +tmp_path_retention_policy = "failed" diff --git a/src/pystack/_pystack.pyi b/src/pystack/_pystack.pyi index f6096b6..8d4a6d4 100644 --- a/src/pystack/_pystack.pyi +++ b/src/pystack/_pystack.pyi @@ -36,6 +36,7 @@ class StackMethod(enum.Enum): ELF_DATA: int HEAP: int SYMBOLS: int + DEBUG_OFFSETS: int class ProcessManager: ... diff --git a/src/pystack/_pystack.pyx b/src/pystack/_pystack.pyx index 596768a..fa794f2 100644 --- a/src/pystack/_pystack.pyx +++ b/src/pystack/_pystack.pyx @@ -77,7 +77,8 @@ class StackMethod(enum.Enum): BSS = 1 << 2 ANONYMOUS_MAPS = 1 << 3 HEAP = 1 << 4 - AUTO = ELF_DATA | SYMBOLS | BSS + DEBUG_OFFSETS = 1 << 5 + AUTO = DEBUG_OFFSETS | ELF_DATA | SYMBOLS | BSS ALL = AUTO | ANONYMOUS_MAPS | HEAP @@ -307,6 +308,7 @@ cdef class ProcessManager: ) ) + native_manager.get().setPythonVersionFromDebugOffsets() python_version = native_manager.get().findPythonVersion() if python_version == (-1, -1): python_version = get_python_version_for_process(pid, map_info) @@ -353,10 +355,12 @@ cdef class ProcessManager: make_shared[CoreFileProcessManager](pid, analyzer, maps, native_map_info) ) + native_manager.get().setPythonVersionFromDebugOffsets() python_version = native_manager.get().findPythonVersion() if python_version == (-1, -1): python_version = get_python_version_for_core(core_file, executable, map_info) native_manager.get().setPythonVersion(python_version) + cdef ProcessManager new_manager = cls( pid, python_version, virtual_maps, map_info ) @@ -527,6 +531,7 @@ cdef remote_addr_t _get_interpreter_state_addr( ) except*: cdef remote_addr_t head = 0 possible_methods = [ + StackMethod.DEBUG_OFFSETS, StackMethod.ELF_DATA, StackMethod.SYMBOLS, StackMethod.BSS, @@ -539,7 +544,10 @@ cdef remote_addr_t _get_interpreter_state_addr( continue try: - if possible_method == StackMethod.ELF_DATA: + if possible_method == StackMethod.DEBUG_OFFSETS: + how = "using debug offsets data" + head = manager.findInterpreterStateFromDebugOffsets() + elif possible_method == StackMethod.ELF_DATA: how = "using ELF data" head = manager.findInterpreterStateFromElfData() elif possible_method == StackMethod.SYMBOLS: diff --git a/src/pystack/_pystack/cpython/code.h b/src/pystack/_pystack/cpython/code.h index 10033cd..5069ffe 100644 --- a/src/pystack/_pystack/cpython/code.h +++ b/src/pystack/_pystack/cpython/code.h @@ -197,14 +197,4 @@ typedef struct } PyCodeObject; } // namespace Python3_13 -typedef union { - Python2::PyCodeObject v2; - Python3_3::PyCodeObject v3_3; - Python3_6::PyCodeObject v3_6; - Python3_8::PyCodeObject v3_8; - Python3_11::PyCodeObject v3_11; - Python3_12::PyCodeObject v3_12; - Python3_13::PyCodeObject v3_13; -} PyCodeObject; - } // namespace pystack diff --git a/src/pystack/_pystack/cpython/dict.h b/src/pystack/_pystack/cpython/dict.h index 154c57d..67e3804 100644 --- a/src/pystack/_pystack/cpython/dict.h +++ b/src/pystack/_pystack/cpython/dict.h @@ -29,7 +29,7 @@ typedef struct _dictobject namespace Python3 { typedef Py_ssize_t (*dict_lookup_func)(void* mp, PyObject* key, Py_hash_t hash, PyObject** value_addr); -union PyDictKeysObject; +struct PyDictKeysObject; typedef struct { @@ -82,7 +82,7 @@ typedef struct _dictkeysobject uint32_t dk_version; Py_ssize_t dk_usable; Py_ssize_t dk_nentries; - char dk_indices[]; /* char is required to avoid strict aliasing. */ + char dk_indices[1]; /* char is required to avoid strict aliasing. */ } PyDictKeysObject; } // namespace Python3_11 @@ -99,14 +99,4 @@ typedef struct _dictvalues } // namespace Python3_13 -typedef union { - Python3_3::PyDictKeysObject v3_3; - Python3_11::PyDictKeysObject v3_11; -} PyDictKeysObject; - -typedef union { - Python3::PyDictValuesObject v3_3; - Python3_13::PyDictValuesObject v3_13; -} PyDictValuesObject; - } // namespace pystack diff --git a/src/pystack/_pystack/cpython/frame.h b/src/pystack/_pystack/cpython/frame.h index ea0e297..b31c58b 100644 --- a/src/pystack/_pystack/cpython/frame.h +++ b/src/pystack/_pystack/cpython/frame.h @@ -39,7 +39,7 @@ namespace Python3_7 { typedef struct _pyframeobject { PyObject_VAR_HEAD struct _pyframeobject* f_back; - PyCodeObject* f_code; + PyObject* f_code; PyObject* f_builtins; PyObject* f_globals; PyObject* f_locals; @@ -64,7 +64,7 @@ typedef signed char PyFrameState; typedef struct _pyframeobject { PyObject_VAR_HEAD struct _pyframeobject* f_back; - PyCodeObject* f_code; + PyObject* f_code; PyObject* f_builtins; PyObject* f_globals; PyObject* f_locals; @@ -126,12 +126,4 @@ typedef struct _interpreter_frame } // namespace Python3_12 -typedef union { - Python2::PyFrameObject v2; - Python3_7::PyFrameObject v3_7; - Python3_10::PyFrameObject v3_10; - Python3_11::PyFrameObject v3_11; - Python3_12::PyFrameObject v3_12; -} PyFrameObject; - } // namespace pystack diff --git a/src/pystack/_pystack/cpython/gc.h b/src/pystack/_pystack/cpython/gc.h index ddfd8bc..53c1ce1 100644 --- a/src/pystack/_pystack/cpython/gc.h +++ b/src/pystack/_pystack/cpython/gc.h @@ -106,10 +106,4 @@ struct _gc_runtime_state }; } // namespace Python3_13 - -typedef union { - struct Python3_7::_gc_runtime_state v3_7; - struct Python3_8::_gc_runtime_state v3_8; - struct Python3_13::_gc_runtime_state v3_13; -} GCRuntimeState; } // namespace pystack diff --git a/src/pystack/_pystack/cpython/interpreter.h b/src/pystack/_pystack/cpython/interpreter.h index 89e1323..719116c 100644 --- a/src/pystack/_pystack/cpython/interpreter.h +++ b/src/pystack/_pystack/cpython/interpreter.h @@ -338,15 +338,4 @@ typedef struct _is struct _import_state imports; } PyInterpreterState; } // namespace Python3_13 - -typedef union { - Python2::PyInterpreterState v2; - Python3_5::PyInterpreterState v3_5; - Python3_7::PyInterpreterState v3_7; - Python3_8::PyInterpreterState v3_8; - Python3_9::PyInterpreterState v3_9; - Python3_11::PyInterpreterState v3_11; - Python3_12::PyInterpreterState v3_12; - Python3_13::PyInterpreterState v3_13; -} PyInterpreterState; } // namespace pystack diff --git a/src/pystack/_pystack/cpython/object.h b/src/pystack/_pystack/cpython/object.h index 15896cb..cc96e04 100644 --- a/src/pystack/_pystack/cpython/object.h +++ b/src/pystack/_pystack/cpython/object.h @@ -205,12 +205,6 @@ typedef struct _typeobject } PyTypeObject; } // namespace Python3_8 -typedef union { - Python2::PyTypeObject v2; - Python3_3::PyTypeObject v3_3; - Python3_8::PyTypeObject v3_8; -} PyTypeObject; - /* These flags are used to determine if a type is a subclass. */ constexpr long Pystack_TPFLAGS_INT_SUBCLASS = 1ul << 23u; constexpr long Pystack_TPFLAGS_LONG_SUBCLASS = 1ul << 24u; diff --git a/src/pystack/_pystack/cpython/runtime.h b/src/pystack/_pystack/cpython/runtime.h index adf6667..e75bf78 100644 --- a/src/pystack/_pystack/cpython/runtime.h +++ b/src/pystack/_pystack/cpython/runtime.h @@ -105,13 +105,15 @@ struct _ceval_runtime_state struct _gil_runtime_state gil; }; +struct PyThreadState; + typedef struct pyruntimestate { int preinitializing; int preinitialized; int core_initialized; int initialized; - PyThreadState* finalizing; + void* finalizing; struct pyinterpreters { @@ -171,7 +173,7 @@ typedef struct pyruntimestate int preinitialized; int core_initialized; int initialized; - PyThreadState* finalizing; + void* finalizing; struct pyinterpreters { diff --git a/src/pystack/_pystack/cpython/string.h b/src/pystack/_pystack/cpython/string.h index eb18e34..cf61ddd 100644 --- a/src/pystack/_pystack/cpython/string.h +++ b/src/pystack/_pystack/cpython/string.h @@ -109,14 +109,4 @@ typedef struct } // namespace Python3_12 -typedef union { - Python3::PyBytesObject v3; -} PyBytesObject; - -typedef union { - Python2::PyUnicodeObject v2; - Python3::PyUnicodeObject v3; - Python3_12::PyUnicodeObject v3_12; -} PyUnicodeObject; - } // namespace pystack diff --git a/src/pystack/_pystack/cpython/thread.h b/src/pystack/_pystack/cpython/thread.h index 2a701f0..c9b5da8 100644 --- a/src/pystack/_pystack/cpython/thread.h +++ b/src/pystack/_pystack/cpython/thread.h @@ -295,18 +295,4 @@ typedef struct _pythreadstate } PyThreadState; } // namespace Python3_13 -typedef union { - Python2::PyThreadState v2; - Python3_4::PyThreadState v3_4; - Python3_7::PyThreadState v3_7; - Python3_11::PyThreadState v3_11; - Python3_12::PyThreadState v3_12; - Python3_13::PyThreadState v3_13; -} PyThreadState; - -union CFrame { - Python3_11::CFrame v3_11; - Python3_12::CFrame v3_12; -}; - } // namespace pystack diff --git a/src/pystack/_pystack/mem.cpp b/src/pystack/_pystack/mem.cpp index d82a3e6..5c735e1 100644 --- a/src/pystack/_pystack/mem.cpp +++ b/src/pystack/_pystack/mem.cpp @@ -398,7 +398,7 @@ CorefileRemoteMemoryManager::StatusCode CorefileRemoteMemoryManager::getMemoryLocationFromCore(remote_addr_t addr, off_t* offset_in_file) const { auto corefile_it = std::find_if(d_vmaps.cbegin(), d_vmaps.cend(), [&](auto& map) { - return (map.Start() <= addr && addr <= map.End()) && (map.FileSize() != 0 && map.Offset() != 0); + return (map.Start() <= addr && addr < map.End()) && (map.FileSize() != 0 && map.Offset() != 0); }); if (corefile_it == d_vmaps.cend()) { return StatusCode::ERROR; diff --git a/src/pystack/_pystack/process.cpp b/src/pystack/_pystack/process.cpp index 4b7dd3f..a905c19 100644 --- a/src/pystack/_pystack/process.cpp +++ b/src/pystack/_pystack/process.cpp @@ -63,6 +63,61 @@ class DirectoryReader } // namespace namespace pystack { +namespace { // unnamed + +struct ParsedPyVersion +{ + int major; + int minor; + int patch; + const char* release_level; + int serial; +}; + +std::ostream& +operator<<(std::ostream& out, const ParsedPyVersion& version) +{ + // Use a temporary stringstream in case `out` is using hex or showbase + std::ostringstream oss; + oss << version.major << "." << version.minor << "." << version.patch; + if (version.release_level) { + oss << version.release_level << version.serial; + } + + out << oss.str(); + return out; +} + +bool +parsePyVersionHex(uint64_t version, ParsedPyVersion& parsed) +{ + int major = (version >> 24) & 0xFF; + int minor = (version >> 16) & 0xFF; + int patch = (version >> 8) & 0xFF; + int level = (version >> 4) & 0x0F; + int count = (version >> 0) & 0x0F; + + const char* level_str = nullptr; + if (level == 0xA) { + level_str = "a"; + } else if (level == 0xB) { + level_str = "b"; + } else if (level == 0xC) { + level_str = "rc"; + } else if (level == 0xF) { + level_str = ""; + } + + if (major < 2 || major > 3 || level_str == nullptr || (level == 0xF && count != 0)) { + return false; // Doesn't look valid. + } + + parsed = ParsedPyVersion{major, minor, patch, level_str, count}; + return true; +} + +} // unnamed namespace + static std::vector getProcessTids(pid_t pid) { @@ -234,30 +289,30 @@ AbstractProcessManager::isValidInterpreterState(remote_addr_t addr) const return false; } - PyInterpreterState is; + Structure is(shared_from_this(), addr); // The check for valid addresses may fail if the address falls in the stack // space (there are "holes" in the address map space so just checking for // min_addr < addr < max_addr does not guarantee a valid address) so we need // to catch InvalidRemoteAddress exceptions. try { - copyObjectFromProcess(addr, &is); + is.copyFromRemote(); } catch (RemoteMemCopyError& ex) { return false; } - PyThreadState current_thread; - auto current_thread_addr = getField(is, &py_is_v::o_tstate_head); + auto current_thread_addr = is.getField(&py_is_v::o_tstate_head); if (!isAddressValid(current_thread_addr)) { return false; } + Structure current_thread(shared_from_this(), current_thread_addr); try { - copyObjectFromProcess(current_thread_addr, ¤t_thread); + current_thread.copyFromRemote(); } catch (RemoteMemCopyError& ex) { return false; } - if (getField(current_thread, &py_thread_v::o_interp) != addr) { + if (current_thread.getField(&py_thread_v::o_interp) != addr) { return false; } @@ -266,9 +321,9 @@ AbstractProcessManager::isValidInterpreterState(remote_addr_t addr) const // Validate dictionaries in the interpreter state std::unordered_map dictionaries( - {{"modules", getField(is, &py_is_v::o_modules)}, - {"sysdict", getField(is, &py_is_v::o_sysdict)}, - {"builtins", getField(is, &py_is_v::o_builtins)}}); + {{"modules", is.getField(&py_is_v::o_modules)}, + {"sysdict", is.getField(&py_is_v::o_sysdict)}, + {"builtins", is.getField(&py_is_v::o_builtins)}}); for (const auto& [dictname, addr] : dictionaries) { if (!isValidDictionaryObject(addr)) { LOG(DEBUG) << "The '" << dictname << "' dictionary object is not valid"; @@ -304,9 +359,8 @@ AbstractProcessManager::findInterpreterStateFromPyRuntime(remote_addr_t runtime_ LOG(INFO) << "Searching for PyInterpreterState based on PyRuntime address " << std::hex << std::showbase << runtime_addr; - PyRuntimeState py_runtime; - copyObjectFromProcess(runtime_addr, &py_runtime); - remote_addr_t interp_state = getField(py_runtime, &py_runtime_v::o_interp_head); + Structure py_runtime(shared_from_this(), runtime_addr); + remote_addr_t interp_state = py_runtime.getField(&py_runtime_v::o_interp_head); if (!isValidInterpreterState(interp_state)) { LOG(INFO) << "Failing to resolve PyInterpreterState based on PyRuntime address " << std::hex @@ -353,6 +407,40 @@ AbstractProcessManager::scanMemoryAreaForInterpreterState(const VirtualMap& map) return (remote_addr_t)result; } +remote_addr_t +AbstractProcessManager::scanMemoryAreaForDebugOffsets(const VirtualMap& map) const +{ + size_t size = map.Size(); + std::vector memory_buffer(size); + remote_addr_t base = map.Start(); + copyMemoryFromProcess(base, size, memory_buffer.data()); + + LOG(INFO) << std::showbase << std::hex << "Searching for debug offsets in memory area spanning from " + << map.Start() << " to " << map.End(); + + uint64_t* lower_bound = (uint64_t*)&memory_buffer.data()[0]; + uint64_t* upper_bound = (uint64_t*)&memory_buffer.data()[size]; + + uint64_t cookie; + memcpy(&cookie, "xdebugpy", sizeof(cookie)); + + for (uint64_t* raddr = lower_bound; raddr < upper_bound; raddr++) { + if (raddr[0] == cookie) { + uint64_t version = raddr[1]; + + ParsedPyVersion parsed; + if (parsePyVersionHex(version, parsed) && parsed.major == 3 && parsed.minor >= 13) { + auto offset = (remote_addr_t)raddr - (remote_addr_t)memory_buffer.data(); + auto addr = offset + base; + LOG(DEBUG) << std::hex << std::showbase << "Possible debug offsets found at address " + << addr << " in a mapping of " << map.Path(); + return addr; + } + } + } + return 0; +} + remote_addr_t AbstractProcessManager::scanBSS() const { @@ -393,6 +481,27 @@ AbstractProcessManager::scanHeap() const return scanMemoryAreaForInterpreterState(d_heap.value()); } +remote_addr_t +AbstractProcessManager::findDebugOffsetsFromMaps() const +{ + LOG(INFO) << "Scanning all writable path-backed maps for _Py_DebugOffsets"; + for (auto& map : d_memory_maps) { + if (map.Flags().find("w") != std::string::npos && !map.Path().empty()) { + LOG(DEBUG) << std::hex << std::showbase << "Attempting to locate _Py_DebugOffsets in map of " + << map.Path() << " starting at " << map.Start() << " and ending at " << map.End(); + LOG(DEBUG) << "Flags: " << map.Flags(); + try { + if (remote_addr_t result = scanMemoryAreaForDebugOffsets(map)) { + return result; + } + } catch (RemoteMemCopyError& ex) { + LOG(INFO) << "Failed to scan map starting at " << map.Start(); + } + } + } + return 0; +} + ssize_t AbstractProcessManager::copyMemoryFromProcess(remote_addr_t addr, size_t size, void* destination) const { @@ -411,35 +520,34 @@ std::string AbstractProcessManager::getStringFromAddress(remote_addr_t addr) const { Python2::_PyStringObject string; - PyUnicodeObject unicode; std::vector buffer; ssize_t len; remote_addr_t data_addr; if (d_major == 2) { - LOG(DEBUG) << std::hex << std::showbase << "Handling unicode object of version 2 from address " + LOG(DEBUG) << std::hex << std::showbase << "Handling string object of version 2 from address " << addr; copyObjectFromProcess(addr, &string); len = string.ob_base.ob_size; buffer.resize(len); data_addr = (remote_addr_t)((char*)addr + offsetof(Python2::_PyStringObject, ob_sval)); - LOG(DEBUG) << std::hex << std::showbase << "Copying ASCII data for unicode object from address " + LOG(DEBUG) << std::hex << std::showbase << "Copying ASCII data for string object from address " << data_addr; copyMemoryFromProcess(data_addr, len, buffer.data()); } else { LOG(DEBUG) << std::hex << std::showbase << "Handling unicode object of version 3 from address " << addr; - copyMemoryFromProcess(addr, offsets().py_unicode.size, &unicode); + Structure unicode(shared_from_this(), addr); - Python3::_PyUnicode_State state = getField(unicode, &py_unicode_v::o_state); + Python3::_PyUnicode_State state = unicode.getField(&py_unicode_v::o_state); if (state.kind != 1 || state.compact != 1) { throw InvalidRemoteObject(); } - len = getField(unicode, &py_unicode_v::o_length); + len = unicode.getField(&py_unicode_v::o_length); buffer.resize(len); - data_addr = addr + getFieldOffset(&py_unicode_v::o_ascii); + data_addr = unicode.getFieldRemoteAddress(&py_unicode_v::o_ascii); LOG(DEBUG) << std::hex << std::showbase << "Copying ASCII data for unicode object from address " << data_addr; copyMemoryFromProcess(data_addr, len, buffer.data()); @@ -469,15 +577,13 @@ AbstractProcessManager::getBytesFromAddress(remote_addr_t addr) const } else { LOG(DEBUG) << std::hex << std::showbase << "Handling bytes object of version 3 from address " << addr; - PyBytesObject bytes; - - copyMemoryFromProcess(addr, offsets().py_bytes.size, &bytes); - len = getField(bytes, &py_bytes_v::o_ob_size) + 1; + Structure bytes(shared_from_this(), addr); + len = bytes.getField(&py_bytes_v::o_ob_size) + 1; if (len < 1) { throw std::runtime_error("Incorrect size of the fetched bytes object"); } buffer.resize(len); - data_addr = addr + getFieldOffset(&py_bytes_v::o_ob_sval); + data_addr = bytes.getFieldRemoteAddress(&py_bytes_v::o_ob_sval); LOG(DEBUG) << std::hex << std::showbase << "Copying data for bytes object from address " << data_addr; @@ -559,21 +665,80 @@ AbstractProcessManager::isInterpreterActive() const { remote_addr_t runtime_addr = findSymbol("_PyRuntime"); if (runtime_addr) { - PyRuntimeState py_runtime; - copyObjectFromProcess(runtime_addr, &py_runtime); - remote_addr_t p = getField(py_runtime, &py_runtime_v::o_finalizing); + Structure py_runtime(shared_from_this(), runtime_addr); + remote_addr_t p = py_runtime.getField(&py_runtime_v::o_finalizing); return p == 0 ? InterpreterStatus::RUNNING : InterpreterStatus::FINALIZED; } return InterpreterStatus::UNKNOWN; } +void +AbstractProcessManager::setPythonVersionFromDebugOffsets() +{ + remote_addr_t pyruntime_addr = findSymbol("_PyRuntime"); + if (!pyruntime_addr) { + pyruntime_addr = findPyRuntimeFromElfData(); + } + if (!pyruntime_addr) { + pyruntime_addr = findDebugOffsetsFromMaps(); + } + + if (!pyruntime_addr) { + LOG(DEBUG) << "Unable to find _Py_DebugOffsets"; + return; + } + + try { + uint64_t cookie; + copyObjectFromProcess(pyruntime_addr, &cookie); + if (0 != memcmp(&cookie, "xdebugpy", 8)) { + LOG(DEBUG) << "Found a _PyRuntime structure without _Py_DebugOffsets"; + return; + } + + uint64_t version; + copyObjectFromProcess(pyruntime_addr + 8, &version); + + ParsedPyVersion parsed; + if (parsePyVersionHex(version, parsed) && parsed.major == 3 && parsed.minor >= 13) { + LOG(INFO) << std::hex << std::showbase << "_Py_DebugOffsets at " << pyruntime_addr + << " identify the version as " << parsed; + setPythonVersion(std::make_pair(parsed.major, parsed.minor)); + Structure py_runtime(shared_from_this(), pyruntime_addr); + std::unique_ptr offsets = loadDebugOffsets(py_runtime); + if (offsets) { + LOG(INFO) << "_Py_DebugOffsets appear to be valid and will be used"; + warnIfOffsetsAreMismatched(pyruntime_addr); + d_debug_offsets_addr = pyruntime_addr; + d_debug_offsets = std::move(offsets); + return; + } + } + } catch (const RemoteMemCopyError& ex) { + LOG(DEBUG) << std::hex << std::showbase << "Found apparently invalid _Py_DebugOffsets at " + << pyruntime_addr; + } + + LOG(DEBUG) << "Failed to validate _PyDebugOffsets structure"; + d_major = 0; + d_minor = 0; + d_py_v = nullptr; + d_debug_offsets_addr = 0; + d_debug_offsets.reset(); +} + std::pair AbstractProcessManager::findPythonVersion() const { + if (d_py_v) { + // Already set or previously found (probably via _Py_DebugOffsets) + return std::make_pair(d_major, d_minor); + } + auto version_symbol = findSymbol("Py_Version"); if (!version_symbol) { - LOG(DEBUG) << "Faled to determine Python version from symbols"; + LOG(DEBUG) << "Failed to determine Python version from symbols"; return {-1, -1}; } unsigned long version; @@ -596,49 +761,38 @@ AbstractProcessManager::setPythonVersion(const std::pair& version) // Note: getCPythonOffsets can throw. Don't set these if it does. d_major = version.first; d_minor = version.second; - - warnIfOffsetsAreMismatched(); } void -AbstractProcessManager::warnIfOffsetsAreMismatched() const +AbstractProcessManager::warnIfOffsetsAreMismatched(remote_addr_t runtime_addr) const { - if (!versionIsAtLeast(3, 13)) { - return; // Nothing to cross-reference; _Py_DebugOffsets was added in 3.13 - } - - remote_addr_t runtime_addr = findSymbol("_PyRuntime"); - if (!runtime_addr) { - return; // We need to start from the _PyRuntime structure - } + Structure py_runtime(shared_from_this(), runtime_addr); - PyRuntimeState py_runtime; - copyObjectFromProcess(runtime_addr, &py_runtime); - - if (0 != memcmp(&py_runtime, "xdebugpy", 8)) { + if (0 != memcmp(py_runtime.getField(&py_runtime_v::o_dbg_off_cookie), "xdebugpy", 8)) { LOG(WARNING) << "Debug offsets cookie doesn't match!"; return; } // Note: It's OK for pystack's size to be smaller, but not larger. #define compare_size(size_offset, pystack_struct) \ - if (getFieldOffset(size_offset) \ - && ((uint64_t)offsets().pystack_struct.size > getField(py_runtime, size_offset))) \ + if ((d_py_v->py_runtime.*size_offset).offset \ + && ((uint64_t)offsets().pystack_struct.size > py_runtime.getField(size_offset))) \ { \ - LOG(WARNING) << "Debug offsets mismatch: " #pystack_struct ".size " \ - << offsets().pystack_struct.size << " > " << getField(py_runtime, size_offset) \ - << " reported by CPython"; \ + LOG(INFO) << "Debug offsets mismatch: compiled-in " << sizeof(void*) * 8 << "-bit python3." \ + << d_minor << " " #pystack_struct ".size " << offsets().pystack_struct.size << " > " \ + << py_runtime.getField(size_offset) << " loaded from _Py_DebugOffsets"; \ } else \ do { \ } while (0) #define compare_offset(field_offset_offset, pystack_field) \ - if (getFieldOffset(field_offset_offset) \ - && (uint64_t)offsets().pystack_field.offset != getField(py_runtime, field_offset_offset)) \ + if ((d_py_v->py_runtime.*field_offset_offset).offset \ + && (uint64_t)offsets().pystack_field.offset != py_runtime.getField(field_offset_offset)) \ { \ - LOG(WARNING) << "Debug offsets mismatch: " #pystack_field << " " \ - << offsets().pystack_field.offset \ - << " != " << getField(py_runtime, field_offset_offset) << " reported by CPython"; \ + LOG(INFO) << "Debug offsets mismatch: compiled-in " << sizeof(void*) * 8 << "-bit python3." \ + << d_minor << " " #pystack_field << " " << offsets().pystack_field.offset \ + << " != " << py_runtime.getField(field_offset_offset) \ + << " loaded from _Py_DebugOffsets"; \ } else \ do { \ } while (0) @@ -701,6 +855,402 @@ AbstractProcessManager::warnIfOffsetsAreMismatched() const #undef compare_offset } +std::unique_ptr +AbstractProcessManager::loadDebugOffsets(Structure& py_runtime) const +{ + if (!versionIsAtLeast(3, 13)) { + return {}; // _Py_DebugOffsets was added in 3.13 + } + + if (0 != memcmp(py_runtime.getField(&py_runtime_v::o_dbg_off_cookie), "xdebugpy", 8)) { + LOG(WARNING) << "Debug offsets cookie doesn't match!"; + return {}; + } + + uint64_t version = py_runtime.getField(&py_runtime_v::o_dbg_off_py_version_hex); + int major = (version >> 24) & 0xff; + int minor = (version >> 16) & 0xff; + + if (major != d_major || minor != d_minor) { + LOG(WARNING) << "Detected version " << d_major << "." << d_minor + << " doesn't match debug offsets version " << major << "." << minor << "!"; + return {}; + } + + python_v debug_offsets{}; + if (!copyDebugOffsets(py_runtime, debug_offsets)) { + return {}; + } + + if (!validateDebugOffsets(py_runtime, debug_offsets)) { + return {}; + } + + auto ret = std::make_unique(); + *ret = debug_offsets; + clampSizes(*ret); + return ret; +} + +bool +AbstractProcessManager::copyDebugOffsets(Structure& py_runtime, python_v& debug_offsets) + const +{ + // Fill in a temporary python_v with the offsets from the remote. + // For fields that aren't in _Py_DebugOffsets, make some assumptions, based + // in part on the size delta between the sizeof(PyObject) baked into our + // static offsets and the sizeof(PyObject) in the remote process/core. + Py_ssize_t new_pyobject_size = py_runtime.getField(&py_runtime_v::o_dbg_off_pyobject_struct_size); + Py_ssize_t pyobject_size_delta = -d_py_v->py_object.size + new_pyobject_size; + +#define set_size(pystack_struct, size_offset) \ + debug_offsets.pystack_struct.size = py_runtime.getField(size_offset) + +#define set_offset(pystack_field, field_offset_offset) \ + debug_offsets.pystack_field = {(offset_t)py_runtime.getField(field_offset_offset)} + + set_size(py_runtime, &py_runtime_v::o_dbg_off_runtime_state_struct_size); + set_offset(py_runtime.o_finalizing, &py_runtime_v::o_dbg_off_runtime_state_finalizing); + set_offset(py_runtime.o_interp_head, &py_runtime_v::o_dbg_off_runtime_state_interpreters_head); + + set_size(py_is, &py_runtime_v::o_dbg_off_interpreter_state_struct_size); + set_offset(py_is.o_next, &py_runtime_v::o_dbg_off_interpreter_state_next); + set_offset(py_is.o_tstate_head, &py_runtime_v::o_dbg_off_interpreter_state_threads_head); + set_offset(py_is.o_gc, &py_runtime_v::o_dbg_off_interpreter_state_gc); + set_offset(py_is.o_modules, &py_runtime_v::o_dbg_off_interpreter_state_imports_modules); + set_offset(py_is.o_sysdict, &py_runtime_v::o_dbg_off_interpreter_state_sysdict); + set_offset(py_is.o_builtins, &py_runtime_v::o_dbg_off_interpreter_state_builtins); + set_offset(py_is.o_gil_runtime_state, &py_runtime_v::o_dbg_off_interpreter_state_ceval_gil); + + set_size(py_thread, &py_runtime_v::o_dbg_off_thread_state_struct_size); + set_offset(py_thread.o_prev, &py_runtime_v::o_dbg_off_thread_state_prev); + set_offset(py_thread.o_next, &py_runtime_v::o_dbg_off_thread_state_next); + set_offset(py_thread.o_interp, &py_runtime_v::o_dbg_off_thread_state_interp); + set_offset(py_thread.o_frame, &py_runtime_v::o_dbg_off_thread_state_current_frame); + set_offset(py_thread.o_thread_id, &py_runtime_v::o_dbg_off_thread_state_thread_id); + set_offset(py_thread.o_native_thread_id, &py_runtime_v::o_dbg_off_thread_state_native_thread_id); + + set_size(py_frame, &py_runtime_v::o_dbg_off_interpreter_frame_struct_size); + set_offset(py_frame.o_back, &py_runtime_v::o_dbg_off_interpreter_frame_previous); + set_offset(py_frame.o_code, &py_runtime_v::o_dbg_off_interpreter_frame_executable); + set_offset(py_frame.o_prev_instr, &py_runtime_v::o_dbg_off_interpreter_frame_instr_ptr); + set_offset(py_frame.o_localsplus, &py_runtime_v::o_dbg_off_interpreter_frame_localsplus); + set_offset(py_frame.o_owner, &py_runtime_v::o_dbg_off_interpreter_frame_owner); + + set_size(py_code, &py_runtime_v::o_dbg_off_code_object_struct_size); + set_offset(py_code.o_filename, &py_runtime_v::o_dbg_off_code_object_filename); + set_offset(py_code.o_name, &py_runtime_v::o_dbg_off_code_object_name); + set_offset(py_code.o_lnotab, &py_runtime_v::o_dbg_off_code_object_linetable); + set_offset(py_code.o_firstlineno, &py_runtime_v::o_dbg_off_code_object_firstlineno); + set_offset(py_code.o_argcount, &py_runtime_v::o_dbg_off_code_object_argcount); + set_offset(py_code.o_varnames, &py_runtime_v::o_dbg_off_code_object_localsplusnames); + set_offset(py_code.o_code_adaptive, &py_runtime_v::o_dbg_off_code_object_co_code_adaptive); + + set_size(py_object, &py_runtime_v::o_dbg_off_pyobject_struct_size); + set_offset(py_object.o_ob_type, &py_runtime_v::o_dbg_off_pyobject_ob_type); + + set_size(py_type, &py_runtime_v::o_dbg_off_type_object_struct_size); + set_offset(py_type.o_tp_name, &py_runtime_v::o_dbg_off_type_object_tp_name); + // Assume our static offsets are correct about the distance from tp_name to the other fields + debug_offsets.py_type.o_tp_repr = { + d_py_v->py_type.o_tp_repr.offset - d_py_v->py_type.o_tp_name.offset + + debug_offsets.py_type.o_tp_name.offset}; + debug_offsets.py_type.o_tp_flags = { + d_py_v->py_type.o_tp_flags.offset - d_py_v->py_type.o_tp_name.offset + + debug_offsets.py_type.o_tp_name.offset}; + + set_size(py_tuple, &py_runtime_v::o_dbg_off_tuple_object_struct_size); + // Assume ob_base is the first field of PyVarObject and ob_size is the second + static_assert(sizeof(PyTupleObject::ob_base.ob_base) == offsetof(PyTupleObject, ob_base.ob_size)); + debug_offsets.py_tuple.o_ob_size = {(offset_t)new_pyobject_size}; + set_offset(py_tuple.o_ob_item, &py_runtime_v::o_dbg_off_tuple_object_ob_item); + + set_size(py_unicode, &py_runtime_v::o_dbg_off_unicode_object_struct_size); + set_offset(py_unicode.o_state, &py_runtime_v::o_dbg_off_unicode_object_state); + set_offset(py_unicode.o_length, &py_runtime_v::o_dbg_off_unicode_object_length); + set_offset(py_unicode.o_ascii, &py_runtime_v::o_dbg_off_unicode_object_asciiobject_size); + + set_size(py_gc, &py_runtime_v::o_dbg_off_gc_struct_size); + set_offset(py_gc.o_collecting, &py_runtime_v::o_dbg_off_gc_collecting); + + // Assume ob_size and ob_item are at the same location for list as for tuple + static_assert( + offsetof(PyListObject, ob_item) + sizeof(PyListObject::ob_item) <= sizeof(PyTupleObject)); + debug_offsets.py_list.size = debug_offsets.py_tuple.size; + + static_assert(offsetof(PyListObject, ob_base.ob_size) == offsetof(PyTupleObject, ob_base.ob_size)); + debug_offsets.py_list.o_ob_size = debug_offsets.py_tuple.o_ob_size; + + static_assert(offsetof(PyListObject, ob_item) == offsetof(PyTupleObject, ob_item)); + debug_offsets.py_list.o_ob_item = {debug_offsets.py_tuple.o_ob_item.offset}; + + // Assume our static offsets for dict are correct save possibly for sizeof(PyObject) changing + debug_offsets.py_dictkeys = d_py_v->py_dictkeys; + debug_offsets.py_dictvalues = d_py_v->py_dictvalues; + debug_offsets.py_dict = d_py_v->py_dict; + debug_offsets.py_dict.size += pyobject_size_delta; + debug_offsets.py_dict.o_ma_keys.offset += pyobject_size_delta; + debug_offsets.py_dict.o_ma_values.offset += pyobject_size_delta; + + // Assume our static offsets for float are correct save possibly for sizeof(PyObject) changing + debug_offsets.py_float = d_py_v->py_float; + debug_offsets.py_float.size += pyobject_size_delta; + debug_offsets.py_float.o_ob_fval.offset += pyobject_size_delta; + + // Assume our static offsets for long are correct save possibly for sizeof(PyObject) changing + debug_offsets.py_long = d_py_v->py_long; + debug_offsets.py_long.size += pyobject_size_delta; + debug_offsets.py_long.o_ob_size.offset += pyobject_size_delta; + debug_offsets.py_long.o_ob_digit.offset += pyobject_size_delta; + + // Assume our static offsets for bytes are correct save possibly for sizeof(PyObject) changing + debug_offsets.py_bytes = d_py_v->py_bytes; + debug_offsets.py_bytes.size += pyobject_size_delta; + debug_offsets.py_bytes.o_ob_size.offset += pyobject_size_delta; + debug_offsets.py_bytes.o_ob_sval.offset += pyobject_size_delta; + + // Assume our static offsets for cframe are all correct + debug_offsets.py_cframe = d_py_v->py_cframe; + + // Assume our static offsets for gilruntimestate are off by 8 bytes in a free-threading build. + // This is quite a hack... + debug_offsets.py_gilruntimestate = d_py_v->py_gilruntimestate; + bool is_free_threading = static_cast(debug_offsets.py_object.size) > 2 * sizeof(void*); + if (is_free_threading) { + debug_offsets.py_gilruntimestate.size += 8; + debug_offsets.py_gilruntimestate.o_last_holder.offset += 8; + debug_offsets.py_gilruntimestate.o_locked.offset += 8; + } + +#undef set_size +#undef set_offset + + return true; +} + +bool +AbstractProcessManager::validateDebugOffsets( + const Structure& py_runtime, + python_v& debug_offsets) const +{ + // Simple sanity checks on the decoded offsets: + // - No structure is larger than 1 MB + // - Every field falls within its structure's size +#define check_size(pystack_struct, size_offset) \ + do { \ + if (debug_offsets.pystack_struct.size > 1024 * 1024) { \ + LOG(WARNING) << "Ignoring debug offsets because " #pystack_struct ".size (" \ + << debug_offsets.pystack_struct.size << ") reported at byte offset " \ + << (d_py_v->py_runtime.*size_offset).offset \ + << " in detected _Py_DebugOffsets structure at " << std::hex << std::showbase \ + << py_runtime.getFieldRemoteAddress(&py_runtime_v::o_dbg_off_cookie) \ + << " is implausibly large"; \ + return {}; \ + } \ + } while (0) + +#define check_field_bounds(structure, field) \ + do { \ + if (debug_offsets.structure.size < 0 \ + || (size_t)debug_offsets.structure.size < debug_offsets.structure.field.offset \ + || debug_offsets.structure.size - debug_offsets.structure.field.offset \ + < sizeof(decltype(debug_offsets.structure.field)::Type)) \ + { \ + LOG(WARNING) << "Ignoring debug offsets because " #structure ".size (" \ + << debug_offsets.structure.size << ") - " #structure "." #field ".offset (" \ + << debug_offsets.structure.field.offset << ") < the field's size (" \ + << sizeof(decltype(debug_offsets.structure.field)::Type) << ")"; \ + return {}; \ + } \ + } while (0) + + check_size(py_runtime, &py_runtime_v::o_dbg_off_runtime_state_struct_size); + check_field_bounds(py_runtime, o_finalizing); + check_field_bounds(py_runtime, o_interp_head); + + check_size(py_is, &py_runtime_v::o_dbg_off_interpreter_state_struct_size); + check_field_bounds(py_is, o_next); + check_field_bounds(py_is, o_tstate_head); + check_field_bounds(py_is, o_gc); + check_field_bounds(py_is, o_modules); + check_field_bounds(py_is, o_sysdict); + check_field_bounds(py_is, o_builtins); + check_field_bounds(py_is, o_gil_runtime_state); + + check_size(py_thread, &py_runtime_v::o_dbg_off_thread_state_struct_size); + check_field_bounds(py_thread, o_prev); + check_field_bounds(py_thread, o_next); + check_field_bounds(py_thread, o_interp); + check_field_bounds(py_thread, o_frame); + check_field_bounds(py_thread, o_thread_id); + check_field_bounds(py_thread, o_native_thread_id); + + check_size(py_frame, &py_runtime_v::o_dbg_off_interpreter_frame_struct_size); + check_field_bounds(py_frame, o_back); + check_field_bounds(py_frame, o_code); + check_field_bounds(py_frame, o_prev_instr); + check_field_bounds(py_frame, o_localsplus); + check_field_bounds(py_frame, o_owner); + + check_size(py_code, &py_runtime_v::o_dbg_off_code_object_struct_size); + check_field_bounds(py_code, o_filename); + check_field_bounds(py_code, o_name); + check_field_bounds(py_code, o_lnotab); + check_field_bounds(py_code, o_firstlineno); + check_field_bounds(py_code, o_argcount); + check_field_bounds(py_code, o_varnames); + check_field_bounds(py_code, o_code_adaptive); + + check_size(py_object, &py_runtime_v::o_dbg_off_pyobject_struct_size); + check_field_bounds(py_object, o_ob_type); + + check_size(py_type, &py_runtime_v::o_dbg_off_type_object_struct_size); + check_field_bounds(py_type, o_tp_name); + check_field_bounds(py_type, o_tp_repr); + check_field_bounds(py_type, o_tp_flags); + + check_size(py_tuple, &py_runtime_v::o_dbg_off_tuple_object_struct_size); + check_field_bounds(py_tuple, o_ob_size); + check_field_bounds(py_tuple, o_ob_item); + + check_size(py_unicode, &py_runtime_v::o_dbg_off_unicode_object_struct_size); + check_field_bounds(py_unicode, o_state); + check_field_bounds(py_unicode, o_length); + check_field_bounds(py_unicode, o_ascii); + + check_size(py_gc, &py_runtime_v::o_dbg_off_gc_struct_size); + check_field_bounds(py_gc, o_collecting); + + check_field_bounds(py_list, o_ob_size); + check_field_bounds(py_list, o_ob_item); + + check_field_bounds(py_dictkeys, o_dk_size); + check_field_bounds(py_dictkeys, o_dk_kind); + check_field_bounds(py_dictkeys, o_dk_nentries); + check_field_bounds(py_dictkeys, o_dk_indices); + + check_field_bounds(py_dictvalues, o_values); + + check_field_bounds(py_dict, o_ma_keys); + check_field_bounds(py_dict, o_ma_values); + + check_field_bounds(py_float, o_ob_fval); + + check_field_bounds(py_long, o_ob_size); + check_field_bounds(py_long, o_ob_digit); + + check_field_bounds(py_bytes, o_ob_size); + check_field_bounds(py_bytes, o_ob_sval); + + check_field_bounds(py_cframe, current_frame); + +#undef check_size +#undef check_field_bounds + + return true; +} + +void +AbstractProcessManager::clampSizes(python_v& debug_offsets) const +{ + // Clamp the size of each struct down to only what we need to copy. + // The runtime state and interpreter state both contain many fields beyond + // the ones that we're interested in or have offsets for. +#define update_size(structure, field) \ + debug_offsets.structure.size = std::max( \ + (size_t)debug_offsets.structure.size, \ + debug_offsets.structure.field.offset \ + + sizeof(decltype(debug_offsets.structure.field)::Type)) + + debug_offsets.py_runtime.size = 0; + update_size(py_runtime, o_finalizing); + update_size(py_runtime, o_interp_head); + + debug_offsets.py_is.size = 0; + update_size(py_is, o_next); + update_size(py_is, o_tstate_head); + update_size(py_is, o_gc); + update_size(py_is, o_modules); + update_size(py_is, o_sysdict); + update_size(py_is, o_builtins); + update_size(py_is, o_gil_runtime_state); + + debug_offsets.py_thread.size = 0; + update_size(py_thread, o_prev); + update_size(py_thread, o_next); + update_size(py_thread, o_interp); + update_size(py_thread, o_frame); + update_size(py_thread, o_thread_id); + update_size(py_thread, o_native_thread_id); + + debug_offsets.py_frame.size = 0; + update_size(py_frame, o_back); + update_size(py_frame, o_code); + update_size(py_frame, o_prev_instr); + update_size(py_frame, o_localsplus); + update_size(py_frame, o_owner); + + debug_offsets.py_code.size = 0; + update_size(py_code, o_filename); + update_size(py_code, o_name); + update_size(py_code, o_lnotab); + update_size(py_code, o_firstlineno); + update_size(py_code, o_argcount); + update_size(py_code, o_varnames); + update_size(py_code, o_code_adaptive); + + debug_offsets.py_object.size = 0; + update_size(py_object, o_ob_type); + + debug_offsets.py_type.size = 0; + update_size(py_type, o_tp_name); + update_size(py_type, o_tp_repr); + update_size(py_type, o_tp_flags); + + debug_offsets.py_tuple.size = 0; + update_size(py_tuple, o_ob_size); + update_size(py_tuple, o_ob_item); + + debug_offsets.py_unicode.size = 0; + update_size(py_unicode, o_state); + update_size(py_unicode, o_length); + update_size(py_unicode, o_ascii); + + debug_offsets.py_gc.size = 0; + update_size(py_gc, o_collecting); + + debug_offsets.py_list.size = 0; + update_size(py_list, o_ob_size); + update_size(py_list, o_ob_item); + + debug_offsets.py_dictkeys.size = 0; + update_size(py_dictkeys, o_dk_size); + update_size(py_dictkeys, o_dk_kind); + update_size(py_dictkeys, o_dk_nentries); + update_size(py_dictkeys, o_dk_indices); + + debug_offsets.py_dictvalues.size = 0; + update_size(py_dictvalues, o_values); + + debug_offsets.py_dict.size = 0; + update_size(py_dict, o_ma_keys); + update_size(py_dict, o_ma_values); + + debug_offsets.py_float.size = 0; + update_size(py_float, o_ob_fval); + + debug_offsets.py_long.size = 0; + update_size(py_long, o_ob_size); + update_size(py_long, o_ob_digit); + + debug_offsets.py_bytes.size = 0; + update_size(py_bytes, o_ob_size); + update_size(py_bytes, o_ob_sval); + + debug_offsets.py_cframe.size = 0; + update_size(py_cframe, current_frame); +} + bool AbstractProcessManager::versionIsAtLeast(int required_major, int required_minor) const { @@ -710,11 +1260,14 @@ AbstractProcessManager::versionIsAtLeast(int required_major, int required_minor) const python_v& AbstractProcessManager::offsets() const { + if (d_debug_offsets) { + return *d_debug_offsets; + } return *d_py_v; } remote_addr_t -AbstractProcessManager::findInterpreterStateFromElfData() const +AbstractProcessManager::findPyRuntimeFromElfData() const { LOG(INFO) << "Trying to resolve PyInterpreterState from Elf data"; SectionInfo section_info; @@ -729,7 +1282,47 @@ AbstractProcessManager::findInterpreterStateFromElfData() const "could not be found"; return 0; } - return findInterpreterStateFromPyRuntime(load_addr + section_info.corrected_addr); + return load_addr + section_info.corrected_addr; +} + +remote_addr_t +AbstractProcessManager::findInterpreterStateFromElfData() const +{ + remote_addr_t pyruntime = findPyRuntimeFromElfData(); + if (!pyruntime) { + return 0; + } + return findInterpreterStateFromPyRuntime(pyruntime); +} + +remote_addr_t +AbstractProcessManager::findInterpreterStateFromDebugOffsets() const +{ + if (!d_debug_offsets_addr) { + LOG(DEBUG) << "Debug offsets were never found"; + return 0; + } + + LOG(INFO) << "Searching for PyInterpreterState based on PyRuntime address " << std::hex + << std::showbase << d_debug_offsets_addr + << " found when searching for 3.13+ debug offsets"; + + try { + Structure runtime(shared_from_this(), d_debug_offsets_addr); + remote_addr_t interp_state = runtime.getField(&py_runtime_v::o_interp_head); + LOG(DEBUG) << "Checking interpreter state at " << std::hex << std::showbase << interp_state + << " found at address " + << runtime.getFieldRemoteAddress(&py_runtime_v::o_interp_head); + if (isValidInterpreterState(interp_state)) { + LOG(DEBUG) << "Interpreter head reference from debug offsets dereferences successfully"; + return interp_state; + } + } catch (...) { + // Swallow exceptions and fall through to return failure + } + LOG(INFO) << "Failed to resolve PyInterpreterState based on PyRuntime address " << std::hex + << std::showbase << d_debug_offsets_addr; + return 0; } ProcessManager::ProcessManager( diff --git a/src/pystack/_pystack/process.h b/src/pystack/_pystack/process.h index fad6d30..c8f7cb7 100644 --- a/src/pystack/_pystack/process.h +++ b/src/pystack/_pystack/process.h @@ -20,6 +20,9 @@ namespace pystack { +template +class Structure; + struct InvalidRemoteObject : public InvalidCopiedMemory { const char* what() const noexcept override @@ -75,6 +78,7 @@ class AbstractProcessManager : public std::enable_shared_from_this @@ -88,25 +92,11 @@ class AbstractProcessManager : public std::enable_shared_from_this findPythonVersion() const; + void setPythonVersionFromDebugOffsets(); void setPythonVersion(const std::pair& version); bool versionIsAtLeast(int required_major, int required_minor) const; const python_v& offsets() const; - template - inline offset_t getFieldOffset(FieldPointer OffsetsStruct::*field) const - { - return (d_py_v->get().*field).offset; - } - - template - inline const typename FieldPointer::Type& - getField(const typename OffsetsStruct::Structure& obj, FieldPointer OffsetsStruct::*field) const - { - offset_t offset = getFieldOffset(field); - auto address = reinterpret_cast(&obj) + offset; - return *reinterpret_cast(address); - } - protected: // Data members pid_t d_pid; @@ -121,6 +111,8 @@ class AbstractProcessManager : public std::enable_shared_from_this d_debug_offsets{}; mutable std::unordered_map d_type_cache; // Methods @@ -128,8 +120,16 @@ class AbstractProcessManager : public std::enable_shared_from_this loadDebugOffsets(Structure& py_runtime) const; + bool copyDebugOffsets(Structure& py_runtime, python_v& debug_offsets) const; + bool validateDebugOffsets(const Structure& py_runtime, python_v& debug_offsets) const; + void clampSizes(python_v& debug_offsets) const; remote_addr_t scanMemoryAreaForInterpreterState(const VirtualMap& map) const; + remote_addr_t scanMemoryAreaForDebugOffsets(const VirtualMap& map) const; }; template diff --git a/src/pystack/_pystack/process.pxd b/src/pystack/_pystack/process.pxd index cf750cd..ede12d5 100644 --- a/src/pystack/_pystack/process.pxd +++ b/src/pystack/_pystack/process.pxd @@ -24,13 +24,15 @@ cdef extern from "process.h" namespace "pystack": remote_addr_t scanBSS() except+ remote_addr_t scanHeap() except+ remote_addr_t scanAllAnonymousMaps() except+ + remote_addr_t findInterpreterStateFromDebugOffsets() except+ remote_addr_t findInterpreterStateFromSymbols() except+ remote_addr_t findInterpreterStateFromElfData() except+ ssize_t copyMemoryFromProcess(remote_addr_t addr, ssize_t size, void *destination) except+ vector[int] Tids() except+ InterpreterStatus isInterpreterActive() except+ pair[int, int] findPythonVersion() - void setPythonVersion(pair[int, int] version) + void setPythonVersion(pair[int, int] version) except + + void setPythonVersionFromDebugOffsets() except + cdef cppclass ProcessManager(AbstractProcessManager): ProcessManager(int pid, shared_ptr[ProcessTracer] tracer, shared_ptr[ProcessAnalyzer] analyzer, vector[VirtualMap] memory_maps, MemoryMapInformation map_info) except+ diff --git a/src/pystack/_pystack/pycode.cpp b/src/pystack/_pystack/pycode.cpp index d85727d..2420937 100644 --- a/src/pystack/_pystack/pycode.cpp +++ b/src/pystack/_pystack/pycode.cpp @@ -105,11 +105,11 @@ static LocationInfo getLocationInfo( const std::shared_ptr& manager, remote_addr_t code_addr, - PyCodeObject& code, + Structure& code, uintptr_t last_instruction_index) { - int code_lineno = manager->getField(code, &py_code_v::o_firstlineno); - remote_addr_t lnotab_addr = manager->getField(code, &py_code_v::o_lnotab); + int code_lineno = code.getField(&py_code_v::o_firstlineno); + remote_addr_t lnotab_addr = code.getField(&py_code_v::o_lnotab); LOG(DEBUG) << std::hex << std::showbase << "Copying lnotab data from address " << lnotab_addr; std::string lnotab = manager->getBytesFromAddress(lnotab_addr); @@ -121,7 +121,7 @@ getLocationInfo( // Check out https://github.com/python/cpython/blob/main/Objects/lnotab_notes.txt for the format of // the lnotab table in different versions of the interpreter. if (manager->versionIsAtLeast(3, 11)) { - uintptr_t code_adaptive = code_addr + manager->getFieldOffset(&py_code_v::o_code_adaptive); + uintptr_t code_adaptive = code.getFieldRemoteAddress(&py_code_v::o_code_adaptive); ptrdiff_t addrq = (reinterpret_cast(last_instruction_index) - reinterpret_cast(code_adaptive)); @@ -178,9 +178,8 @@ isValid(const std::shared_ptr& manager, remote_add } return false; } else { - PyObject obj; - manager->copyObjectFromProcess(addr, &obj); - return reinterpret_cast(obj.ob_type) == pycodeobject_addr; + Structure obj(manager, addr); + return obj.getField(&py_object_v::o_ob_type) == pycodeobject_addr; } } return true; @@ -191,7 +190,6 @@ CodeObject::CodeObject( remote_addr_t addr, uintptr_t lasti) { - PyCodeObject code; if (!isValid(manager, addr)) { d_filename = "???"; d_scope = "???"; @@ -200,15 +198,15 @@ CodeObject::CodeObject( return; } LOG(DEBUG) << std::hex << std::showbase << "Copying code struct from address " << addr; - manager->copyMemoryFromProcess(addr, manager->offsets().py_code.size, &code); + Structure code(manager, addr); - remote_addr_t filename_addr = manager->getField(code, &py_code_v::o_filename); + remote_addr_t filename_addr = code.getField(&py_code_v::o_filename); LOG(DEBUG) << std::hex << std::showbase << "Copying filename Python string from address " << filename_addr; d_filename = manager->getStringFromAddress(filename_addr); LOG(DEBUG) << "Code object filename: " << d_filename; - remote_addr_t name_addr = manager->getField(code, &py_code_v::o_name); + remote_addr_t name_addr = code.getField(&py_code_v::o_name); LOG(DEBUG) << std::hex << std::showbase << "Copying code name Python string from address " << name_addr; d_scope = manager->getStringFromAddress(name_addr); @@ -220,11 +218,11 @@ CodeObject::CodeObject( << d_location_info.end_lineno << ") column_range=(" << d_location_info.column << ", " << d_location_info.end_column << ")"; - d_narguments = manager->getField(code, &py_code_v::o_argcount); + d_narguments = code.getField(&py_code_v::o_argcount); LOG(DEBUG) << "Code object n arguments: " << d_narguments; LOG(DEBUG) << "Copying variable names"; - remote_addr_t varnames_addr = manager->getField(code, &py_code_v::o_varnames); + remote_addr_t varnames_addr = code.getField(&py_code_v::o_varnames); TupleObject varnames(manager, varnames_addr); std::transform( varnames.Items().cbegin(), diff --git a/src/pystack/_pystack/pyframe.cpp b/src/pystack/_pystack/pyframe.cpp index 7216996..c752ddd 100644 --- a/src/pystack/_pystack/pyframe.cpp +++ b/src/pystack/_pystack/pyframe.cpp @@ -19,11 +19,9 @@ FrameObject::FrameObject( ssize_t frame_no) : d_manager(manager) { - PyFrameObject frame; LOG(DEBUG) << "Copying frame number " << frame_no; LOG(DEBUG) << std::hex << std::showbase << "Copying frame struct from address " << addr; - - manager->copyMemoryFromProcess(addr, manager->offsets().py_frame.size, &frame); + Structure frame(manager, addr); d_addr = addr; d_frame_no = frame_no; @@ -37,7 +35,7 @@ FrameObject::FrameObject( d_code = getCode(manager, frame); - auto prev_addr = manager->getField(frame, &py_frame_v::o_back); + auto prev_addr = frame.getField(&py_frame_v::o_back); LOG(DEBUG) << std::hex << std::showbase << "Previous frame address: " << prev_addr; if (prev_addr) { d_prev = std::make_shared(manager, prev_addr, next_frame_no); @@ -48,11 +46,11 @@ FrameObject::FrameObject( bool FrameObject::getIsShim( const std::shared_ptr& manager, - const PyFrameObject& frame) + Structure& frame) { if (manager->versionIsAtLeast(3, 12)) { constexpr int FRAME_OWNED_BY_CSTACK = 3; - return manager->getField(frame, &py_frame_v::o_owner) == FRAME_OWNED_BY_CSTACK; + return frame.getField(&py_frame_v::o_owner) == FRAME_OWNED_BY_CSTACK; } return false; // Versions before 3.12 don't have shim frames. } @@ -60,18 +58,18 @@ FrameObject::getIsShim( std::unique_ptr FrameObject::getCode( const std::shared_ptr& manager, - const PyFrameObject& frame) + Structure& frame) { - remote_addr_t py_code_addr = manager->getField(frame, &py_frame_v::o_code); + remote_addr_t py_code_addr = frame.getField(&py_frame_v::o_code); LOG(DEBUG) << std::hex << std::showbase << "Attempting to construct code object from address " << py_code_addr; uintptr_t last_instruction; if (manager->versionIsAtLeast(3, 11)) { - last_instruction = manager->getField(frame, &py_frame_v::o_prev_instr); + last_instruction = frame.getField(&py_frame_v::o_prev_instr); } else { - last_instruction = manager->getField(frame, &py_frame_v::o_lasti); + last_instruction = frame.getField(&py_frame_v::o_lasti); } return std::make_unique(manager, py_code_addr, last_instruction); } @@ -79,7 +77,7 @@ FrameObject::getCode( bool FrameObject::isEntry( const std::shared_ptr& manager, - const PyFrameObject& frame) + Structure& frame) { if (manager->versionIsAtLeast(3, 12)) { // This is an entry frame if the previous frame was a shim, or if @@ -89,7 +87,7 @@ FrameObject::isEntry( return (d_prev && d_prev->d_is_shim) || (d_frame_no == 0 && d_is_shim); } else if (manager->versionIsAtLeast(3, 11)) { // This is an entry frame if it has an entry flag set. - return manager->getField(frame, &py_frame_v::o_is_entry); + return frame.getField(&py_frame_v::o_is_entry); } return true; } @@ -105,7 +103,8 @@ FrameObject::resolveLocalVariables() const size_t n_arguments = d_code->NArguments(); const size_t n_locals = d_code->Varnames().size(); - const remote_addr_t locals_addr = d_addr + d_manager->getFieldOffset(&py_frame_v::o_localsplus); + Structure frame(d_manager, d_addr); + const remote_addr_t locals_addr = frame.getFieldRemoteAddress(&py_frame_v::o_localsplus); if (n_locals < n_arguments) { throw std::runtime_error("Found more arguments than local variables"); diff --git a/src/pystack/_pystack/pyframe.h b/src/pystack/_pystack/pyframe.h index 3c5418c..69d9127 100644 --- a/src/pystack/_pystack/pyframe.h +++ b/src/pystack/_pystack/pyframe.h @@ -1,11 +1,12 @@ #pragma once -#include "memory" -#include "unordered_map" +#include +#include #include "mem.h" #include "process.h" #include "pycode.h" +#include "structure.h" namespace pystack { @@ -32,14 +33,15 @@ class FrameObject private: // Methods - static bool - getIsShim(const std::shared_ptr& manager, const PyFrameObject& frame); + static bool getIsShim( + const std::shared_ptr& manager, + Structure& frame); static std::unique_ptr - getCode(const std::shared_ptr& manager, const PyFrameObject& frame); + getCode(const std::shared_ptr& manager, Structure& frame); bool - isEntry(const std::shared_ptr& manager, const PyFrameObject& frame); + isEntry(const std::shared_ptr& manager, Structure& frame); // Data members const std::shared_ptr d_manager{}; diff --git a/src/pystack/_pystack/pythread.cpp b/src/pystack/_pystack/pythread.cpp index 5ea4837..d50e412 100644 --- a/src/pystack/_pystack/pythread.cpp +++ b/src/pystack/_pystack/pythread.cpp @@ -6,9 +6,9 @@ #include "mem.h" #include "native_frame.h" #include "process.h" -#include "pycompat.h" #include "pyframe.h" #include "pythread.h" +#include "structure.h" #include "version.h" #include "cpython/pthread.h" @@ -47,10 +47,9 @@ findPthreadTidOffset( remote_addr_t interp_state_addr) { LOG(DEBUG) << "Attempting to locate tid offset in pthread structure"; - PyInterpreterState is; - manager->copyObjectFromProcess(interp_state_addr, &is); + Structure is(manager, interp_state_addr); - auto current_thread_addr = manager->getField(is, &py_is_v::o_tstate_head); + auto current_thread_addr = is.getField(&py_is_v::o_tstate_head); auto thread_head = current_thread_addr; @@ -64,9 +63,8 @@ findPthreadTidOffset( // pthread' that we know about to avoid having to do guess-work by doing a // linear scan over the struct. while (current_thread_addr != (remote_addr_t) nullptr) { - PyThreadState current_thread; - manager->copyObjectFromProcess(current_thread_addr, ¤t_thread); - auto pthread_id_addr = manager->getField(current_thread, &py_thread_v::o_thread_id); + Structure current_thread(manager, current_thread_addr); + auto pthread_id_addr = current_thread.getField(&py_thread_v::o_thread_id); pid_t the_tid; std::vector glibc_pthread_offset_candidates = { @@ -80,7 +78,7 @@ findPthreadTidOffset( return candidate; } } - remote_addr_t next_thread_addr = manager->getField(current_thread, &py_thread_v::o_next); + remote_addr_t next_thread_addr = current_thread.getField(&py_thread_v::o_next); if (next_thread_addr == current_thread_addr) { break; } @@ -91,9 +89,8 @@ findPthreadTidOffset( current_thread_addr = thread_head; while (current_thread_addr != (remote_addr_t) nullptr) { - PyThreadState current_thread; - manager->copyObjectFromProcess(current_thread_addr, ¤t_thread); - auto pthread_id_addr = manager->getField(current_thread, &py_thread_v::o_thread_id); + Structure current_thread(manager, current_thread_addr); + auto pthread_id_addr = current_thread.getField(&py_thread_v::o_thread_id); // Attempt to locate a field in the pthread struct that's equal to the pid. uintptr_t buffer[100]; @@ -118,7 +115,7 @@ findPthreadTidOffset( } } - remote_addr_t next_thread_addr = manager->getField(current_thread, &py_thread_v::o_next); + remote_addr_t next_thread_addr = current_thread.getField(&py_thread_v::o_next); if (next_thread_addr == current_thread_addr) { break; } @@ -133,9 +130,8 @@ PyThread::PyThread(const std::shared_ptr& manager, { d_pid = manager->Pid(); - PyThreadState ts; LOG(DEBUG) << std::hex << std::showbase << "Copying main thread struct from address " << addr; - manager->copyObjectFromProcess(addr, &ts); + Structure ts(manager, addr); remote_addr_t frame_addr = getFrameAddr(manager, ts); if (frame_addr != (remote_addr_t) nullptr) { @@ -145,11 +141,11 @@ PyThread::PyThread(const std::shared_ptr& manager, } d_addr = addr; - remote_addr_t candidate_next_addr = manager->getField(ts, &py_thread_v::o_next); + remote_addr_t candidate_next_addr = ts.getField(&py_thread_v::o_next); d_next_addr = candidate_next_addr == addr ? (remote_addr_t) nullptr : candidate_next_addr; - d_pthread_id = manager->getField(ts, &py_thread_v::o_thread_id); - d_tid = getThreadTid(manager, addr, d_pthread_id); + d_pthread_id = ts.getField(&py_thread_v::o_thread_id); + d_tid = getThreadTid(manager, ts, d_pthread_id); d_next = nullptr; if (d_next_addr != (remote_addr_t)NULL) { @@ -165,14 +161,12 @@ PyThread::PyThread(const std::shared_ptr& manager, int PyThread::getThreadTid( const std::shared_ptr& manager, - remote_addr_t thread_addr, + Structure& ts, unsigned long pthread_id) { int the_tid = -1; if (manager->versionIsAtLeast(3, 11)) { - manager->copyObjectFromProcess( - (remote_addr_t)(thread_addr + manager->getFieldOffset(&py_thread_v::o_native_thread_id)), - &the_tid); + the_tid = ts.getField(&py_thread_v::o_native_thread_id); } else { the_tid = inferTidFromPThreadStructure(manager, pthread_id); } @@ -219,19 +213,18 @@ PyThread::inferTidFromPThreadStructure( remote_addr_t PyThread::getFrameAddr( const std::shared_ptr& manager, - const PyThreadState& ts) + Structure& ts) { if (manager->versionIsAtLeast(3, 11) && !manager->versionIsAtLeast(3, 13)) { - remote_addr_t cframe_addr = manager->getField(ts, &py_thread_v::o_frame); + remote_addr_t cframe_addr = ts.getField(&py_thread_v::o_frame); if (!manager->isAddressValid(cframe_addr)) { return reinterpret_cast(nullptr); } - CFrame cframe; - manager->copyObjectFromProcess(cframe_addr, &cframe); - return manager->getField(cframe, &py_cframe_v::current_frame); + Structure cframe(manager, cframe_addr); + return cframe.getField(&py_cframe_v::current_frame); } else { - return manager->getField(ts, &py_thread_v::o_frame); + return ts.getField(&py_thread_v::o_frame); } } @@ -261,7 +254,7 @@ PyThread::isGCCollecting() const PyThread::GilStatus PyThread::calculateGilStatus( - PyThreadState& ts, + Structure& ts, const std::shared_ptr& manager) const { LOG(DEBUG) << "Attempting to determine GIL Status"; @@ -277,25 +270,20 @@ PyThread::calculateGilStatus( // a ceval state, which points to a GIL runtime state. // If that GIL state has `locked` set and `last_holder` is d_addr, // then the thread represented by this PyThread holds the GIL. - PyInterpreterState interp; - auto is_addr = manager->getField(ts, &py_thread_v::o_interp); - manager->copyObjectFromProcess(is_addr, &interp); + auto is_addr = ts.getField(&py_thread_v::o_interp); + Structure interp(manager, is_addr); - auto gil_addr = manager->getField(interp, &py_is_v::o_gil_runtime_state); - - Python3_9::_gil_runtime_state gil; - manager->copyObjectFromProcess(gil_addr, &gil); - - auto locked = *reinterpret_cast(&gil.locked); - auto holder = *reinterpret_cast(&gil.last_holder); + auto gil_addr = interp.getField(&py_is_v::o_gil_runtime_state); + Structure gil(manager, gil_addr); + auto locked = gil.getField(&py_gilruntimestate_v::o_locked); + auto holder = gil.getField(&py_gilruntimestate_v::o_last_holder); return (locked && holder == d_addr ? GilStatus::HELD : GilStatus::NOT_HELD); } else if (manager->versionIsAtLeast(3, 8)) { // Fast, exact method by checking the gilstate structure in _PyRuntime LOG(DEBUG) << "Searching for the GIL by checking the value of 'tstate_current'"; - PyRuntimeState runtime; - manager->copyObjectFromProcess(pyruntime, &runtime); - uintptr_t tstate_current = manager->getField(runtime, &py_runtime_v::o_tstate_current); + Structure runtime(manager, pyruntime); + uintptr_t tstate_current = runtime.getField(&py_runtime_v::o_tstate_current); return (tstate_current == d_addr ? GilStatus::HELD : GilStatus::NOT_HELD); } else { LOG(DEBUG) << "Searching for the GIL by scanning the _PyRuntime structure"; @@ -338,32 +326,31 @@ PyThread::calculateGilStatus( PyThread::GCStatus PyThread::calculateGCStatus( - PyThreadState& ts, + Structure& ts, const std::shared_ptr& manager) const { LOG(DEBUG) << "Attempting to determine GC Status"; - GCRuntimeState gcstate; + remote_addr_t gcstate_addr; if (manager->versionIsAtLeast(3, 9)) { - PyInterpreterState interp; - auto is_addr = manager->getField(ts, &py_thread_v::o_interp); - manager->copyObjectFromProcess(is_addr, &interp); - gcstate = manager->getField(interp, &py_is_v ::o_gc); + auto is_addr = ts.getField(&py_thread_v::o_interp); + Structure interp(manager, is_addr); + gcstate_addr = interp.getFieldRemoteAddress(&py_is_v::o_gc); } else if (manager->versionIsAtLeast(3, 7)) { remote_addr_t pyruntime = manager->findSymbol("_PyRuntime"); if (!pyruntime) { LOG(DEBUG) << "Failed to get GC status because the _PyRuntime symbol is unavailable"; return GCStatus::COLLECTING_UNKNOWN; } - PyRuntimeState runtime; - manager->copyObjectFromProcess(pyruntime, &runtime); - gcstate = manager->getField(runtime, &py_runtime_v::o_gc); + Structure runtime(manager, pyruntime); + gcstate_addr = runtime.getFieldRemoteAddress(&py_runtime_v::o_gc); } else { LOG(DEBUG) << "GC Status retrieval not supported by this Python version"; return GCStatus::COLLECTING_UNKNOWN; } - auto collecting = manager->getField(gcstate, &py_gc_v::o_collecting); + Structure gcstate(manager, gcstate_addr); + auto collecting = gcstate.getField(&py_gc_v::o_collecting); LOG(DEBUG) << "GC status correctly retrieved: " << collecting; return collecting ? GCStatus::COLLECTING : GCStatus::NOT_COLLECTING; } @@ -380,10 +367,8 @@ getThreadFromInterpreterState( } LOG(DEBUG) << std::hex << std::showbase << "Copying PyInterpreterState struct from address " << addr; - PyInterpreterState is; - manager->copyObjectFromProcess(addr, &is); - - auto thread_addr = manager->getField(is, &py_is_v::o_tstate_head); + Structure is(manager, addr); + auto thread_addr = is.getField(&py_is_v::o_tstate_head); return std::make_shared(manager, thread_addr); } diff --git a/src/pystack/_pystack/pythread.h b/src/pystack/_pystack/pythread.h index 93b9626..ab02c67 100644 --- a/src/pystack/_pystack/pythread.h +++ b/src/pystack/_pystack/pythread.h @@ -47,8 +47,9 @@ class PyThread : public Thread GCStatus isGCCollecting() const; // Static Methods - static remote_addr_t - getFrameAddr(const std::shared_ptr& manager, const PyThreadState& ts); + static remote_addr_t getFrameAddr( + const std::shared_ptr& manager, + Structure& ts); private: // Data members @@ -62,10 +63,10 @@ class PyThread : public Thread // Methods GilStatus calculateGilStatus( - PyThreadState& ts, + Structure& ts, const std::shared_ptr& manager) const; GCStatus calculateGCStatus( - PyThreadState& ts, + Structure& ts, const std::shared_ptr& manager) const; // Static Methods @@ -74,7 +75,7 @@ class PyThread : public Thread unsigned long pthread_id); static int getThreadTid( const std::shared_ptr& manager, - remote_addr_t thread_addr, + Structure& ts, unsigned long pthread_id); }; diff --git a/src/pystack/_pystack/pytypes.cpp b/src/pystack/_pystack/pytypes.cpp index 892c952..8e9fc79 100644 --- a/src/pystack/_pystack/pytypes.cpp +++ b/src/pystack/_pystack/pytypes.cpp @@ -7,6 +7,7 @@ #include "logging.h" #include "pytypes.h" +#include "structure.h" #include "version.h" namespace pystack { @@ -88,17 +89,15 @@ TupleObject::TupleObject( { d_manager = manager; - PyTupleObject tuple; - manager->copyMemoryFromProcess(addr, manager->offsets().py_tuple.size, &tuple); - - ssize_t num_items = manager->getField(tuple, &py_tuple_v::o_ob_size); + Structure tuple(manager, addr); + ssize_t num_items = tuple.getField(&py_tuple_v::o_ob_size); if (num_items == 0) { LOG(DEBUG) << std::hex << std::showbase << "There are no elements in this tuple"; return; } d_items.resize(num_items); manager->copyMemoryFromProcess( - addr + manager->getFieldOffset(&py_tuple_v::o_ob_item), + tuple.getFieldRemoteAddress(&py_tuple_v::o_ob_item), num_items * sizeof(PyObject*), d_items.data()); } @@ -120,17 +119,15 @@ ListObject::ListObject(const std::shared_ptr& mana { d_manager = manager; - PyListObject list; - manager->copyMemoryFromProcess(addr, manager->offsets().py_list.size, &list); - - ssize_t num_items = manager->getField(list, &py_list_v::o_ob_size); + Structure list(manager, addr); + ssize_t num_items = list.getField(&py_list_v::o_ob_size); if (num_items == 0) { LOG(DEBUG) << std::hex << std::showbase << "There are no elements in this list"; return; } d_items.resize(num_items); manager->copyMemoryFromProcess( - (remote_addr_t)manager->getField(list, &py_list_v::o_ob_item), + (remote_addr_t)list.getField(&py_list_v::o_ob_item), num_items * sizeof(PyObject*), d_items.data()); } @@ -160,12 +157,11 @@ LongObject::LongObject( constexpr unsigned int shift = 15; #endif - _PyLongObject longobj; - manager->copyMemoryFromProcess(addr, manager->offsets().py_long.size, &longobj); + Structure longobj(manager, addr); ssize_t size; bool negative; - Py_ssize_t ob_size = manager->getField(longobj, &py_long_v::o_ob_size); + Py_ssize_t ob_size = longobj.getField(&py_long_v::o_ob_size); if (manager->versionIsAtLeast(3, 12)) { auto lv_tag = *reinterpret_cast(&ob_size); negative = (lv_tag & 3) == 2; @@ -200,7 +196,7 @@ LongObject::LongObject( std::vector digits; digits.resize(size); manager->copyMemoryFromProcess( - addr + manager->getFieldOffset(&py_long_v::o_ob_digit), + longobj.getFieldRemoteAddress(&py_long_v::o_ob_digit), sizeof(digit) * size, digits.data()); for (ssize_t i = 0; i < size; ++i) { @@ -250,25 +246,24 @@ LongObject::Overflowed() const void getDictEntries( const std::shared_ptr& manager, - const Python3::PyDictObject& dict, + Structure& dict, ssize_t& num_items, std::vector& valid_entries) { - remote_addr_t keys_addr = manager->getField(dict, &py_dict_v::o_ma_keys); assert(manager->versionIsAtLeast(3, 0)); + remote_addr_t keys_addr = dict.getField(&py_dict_v::o_ma_keys); ssize_t dk_size = 0; int dk_kind = 0; - PyDictKeysObject keys; - manager->copyMemoryFromProcess(keys_addr, manager->offsets().py_dictkeys.size, &keys); - num_items = manager->getField(keys, &py_dictkeys_v::o_dk_nentries); - dk_size = manager->getField(keys, &py_dictkeys_v::o_dk_size); + Structure keys(manager, keys_addr); + num_items = keys.getField(&py_dictkeys_v::o_dk_nentries); + dk_size = keys.getField(&py_dictkeys_v::o_dk_size); if (manager->versionIsAtLeast(3, 11)) { // We're reusing the o_dk_size offset for dk_log2_size. Fix up the value. dk_size = 1L << dk_size; // Added in 3.11 - dk_kind = manager->getField(keys, &py_dictkeys_v::o_dk_kind); + dk_kind = keys.getField(&py_dictkeys_v::o_dk_kind); } if (num_items == 0) { LOG(DEBUG) << std::hex << std::showbase << "There are no elements in this dict"; @@ -293,8 +288,8 @@ getDictEntries( offset = 8 * dk_size; } - offset_t dk_indices_offset = manager->getFieldOffset(&py_dictkeys_v::o_dk_indices); - remote_addr_t entries_addr = keys_addr + dk_indices_offset + offset; + offset_t dk_indices_addr = keys.getFieldRemoteAddress(&py_dictkeys_v::o_dk_indices); + remote_addr_t entries_addr = dk_indices_addr + offset; std::vector raw_entries; raw_entries.resize(num_items); @@ -363,8 +358,7 @@ DictObject::DictObject(std::shared_ptr manager, re void DictObject::loadFromPython3(remote_addr_t addr) { - Python3::PyDictObject dict; - d_manager->copyMemoryFromProcess(addr, d_manager->offsets().py_dict.size, &dict); + Structure dict(d_manager, addr); ssize_t num_items; std::vector valid_entries; @@ -393,13 +387,13 @@ DictObject::loadFromPython3(remote_addr_t addr) * All dicts sharing same key must have same insertion order. */ - remote_addr_t dictvalues_addr = d_manager->getField(dict, &py_dict_v::o_ma_values); + remote_addr_t dictvalues_addr = dict.getField(&py_dict_v::o_ma_values); + Structure dictvalues(d_manager, dictvalues_addr); // Get the values in one copy if we are dealing with a split-table dictionary if (dictvalues_addr != 0) { d_values.resize(num_items); - auto values_offset = d_manager->getFieldOffset(&py_dictvalues_v::o_values); - auto values_addr = dictvalues_addr + values_offset; + auto values_addr = dictvalues.getFieldRemoteAddress(&py_dictvalues_v::o_values); d_manager->copyMemoryFromProcess(values_addr, num_items * sizeof(PyObject*), d_values.data()); } else { std::transform( @@ -519,9 +513,9 @@ Object::Object(const std::shared_ptr& manager, rem { LOG(DEBUG) << std::hex << std::showbase << "Copying PyObject data from address " << addr; - PyObject obj; + Structure obj(manager, addr); try { - manager->copyMemoryFromProcess(addr, manager->offsets().py_object.size, &obj); + obj.copyFromRemote(); } catch (RemoteMemCopyError& ex) { LOG(WARNING) << std::hex << std::showbase << "Failed to read PyObject data from address " << d_addr; @@ -529,13 +523,11 @@ Object::Object(const std::shared_ptr& manager, rem return; } - PyTypeObject cls; - d_type_addr = manager->getField(obj, &py_object_v::o_ob_type); + d_type_addr = obj.getField(&py_object_v::o_ob_type); LOG(DEBUG) << std::hex << std::showbase << "Copying typeobject from address " << d_type_addr; + Structure cls(manager, d_type_addr); try { - manager->copyMemoryFromProcess(d_type_addr, manager->offsets().py_type.size, &cls); - - d_flags = manager->getField(cls, &py_type_v::o_tp_flags); + d_flags = cls.getField(&py_type_v::o_tp_flags); } catch (RemoteMemCopyError& ex) { LOG(WARNING) << std::hex << std::showbase << "Failed to read typeobject from address " << d_type_addr; @@ -543,7 +535,7 @@ Object::Object(const std::shared_ptr& manager, rem return; } - remote_addr_t name_addr = manager->getField(cls, &py_type_v::o_tp_name); + remote_addr_t name_addr = cls.getField(&py_type_v::o_tp_name); try { d_classname = manager->getCStringFromAddress(name_addr); } catch (RemoteMemCopyError& ex) { @@ -625,9 +617,8 @@ Object::toInteger() const double Object::toFloat() const { - PyFloatObject the_float; - d_manager->copyMemoryFromProcess(d_addr, d_manager->offsets().py_float.size, &the_float); - return d_manager->getField(the_float, &py_float_v::o_ob_fval); + Structure the_float(d_manager, d_addr); + return the_float.getField(&py_float_v::o_ob_fval); } bool @@ -728,9 +719,9 @@ Object::toConcreteObject() const } std::string -Object::guessClassName(PyTypeObject& type) const +Object::guessClassName(Structure& type) const { - remote_addr_t tp_repr = d_manager->getField(type, &py_type_v::o_tp_repr); + remote_addr_t tp_repr = type.getField(&py_type_v::o_tp_repr); if (tp_repr == d_manager->findSymbol("float_repr")) { return "float"; } diff --git a/src/pystack/_pystack/pytypes.h b/src/pystack/_pystack/pytypes.h index cca2e1a..92672a3 100644 --- a/src/pystack/_pystack/pytypes.h +++ b/src/pystack/_pystack/pytypes.h @@ -1,14 +1,13 @@ #pragma once #include -#include #include #include #include #include "mem.h" #include "process.h" -#include "pycompat.h" +#include "structure.h" namespace pystack { @@ -181,7 +180,7 @@ class Object bool toBool() const; long toInteger() const; double toFloat() const; - std::string guessClassName(PyTypeObject& type) const; + std::string guessClassName(Structure& type) const; }; } // namespace pystack diff --git a/src/pystack/_pystack/structure.h b/src/pystack/_pystack/structure.h new file mode 100644 index 0000000..b9a0c16 --- /dev/null +++ b/src/pystack/_pystack/structure.h @@ -0,0 +1,91 @@ +#pragma once + +#include +#include + +#include "process.h" + +namespace pystack { + +template +class Structure +{ + public: + // Constructors + Structure(std::shared_ptr manager, remote_addr_t addr); + Structure(const Structure&) = delete; + Structure& operator=(const Structure&) = delete; + + // Methods + void copyFromRemote(); + + template + remote_addr_t getFieldRemoteAddress(FieldPointer OffsetsStruct::*field) const; + + template + const typename FieldPointer::Type& getField(FieldPointer OffsetsStruct::*field); + + private: + // Data members + std::shared_ptr d_manager; + remote_addr_t d_addr; + ssize_t d_size; + std::array d_footprintbuf; + std::vector d_heapbuf; + char* d_buf; +}; + +template +inline Structure::Structure( + std::shared_ptr manager, + remote_addr_t addr) +: d_manager(manager) +, d_addr(addr) +, d_size(d_manager->offsets().get().size) +, d_buf{} +{ +} + +template +inline void +Structure::copyFromRemote() +{ + if (d_buf) { + return; // already copied + } + + if (d_size < 512) { + d_buf = &d_footprintbuf[0]; + } else { + d_heapbuf.resize(d_size); + d_buf = &d_heapbuf[0]; + } + d_manager->copyMemoryFromProcess(d_addr, d_size, d_buf); +} + +template +template +inline remote_addr_t +Structure::getFieldRemoteAddress(FieldPointer OffsetsStruct::*field) const +{ + offset_t offset = (d_manager->offsets().get().*field).offset; + return d_addr + offset; +} + +template +template +inline const typename FieldPointer::Type& +Structure::getField(FieldPointer OffsetsStruct::*field) +{ + copyFromRemote(); + offset_t offset = (d_manager->offsets().get().*field).offset; + if (d_size < 0 || (size_t)d_size < sizeof(typename FieldPointer::Type) + || d_size - sizeof(typename FieldPointer::Type) < offset) + { + abort(); + } + auto address = d_buf + offset; + return *reinterpret_cast(address); +} + +} // namespace pystack diff --git a/src/pystack/_pystack/version.cpp b/src/pystack/_pystack/version.cpp index 69ba621..775d383 100644 --- a/src/pystack/_pystack/version.cpp +++ b/src/pystack/_pystack/version.cpp @@ -192,6 +192,17 @@ py_cframe() }; } +template +constexpr py_gilruntimestate_v +py_gilruntimestate() +{ + return { + sizeof(T), + offsetof(T, locked), + offsetof(T, last_holder), + }; +} + template constexpr py_runtime_v py_runtime() @@ -226,6 +237,8 @@ py_runtimev313() offsetof(T, interpreters.head), {}, {}, + offsetof(T, debug_offsets.cookie), + offsetof(T, debug_offsets.version), offsetof(T, debug_offsets.runtime_state.size), offsetof(T, debug_offsets.runtime_state.finalizing), offsetof(T, debug_offsets.runtime_state.interpreters_head), @@ -622,6 +635,7 @@ python_v python_v3_12 = { py_runtimev312(), py_gc(), py_cframe(), + py_gilruntimestate(), }; // ---- Python 3.13 ------------------------------------------------------------ @@ -645,6 +659,7 @@ python_v python_v3_13 = { py_runtimev313(), py_gc(), py_cframe(), + py_gilruntimestate(), }; // ----------------------------------------------------------------------------- diff --git a/src/pystack/_pystack/version.h b/src/pystack/_pystack/version.h index 3fc76c6..6570f7d 100644 --- a/src/pystack/_pystack/version.h +++ b/src/pystack/_pystack/version.h @@ -20,7 +20,6 @@ struct FieldOffset struct py_tuple_v { - typedef PyTupleObject Structure; ssize_t size; FieldOffset o_ob_size; FieldOffset o_ob_item; @@ -28,7 +27,6 @@ struct py_tuple_v struct py_list_v { - typedef PyListObject Structure; ssize_t size; FieldOffset o_ob_size; FieldOffset o_ob_item; @@ -36,7 +34,6 @@ struct py_list_v struct py_dict_v { - typedef Python3::PyDictObject Structure; ssize_t size; FieldOffset o_ma_keys; FieldOffset o_ma_values; @@ -44,7 +41,6 @@ struct py_dict_v struct py_dictkeys_v { - typedef PyDictKeysObject Structure; ssize_t size; FieldOffset o_dk_size; FieldOffset o_dk_kind; @@ -54,21 +50,18 @@ struct py_dictkeys_v struct py_dictvalues_v { - typedef PyDictValuesObject Structure; ssize_t size; FieldOffset o_values; }; struct py_float_v { - typedef PyFloatObject Structure; ssize_t size; FieldOffset o_ob_fval; }; struct py_long_v { - typedef _PyLongObject Structure; ssize_t size; FieldOffset o_ob_size; FieldOffset o_ob_digit; @@ -76,7 +69,6 @@ struct py_long_v struct py_bytes_v { - typedef PyBytesObject Structure; ssize_t size; FieldOffset o_ob_size; FieldOffset o_ob_sval; @@ -84,7 +76,6 @@ struct py_bytes_v struct py_unicode_v { - typedef PyUnicodeObject Structure; ssize_t size; FieldOffset o_state; FieldOffset o_length; @@ -93,14 +84,12 @@ struct py_unicode_v struct py_object_v { - typedef PyObject Structure; ssize_t size; FieldOffset o_ob_type; }; struct py_code_v { - typedef PyCodeObject Structure; ssize_t size; FieldOffset o_filename; FieldOffset o_name; @@ -113,7 +102,6 @@ struct py_code_v struct py_frame_v { - typedef PyFrameObject Structure; ssize_t size; FieldOffset o_back; FieldOffset o_code; @@ -126,7 +114,6 @@ struct py_frame_v struct py_thread_v { - typedef PyThreadState Structure; ssize_t size; FieldOffset o_prev; FieldOffset o_next; @@ -138,13 +125,16 @@ struct py_thread_v struct py_runtime_v { - typedef PyRuntimeState Structure; ssize_t size; FieldOffset o_finalizing; FieldOffset o_interp_head; - FieldOffset o_gc; + FieldOffset o_gc; // Using char because we can only use the offset, + // as the size and members change between versions FieldOffset o_tstate_current; + FieldOffset o_dbg_off_cookie; + FieldOffset o_dbg_off_py_version_hex; + FieldOffset o_dbg_off_runtime_state_struct_size; FieldOffset o_dbg_off_runtime_state_finalizing; FieldOffset o_dbg_off_runtime_state_interpreters_head; @@ -209,7 +199,6 @@ struct py_runtime_v struct py_type_v { - typedef PyTypeObject Structure; ssize_t size; FieldOffset o_tp_name; FieldOffset o_tp_repr; @@ -218,11 +207,11 @@ struct py_type_v struct py_is_v { - typedef PyInterpreterState Structure; ssize_t size; FieldOffset o_next; FieldOffset o_tstate_head; - FieldOffset o_gc; + FieldOffset o_gc; // Using char because we can only use the offset, + // as the size and members change between versions FieldOffset o_modules; FieldOffset o_sysdict; FieldOffset o_builtins; @@ -231,18 +220,23 @@ struct py_is_v struct py_gc_v { - typedef GCRuntimeState Structure; ssize_t size; FieldOffset o_collecting; }; struct py_cframe_v { - typedef CFrame Structure; ssize_t size; FieldOffset current_frame; }; +struct py_gilruntimestate_v +{ + ssize_t size; + FieldOffset o_locked; + FieldOffset o_last_holder; +}; + struct python_v { py_tuple_v py_tuple; @@ -263,6 +257,7 @@ struct python_v py_runtime_v py_runtime; py_gc_v py_gc; py_cframe_v py_cframe; + py_gilruntimestate_v py_gilruntimestate; template inline const T& get() const; @@ -293,6 +288,7 @@ define_python_v_get_specialization(py_is); define_python_v_get_specialization(py_runtime); define_python_v_get_specialization(py_gc); define_python_v_get_specialization(py_cframe); +define_python_v_get_specialization(py_gilruntimestate); #undef define_python_v_get_specialization diff --git a/src/pystack/process.py b/src/pystack/process.py index cd8bf83..513ea1f 100644 --- a/src/pystack/process.py +++ b/src/pystack/process.py @@ -19,8 +19,12 @@ r".*libpython(?P\d+)\.(?P\d+).*", re.IGNORECASE ) +# Strings like "3.8.10 (default, May 26 2023, 14:05:08)" +# or "2.7.18rc1 (v2.7.18rc1:8d21aa21f2, Apr 20 2020, 13:19:08)" +# or "3.13.0+ experimental free-threading build (Python)" BSS_VERSION_REGEXP = re.compile( - rb"((2|3)\.(\d+)\.(\d{1,2}))((a|b|c|rc)\d{1,2})?\+? (\(.{1,64}\))" + rb"((2|3)\.(\d+)\.(\d{1,2}))((a|b|c|rc)\d{1,2})?\+?" + rb"(?: experimental free-threading build)? (\(.{1,64}\))" ) LOGGER = logging.getLogger(__file__) diff --git a/tests/integration/test_gil.py b/tests/integration/test_gil.py index 0dc15e5..a29d7fa 100644 --- a/tests/integration/test_gil.py +++ b/tests/integration/test_gil.py @@ -1,5 +1,8 @@ +import subprocess from pathlib import Path +import pytest + from pystack.engine import get_process_threads from pystack.engine import get_process_threads_for_core from tests.utils import ALL_PYTHONS @@ -14,6 +17,15 @@ TEST_SINGLE_THREAD_FILE = Path(__file__).parent / "single_thread_program.py" +@pytest.fixture(autouse=True) +def enable_gil_if_free_threading(python, monkeypatch): + _, python_executable = python + proc = subprocess.run([python_executable, "-Xgil=1", "-cpass"], capture_output=True) + free_threading = proc.returncode == 0 + if free_threading: + monkeypatch.setenv("PYTHON_GIL", "1") + + @ALL_PYTHONS def test_gil_status_one_thread_among_many_holds_the_gil(python, tmpdir): # GIVEN diff --git a/tests/integration/test_local_variables.py b/tests/integration/test_local_variables.py index 715e491..d992eae 100644 --- a/tests/integration/test_local_variables.py +++ b/tests/integration/test_local_variables.py @@ -573,7 +573,6 @@ def test_trashed_locals(generate_threads, python, tmpdir): class ListObject(ctypes.Structure): _fields_ = [ - ("ob_refcnt", ctypes.c_ssize_t), ("ob_type", ctypes.c_void_p), ("ob_size", ctypes.c_ssize_t), ("ob_item", ctypes.c_void_p), @@ -581,23 +580,26 @@ class ListObject(ctypes.Structure): class TupleObject(ctypes.Structure): _fields_ = [ - ("ob_refcnt", ctypes.c_ssize_t), ("ob_type", ctypes.c_void_p), ("ob_size", ctypes.c_ssize_t), ("ob_item0", ctypes.c_void_p), ("ob_item1", ctypes.c_void_p), ] +def ob_type_field(obj): + # Assume ob_type is the last field of PyObject + return id(obj) + sys.getsizeof(None) - ctypes.sizeof(ctypes.c_void_p) + def main(): bad_type = (1, 2, 3) bad_elem = (4, 5, 6) nullelem = (7, 8, 9) bad_list = [0, 1, 2] - TupleObject.from_address(id(bad_type)).ob_type = 0xded - TupleObject.from_address(id(bad_elem)).ob_item1 = 0xbad - TupleObject.from_address(id(nullelem)).ob_item1 = 0x0 - ListObject.from_address(id(bad_list)).ob_item = 0x0 + TupleObject.from_address(ob_type_field(bad_type)).ob_type = 0xded + TupleObject.from_address(ob_type_field(bad_elem)).ob_item1 = 0xbad + TupleObject.from_address(ob_type_field(nullelem)).ob_item1 = 0x0 + ListObject.from_address(ob_type_field(bad_list)).ob_item = 0x0 fifo = sys.argv[1] with open(sys.argv[1], "w") as fifo: diff --git a/tests/integration/test_smoke.py b/tests/integration/test_smoke.py index 47a02fa..1a3278c 100644 --- a/tests/integration/test_smoke.py +++ b/tests/integration/test_smoke.py @@ -20,9 +20,20 @@ elif sys.version_info < (3, 11): # pragma: no cover STACK_METHODS = (StackMethod.SYMBOLS, StackMethod.ELF_DATA, StackMethod.HEAP) CORE_STACK_METHODS = (StackMethod.SYMBOLS, StackMethod.ELF_DATA) -else: # pragma: no cover +elif sys.version_info < (3, 13): # pragma: no cover STACK_METHODS = (StackMethod.SYMBOLS, StackMethod.ELF_DATA) CORE_STACK_METHODS = (StackMethod.SYMBOLS, StackMethod.ELF_DATA) +else: # pragma: no cover + STACK_METHODS = ( + StackMethod.DEBUG_OFFSETS, + StackMethod.SYMBOLS, + StackMethod.ELF_DATA, + ) + CORE_STACK_METHODS = ( + StackMethod.DEBUG_OFFSETS, + StackMethod.SYMBOLS, + StackMethod.ELF_DATA, + ) @pytest.mark.parametrize("method", STACK_METHODS) diff --git a/tests/utils.py b/tests/utils.py index 195e8dd..b232140 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -23,6 +23,7 @@ PythonVersion = Tuple[Tuple[int, int], pathlib.Path] ALL_VERSIONS = [ + ((3, 13), "python3.13t"), ((3, 13), "python3.13"), ((3, 12), "python3.12"), ((3, 11), "python3.11"), @@ -44,6 +45,8 @@ def find_all_available_pythons() -> Iterable[Interpreter]: # pragma: no cover versions = [((sys.version_info[0], sys.version_info[1]), sys.executable)] elif test_version is not None: major, minor = test_version.split(".") + if minor.endswith("t"): + minor = minor[:-1] versions = [((int(major), int(minor)), f"python{test_version}")] else: versions = ALL_VERSIONS @@ -215,6 +218,7 @@ def generate_all_pystack_combinations( ]: # pragma: no cover if corefile: stack_methods = ( + StackMethod.DEBUG_OFFSETS, StackMethod.SYMBOLS, StackMethod.BSS, StackMethod.ELF_DATA, @@ -222,6 +226,7 @@ def generate_all_pystack_combinations( ) else: stack_methods = ( + StackMethod.DEBUG_OFFSETS, StackMethod.SYMBOLS, StackMethod.BSS, StackMethod.HEAP, @@ -240,6 +245,10 @@ def generate_all_pystack_combinations( AVAILABLE_PYTHONS, ): (major_version, minor_version) = python.version + if method == StackMethod.DEBUG_OFFSETS and ( + major_version < 3 or (major_version == 3 and minor_version < 13) + ): + continue if method == StackMethod.BSS and ( major_version > 3 or (major_version == 3 and minor_version >= 10) ):