Skip to content

Commit

Permalink
Harden --locals against trashed memory
Browse files Browse the repository at this point in the history
When attempting to produce a repr for local variables, attempt to cope
better with trashed memory:

- Use "<invalid object at 0x1234>" as the repr of a `PyObject*` pointing
  directly at invalid memory, or one whose `ob_type` points at invalid
  memory.
- Use "<list object at 0x1234>" as, for instance, the repr of a list containing an
  invalid `ob_items` pointer. More generally, use this for any object
  where we can determine the type, but generating the repr according to
  that type's rules fails due to accessing unmapped memory.

Test this with some ctypes abuse.

Signed-off-by: Matt Wozniski <[email protected]>
  • Loading branch information
godlygeek committed Aug 1, 2024
1 parent c0b8b49 commit 6721256
Show file tree
Hide file tree
Showing 3 changed files with 120 additions and 21 deletions.
1 change: 1 addition & 0 deletions news/194.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Heap corruption could cause PyStack to fail to generate a stack when ``--locals`` mode was used. This has been fixed by falling back to a reasonable default when attempting to format the repr of a local variable causes a dereference of an invalid pointer.
72 changes: 51 additions & 21 deletions src/pystack/_pystack/pytypes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -503,7 +503,7 @@ std::string
GenericObject::toString(ssize_t max_size) const
{
std::stringstream os;
os << "<" << std::hex << std::showbase << d_classname << " at " << d_addr << ">";
os << "<" << std::hex << d_classname << " at 0x" << d_addr << ">";
std::string object_str = os.str();
return limitOutput(object_str, max_size);
}
Expand All @@ -521,19 +521,40 @@ NoneObject::toString([[maybe_unused]] ssize_t max_size) const

Object::Object(const std::shared_ptr<const AbstractProcessManager>& manager, remote_addr_t addr)
: d_addr(addr)
, d_type_addr(0)
, d_classname()
, d_flags()
, d_manager(manager)
{
LOG(DEBUG) << std::hex << std::showbase << "Copying PyObject data from address " << addr;

PyObject obj;
manager->copyObjectFromProcess(d_addr, &obj);
try {
manager->copyObjectFromProcess(d_addr, &obj);
} catch (RemoteMemCopyError& ex) {
LOG(WARNING) << std::hex << std::showbase << "Failed to read PyObject data from address "
<< d_addr;
d_classname = "invalid object";
return;
}

PyTypeObject cls;
LOG(DEBUG) << std::hex << std::showbase << "Copying typeobject from address " << obj.ob_type;
d_type_addr = reinterpret_cast<remote_addr_t>(obj.ob_type);
manager->copyMemoryFromProcess((remote_addr_t)obj.ob_type, manager->offsets().py_type.size, &cls);
try {
manager->copyMemoryFromProcess(
(remote_addr_t)obj.ob_type,
manager->offsets().py_type.size,
&cls);

d_flags = manager->getField(cls, &py_type_v::o_tp_flags);
d_flags = manager->getField(cls, &py_type_v::o_tp_flags);
} catch (RemoteMemCopyError& ex) {
LOG(WARNING) << std::hex << std::showbase << "Failed to read typeobject from address "
<< obj.ob_type;
d_type_addr = 0;
d_classname = "invalid object";
return;
}

remote_addr_t name_addr = manager->getField(cls, &py_type_v::o_tp_name);
try {
Expand Down Expand Up @@ -577,24 +598,33 @@ Object::toString(ssize_t max_size) const
if (max_size <= 5) {
return ELLIPSIS;
}
std::stringstream os;
std::visit(
overloaded{
[&](const auto& arg) { os << arg.toString(max_size); },
[&](const bool arg) { os << arg; },
[&](const long arg) { os << arg; },
[&](const double arg) { os << arg; },
[&](const std::string& arg) {
std::string truncated = arg;
if (static_cast<size_t>(max_size) < arg.size()) {
truncated = arg.substr(0, max_size - 3) + "...";
}
os << truncated;
},
},
toConcreteObject());
try {
std::stringstream os;
std::visit(
overloaded{
[&](const auto& arg) { os << arg.toString(max_size); },
[&](const bool arg) { os << arg; },
[&](const long arg) { os << arg; },
[&](const double arg) { os << arg; },
[&](const std::string& arg) {
std::string truncated = arg;
if (static_cast<size_t>(max_size) < arg.size()) {
truncated = arg.substr(0, max_size - 3) + "...";
}
os << truncated;
},
},
toConcreteObject());

return os.str();
} catch (RemoteMemCopyError& ex) {
LOG(WARNING) << std::hex << std::showbase << "Failed to create a repr for object of type "
<< d_classname << " at address " << d_addr;

return os.str();
std::stringstream os;
os << std::hex << std::showbase << "<" << d_classname << " object at " << d_addr << ">";
return os.str();
}
}

long
Expand Down
68 changes: 68 additions & 0 deletions tests/integration/test_local_variables.py
Original file line number Diff line number Diff line change
Expand Up @@ -558,3 +558,71 @@ def second_func(the_argument):

second_func_frame = find_frame(frames, "second_func")
assert "the_argument" not in second_func_frame.arguments


@ALL_PYTHONS
@ALL_SOURCES
def test_trashed_locals(generate_threads, python, tmpdir):
# GIVEN
_, python_executable = python

program_template = """
import ctypes
import sys
import time
class ListObject(ctypes.Structure):
_fields_ = [
("ob_refcnt", ctypes.c_ssize_t),
("ob_type", ctypes.c_void_p),
("ob_size", ctypes.c_ssize_t),
("ob_item", ctypes.c_void_p),
]
class TupleObject(ctypes.Structure):
_fields_ = [
("ob_refcnt", ctypes.c_ssize_t),
("ob_type", ctypes.c_void_p),
("ob_size", ctypes.c_ssize_t),
("ob_item0", ctypes.c_void_p),
("ob_item1", ctypes.c_void_p),
]
def main():
bad_type = (1, 2, 3)
bad_elem = (4, 5, 6)
nullelem = (7, 8, 9)
bad_list = [0, 1, 2]
TupleObject.from_address(id(bad_type)).ob_type = 0xded
TupleObject.from_address(id(bad_elem)).ob_item1 = 0xbad
TupleObject.from_address(id(nullelem)).ob_item1 = 0x0
ListObject.from_address(id(bad_list)).ob_item = 0x0
fifo = sys.argv[1]
with open(sys.argv[1], "w") as fifo:
fifo.write("ready")
time.sleep(1000)
main()
"""

script = tmpdir / "the_script.py"
script.write_text(program_template, encoding="utf-8")

# WHEN
threads = generate_threads(python_executable, script, tmpdir, locals=True)

# THEN

assert len(threads) == 1
(thread,) = threads

frames = list(thread.frames)
assert (len(frames)) == 2

main = find_frame(frames, "main")
assert re.match(r"^<invalid object at 0x[0-9a-f]{4,}>$", main.locals["bad_type"])
assert main.locals["bad_elem"] == "(4, <invalid object at 0xbad>, 6)"
assert main.locals["nullelem"] == "(7, <invalid object at 0x0>, 9)"
assert re.match(r"^<list object at 0x[0-9a-f]{4,}>$", main.locals["bad_list"])

0 comments on commit 6721256

Please sign in to comment.