From a5da128bb1617d38ff3edc1c424efc039cfa4da5 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 6 Mar 2024 11:08:31 -0800 Subject: [PATCH] tools/fsrefs.py: check for references from uprobes This one is pretty involved to implement and to test. Signed-off-by: Omar Sandoval --- tests/linux_kernel/__init__.py | 86 ++++++++++++++ tests/linux_kernel/tools/test_fsrefs.py | 111 +++++++++++++++++- tools/fsrefs.py | 142 +++++++++++++++++++++++- util.py | 93 +++++++++++++--- vmtest/vm.py | 3 + 5 files changed, 416 insertions(+), 19 deletions(-) diff --git a/tests/linux_kernel/__init__.py b/tests/linux_kernel/__init__.py index d2144c136..afc3e7987 100644 --- a/tests/linux_kernel/__init__.py +++ b/tests/linux_kernel/__init__.py @@ -511,6 +511,92 @@ def mkswap(path, size): f.write(header) +class _perf_event_attr_sample_period_or_freq(ctypes.Union): + _fields_ = ( + ("sample_period", ctypes.c_uint64), + ("sample_freq", ctypes.c_uint64), + ) + + +class _perf_event_attr_wakeup_events_or_watermark(ctypes.Union): + _fields_ = ( + ("wakeup_events", ctypes.c_uint32), + ("wakeup_watermark", ctypes.c_uint32), + ) + + +class _perf_event_attr_config1(ctypes.Union): + _fields_ = ( + ("bp_addr", ctypes.c_uint64), + ("kprobe_func", ctypes.c_uint64), + ("uprobe_path", ctypes.c_uint64), + ("config1", ctypes.c_uint64), + ) + + +class _perf_event_attr_config2(ctypes.Union): + _fields_ = ( + ("bp_len", ctypes.c_uint64), + ("kprobe_addr", ctypes.c_uint64), + ("probe_offset", ctypes.c_uint64), + ("config2", ctypes.c_uint64), + ) + + +class perf_event_attr(ctypes.Structure): + _fields_ = ( + ("type", ctypes.c_uint32), + ("size", ctypes.c_uint32), + ("config", ctypes.c_uint64), + ("_sample_period_or_freq", _perf_event_attr_sample_period_or_freq), + ("sample_type", ctypes.c_uint64), + ("read_format", ctypes.c_uint64), + ("_bitfields1", ctypes.c_uint64), + ("_wakeup_events_or_watermark", _perf_event_attr_wakeup_events_or_watermark), + ("bp_type", ctypes.c_uint32), + ("_config1", _perf_event_attr_config1), + ("_config2", _perf_event_attr_config2), + ("branch_sample_type", ctypes.c_uint64), + ("sample_regs_user", ctypes.c_uint64), + ("sample_stack_user", ctypes.c_uint32), + ("clockid", ctypes.c_int32), + ("sample_regs_intr", ctypes.c_uint64), + ("aux_watermark", ctypes.c_uint32), + ("sample_max_stack", ctypes.c_uint16), + ("__reserved2", ctypes.c_uint16), + ("aux_sample_size", ctypes.c_uint32), + ("__reserved3", ctypes.c_uint32), + ("sig_data", ctypes.c_uint64), + ("config3", ctypes.c_uint64), + ) + _anonymous_ = ( + "_sample_period_or_freq", + "_wakeup_events_or_watermark", + "_config1", + "_config2", + ) + + +PERF_FLAG_FD_NO_GROUP = 1 << 0 +PERF_FLAG_FD_OUTPUT = 1 << 1 +PERF_FLAG_PID_CGROUP = 1 << 2 +PERF_FLAG_FD_CLOEXEC = 1 << 3 + + +def perf_event_open(attr, pid, cpu, group_fd=-1, flags=PERF_FLAG_FD_CLOEXEC): + attr.size = ctypes.sizeof(perf_event_attr) + return _check_ctypes_syscall( + _syscall( + SYS["perf_event_open"], + ctypes.byref(attr), + ctypes.c_int(pid), + ctypes.c_int(cpu), + ctypes.c_int(group_fd), + ctypes.c_ulong(flags), + ) + ) + + _syscall = _c.syscall _syscall.restype = ctypes.c_long diff --git a/tests/linux_kernel/tools/test_fsrefs.py b/tests/linux_kernel/tools/test_fsrefs.py index f028545be..cb40c7eaf 100644 --- a/tests/linux_kernel/tools/test_fsrefs.py +++ b/tests/linux_kernel/tools/test_fsrefs.py @@ -1,6 +1,7 @@ # Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later +import array import contextlib import ctypes import errno @@ -9,9 +10,11 @@ import os from pathlib import Path import re +import socket import subprocess import sys import tempfile +import unittest from drgn.helpers.linux.fs import fget from drgn.helpers.linux.pid import find_task @@ -28,6 +31,8 @@ losetup, mkswap, mount, + perf_event_attr, + perf_event_open, skip_unless_have_test_disk, swapoff, swapon, @@ -36,6 +41,8 @@ ) from tools.fsrefs import main +UPROBE_TYPE_PATH = Path("/sys/bus/event_source/devices/uprobe/type") + class TestFsRefs(LinuxKernelTestCase): def setUp(self): @@ -248,7 +255,7 @@ def setup_binfmt_misc_in_userns(path): unshare(CLONE_NEWUSER | CLONE_NEWNS) except OSError as e: if e.errno == errno.EINVAL: - return "kernel does not support user namespaces" + return "kernel does not support user namespaces (CONFIG_USER_NS)" else: raise Path("/proc/self/uid_map").write_text("0 0 1") @@ -266,7 +273,7 @@ def setup_binfmt_misc_in_userns(path): ) except OSError as e: if e.errno == errno.ENODEV: - return "kernel does not support binfmt_misc" + return "kernel does not support binfmt_misc (CONFIG_BINFMT_MISC)" elif e.errno == errno.EPERM: return "kernel does not support sandboxed binfmt_misc mounts" else: @@ -317,3 +324,103 @@ def test_swap_file(self): "swap file (struct swap_info_struct *)", self.run_and_capture("--check", "swap", "--inode", str(path)), ) + + def test_uprobe_event(self): + for mnt in iter_mounts(): + if mnt.fstype == "tracefs": + break + else: + self.skipTest("tracefs not mounted") + uprobe_events = mnt.mount_point / "uprobe_events" + if not uprobe_events.exists(): + self.skipTest( + "kernel does not support uprobe events (CONFIG_UPROBE_EVENTS)" + ) + + def uprobe_events_append(s): + # open(..., "a") tries lseek(..., SEEK_END), which fails with + # EINVAL. + with open(os.open(uprobe_events, os.O_WRONLY | os.O_APPEND), "w") as f: + f.write(s) + + path = self._tmp / "file" + path.touch() + + probe_name = f"drgntest_{os.urandom(20).hex()}" + retprobe_name = f"drgntest_{os.urandom(20).hex()}" + with contextlib.ExitStack() as exit_stack: + uprobe_events_append(f"p:{probe_name} {path}:0\n") + exit_stack.callback(uprobe_events_append, f"-:{probe_name}\n") + uprobe_events_append(f"r:{retprobe_name} {path}:0\n") + exit_stack.callback(uprobe_events_append, f"-:{retprobe_name}\n") + + instance = Path(tempfile.mkdtemp(dir=mnt.mount_point / "instances")) + exit_stack.callback(instance.rmdir) + + (instance / "events/uprobes" / probe_name / "enable").write_text("1") + (instance / "events/uprobes" / retprobe_name / "enable").write_text("1") + + output = self.run_and_capture("--check", "uprobes", "--inode", str(path)) + self.assertIn(f"uprobe event p:uprobes/{probe_name} ", output) + self.assertIn(f"uprobe event r:uprobes/{retprobe_name} ", output) + + @unittest.skipUnless( + UPROBE_TYPE_PATH.exists(), "kernel does not support perf_uprobe" + ) + def test_perf_uprobe(self): + path = self._tmp / "file" + path.touch() + + attr = perf_event_attr() + attr.type = int(UPROBE_TYPE_PATH.read_text()) + ctypes_path = ctypes.c_char_p(os.fsencode(path)) + attr.uprobe_path = ctypes.cast(ctypes_path, ctypes.c_void_p).value + fd = perf_event_open(attr, -1, min(os.sched_getaffinity(0))) + try: + self.assertIn( + f"perf uprobe (owned by pid {os.getpid()}", + self.run_and_capture("--check", "uprobes", "--inode", str(path)), + ) + finally: + os.close(fd) + + @unittest.skipUnless( + UPROBE_TYPE_PATH.exists(), "kernel does not support perf_uprobe" + ) + def test_perf_uprobe_no_owner(self): + path = self._tmp / "file" + path.touch() + + sock1, sock2 = socket.socketpair() + try: + # Create a perf event in a process, send it over a Unix socket to + # keep it alive, then die. + pid = os.fork() + if pid == 0: + try: + attr = perf_event_attr() + attr.type = int(UPROBE_TYPE_PATH.read_text()) + ctypes_path = ctypes.c_char_p(os.fsencode(path)) + attr.uprobe_path = ctypes.cast(ctypes_path, ctypes.c_void_p).value + fd = perf_event_open(attr, -1, min(os.sched_getaffinity(0))) + sock2.sendmsg( + [b"\0"], + [ + ( + socket.SOL_SOCKET, + socket.SCM_RIGHTS, + array.array("i", [fd]), + ) + ], + ) + finally: + os._exit(0) + + os.waitpid(pid, 0) + self.assertIn( + "perf uprobe (no owner)", + self.run_and_capture("--check", "uprobes", "--inode", str(path)), + ) + finally: + sock1.close() + sock2.close() diff --git a/tools/fsrefs.py b/tools/fsrefs.py index 749e1cfd3..e844c91c5 100755 --- a/tools/fsrefs.py +++ b/tools/fsrefs.py @@ -6,7 +6,8 @@ import typing from typing import Any, Callable, Optional, Sequence, Union -from drgn import FaultError, Object, Program, cast +from drgn import FaultError, Object, Program, cast, container_of +from drgn.helpers.linux.cpumask import for_each_possible_cpu from drgn.helpers.linux.fs import ( d_path, fget, @@ -16,10 +17,16 @@ mount_dst, ) from drgn.helpers.linux.idr import idr_for_each_entry -from drgn.helpers.linux.list import hlist_for_each_entry, list_for_each_entry +from drgn.helpers.linux.list import ( + hlist_for_each_entry, + list_empty, + list_for_each_entry, +) from drgn.helpers.linux.mm import for_each_vma +from drgn.helpers.linux.percpu import per_cpu_ptr from drgn.helpers.linux.pid import find_task, for_each_task from drgn.helpers.linux.plist import plist_for_each_entry +from drgn.helpers.linux.rbtree import rbtree_inorder_for_each_entry class warn_on_fault: @@ -287,6 +294,133 @@ def visit_swap_files(prog: Program, visitor: "Visitor") -> None: print(f"swap file {swap_info.format_(**format_args)} {match}") +# call was moved from struct trace_probe to struct trace_event Linux kernel +# commit 60d53e2c3b75 ("tracing/probe: Split trace_event related data from +# trace_probe") (in v5.4). +def _trace_probe_call(tp: Object) -> Object: + try: + event = tp.event + except AttributeError: + return tp.call + return event.call + + +def trace_probe_group_name(tp: Object) -> str: + return os.fsdecode(_trace_probe_call(tp).member_("class").system.string_()) + + +def trace_probe_name(tp: Object) -> str: + prog = tp.prog_ + call = _trace_probe_call(tp).read_() + + # TRACE_EVENT_FL_CUSTOM was added in Linux kernel commit 3a73333fb370 + # ("tracing: Add TRACE_CUSTOM_EVENT() macro") (in v5.18). + try: + TRACE_EVENT_FL_CUSTOM = prog["TRACE_EVENT_FL_CUSTOM"] + except KeyError: + pass + else: + if call.flags & TRACE_EVENT_FL_CUSTOM: + return os.fsdecode(call.name.string_()) + + if call.flags & prog["TRACE_EVENT_FL_TRACEPOINT"]: + tracepoint = call.tp.read_() + return os.fsdecode(tracepoint.name.string_()) if tracepoint else "" + else: + return os.fsdecode(call.name.string_()) + + +def visit_uprobes(prog: Program, visitor: "Visitor") -> None: + try: + uprobes_tree = prog["uprobes_tree"] + except KeyError: + # If uprobes_tree doesn't exist, then CONFIG_UPROBES=n. + return + try: + uprobe_dispatcher = prog["uprobe_dispatcher"] + except KeyError: + # uprobe_dispatcher only exists if CONFIG_UPROBE_EVENTS=y, which is + # theoretically separate from CONFIG_UPROBES, although as of Linux 6.8 + # they will always be the same. + uprobe_dispatcher = None + with warn_on_fault("iterating uprobes"): + for uprobe in rbtree_inorder_for_each_entry( + "struct uprobe", uprobes_tree.address_of_(), "rb_node" + ): + try: + match = visitor.visit_inode(uprobe.inode) + except FaultError: + continue + if not match: + continue + found_consumer = False + with warn_on_fault("iterating uprobe consumers"): + consumer = uprobe.consumers.read_() + while consumer: + handler = consumer.handler.read_() + if handler == uprobe_dispatcher: + tu = container_of(consumer, "struct trace_uprobe", "consumer") + # uprobe events created through tracefs are in a list + # anchored on devent.list since Linux kernel commit + # 0597c49c69d5 ("tracing/uprobes: Use dyn_event + # framework for uprobe events") (in v5.0) and list + # before that. + try: + event_list = tu.devent.list + except AttributeError: + event_list = tu.list + if list_empty(event_list.address_of_()): + found_perf_event = False + with ignore_fault: + call = _trace_probe_call(tu.tp) + # uprobes created with perf_event_open have a + # struct perf_event in call.perf_events, which + # only exists if CONFIG_PERF_EVENTS=y. + try: + perf_events = call.perf_events + except AttributeError: + pass + else: + for cpu in for_each_possible_cpu(prog): + for perf_event in hlist_for_each_entry( + "struct perf_event", + per_cpu_ptr(perf_events, cpu), + "hlist_entry", + ): + owner = perf_event.owner.read_() + if owner: + owner_pid = owner.pid.value_() + owner_comm = os.fsdecode( + owner.comm.string_() + ) + print( + f"perf uprobe (owned by pid {owner_pid} ({owner_comm})) {perf_event.format_(**format_args)} {match}" + ) + else: + print( + f"perf uprobe (no owner) {perf_event.format_(**format_args)} {match}" + ) + found_perf_event = True + if not found_perf_event: + print( + f"unknown trace uprobe {tu.format_(**format_args)} {match}" + ) + else: + c = "r" if tu.consumer.ret_handler else "p" + group_name = trace_probe_group_name(tu.tp) + event_name = trace_probe_name(tu.tp) + print( + f"uprobe event {c}:{group_name}/{event_name} {tu.format_(**format_args)} {match}" + ) + else: + print( + f"unknown uprobe consumer {consumer.format_(**format_args)}" + ) + consumer = consumer.next.read_() + if not found_consumer: + print(f"unknown uprobe {uprobe.format_(**format_args)} {match}") + + def hexint(x: str) -> int: return int(x, 16) @@ -333,6 +467,7 @@ def main(prog: Program, argv: Sequence[str]) -> None: "mounts", "swap", "tasks", + "uprobes", ] check_group = parser.add_argument_group( title="check selection" @@ -402,6 +537,9 @@ def main(prog: Program, argv: Sequence[str]) -> None: if "swap" in enabled_checks: visit_swap_files(prog, visitor) + if "uprobes" in enabled_checks: + visit_uprobes(prog, visitor) + if __name__ == "__main__": prog: Program diff --git a/util.py b/util.py index d8173a263..ebaacf641 100644 --- a/util.py +++ b/util.py @@ -137,107 +137,170 @@ def __str__(self) -> str: if NORMALIZED_MACHINE_NAME == "x86_64": if sys.maxsize > 2**32: - SYS = {"bpf": 321, "kexec_file_load": 320, "rt_sigtimedwait": 128} + SYS = { + "bpf": 321, + "kexec_file_load": 320, + "perf_event_open": 298, + "rt_sigtimedwait": 128, + } else: # x32 - SYS = {"bpf": 321, "kexec_file_load": 320, "rt_sigtimedwait": 523} + SYS = { + "bpf": 321, + "kexec_file_load": 320, + "perf_event_open": 298, + "rt_sigtimedwait": 523, + } else: SYS = { "aarch64": { "bpf": 280, "kexec_file_load": 294, + "perf_event_open": 241, "rt_sigtimedwait": 137, "rt_sigtimedwait_time64": 421, }, - "alpha": {"bpf": 515, "rt_sigtimedwait": 355}, + "alpha": {"bpf": 515, "perf_event_open": 493, "rt_sigtimedwait": 355}, "arc": { "bpf": 280, "kexec_file_load": 294, + "perf_event_open": 241, "rt_sigtimedwait": 137, "rt_sigtimedwait_time64": 421, }, "arm": { "bpf": 386, "kexec_file_load": 401, + "perf_event_open": 364, "rt_sigtimedwait": 177, "rt_sigtimedwait_time64": 421, }, "csky": { "bpf": 280, "kexec_file_load": 294, + "perf_event_open": 241, "rt_sigtimedwait": 137, "rt_sigtimedwait_time64": 421, }, "hexagon": { "bpf": 280, "kexec_file_load": 294, + "perf_event_open": 241, "rt_sigtimedwait": 137, "rt_sigtimedwait_time64": 421, }, - "i386": {"bpf": 357, "rt_sigtimedwait": 177, "rt_sigtimedwait_time64": 421}, - "ia64": {"bpf": 317, "rt_sigtimedwait": 159}, + "i386": { + "bpf": 357, + "perf_event_open": 336, + "rt_sigtimedwait": 177, + "rt_sigtimedwait_time64": 421, + }, "loongarch": { "bpf": 280, "kexec_file_load": 294, + "perf_event_open": 241, "rt_sigtimedwait": 137, "rt_sigtimedwait_time64": 421, }, "loongarch64": { "bpf": 280, "kexec_file_load": 294, + "perf_event_open": 241, "rt_sigtimedwait": 137, "rt_sigtimedwait_time64": 421, }, - "m68k": {"bpf": 354, "rt_sigtimedwait": 177, "rt_sigtimedwait_time64": 421}, + "m68k": { + "bpf": 354, + "perf_event_open": 332, + "rt_sigtimedwait": 177, + "rt_sigtimedwait_time64": 421, + }, "microblaze": { "bpf": 387, + "perf_event_open": 366, "rt_sigtimedwait": 177, "rt_sigtimedwait_time64": 421, }, # TODO: mips is missing here because I don't know how to distinguish # between the o32 and n32 ABIs. - "mips64": {"bpf": 315, "rt_sigtimedwait": 126}, + "mips64": {"bpf": 315, "perf_event_open": 292, "rt_sigtimedwait": 126}, "nios2": { "bpf": 280, "kexec_file_load": 294, + "perf_event_open": 241, "rt_sigtimedwait": 137, "rt_sigtimedwait_time64": 421, }, "openrisc": { "bpf": 280, "kexec_file_load": 294, + "perf_event_open": 241, "rt_sigtimedwait": 137, "rt_sigtimedwait_time64": 421, }, "parisc": { "bpf": 341, "kexec_file_load": 355, + "perf_event_open": 318, "rt_sigtimedwait": 177, "rt_sigtimedwait_time64": 421, }, - "parisc64": {"bpf": 341, "kexec_file_load": 355, "rt_sigtimedwait": 177}, - "ppc": {"bpf": 361, "rt_sigtimedwait": 176, "rt_sigtimedwait_time64": 421}, - "ppc64": {"bpf": 361, "rt_sigtimedwait": 176}, + "parisc64": { + "bpf": 341, + "kexec_file_load": 355, + "perf_event_open": 318, + "rt_sigtimedwait": 177, + }, + "ppc": { + "bpf": 361, + "perf_event_open": 319, + "rt_sigtimedwait": 176, + "rt_sigtimedwait_time64": 421, + }, + "ppc64": {"bpf": 361, "perf_event_open": 319, "rt_sigtimedwait": 176}, "riscv32": { "bpf": 280, "kexec_file_load": 294, + "perf_event_open": 241, "rt_sigtimedwait": 137, "rt_sigtimedwait_time64": 421, }, "riscv64": { "bpf": 280, "kexec_file_load": 294, + "perf_event_open": 241, "rt_sigtimedwait": 137, "rt_sigtimedwait_time64": 421, }, "s390": { "bpf": 351, "kexec_file_load": 381, + "perf_event_open": 331, "rt_sigtimedwait": 177, "rt_sigtimedwait_time64": 421, }, - "s390x": {"bpf": 351, "kexec_file_load": 381, "rt_sigtimedwait": 177}, - "sh": {"bpf": 375, "rt_sigtimedwait": 177, "rt_sigtimedwait_time64": 421}, - "sparc": {"bpf": 349, "rt_sigtimedwait": 105, "rt_sigtimedwait_time64": 421}, - "sparc64": {"bpf": 349, "rt_sigtimedwait": 105}, - "xtensa": {"bpf": 340, "rt_sigtimedwait": 229, "rt_sigtimedwait_time64": 421}, + "s390x": { + "bpf": 351, + "kexec_file_load": 381, + "perf_event_open": 331, + "rt_sigtimedwait": 177, + }, + "sh": { + "bpf": 375, + "perf_event_open": 336, + "rt_sigtimedwait": 177, + "rt_sigtimedwait_time64": 421, + }, + "sparc": { + "bpf": 349, + "perf_event_open": 327, + "rt_sigtimedwait": 105, + "rt_sigtimedwait_time64": 421, + }, + "sparc64": {"bpf": 349, "perf_event_open": 327, "rt_sigtimedwait": 105}, + "xtensa": { + "bpf": 340, + "perf_event_open": 327, + "rt_sigtimedwait": 229, + "rt_sigtimedwait_time64": 421, + }, }.get(NORMALIZED_MACHINE_NAME, {}) diff --git a/vmtest/vm.py b/vmtest/vm.py index c03a7b414..6ad10d62f 100644 --- a/vmtest/vm.py +++ b/vmtest/vm.py @@ -109,6 +109,9 @@ # Mount additional filesystems. mount -t binfmt_misc -o nosuid,nodev,noexec binfmt_misc /proc/sys/fs/binfmt_misc +# We currently only enable tracefs if we have uprobes, which AArch64 only +# supports since Linux 4.10. +mount -t tracefs -o nosuid,nodev,noexec tracefs /sys/kernel/tracing || true # Configure networking. cat << EOF > /etc/hosts