From df83bfb9839ca8fc4e0a37ca651c447017b284b5 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 13 Dec 2024 16:38:11 -0800 Subject: [PATCH 1/4] libdrgn: work around vmlinux with build ID in broken ELF notes The module API will enforce that the build ID of vmlinux matches the build ID we find in the core dump or live system. Unfortunately, several kernel releases, most importantly 4.14 LTS and 4.19 LTS, cannot have their build ID parsed by dwelf_elf_gnu_build_id() thanks to the NT_GNU_PROPERTY_TYPE_0 alignment fiasco. Add our own drgn_elf_gnu_build_id() function with a workaround. Signed-off-by: Omar Sandoval --- libdrgn/Makefile.am | 2 + libdrgn/debug_info.c | 45 +++-------- libdrgn/debug_info.h | 10 --- libdrgn/dwarf_info.c | 1 + libdrgn/elf_file.c | 48 ------------ libdrgn/elf_file.h | 56 ------------- libdrgn/elf_notes.c | 174 +++++++++++++++++++++++++++++++++++++++++ libdrgn/elf_notes.h | 115 +++++++++++++++++++++++++++ libdrgn/linux_kernel.c | 30 ++----- libdrgn/program.c | 1 + libdrgn/stack_trace.c | 3 +- 11 files changed, 312 insertions(+), 173 deletions(-) create mode 100644 libdrgn/elf_notes.c create mode 100644 libdrgn/elf_notes.h diff --git a/libdrgn/Makefile.am b/libdrgn/Makefile.am index 05646609c..5cffa80da 100644 --- a/libdrgn/Makefile.am +++ b/libdrgn/Makefile.am @@ -62,6 +62,8 @@ libdrgnimpl_la_SOURCES = $(ARCH_DEFS_PYS:_defs.py=.c) \ dwarf_info.h \ elf_file.c \ elf_file.h \ + elf_notes.c \ + elf_notes.h \ elf_sections.h \ error.c \ error.h \ diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index ec590facf..85040eddd 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -20,6 +20,7 @@ #include "cleanup.h" #include "debug_info.h" #include "elf_file.h" +#include "elf_notes.h" #include "error.h" #include "linux_kernel.h" #include "openmp.h" @@ -465,7 +466,7 @@ drgn_debug_info_report_elf(struct drgn_debug_info_load_state *load, struct drgn_error *err; const void *build_id; - ssize_t build_id_len = dwelf_elf_gnu_build_id(elf, &build_id); + ssize_t build_id_len = drgn_elf_gnu_build_id(elf, &build_id); if (build_id_len < 0) { err = drgn_debug_info_report_error(load, path, NULL, drgn_error_libelf()); @@ -708,7 +709,7 @@ static bool build_id_matches(Elf *elf, const void *build_id, size_t build_id_len) { const void *elf_build_id; - ssize_t elf_build_id_len = dwelf_elf_gnu_build_id(elf, &elf_build_id); + ssize_t elf_build_id_len = drgn_elf_gnu_build_id(elf, &elf_build_id); if (elf_build_id_len < 0) return false; return (elf_build_id_len == build_id_len && @@ -924,32 +925,6 @@ static void read_phdr(const void *phdr_buf, size_t i, bool is_64_bit, } } -static const char *read_build_id(const void *buf, size_t buf_len, - unsigned int align, bool bswap, - size_t *len_ret) -{ - /* - * Build IDs are usually 16 or 20 bytes (MD5 or SHA-1, respectively), so - * these arbitrary limits are generous. - */ - static const uint32_t build_id_min_size = 2; - static const uint32_t build_id_max_size = 1024; - Elf32_Nhdr nhdr; - const char *name; - const void *desc; - while (next_elf_note(&buf, &buf_len, align, bswap, &nhdr, &name, &desc)) { - if (nhdr.n_namesz == sizeof("GNU") && - memcmp(name, "GNU", sizeof("GNU")) == 0 && - nhdr.n_type == NT_GNU_BUILD_ID && - nhdr.n_descsz >= build_id_min_size && - nhdr.n_descsz <= build_id_max_size) { - *len_ret = nhdr.n_descsz; - return desc; - } - } - return NULL; -} - struct core_get_phdr_arg { const void *phdr_buf; bool is_64_bit; @@ -1081,12 +1056,14 @@ userspace_core_identify_file(struct drgn_program *prog, return err; } } - ret->build_id = read_build_id(core->segment_buf, - phdr.p_filesz, - phdr.p_align == 8 ? 8 : 4, - arg.bswap, - &ret->build_id_len); - if (ret->build_id) + ret->build_id_len = + parse_gnu_build_id_from_notes(core->segment_buf, + phdr.p_filesz, + phdr.p_align == 8 + ? 8 : 4, + arg.bswap, + &ret->build_id); + if (ret->build_id_len) break; } } diff --git a/libdrgn/debug_info.h b/libdrgn/debug_info.h index e4b363807..6d69a13dd 100644 --- a/libdrgn/debug_info.h +++ b/libdrgn/debug_info.h @@ -14,7 +14,6 @@ #include #include -#include #include #include "cfi.h" @@ -302,15 +301,6 @@ struct drgn_error *find_elf_file(char **path_ret, int *fd_ret, Elf **elf_ret, struct drgn_error *elf_address_range(Elf *elf, uint64_t bias, uint64_t *start_ret, uint64_t *end_ret); -static inline Elf_Type note_header_type(uint64_t p_align) -{ -#if _ELFUTILS_PREREQ(0, 175) - if (p_align == 8) - return ELF_T_NHDR8; -#endif - return ELF_T_NHDR; -} - /** @} */ #endif /* DRGN_DEBUG_INFO_H */ diff --git a/libdrgn/dwarf_info.c b/libdrgn/dwarf_info.c index b896e0473..7725e781c 100644 --- a/libdrgn/dwarf_info.c +++ b/libdrgn/dwarf_info.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include diff --git a/libdrgn/elf_file.c b/libdrgn/elf_file.c index 9c8cd5075..bf5952b6b 100644 --- a/libdrgn/elf_file.c +++ b/libdrgn/elf_file.c @@ -281,51 +281,3 @@ struct drgn_error *drgn_elf_file_section_buffer_error(struct binary_buffer *bb, return drgn_elf_file_section_error(buffer->file, buffer->scn, buffer->data, ptr, message); } - -bool next_elf_note(const void **p, size_t *size, unsigned int align, bool bswap, - Elf32_Nhdr *nhdr_ret, const char **name_ret, - const void **desc_ret) -{ - uint64_t align_mask = align - 1; - - if (*size < sizeof(*nhdr_ret)) - return false; - memcpy(nhdr_ret, *p, sizeof(*nhdr_ret)); - if (bswap) { - nhdr_ret->n_namesz = bswap_32(nhdr_ret->n_namesz); - nhdr_ret->n_descsz = bswap_32(nhdr_ret->n_descsz); - nhdr_ret->n_type = bswap_32(nhdr_ret->n_type); - } - - if (nhdr_ret->n_namesz > *size - sizeof(*nhdr_ret)) - return false; - uint64_t aligned_namesz = (nhdr_ret->n_namesz + align_mask) & ~align_mask; - if (nhdr_ret->n_descsz > 0 - && (aligned_namesz > *size - sizeof(*nhdr_ret) - || nhdr_ret->n_descsz > *size - sizeof(*nhdr_ret) - aligned_namesz)) - return false; - - *p = (const char *)*p + sizeof(*nhdr_ret); - *size -= sizeof(*nhdr_ret); - - *name_ret = *p; - if (aligned_namesz > *size) { - *p = (const char *)*p + *size; - *size = 0; - } else { - *p = (const char *)*p + aligned_namesz; - *size -= aligned_namesz; - } - - *desc_ret = *p; - uint64_t aligned_descsz = (nhdr_ret->n_descsz + align_mask) & ~align_mask; - if (aligned_descsz > *size) { - *p = (const char *)*p + *size; - *size = 0; - } else { - *p = (const char *)*p + aligned_descsz; - *size -= aligned_descsz; - } - - return true; -} diff --git a/libdrgn/elf_file.h b/libdrgn/elf_file.h index 3939aa1ea..ca2b6f3eb 100644 --- a/libdrgn/elf_file.h +++ b/libdrgn/elf_file.h @@ -165,62 +165,6 @@ drgn_elf_file_section_buffer_init_index(struct drgn_elf_file_section_buffer *buf file->scn_data[scn]); } -/** - * Parse the next ELF note out of a buffer. - * - * @note - * Alignment of ELF notes is a mess. The [System V - * gABI](http://www.sco.com/developers/gabi/latest/ch5.pheader.html#note_section) - * says that the note header and descriptor should be aligned to 4 bytes for - * 32-bit files and 8 bytes for 64-bit files. However, on Linux, 4-byte - * alignment is used for both 32-bit and 64-bit files. - * @note - * The only exception as of 2024 is `NT_GNU_PROPERTY_TYPE_0`, which is defined - * to follow the gABI alignment. See - * ["PT_NOTE alignment, NT_GNU_PROPERTY_TYPE_0, glibc and gold"](https://public-inbox.org/libc-alpha/13a92cb0-a993-f684-9a96-e02e4afb1bef@redhat.com/). - * But, note that the 12-byte note header plus the 4-byte `"GNU\0"` name is a - * multiple of 8 bytes, and the `NT_GNU_PROPERTY_TYPE_0` descriptor is defined - * to be a multiple of 4 bytes for 32-bit files and 8 bytes for 64-bit files. As - * a result, `NT_GNU_PROPERTY_TYPE_0` is never actually padded, and 4-byte vs. - * 8-byte alignment are equivalent for parsing purposes. - * @note - * According to the [gABI Linux - * Extensions](https://gitlab.com/x86-psABIs/Linux-ABI), consumers are now - * supposed to use the `p_align` of the `PT_NOTE` segment instead of assuming an - * alignment. However, the Linux kernel as of 6.0 generates core dumps with - * `PT_NOTE` segments with a `p_align` of 0 or 1 which are actually aligned to 4 - * bytes. So, when parsing notes from an ELF file, you need to use 8-byte - * alignment if `p_align` is 8 and 4-byte alignment otherwise. binutils and - * elfutils appear to do the same. - * @note - * Before Linux kernel commit f7ba52f302fd ("vmlinux.lds.h: Discard - * .note.gnu.property section") (in v6.4), the vmlinux linker script can create - * a `PT_NOTE` segment with a `p_align` of 8 where the entries other than - * `NT_GNU_PROPERTY_TYPE_0` are actually aligned to 4 bytes. - * @note - * Finally, there are cases where we don't know `p_align`. For example, - * `/sys/kernel/notes` contains the contents of the vmlinux `.notes` section, - * which (ignoring the aforementioned bug) we can assume has 4-byte alignment. - * As another example, `/sys/module/$module/notes/` contains a file for each - * note section. Since `NT_GNU_PROPERTY_TYPE_0` can be parsed assuming 4-byte - * alignment, we can again assume 4-byte alignment. This will work as long as - * any future note types requiring 8-byte alignment also happen to have an - * 8-byte aligned header+name and descriptor (but hopefully no one ever adds an - * 8-byte aligned note again). - * - * @param[in,out] p Current position. Initialize to the beginning of the buffer. - * @param[in,out] size Remaining size. Initialize to the size of the buffer. - * @param[in] align Note alignment. Usually `p_align == 8 ? 8 : 4` if the - * program header is available, otherwise 4. - * @param[in] bswap Whether the note header needs to be byte-swapped. - * @param[out] name_ret Returned note name. - * @param[out] desc_ret Returned note descriptor. - * @return @c true if a note was parsed, @c false if there are no more notes. - */ -bool next_elf_note(const void **p, size_t *size, unsigned int align, bool bswap, - Elf32_Nhdr *nhdr_ret, const char **name_ret, - const void **desc_ret); - /** @} */ #endif /* DRGN_ELF_FILE_H */ diff --git a/libdrgn/elf_notes.c b/libdrgn/elf_notes.c new file mode 100644 index 000000000..bcd36449d --- /dev/null +++ b/libdrgn/elf_notes.c @@ -0,0 +1,174 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// SPDX-License-Identifier: LGPL-2.1-or-later + +#include +#include + +#include "elf_notes.h" +#include "util.h" + +bool next_elf_note(const void **p, size_t *size, unsigned int align, bool bswap, + GElf_Nhdr *nhdr_ret, const char **name_ret, + const void **desc_ret) +{ + uint64_t align_mask = align - 1; + + if (*size < sizeof(*nhdr_ret)) + return false; + memcpy(nhdr_ret, *p, sizeof(*nhdr_ret)); + if (bswap) { + nhdr_ret->n_namesz = bswap_32(nhdr_ret->n_namesz); + nhdr_ret->n_descsz = bswap_32(nhdr_ret->n_descsz); + nhdr_ret->n_type = bswap_32(nhdr_ret->n_type); + } + + if (nhdr_ret->n_namesz > *size - sizeof(*nhdr_ret)) + return false; + uint64_t aligned_namesz = (nhdr_ret->n_namesz + align_mask) & ~align_mask; + if (nhdr_ret->n_descsz > 0 + && (aligned_namesz > *size - sizeof(*nhdr_ret) + || nhdr_ret->n_descsz > *size - sizeof(*nhdr_ret) - aligned_namesz)) + return false; + + *p = (const char *)*p + sizeof(*nhdr_ret); + *size -= sizeof(*nhdr_ret); + + *name_ret = *p; + if (aligned_namesz > *size) { + *p = (const char *)*p + *size; + *size = 0; + } else { + *p = (const char *)*p + aligned_namesz; + *size -= aligned_namesz; + } + + *desc_ret = *p; + uint64_t aligned_descsz = (nhdr_ret->n_descsz + align_mask) & ~align_mask; + if (aligned_descsz > *size) { + *p = (const char *)*p + *size; + *size = 0; + } else { + *p = (const char *)*p + aligned_descsz; + *size -= aligned_descsz; + } + + return true; +} + +size_t parse_gnu_build_id_from_notes(const void *buf, size_t size, + unsigned int align, bool bswap, + const void **ret) +{ + GElf_Nhdr nhdr; + const char *name; + const void *desc; + while (next_elf_note(&buf, &size, align, bswap, &nhdr, &name, &desc)) { + if (nhdr.n_namesz == sizeof("GNU") + && memcmp(name, "GNU", sizeof("GNU")) == 0 + && nhdr.n_type == NT_GNU_BUILD_ID + && nhdr.n_descsz > 0) { + *ret = desc; + return nhdr.n_descsz; + } + } + *ret = NULL; + return 0; +} + +#if _ELFUTILS_PREREQ(0, 175) +ssize_t drgn_elf_gnu_build_id(Elf *elf, const void **ret) +{ + GElf_Ehdr ehdr_mem, *ehdr = gelf_getehdr(elf, &ehdr_mem); + if (!ehdr) + return -1; + bool bswap = + (ehdr->e_ident[EI_DATA] == ELFDATA2LSB) != HOST_LITTLE_ENDIAN; + + Elf_Data *nhdr8_data = NULL; + size_t num_note_sections = 0; + + Elf_Scn *scn = elf_nextscn(elf, NULL); + if (scn) { + do { + GElf_Shdr shdr_mem, *shdr = gelf_getshdr(scn, &shdr_mem); + if (!shdr || shdr->sh_type != SHT_NOTE) + continue; + num_note_sections++; + + Elf_Data *data = elf_rawdata(scn, NULL); + if (!data) + continue; + + if (data->d_type == ELF_T_NHDR8) + nhdr8_data = data; + + unsigned int align = data->d_type == ELF_T_NHDR8 ? 8 : 4; + const void *build_id; + size_t size = parse_gnu_build_id_from_notes(data->d_buf, + data->d_size, + align, + bswap, + &build_id); + if (size) { + *ret = build_id; + return size; + } + } while ((scn = elf_nextscn(elf, scn))); + } else { + size_t phnum; + if (elf_getphdrnum(elf, &phnum)) + return -1; + for (size_t i = 0; i < phnum; i++) { + GElf_Phdr phdr_mem, *phdr = + gelf_getphdr(elf, i, &phdr_mem); + if (!phdr || phdr->p_type != PT_NOTE) + continue; + num_note_sections++; + + Elf_Data *data = + elf_getdata_rawchunk(elf, phdr->p_offset, + phdr->p_filesz, + note_header_type(phdr->p_align)); + if (!data) + continue; + + if (data->d_type == ELF_T_NHDR8) + nhdr8_data = data; + + unsigned int align = data->d_type == ELF_T_NHDR8 ? 8 : 4; + const void *build_id; + size_t size = parse_gnu_build_id_from_notes(data->d_buf, + data->d_size, + align, + bswap, + &build_id); + if (size) { + *ret = build_id; + return size; + } + } + } + + // Before Linux kernel commit f7ba52f302fd ("vmlinux.lds.h: Discard + // .note.gnu.property section") (in v6.4), the vmlinux .notes section + // may specify an 8-byte alignment even though it is actually 4-byte + // aligned. This fix was backported to several stable and longterm + // kernels, but it never made it to the 4.14 and 4.19 longterm branches. + // So, we try to work around this bug here. + // + // If there is only one note section (or segment) and it specifies + // 8-byte alignment, then it might be affected by this bug, so we retry + // with 4-byte alignment. + // + // Whenever we drop Linux 4.14 and 4.19 support, we can probably drop + // this workaround and just use dwelf_elf_gnu_build_id(). + if (nhdr8_data && num_note_sections == 1) { + return parse_gnu_build_id_from_notes(nhdr8_data->d_buf, + nhdr8_data->d_size, 4, + bswap, ret); + } + + *ret = NULL; + return 0; +} +#endif diff --git a/libdrgn/elf_notes.h b/libdrgn/elf_notes.h new file mode 100644 index 000000000..3f198f6ee --- /dev/null +++ b/libdrgn/elf_notes.h @@ -0,0 +1,115 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// SPDX-License-Identifier: LGPL-2.1-or-later + +/** + * @file + * + * ELF note parsing. + */ + +#ifndef DRGN_ELF_NOTES_H +#define DRGN_ELF_NOTES_H + +#include +#include +#include + +/** + * Parse the next ELF note out of a buffer. + * + * @note + * Alignment of ELF notes is a mess. The [System V + * gABI](http://www.sco.com/developers/gabi/latest/ch5.pheader.html#note_section) + * says that the note header and descriptor should be aligned to 4 bytes for + * 32-bit files and 8 bytes for 64-bit files. However, on Linux, 4-byte + * alignment is used for both 32-bit and 64-bit files. + * @note + * The only exception as of 2024 is `NT_GNU_PROPERTY_TYPE_0`, which is defined + * to follow the gABI alignment. See + * ["PT_NOTE alignment, NT_GNU_PROPERTY_TYPE_0, glibc and gold"](https://public-inbox.org/libc-alpha/13a92cb0-a993-f684-9a96-e02e4afb1bef@redhat.com/). + * But, note that the 12-byte note header plus the 4-byte `"GNU\0"` name is a + * multiple of 8 bytes, and the `NT_GNU_PROPERTY_TYPE_0` descriptor is defined + * to be a multiple of 4 bytes for 32-bit files and 8 bytes for 64-bit files. As + * a result, `NT_GNU_PROPERTY_TYPE_0` is never actually padded, and 4-byte vs. + * 8-byte alignment are equivalent for parsing purposes. + * @note + * According to the [gABI Linux + * Extensions](https://gitlab.com/x86-psABIs/Linux-ABI), consumers are now + * supposed to use the `p_align` of the `PT_NOTE` segment instead of assuming an + * alignment. However, the Linux kernel as of 6.0 generates core dumps with + * `PT_NOTE` segments with a `p_align` of 0 or 1 which are actually aligned to 4 + * bytes. So, when parsing notes from an ELF file, you need to use 8-byte + * alignment if `p_align` is 8 and 4-byte alignment otherwise. binutils and + * elfutils appear to do the same. + * @note + * Before Linux kernel commit f7ba52f302fd ("vmlinux.lds.h: Discard + * .note.gnu.property section") (in v6.4), the vmlinux linker script can create + * a `PT_NOTE` segment with a `p_align` of 8 where the entries other than + * `NT_GNU_PROPERTY_TYPE_0` are actually aligned to 4 bytes. + * @note + * Finally, there are cases where we don't know `p_align`. For example, + * `/sys/kernel/notes` contains the contents of the vmlinux `.notes` section, + * which (ignoring the aforementioned bug) we can assume has 4-byte alignment. + * As another example, `/sys/module/$module/notes/` contains a file for each + * note section. Since `NT_GNU_PROPERTY_TYPE_0` can be parsed assuming 4-byte + * alignment, we can again assume 4-byte alignment. This will work as long as + * any future note types requiring 8-byte alignment also happen to have an + * 8-byte aligned header+name and descriptor (but hopefully no one ever adds an + * 8-byte aligned note again). + * + * @param[in,out] p Current position. Initialize to the beginning of the buffer. + * @param[in,out] size Remaining size. Initialize to the size of the buffer. + * @param[in] align Note alignment. Usually `p_align == 8 ? 8 : 4` if the + * program header is available, otherwise 4. + * @param[in] bswap Whether the note header needs to be byte-swapped. + * @param[out] name_ret Returned note name. + * @param[out] desc_ret Returned note descriptor. + * @return @c true if a note was parsed, @c false if there are no more notes. + */ +bool next_elf_note(const void **p, size_t *size, unsigned int align, bool bswap, + GElf_Nhdr *nhdr_ret, const char **name_ret, + const void **desc_ret); + + +/** + * Parse a GNU build ID from a buffer containing note data. + * + * @param[in] buf Buffer containing note data. + * @param[in] size Size of @p buf in bytes. + * @param[in] align Note alignment. See @ref next_elf_note(). + * @param[in] bswap Whether the note header needs to be byte-swapped. + * @param[out] ret Returned build ID, or @c NULL if not found. + * @return Size of returned build ID in bytes, or @c NULL if not found. + */ +size_t parse_gnu_build_id_from_notes(const void *buf, size_t size, + unsigned int align, bool bswap, + const void **ret); + +#if _ELFUTILS_PREREQ(0, 175) +/** + * Find an ELF file's GNU build ID, working around `vmlinux` files with broken + * note alignment when possible. + * + * @param[out] ret Returned build ID, or @c NULL if not found. + * @return Size of returned build ID in bytes, or @c NULL if not found. + */ +ssize_t drgn_elf_gnu_build_id(Elf *elf, const void **ret); +#else +#include + +static inline ssize_t drgn_elf_gnu_build_id(Elf *elf, const void **ret) +{ + return dwelf_elf_gnu_build_id(elf, ret); +} +#endif + +static inline Elf_Type note_header_type(uint64_t p_align) +{ +#if _ELFUTILS_PREREQ(0, 175) + if (p_align == 8) + return ELF_T_NHDR8; +#endif + return ELF_T_NHDR; +} + +#endif /* DRGN_ELF_NOTES_H */ diff --git a/libdrgn/linux_kernel.c b/libdrgn/linux_kernel.c index 1960af701..94bc2dc70 100644 --- a/libdrgn/linux_kernel.c +++ b/libdrgn/linux_kernel.c @@ -21,6 +21,7 @@ #include "debug_info.h" #include "drgn_internal.h" #include "elf_file.h" +#include "elf_notes.h" #include "error.h" #include "hash_table.h" #include "helpers.h" @@ -603,25 +604,6 @@ kernel_module_iterator_next(struct kernel_module_iterator *it) return NULL; } -static size_t parse_gnu_build_id_from_note(const void *note, size_t note_size, - bool bswap, const void **ret) -{ - Elf32_Nhdr nhdr; - const char *name; - const void *desc; - while (next_elf_note(¬e, ¬e_size, 4, bswap, &nhdr, &name, &desc)) { - if (nhdr.n_namesz == sizeof("GNU") && - memcmp(name, "GNU", sizeof("GNU")) == 0 && - nhdr.n_type == NT_GNU_BUILD_ID && - nhdr.n_descsz > 0) { - *ret = desc; - return nhdr.n_descsz; - } - } - *ret = NULL; - return 0; -} - static struct drgn_error * kernel_module_iterator_gnu_build_id_live(struct kernel_module_iterator *it, const void **build_id_ret, @@ -676,8 +658,8 @@ kernel_module_iterator_gnu_build_id_live(struct kernel_module_iterator *it, close(fd); *build_id_len_ret = - parse_gnu_build_id_from_note(it->build_id_buf, r, false, - build_id_ret); + parse_gnu_build_id_from_notes(it->build_id_buf, r, 4, + false, build_id_ret); if (*build_id_len_ret) { err = NULL; goto out; @@ -769,8 +751,8 @@ kernel_module_iterator_gnu_build_id(struct kernel_module_iterator *it, return err; *build_id_len_ret = - parse_gnu_build_id_from_note(it->build_id_buf, size, - bswap, build_id_ret); + parse_gnu_build_id_from_notes(it->build_id_buf, size, 4, + bswap, build_id_ret); if (*build_id_len_ret) return NULL; } @@ -1578,7 +1560,7 @@ report_kernel_modules(struct drgn_debug_info_load_state *load, struct kernel_module_file *kmod = &kmods[i]; ssize_t build_id_len = - dwelf_elf_gnu_build_id(kmod->elf, &kmod->gnu_build_id); + drgn_elf_gnu_build_id(kmod->elf, &kmod->gnu_build_id); if (build_id_len < 0) { err = drgn_debug_info_report_error(load, kmod->path, NULL, diff --git a/libdrgn/program.c b/libdrgn/program.c index 7d2f08efb..ab7783196 100644 --- a/libdrgn/program.c +++ b/libdrgn/program.c @@ -21,6 +21,7 @@ #include "cleanup.h" #include "debug_info.h" +#include "elf_notes.h" #include "error.h" #include "helpers.h" #include "io.h" diff --git a/libdrgn/stack_trace.c b/libdrgn/stack_trace.c index 3650d8d71..c1fe3c595 100644 --- a/libdrgn/stack_trace.c +++ b/libdrgn/stack_trace.c @@ -14,6 +14,7 @@ #include "dwarf_constants.h" #include "dwarf_info.h" #include "elf_file.h" +#include "elf_notes.h" #include "error.h" #include "helpers.h" #include "minmax.h" @@ -650,7 +651,7 @@ drgn_get_initial_registers_from_kernel_core_dump(struct drgn_program *prog, const void *p = drgn_object_buffer(&tmp); size_t size = drgn_object_size(&tmp); bool bswap = drgn_platform_bswap(&prog->platform); - Elf32_Nhdr nhdr; + GElf_Nhdr nhdr; const char *name; const void *desc; while (next_elf_note(&p, &size, 4, bswap, &nhdr, &name, &desc) From 120428f81500c582416fe28cd846327c7a0b418b Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 16 Dec 2024 11:38:51 -0800 Subject: [PATCH 2/4] drgn.helpers.linux.slab: harden against cycles in freelist Breno and Leandro debugged an issue where drgn was stuck in identify_address() in our production crash monitoring system. They found that _slub_get_freelist() was in an infinite loop because the freelist contained a cycle. This indicates some sort of corruption or race. We generally assume that data structures are valid when parsing them, but this case is particularly risky because we use identify_address() liberally. Luckily, we're already building a set of freelist pointers, so we can check for a cycle relatively cheaply. Breno and Leandro suggested breaking from the loop when a cycle is detected, but that can cause silently undiagnosed corruptions and bogus results from slab_cache_for_each_allocated_object() and slab_object_info(). Instead, let's raise an exception. slab_cache_for_each_allocated_object() has to fail hard, but slab_object_info() can catch the exception and indicate the corruption in its return value. Closes #437. Reported-by: Breno Leitao Reported-by: Leandro Silva Signed-off-by: Omar Sandoval --- drgn/helpers/common/memory.py | 7 +++- drgn/helpers/linux/slab.py | 71 ++++++++++++++++++++++++++++------- 2 files changed, 64 insertions(+), 14 deletions(-) diff --git a/drgn/helpers/common/memory.py b/drgn/helpers/common/memory.py index 95b9019ff..d73d5aa26 100644 --- a/drgn/helpers/common/memory.py +++ b/drgn/helpers/common/memory.py @@ -113,7 +113,12 @@ def _identify_kernel_address( cache_name = escape_ascii_string( slab_info.slab_cache.name.string_(), escape_backslash=True ) - maybe_free = "" if slab_info.allocated else "free " + if slab_info.allocated: + maybe_free = "" + elif slab_info.allocated is None: + maybe_free = "corrupted " + else: + maybe_free = "free " return f"{maybe_free}slab object: {cache_name}+{hex(addr - slab_info.address)}" else: return _identify_kernel_vmap(prog, addr, cache) diff --git a/drgn/helpers/linux/slab.py b/drgn/helpers/linux/slab.py index f1b56dc9f..1f51030a2 100644 --- a/drgn/helpers/linux/slab.py +++ b/drgn/helpers/linux/slab.py @@ -30,6 +30,7 @@ container_of, sizeof, ) +from drgn.helpers import ValidationError from drgn.helpers.common.format import escape_ascii_string from drgn.helpers.common.prog import takes_program_or_default from drgn.helpers.linux.cpumask import for_each_online_cpu @@ -205,6 +206,19 @@ def print_slab_caches(prog: Program) -> None: print(f"{name} ({s.type_.type_name()})0x{s.value_():x}") +class SlabCorruptionError(ValidationError): + """ + Error raised when a corruption is encountered in a slab allocator data + structure. + """ + + +class SlabFreelistCycleError(SlabCorruptionError): + """ + Error raised when a cycle is encountered in a slab allocator freelist. + """ + + # Between SLUB, SLAB, their respective configuration options, and the # differences between kernel versions, there is a lot of state that we need to # keep track of to inspect the slab allocator. It isn't pretty, but this class @@ -214,6 +228,7 @@ class _SlabCacheHelper: def __init__(self, slab_cache: Object) -> None: self._prog = slab_cache.prog_ self._slab_cache = slab_cache.read_() + self._freelist_error: Optional[Exception] = None def _page_objects( self, page: Object, slab: Object, pointer_type: Type @@ -221,6 +236,9 @@ def _page_objects( raise NotImplementedError() def for_each_allocated_object(self, type: Union[str, Type]) -> Iterator[Object]: + if self._freelist_error: + raise self._freelist_error + pointer_type = self._prog.pointer_type(self._prog.type(type)) slab_type = _get_slab_type(self._prog) # Get the underlying implementation directly to avoid overhead on each @@ -307,9 +325,17 @@ def _try_hardened_freelist_dereference(ptr_addr: int) -> int: self._freelist_dereference = _try_hardened_freelist_dereference - def _slub_get_freelist(freelist: Object, freelist_set: Set[int]) -> None: + def _slub_get_freelist( + freelist_name: Callable[[], str], freelist: Object, freelist_set: Set[int] + ) -> None: ptr = freelist.value_() while ptr: + if ptr in freelist_set: + raise SlabFreelistCycleError( + f"{fsdecode(slab_cache.name.string_())} {freelist_name()} " + "freelist contains cycle; " + "may be corrupted or in the middle of update" + ) freelist_set.add(ptr) ptr = self._freelist_dereference(ptr + freelist_offset) @@ -325,11 +351,16 @@ def _slub_get_freelist(freelist: Object, freelist_set: Set[int]) -> None: # slab for a CPU is `struct slab *slab`. Before that, it is `struct # page *page`. cpu_slab_attr = "slab" if hasattr(cpu_slab, "slab") else "page" - for cpu in for_each_online_cpu(self._prog): - this_cpu_slab = per_cpu_ptr(cpu_slab, cpu) - slab = getattr(this_cpu_slab, cpu_slab_attr).read_() - if slab and slab.slab_cache == slab_cache: - _slub_get_freelist(this_cpu_slab.freelist, cpu_freelists) + try: + for cpu in for_each_online_cpu(self._prog): + this_cpu_slab = per_cpu_ptr(cpu_slab, cpu) + slab = getattr(this_cpu_slab, cpu_slab_attr).read_() + if slab and slab.slab_cache == slab_cache: + _slub_get_freelist( + lambda: f"cpu {cpu}", this_cpu_slab.freelist, cpu_freelists + ) + except (SlabCorruptionError, FaultError) as e: + self._freelist_error = e self._slub_get_freelist = _slub_get_freelist self._cpu_freelists = cpu_freelists @@ -338,7 +369,7 @@ def _page_objects( self, page: Object, slab: Object, pointer_type: Type ) -> Iterator[Object]: freelist: Set[int] = set() - self._slub_get_freelist(slab.freelist, freelist) + self._slub_get_freelist(lambda: f"slab {hex(slab)}", slab.freelist, freelist) addr = page_to_virt(page).value_() + self._red_left_pad end = addr + self._slab_cache_size * slab.objects while addr < end: @@ -353,11 +384,22 @@ def object_info(self, page: Object, slab: Object, addr: int) -> "SlabObjectInfo" + (addr - first_addr) // self._slab_cache_size * self._slab_cache_size ) if address in self._cpu_freelists: - allocated = False + allocated: Optional[bool] = False else: freelist: Set[int] = set() - self._slub_get_freelist(slab.freelist, freelist) - allocated = address not in freelist + try: + self._slub_get_freelist( + lambda: f"slab {hex(slab)}", slab.freelist, freelist + ) + except (SlabCorruptionError, FaultError): + allocated = False if address in freelist else None + else: + if address in freelist: + allocated = False + elif self._freelist_error: + allocated = None + else: + allocated = True return SlabObjectInfo(self._slab_cache, slab, address, allocated) @@ -536,11 +578,14 @@ class SlabObjectInfo: address: int """Address of the slab object.""" - allocated: bool - """``True`` if the object is allocated, ``False`` if it is free.""" + allocated: Optional[bool] + """ + ``True`` if the object is allocated, ``False`` if it is free, or ``None`` + if not known because the slab cache is corrupted. + """ def __init__( - self, slab_cache: Object, slab: Object, address: int, allocated: bool + self, slab_cache: Object, slab: Object, address: int, allocated: Optional[bool] ) -> None: self.slab_cache = slab_cache self.slab = slab From 40065816a0f4c4b95eb57908f8cebbdc225d724d Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 16 Dec 2024 13:48:45 -0800 Subject: [PATCH 3/4] drgn.internal.repl: also catch AttributeError in enhanced REPL attempt I didn't see this in practice, it's mostly out of paranoia. In particular, if _pyrepl.readline._setup() goes away, this will catch it. Signed-off-by: Omar Sandoval --- drgn/internal/repl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drgn/internal/repl.py b/drgn/internal/repl.py index df050e0df..45dabab4a 100644 --- a/drgn/internal/repl.py +++ b/drgn/internal/repl.py @@ -39,7 +39,7 @@ def interact(local: Dict[str, Any], banner: str) -> None: print(banner, file=sys.stderr) run_multiline_interactive_console(console) -except (ModuleNotFoundError, ImportError): +except (ModuleNotFoundError, ImportError, AttributeError): import code import readline From ecebeca4f94a0dd081e6b3efb21a614be710cafc Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 16 Dec 2024 14:27:54 -0800 Subject: [PATCH 4/4] drgn.internal.repl: honor PYTHON_BASIC_REPL instead of our own environment variable Apparently the official Python interpreter has its own environment variable for disabling the new REPL, so let's just reuse that instead of defining our own. Signed-off-by: Omar Sandoval --- docs/advanced_usage.rst | 15 ++++++++------- drgn/internal/repl.py | 7 ++++--- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/docs/advanced_usage.rst b/docs/advanced_usage.rst index f51f28dc9..84c44eb84 100644 --- a/docs/advanced_usage.rst +++ b/docs/advanced_usage.rst @@ -114,19 +114,20 @@ Some of drgn's behavior can be modified through environment variables: Whether drgn should use libkdumpfile for ELF vmcores (0 or 1). The default is 0. This functionality will be removed in the future. -``DRGN_USE_PYREPL`` - Whether drgn should attempt to use the improved REPL (pyrepl) from Python - 3.13. This provides colored output and multiline editing, among other - features. The default is 1. Unfortunately, Python has no public API to use - these features, so drgn must rely on internal implementation details. Set - this to 0 to disable this feature. - ``DRGN_USE_SYS_MODULE`` Whether drgn should use ``/sys/module`` to find information about loaded kernel modules for the running kernel instead of getting them from the core dump (0 or 1). The default is 1. This environment variable is mainly intended for testing and may be ignored in the future. +``PYTHON_BASIC_REPL`` + If non-empty, don't try to use the `new interactive REPL + `_ + added in Python 3.13. drgn makes use of the new REPL through internal + implementation details since there is `not yet + `_ a public API for it. If + it breaks, this may be used as an escape hatch. + .. _kernel-special-objects: Linux Kernel Special Objects diff --git a/drgn/internal/repl.py b/drgn/internal/repl.py index 45dabab4a..4fb69964a 100644 --- a/drgn/internal/repl.py +++ b/drgn/internal/repl.py @@ -15,9 +15,10 @@ # in the "code" module. We'd like to give the best experience possible, so we'll # detect _pyrepl and try to use it where possible. try: - # Since this mucks with internals, add a knob that can be used to disable it - # and use the traditional REPL. - if os.environ.get("DRGN_USE_PYREPL") in ("0", "n", "N", "false", "False"): + # The official Python interpreter honors this environment variable to + # disable the new REPL. We do the same, which also gives users an escape + # hatch if any of the internals we're messing with change. + if os.environ.get("PYTHON_BASIC_REPL"): raise ModuleNotFoundError() # Unfortunately, the typeshed library behind mypy explicitly removed type