From 0832aecfc109d880fbd6fe6069098d49779db8b8 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Tue, 18 Feb 2025 14:50:44 +0100 Subject: [PATCH 1/8] lxd/main_checkfeature: add bpf_token kernel feature detection logic Signed-off-by: Alexander Mikhalitsyn --- lxd/api_1.0.go | 1 + lxd/daemon.go | 7 +++++++ lxd/main_checkfeature.go | 35 +++++++++++++++++++++++++++++++++++ lxd/sys/os.go | 1 + 4 files changed, 44 insertions(+) diff --git a/lxd/api_1.0.go b/lxd/api_1.0.go index 3b2088609daa..afab98620dd9 100644 --- a/lxd/api_1.0.go +++ b/lxd/api_1.0.go @@ -329,6 +329,7 @@ func api10Get(d *Daemon, r *http.Request) response.Response { } env.KernelFeatures = map[string]string{ + "bpf_token": fmt.Sprint(s.OS.BPFToken), "netnsid_getifaddrs": fmt.Sprint(s.OS.NetnsGetifaddrs), "uevent_injection": fmt.Sprint(s.OS.UeventInjection), "unpriv_binfmt": fmt.Sprint(s.OS.UnprivBinfmt), diff --git a/lxd/daemon.go b/lxd/daemon.go index a508bf3d8186..dd3f6c843164 100644 --- a/lxd/daemon.go +++ b/lxd/daemon.go @@ -1235,6 +1235,13 @@ func (d *Daemon) init() error { logger.Info(" - unprivileged binfmt_misc: no") } + d.os.BPFToken = canUseBPFToken() + if d.os.BPFToken { + logger.Info(" - BPF Token: yes") + } else { + logger.Info(" - BPF Token: no") + } + /* * During daemon startup we're the only thread that touches VFS3Fscaps * so we don't need to bother with atomic.StoreInt32() when touching diff --git a/lxd/main_checkfeature.go b/lxd/main_checkfeature.go index 70b20f49942f..e97cc76d8095 100644 --- a/lxd/main_checkfeature.go +++ b/lxd/main_checkfeature.go @@ -42,6 +42,7 @@ __ro_after_init bool pidfd_aware = false; __ro_after_init bool pidfd_setns_aware = false; __ro_after_init bool uevent_aware = false; __ro_after_init bool binfmt_aware = false; +__ro_after_init bool bpftoken_aware = false; __ro_after_init int seccomp_notify_aware = 0; __ro_after_init char errbuf[4096]; @@ -619,6 +620,35 @@ static void is_binfmt_aware(void) binfmt_aware = true; } +static void is_bpftoken_aware(void) +{ + __do_close int fs_fd = -EBADF; + int ret; + + fs_fd = lxd_fsopen("bpf", FSOPEN_CLOEXEC); + if (fs_fd < 0) { + (void)sprintf(errbuf, "%s", "fsopen() failed on bpffs"); + return; + } + + // Try to set an invalid "delegate_cmds" option value and ensure that it fails. + // This is important to check, because bpffs ignores unknown options on the kernel side. + ret = lxd_fsconfig(fs_fd, FSCONFIG_SET_STRING, "delegate_cmds", "MUSTFAIL", 0); + if (ret == 0) { + (void)sprintf(errbuf, "%s", "fsconfig succeed to set delegate_cmds, but must fail"); + return; + } + + // Now let's check that a valid value works too. Just in case. + ret = lxd_fsconfig(fs_fd, FSCONFIG_SET_STRING, "delegate_cmds", "any", 0); + if (ret < 0) { + (void)sprintf(errbuf, "%s - fsconfig failed to set delegate_cmds", strerror(errno)); + return; + } + + bpftoken_aware = true; +} + void checkfeature(void) { __do_close int hostnetns_fd = -EBADF, newnetns_fd = -EBADF, pidfd = -EBADF; @@ -638,6 +668,7 @@ void checkfeature(void) (void)sprintf(errbuf, "%s", "Failed to attach to host network namespace"); is_binfmt_aware(); + is_bpftoken_aware(); } static bool is_empty_string(char *s) @@ -734,3 +765,7 @@ func canUseCoreScheduling() bool { func canUseBinfmt() bool { return bool(C.binfmt_aware) } + +func canUseBPFToken() bool { + return bool(C.bpftoken_aware) +} diff --git a/lxd/sys/os.go b/lxd/sys/os.go index 9a6299d9fbc4..1e46f7bd2cfc 100644 --- a/lxd/sys/os.go +++ b/lxd/sys/os.go @@ -81,6 +81,7 @@ type OS struct { CGInfo cgroup.Info // Kernel features + BPFToken bool // BPFToken indicates support for BPF token delegation mechanism. CloseRange bool // CloseRange indicates support for the close_range syscall. ContainerCoreScheduling bool // ContainerCoreScheduling indicates LXC and kernel support for core scheduling. CoreScheduling bool // CoreScheduling indicates support for core scheduling syscalls. From 188276ba01ef92fa955631fa2c2f78b0031c9af5 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Mon, 17 Feb 2025 16:38:28 +0100 Subject: [PATCH 2/8] lxd/main_forkmount: bpffs mount with delegation support Signed-off-by: Alexander Mikhalitsyn --- lxd/main_forkmount.go | 203 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 203 insertions(+) diff --git a/lxd/main_forkmount.go b/lxd/main_forkmount.go index 1c30c9f9654a..3c8f8a75c2de 100644 --- a/lxd/main_forkmount.go +++ b/lxd/main_forkmount.go @@ -23,8 +23,10 @@ package main #include "lxd.h" #include "memory_utils.h" #include "mount_utils.h" +#include "process_utils.h" #include "syscall_numbers.h" #include "syscall_wrappers.h" +#include "../shared/netutils/unixfd.h" #define VERSION_AT_LEAST(major, minor, micro) \ ((LXC_DEVEL == 1) || (!(major > LXC_VERSION_MAJOR || \ @@ -550,6 +552,191 @@ static void do_lxc_forkumount(void) #endif } +static void do_mount_bpffs(int pidfd, int ns_fd) +{ + __do_close int fs_fd = -EBADF, mnt_fd = -EBADF; + int ret; + char *mountpoint; + char *delegate_cmds, *delegate_maps, *delegate_progs, *delegate_attachs; + int sk_fds[2] = {-EBADF, -EBADF}; + pid_t child_pid = -1; + struct unix_fds fds = {}; + char buf[50]; + char *bpffs_fd_ready = "BPFFSFDREADY"; + char *bpffs_mnt_ready = "BPFFSMNTREADY"; + + mountpoint = advance_arg(true); + delegate_cmds = advance_arg(true); + delegate_maps = advance_arg(true); + delegate_progs = advance_arg(true); + delegate_attachs = advance_arg(true); + + ret = socketpair(AF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC, 0, sk_fds); + if (ret < 0) { + fprintf(stderr, + "%s - Failed to create anonymous unix socket pair\n", + strerror(errno)); + exit(EXIT_FAILURE); + } + + child_pid = fork(); + if (child_pid < 0) { + fprintf(stderr, + "%s - Failed to fork()\n", + strerror(errno)); + exit(EXIT_FAILURE); + } + + if (child_pid == 0) { + __do_close int mountpoint_fd = -EBADF; + + // close parent's socket + close(sk_fds[0]); + + // 1. Go into container's user & mount namespace + + attach_userns_fd(ns_fd); + + if (!change_namespaces(pidfd, ns_fd, CLONE_NEWNS)) + die("Failed setns to container mount namespace"); + + // 2. Create bpf filesystem file_context fd (tied to users) + + fs_fd = lxd_fsopen("bpf", FSOPEN_CLOEXEC); + if (fs_fd < 0) + die("fsopen: bpf"); + + // 3. Send bpf filesystem file_context fd to a parent process + + ret = lxc_abstract_unix_send_fds(sk_fds[1], &fs_fd, 1, bpffs_fd_ready, sizeof(bpffs_fd_ready)); + if (ret < 0) + die("lxc_abstract_unix_send_fds(bpffs_fd_ready) failed"); + + close_prot_errno_disarm(fs_fd); + + // 8. Get the detached mount of bpf filesystem from the child process + + fds.fd_count_max = 1; + fds.flags = UNIX_FDS_ACCEPT_EXACT; + ret = lxc_abstract_unix_recv_fds(sk_fds[1], &fds, buf, sizeof(buf)); + if (ret < 0 || fds.fd_count_ret != 1 || strncmp(buf, bpffs_mnt_ready, sizeof(bpffs_mnt_ready))) + die("lxc_abstract_unix_recv_fds(bpffs_mnt_ready) failed"); + mnt_fd = fds.fd[0]; + + mountpoint_fd = make_dest_open(mnt_fd, mountpoint); + if (mountpoint_fd < 0) + die("Failed to create destination mount point"); + + // 9. Move the detached mount of bpf filesystem to a right place in the container's mount namespace + + ret = lxd_move_mount(mnt_fd, "", mountpoint_fd, "", + MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH); + if (ret) + die("Failed to move detached mount to target from %d to %s", mnt_fd, mountpoint); + + exit(EXIT_SUCCESS); + } + + // close child's socket + close(sk_fds[1]); + + // 4. Get bpf filesystem file_context fd from a child process + + fds.fd_count_max = 1; + fds.flags = UNIX_FDS_ACCEPT_EXACT; + ret = lxc_abstract_unix_recv_fds(sk_fds[0], &fds, buf, sizeof(buf)); + if (ret < 0 || fds.fd_count_ret != 1 || strncmp(buf, bpffs_fd_ready, sizeof(bpffs_fd_ready))) { + fprintf(stderr, + "%s - lxc_abstract_unix_recv_fds(bpffs_fd_ready) failed\n", + strerror(errno)); + goto err_process; + } + + fs_fd = fds.fd[0]; + + // 5. Configure bpf filesystem file_context fd to set BPF token delegation properties + + ret = lxd_fsconfig(fs_fd, FSCONFIG_SET_STRING, "delegate_cmds", delegate_cmds, 0); + if (ret < 0) { + fprintf(stderr, + "%s - fsconfig failed to set delegate_cmds=%s\n", + strerror(errno), + delegate_cmds); + goto err_process; + } + + ret = lxd_fsconfig(fs_fd, FSCONFIG_SET_STRING, "delegate_maps", delegate_maps, 0); + if (ret < 0) { + fprintf(stderr, + "%s - fsconfig failed to set delegate_maps=%s\n", + strerror(errno), + delegate_maps); + goto err_process; + } + + ret = lxd_fsconfig(fs_fd, FSCONFIG_SET_STRING, "delegate_progs", delegate_progs, 0); + if (ret < 0) { + fprintf(stderr, + "%s - fsconfig failed to set delegate_progs=%s\n", + strerror(errno), + delegate_progs); + goto err_process; + } + + ret = lxd_fsconfig(fs_fd, FSCONFIG_SET_STRING, "delegate_attachs", delegate_attachs, 0); + if (ret < 0) { + fprintf(stderr, + "%s - fsconfig failed to set delegate_attachs=%s\n", + strerror(errno), + delegate_attachs); + goto err_process; + } + + ret = lxd_fsconfig(fs_fd, FSCONFIG_CMD_CREATE, NULL, NULL, 0); + if (ret < 0) { + fprintf(stderr, + "%s - fsconfig(FSCONFIG_CMD_CREATE) failed\n", + strerror(errno)); + goto err_process; + } + + // 6. Make a detached mount of bpf filesystem from a file_context fd + + mnt_fd = lxd_fsmount(fs_fd, FSMOUNT_CLOEXEC, 0); + if (mnt_fd < 0) { + fprintf(stderr, + "%s - fsmount failed\n", + strerror(errno)); + goto err_process; + } + + // 7. Send the detached mount of bpf filesystem to the parent process + + ret = lxc_abstract_unix_send_fds(sk_fds[0], &mnt_fd, 1, bpffs_mnt_ready, sizeof(bpffs_mnt_ready)); + if (ret < 0) { + fprintf(stderr, + "%s - lxc_abstract_unix_send_fds(bpffs_mnt_ready) failed\n", + strerror(errno)); + goto err_process; + } + + // 10. End + + ret = wait_for_pid(child_pid); + if (ret) + die("wait_for_pid"); + + exit(EXIT_SUCCESS); + +err_process: + if (child_pid > 0) { + kill(child_pid, SIGKILL); + wait_for_pid(child_pid); + } + + exit(EXIT_FAILURE); +} + void forkmount(void) { char *command = NULL, *cur = NULL; @@ -623,6 +810,22 @@ void forkmount(void) do_lxd_forkumount(pidfd, ns_fd); } else if (strcmp(command, "lxc-umount") == 0) { do_lxc_forkumount(); + } else if (strcmp(command, "bpffs") == 0) { + // Get the pid + cur = advance_arg(false); + if (cur == NULL || (strcmp(cur, "--help") == 0 || strcmp(cur, "--version") == 0 || strcmp(cur, "-h") == 0)) + return; + + pid = atoi(cur); + if (pid <= 0) + _exit(EXIT_FAILURE); + + pidfd = atoi(advance_arg(true)); + ns_fd = pidfd_nsfd(pidfd, pid); + if (ns_fd < 0) + _exit(EXIT_FAILURE); + + do_mount_bpffs(pidfd, ns_fd); } } */ From 7bbd0f5127e358a5072dd984a1ff1312e3b9283f Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Mon, 17 Feb 2025 12:17:06 +0100 Subject: [PATCH 3/8] lxd/api_internal: add start-host instance hook Signed-off-by: Alexander Mikhalitsyn --- lxd-user/callhook/callhook.go | 9 ++++++++ lxd/api_internal.go | 36 ++++++++++++++++++++++++++++++ lxd/instance/instance_interface.go | 3 +++ 3 files changed, 48 insertions(+) diff --git a/lxd-user/callhook/callhook.go b/lxd-user/callhook/callhook.go index 005b7d3aabf6..7fc25420e721 100644 --- a/lxd-user/callhook/callhook.go +++ b/lxd-user/callhook/callhook.go @@ -73,6 +73,15 @@ func HandleContainerHook(lxdPath string, projectName string, instanceRef string, u := api.NewURL().Path("internal", "containers", instanceRef, "on"+hook) u.WithQuery("target", target) + if hook == "starthost" { + lxcPID := os.Getenv("LXC_PID") + if lxcPID == "" { + return errors.New("starthost hook requires LXC_PID env variable set") + } + + u.WithQuery("lxc_pid", lxcPID) + } + if projectName != "" { u.WithQuery("project", projectName) } diff --git a/lxd/api_internal.go b/lxd/api_internal.go index 4d4c6b2fa9c9..acfcab2a556d 100644 --- a/lxd/api_internal.go +++ b/lxd/api_internal.go @@ -50,6 +50,7 @@ var apiInternal = []APIEndpoint{ internalClusterRebalanceCmd, internalClusterHealCmd, internalContainerOnStartCmd, + internalContainerOnStartHostCmd, internalContainerOnStopCmd, internalContainerOnStopNSCmd, internalGarbageCollectorCmd, @@ -82,6 +83,12 @@ var internalContainerOnStartCmd = APIEndpoint{ Get: APIEndpointAction{Handler: internalContainerOnStart, AccessHandler: allowPermission(entity.TypeServer, auth.EntitlementCanEdit)}, } +var internalContainerOnStartHostCmd = APIEndpoint{ + Path: "containers/{instanceRef}/onstarthost", + + Get: APIEndpointAction{Handler: internalContainerOnStartHost, AccessHandler: allowPermission(entity.TypeServer, auth.EntitlementCanEdit)}, +} + var internalContainerOnStopNSCmd = APIEndpoint{ Path: "containers/{instanceRef}/onstopns", @@ -341,6 +348,35 @@ func internalContainerOnStart(d *Daemon, r *http.Request) response.Response { return response.EmptySyncResponse } +func internalContainerOnStartHost(d *Daemon, r *http.Request) response.Response { + s := d.State() + + inst, err := internalContainerHookLoadFromReference(s, r) + if err != nil { + logger.Error("The start-host hook failed to load", logger.Ctx{"err": err}) + return response.SmartError(err) + } + + lxcPID := request.QueryParam(r, "lxc_pid") + if lxcPID == "" { + err := fmt.Errorf("No lxc_pid GET parameter was provided") + logger.Error("The start-host hook failed", logger.Ctx{"instance": inst.Name(), "err": err}) + return response.BadRequest(err) + } + + args := map[string]string{ + "LXC_PID": lxcPID, + } + + err = inst.OnHook(instance.HookStartHost, args) + if err != nil { + logger.Error("The start-host hook failed", logger.Ctx{"instance": inst.Name(), "err": err}) + return response.SmartError(err) + } + + return response.EmptySyncResponse +} + func internalContainerOnStopNS(d *Daemon, r *http.Request) response.Response { s := d.State() diff --git a/lxd/instance/instance_interface.go b/lxd/instance/instance_interface.go index 63658fe4f15d..c0290520d4a4 100644 --- a/lxd/instance/instance_interface.go +++ b/lxd/instance/instance_interface.go @@ -28,6 +28,9 @@ import ( // HookStart hook used when instance has started. const HookStart = "onstart" +// HookStartHost hook used when instance is fully ready to be started. +const HookStartHost = "onstarthost" + // HookStopNS hook used when instance has stopped but before namespaces have been destroyed. const HookStopNS = "onstopns" From 253082b24ce27460af9d1c2f4b4fd1b1fd236f79 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Mon, 17 Feb 2025 13:51:11 +0100 Subject: [PATCH 4/8] lxd/instance/lxc: add security.delegate_bpf support Signed-off-by: Alexander Mikhalitsyn --- lxd/instance/drivers/driver_lxc.go | 101 +++++++++++++++++++++++++++++ lxd/project/limits/permissions.go | 4 ++ 2 files changed, 105 insertions(+) diff --git a/lxd/instance/drivers/driver_lxc.go b/lxd/instance/drivers/driver_lxc.go index 93a85624e8d6..368889786105 100644 --- a/lxd/instance/drivers/driver_lxc.go +++ b/lxd/instance/drivers/driver_lxc.go @@ -1071,6 +1071,13 @@ func (d *lxc) initLXC(config bool) (*liblxc.Container, error) { } } + if shared.IsTrue(d.expandedConfig["security.delegate_bpf"]) { + err = lxcSetConfigItem(cc, "lxc.hook.start-host", fmt.Sprintf("%s callhook %s %s %s starthost", d.state.OS.ExecPath, shared.VarPath(""), strconv.Quote(d.Project().Name), strconv.Quote(d.Name()))) + if err != nil { + return nil, err + } + } + // Memory limits if d.state.OS.CGInfo.Supports(cgroup.Memory, cg) { memory := d.expandedConfig["limits.memory"] @@ -2464,6 +2471,8 @@ func (d *lxc) OnHook(hookName string, args map[string]string) error { switch hookName { case instance.HookStart: return d.onStart(args) + case instance.HookStartHost: + return d.onStartHost(args) case instance.HookStopNS: return d.onStopNS(args) case instance.HookStop: @@ -2522,6 +2531,94 @@ func (d *lxc) onStart(_ map[string]string) error { return nil } +// mountBpfFs mounts bpffs inside the container. +func (d *lxc) mountBpfFs(pid int, bpffsParams map[string]string) error { + if !d.state.OS.BPFToken { + return fmt.Errorf("BPF Token mechanism is not supported by kernel running.") + } + + pidFdNr, pidFd := seccomp.MakePidFd(pid, d.state) + if pidFdNr >= 0 { + defer func() { _ = pidFd.Close() }() + } + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + d.logger.Debug("bpffs mount helper is being called", logger.Ctx{"pid": pid, "bpffsParams": bpffsParams}) + stdout, err := shared.RunCommandInheritFds( + ctx, + []*os.File{pidFd}, + d.state.OS.ExecPath, + "forkmount", + "bpffs", + "--", + fmt.Sprint(pid), + fmt.Sprint(pidFdNr), + bpffsParams["mountpoint"], + bpffsParams["delegate_cmds"], + bpffsParams["delegate_maps"], + bpffsParams["delegate_progs"], + bpffsParams["delegate_attachs"]) + if err != nil { + d.logger.Error("bpffs mount helper has failed", logger.Ctx{"err": err, "stdout": stdout}) + return err + } + + d.logger.Debug("bpffs mount helper has finished without error", logger.Ctx{"stdout": stdout}) + + return nil +} + +// onStartHost implements the LXC start-host hook. +func (d *lxc) onStartHost(args map[string]string) error { + if shared.IsFalseOrEmpty(d.expandedConfig["security.delegate_bpf"]) { + return nil + } + + // Get the init PID + pidStr, ok := args["LXC_PID"] + if !ok { + return fmt.Errorf("No LXC_PID parameter was provided to start-host hook") + } + + pid, err := strconv.Atoi(pidStr) + if err != nil { + return fmt.Errorf("Invalid LXC_PID parameter was provided to start-host hook %q: %w", pidStr, err) + } + + bpffsParams := map[string]string{ + "delegate_cmds": "any", + "delegate_maps": "any", + "delegate_progs": "any", + "delegate_attachs": "any", + "mountpoint": "/sys/fs/bpf", + } + + if d.expandedConfig["security.delegate_bpf.cmds"] != "" { + bpffsParams["delegate_cmds"] = d.expandedConfig["security.delegate_bpf.cmds"] + } + + if d.expandedConfig["security.delegate_bpf.maps"] != "" { + bpffsParams["delegate_maps"] = d.expandedConfig["security.delegate_bpf.maps"] + } + + if d.expandedConfig["security.delegate_bpf.progs"] != "" { + bpffsParams["delegate_progs"] = d.expandedConfig["security.delegate_bpf.progs"] + } + + if d.expandedConfig["security.delegate_bpf.attachs"] != "" { + bpffsParams["delegate_attachs"] = d.expandedConfig["security.delegate_bpf.attachs"] + } + + err = d.mountBpfFs(pid, bpffsParams) + if err != nil { + return err + } + + return nil +} + // validateStartup checks any constraints that would prevent start up from succeeding under normal circumstances. func (d *lxc) validateStartup(statusCode api.StatusCode) error { err := d.common.validateStartup(statusCode) @@ -2539,6 +2636,10 @@ func (d *lxc) validateStartup(statusCode api.StatusCode) error { return fmt.Errorf("Instance is protected from being started") } + if shared.IsTrue(d.expandedConfig["security.delegate_bpf"]) && !d.state.OS.BPFToken { + return fmt.Errorf("BPF Token mechanism is not supported by your kernel. Linux kernel 6.9+ is required to start this instance, or security.delegate_bpf option must be disabled") + } + return nil } diff --git a/lxd/project/limits/permissions.go b/lxd/project/limits/permissions.go index 675c59b44ab9..01d0445ce66f 100644 --- a/lxd/project/limits/permissions.go +++ b/lxd/project/limits/permissions.go @@ -866,6 +866,10 @@ func isContainerLowLevelOptionForbidden(key string) bool { return true } + if strings.HasPrefix(key, "security.delegate_bpf") { + return true + } + if shared.ValueInSlice(key, []string{ "boot.host_shutdown_timeout", "linux.kernel_modules", From db9f6da21e80126110f875f40076c3534a0df003 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Mon, 17 Feb 2025 13:53:38 +0100 Subject: [PATCH 5/8] lxd/instance/instancetype/instance: add security.delegate_bpf* config options Signed-off-by: Alexander Mikhalitsyn --- lxd/instance/instancetype/instance.go | 50 ++++++++++++++++++++++++++ shared/validate/validate_cgo.go | 51 +++++++++++++++++++++++++++ shared/validate/validate_no_cgo.go | 14 ++++++++ 3 files changed, 115 insertions(+) create mode 100644 shared/validate/validate_cgo.go create mode 100644 shared/validate/validate_no_cgo.go diff --git a/lxd/instance/instancetype/instance.go b/lxd/instance/instancetype/instance.go index fa43b8dbdadb..add90780e1f1 100644 --- a/lxd/instance/instancetype/instance.go +++ b/lxd/instance/instancetype/instance.go @@ -911,6 +911,56 @@ var InstanceConfigKeysContainer = map[string]func(value string) error{ // shortdesc: Whether to handle the `sysinfo` system call "security.syscalls.intercept.sysinfo": validate.Optional(validate.IsBool), + // lxdmeta:generate(entities=instance; group=security; key=security.delegate_bpf) + // + // --- + // type: bool + // defaultdesc: `false` + // liveupdate: no + // condition: unprivileged container + // shortdesc: Whether to enable eBPF delegation using BPF Token mechanism + "security.delegate_bpf": validate.Optional(validate.IsBool), + + // lxdmeta:generate(entities=instance; group=security; key=security.delegate_bpf.cmds) + // + // --- + // type: bool + // defaultdesc: `false` + // liveupdate: no + // condition: unprivileged container + // shortdesc: Which eBPF commands to allow with delegation mechanism + "security.delegate_bpf.cmds": validate.Optional(validate.IsBpfDelegateOption("cmds")), + + // lxdmeta:generate(entities=instance; group=security; key=security.delegate_bpf.maps) + // + // --- + // type: bool + // defaultdesc: `false` + // liveupdate: no + // condition: unprivileged container + // shortdesc: Which eBPF maps to allow with delegation mechanism + "security.delegate_bpf.maps": validate.Optional(validate.IsBpfDelegateOption("maps")), + + // lxdmeta:generate(entities=instance; group=security; key=security.delegate_bpf.progs) + // + // --- + // type: bool + // defaultdesc: `false` + // liveupdate: no + // condition: unprivileged container + // shortdesc: Which eBPF program types to allow with delegation mechanism + "security.delegate_bpf.progs": validate.Optional(validate.IsBpfDelegateOption("progs")), + + // lxdmeta:generate(entities=instance; group=security; key=security.delegate_bpf.attachs) + // + // --- + // type: bool + // defaultdesc: `false` + // liveupdate: no + // condition: unprivileged container + // shortdesc: Which eBPF attach types to allow with delegation mechanism + "security.delegate_bpf.attachs": validate.Optional(validate.IsBpfDelegateOption("attachs")), + // lxdmeta:generate(entities=instance; group=volatile; key=volatile.last_state.idmap) // The UID/GID map that has been applied to the container's underlying storage. // This is usually set for containers created on older kernels that don't diff --git a/shared/validate/validate_cgo.go b/shared/validate/validate_cgo.go new file mode 100644 index 000000000000..ce340d59a472 --- /dev/null +++ b/shared/validate/validate_cgo.go @@ -0,0 +1,51 @@ +//go:build linux && cgo + +package validate + +/* +#include "../../lxd/include/config.h" +#include "../../lxd/include/memory_utils.h" +#include "../../lxd/include/mount_utils.h" + +static int is_bpf_delegate_option_value_valid(char *option, char *value) +{ + __do_close int fs_fd = -EBADF; + int ret; + + fs_fd = lxd_fsopen("bpf", FSOPEN_CLOEXEC); + if (fs_fd < 0) { + return 2; + } + + ret = lxd_fsconfig(fs_fd, FSCONFIG_SET_STRING, option, value, 0); + if (ret < 0) { + return 1; + } + + return 0; +} +*/ +import "C" + +import ( + "fmt" + "unsafe" +) + +// IsBpfDelegateOption validates a BPF Token delegation option. +func IsBpfDelegateOption(delegateOption string) func(value string) error { + return func(value string) error { + cdelegateOption := C.CString(fmt.Sprintf("delegate_%s", delegateOption)) + defer C.free(unsafe.Pointer(cdelegateOption)) + + cvalue := C.CString(value) + defer C.free(unsafe.Pointer(cvalue)) + + r := C.is_bpf_delegate_option_value_valid(cdelegateOption, cvalue) + if r != 0 { + return fmt.Errorf("Invalid %s option value: %s", delegateOption, value) + } + + return nil + } +} diff --git a/shared/validate/validate_no_cgo.go b/shared/validate/validate_no_cgo.go new file mode 100644 index 000000000000..69fda7e08132 --- /dev/null +++ b/shared/validate/validate_no_cgo.go @@ -0,0 +1,14 @@ +//go:build !(linux && cgo) + +package validate + +import ( + "fmt" +) + +// IsBpfDelegateOption validates a BPF Token delegation option. +func IsBpfDelegateOption(delegateOption string) func(value string) error { + return func(value string) error { + return fmt.Errorf("This should never be called.") + } +} From 77eb3fa6d559dce75e0db8806f369a582771c113 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Mon, 17 Feb 2025 13:51:58 +0100 Subject: [PATCH 6/8] api: add bpf_delegation API extension Signed-off-by: Alexander Mikhalitsyn --- doc/api-extensions.md | 4 ++++ shared/version/api.go | 1 + 2 files changed, 5 insertions(+) diff --git a/doc/api-extensions.md b/doc/api-extensions.md index 510d2b0871fd..3e8b7662269a 100644 --- a/doc/api-extensions.md +++ b/doc/api-extensions.md @@ -2618,3 +2618,7 @@ Note that the `openid` and `email` scopes are always required. ## `project_default_network_and_storage` Adds flags --network and --storage. The --network flag adds a network device connected to the specified network to the default profile. The --storage flag adds a root disk device using the specified storage pool to the default profile. + +## `bpf_delegation` + +Adds new `security.delegate_bpf.*` group of options in order to support eBPF delegation using BPF Token mechanism. diff --git a/shared/version/api.go b/shared/version/api.go index 24ce86f62292..0077d22df9a1 100644 --- a/shared/version/api.go +++ b/shared/version/api.go @@ -439,6 +439,7 @@ var APIExtensions = []string{ "cloud_init_ssh_keys", "oidc_scopes", "project_default_network_and_storage", + "bpf_delegation", } // APIExtensionsCount returns the number of available API extensions. From ea4a5d1390824115dac6121a6b755cee6198e9b3 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Mon, 17 Feb 2025 13:22:15 +0100 Subject: [PATCH 7/8] doc: Update metadata Signed-off-by: Alexander Mikhalitsyn --- doc/metadata.txt | 45 +++++++++++++++++++++++++++++ lxd/metadata/configuration.json | 50 +++++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+) diff --git a/doc/metadata.txt b/doc/metadata.txt index ef5f8f6a7c6c..50d603125bb8 100644 --- a/doc/metadata.txt +++ b/doc/metadata.txt @@ -2148,6 +2148,51 @@ If left empty, no limit is set. When enabling this option, set {config:option}`instance-security:security.secureboot` to `false`. ``` +```{config:option} security.delegate_bpf instance-security +:condition: "unprivileged container" +:defaultdesc: "`false`" +:liveupdate: "no" +:shortdesc: "Whether to enable eBPF delegation using BPF Token mechanism" +:type: "bool" + +``` + +```{config:option} security.delegate_bpf.attachs instance-security +:condition: "unprivileged container" +:defaultdesc: "`false`" +:liveupdate: "no" +:shortdesc: "Which eBPF attach types to allow with delegation mechanism" +:type: "bool" + +``` + +```{config:option} security.delegate_bpf.cmds instance-security +:condition: "unprivileged container" +:defaultdesc: "`false`" +:liveupdate: "no" +:shortdesc: "Which eBPF commands to allow with delegation mechanism" +:type: "bool" + +``` + +```{config:option} security.delegate_bpf.maps instance-security +:condition: "unprivileged container" +:defaultdesc: "`false`" +:liveupdate: "no" +:shortdesc: "Which eBPF maps to allow with delegation mechanism" +:type: "bool" + +``` + +```{config:option} security.delegate_bpf.progs instance-security +:condition: "unprivileged container" +:defaultdesc: "`false`" +:liveupdate: "no" +:shortdesc: "Which eBPF program types to allow with delegation mechanism" +:type: "bool" + +``` + ```{config:option} security.devlxd instance-security :defaultdesc: "`true`" :liveupdate: "no" diff --git a/lxd/metadata/configuration.json b/lxd/metadata/configuration.json index 4b0babc55eb7..8bbb5d2d65c9 100644 --- a/lxd/metadata/configuration.json +++ b/lxd/metadata/configuration.json @@ -2425,6 +2425,56 @@ "type": "bool" } }, + { + "security.delegate_bpf": { + "condition": "unprivileged container", + "defaultdesc": "`false`", + "liveupdate": "no", + "longdesc": "", + "shortdesc": "Whether to enable eBPF delegation using BPF Token mechanism", + "type": "bool" + } + }, + { + "security.delegate_bpf.attachs": { + "condition": "unprivileged container", + "defaultdesc": "`false`", + "liveupdate": "no", + "longdesc": "", + "shortdesc": "Which eBPF attach types to allow with delegation mechanism", + "type": "bool" + } + }, + { + "security.delegate_bpf.cmds": { + "condition": "unprivileged container", + "defaultdesc": "`false`", + "liveupdate": "no", + "longdesc": "", + "shortdesc": "Which eBPF commands to allow with delegation mechanism", + "type": "bool" + } + }, + { + "security.delegate_bpf.maps": { + "condition": "unprivileged container", + "defaultdesc": "`false`", + "liveupdate": "no", + "longdesc": "", + "shortdesc": "Which eBPF maps to allow with delegation mechanism", + "type": "bool" + } + }, + { + "security.delegate_bpf.progs": { + "condition": "unprivileged container", + "defaultdesc": "`false`", + "liveupdate": "no", + "longdesc": "", + "shortdesc": "Which eBPF program types to allow with delegation mechanism", + "type": "bool" + } + }, { "security.devlxd": { "defaultdesc": "`true`", From d495777e499e9e96832517bdcc6a5053e650ce73 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Tue, 18 Feb 2025 18:18:48 +0100 Subject: [PATCH 8/8] lxd/main_forkmount: Fix linter error Signed-off-by: Alexander Mikhalitsyn --- lxd/main_forkmount.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lxd/main_forkmount.go b/lxd/main_forkmount.go index 3c8f8a75c2de..e3c6f8e24fd8 100644 --- a/lxd/main_forkmount.go +++ b/lxd/main_forkmount.go @@ -836,14 +836,14 @@ import ( "github.com/spf13/cobra" - // Used by cgo - _ "github.com/canonical/lxd/lxd/include" + _ "github.com/canonical/lxd/lxd/include" // Used by cgo ) type cmdForkmount struct { global *cmdGlobal } +// Command returns a cobra.Command for use with (*cobra.Command).AddCommand. func (c *cmdForkmount) Command() *cobra.Command { // Main subcommand cmd := &cobra.Command{} @@ -889,6 +889,7 @@ func (c *cmdForkmount) Command() *cobra.Command { return cmd } +// Run executes the forkmount command. func (c *cmdForkmount) Run(cmd *cobra.Command, args []string) error { return fmt.Errorf("This command should have been intercepted in cgo") }