Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

work on bare-metal SNP support in node-installer #728

Merged
merged 9 commits into from
Jul 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,13 @@ go 1.22.0

toolchain go1.22.5

replace github.com/edgelesssys/contrast/node-installer => ./node-installer
replace (
github.com/edgelesssys/contrast/node-installer => ./node-installer
// The upstream package has some stepping issues with Genoa:
// https://github.com/google/go-sev-guest/issues/115
// https://github.com/google/go-sev-guest/issues/103
github.com/google/go-sev-guest => github.com/edgelesssys/go-sev-guest v0.0.0-20240705062330-4bd4b2483aa0
)

require (
filippo.io/keygen v0.0.0-20230306160926-5201437acf8e
Expand Down Expand Up @@ -64,7 +70,6 @@ require (
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect
github.com/pborman/uuid v1.2.1 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/prometheus/client_model v0.6.1 // indirect
Expand Down
7 changes: 2 additions & 5 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ3
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/edgelesssys/go-sev-guest v0.0.0-20240705062330-4bd4b2483aa0 h1:d9uufoq2x0o81qC4GOau1NaAbZ/KkiVnw8mpMSEqS80=
github.com/edgelesssys/go-sev-guest v0.0.0-20240705062330-4bd4b2483aa0/go.mod h1:tqyacT8AaJv6ZWLtsFK5HKJ+d6b4L5iuAmRMWCkIupA=
github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g=
github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ=
Expand Down Expand Up @@ -41,16 +43,13 @@ github.com/google/go-github/v62 v62.0.0 h1:/6mGCaRywZz9MuHyw9gD1CwsbmBX8GWsbFkwM
github.com/google/go-github/v62 v62.0.0/go.mod h1:EMxeUqGJq2xRu9DYBMwel/mr7kZrzUOfQmmpYrZn2a4=
github.com/google/go-querystring v1.1.0 h1:AnCroh3fv4ZBgVIf1Iwtovgjaw/GiKJo8M8yD/fhyJ8=
github.com/google/go-querystring v1.1.0/go.mod h1:Kcdr2DB4koayq7X8pmAG4sNG59So17icRSOU623lUBU=
github.com/google/go-sev-guest v0.11.1 h1:gnww4U8fHV5DCPz4gykr1s8SEX1fFNcxCBy+vvXN24k=
github.com/google/go-sev-guest v0.11.1/go.mod h1:qBOfb+JmgsUI3aUyzQoGC13Kpp9zwLeWvuyXmA9q77w=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0=
github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/logger v1.1.1 h1:+6Z2geNxc9G+4D4oDO9njjjn2d0wN5d7uOo0vOIW1NQ=
github.com/google/logger v1.1.1/go.mod h1:BkeJZ+1FhQ+/d087r4dzojEg1u2ZX+ZqG1jTUrLM+zQ=
github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1 h1:K6RDEckDVWvDI9JAJYCmNdQXq6neHJOYx3V6jnqNEec=
github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
github.com/google/uuid v1.0.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
Expand Down Expand Up @@ -96,8 +95,6 @@ github.com/onsi/ginkgo/v2 v2.15.0 h1:79HwNRBAZHOEwrczrgSOPy+eFTTlIGELKy5as+ClttY
github.com/onsi/ginkgo/v2 v2.15.0/go.mod h1:HlxMHtYF57y6Dpf+mc5529KKmSq9h2FpCF+/ZkwUxKM=
github.com/onsi/gomega v1.31.0 h1:54UJxxj6cPInHS3a35wm6BK/F9nHYueZ1NVujHDrnXE=
github.com/onsi/gomega v1.31.0/go.mod h1:DW9aCi7U6Yi40wNVAvT6kzFnEVEI5n3DloYBiKiT6zk=
github.com/pborman/uuid v1.2.1 h1:+ZZIw58t/ozdjRaXh/3awHfmWRbzYxJoAdNJxe/3pvw=
github.com/pborman/uuid v1.2.1/go.mod h1:X/NO0urCmaxf9VXbdlT7C2Yzkj2IKimNn4k+gtPdI/k=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
Expand Down
11 changes: 7 additions & 4 deletions internal/kuberesource/parts.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,15 @@ import (
)

// ContrastRuntimeClass creates a new RuntimeClassConfig.
func ContrastRuntimeClass() *RuntimeClassConfig {
func ContrastRuntimeClass(platform platforms.Platform) *RuntimeClassConfig {
r := RuntimeClass(runtimeHandler).
WithHandler(runtimeHandler).
WithLabels(map[string]string{"addonmanager.kubernetes.io/mode": "Reconcile"}).
WithOverhead(Overhead(corev1.ResourceList{"memory": resource.MustParse("1152Mi")})).
WithScheduling(Scheduling(map[string]string{"kubernetes.azure.com/kata-cc-isolation": "true"}))
WithOverhead(Overhead(corev1.ResourceList{"memory": resource.MustParse("1152Mi")}))

if platform == platforms.AKSCloudHypervisorSNP {
r.WithScheduling(Scheduling(map[string]string{"kubernetes.azure.com/kata-cc-isolation": "true"}))
}

return &RuntimeClassConfig{r}
}
Expand All @@ -40,7 +43,7 @@ func NodeInstaller(namespace string, platform platforms.Platform) (*NodeInstalle
switch platform {
case platforms.AKSCloudHypervisorSNP:
nodeInstallerImageURL = "ghcr.io/edgelesssys/contrast/node-installer-microsoft:latest"
case platforms.K3sQEMUTDX, platforms.RKE2QEMUTDX:
case platforms.K3sQEMUTDX, platforms.K3sQEMUSNP, platforms.RKE2QEMUTDX:
nodeInstallerImageURL = "ghcr.io/edgelesssys/contrast/node-installer-kata:latest"
default:
return nil, fmt.Errorf("unsupported platform %q", platform)
Expand Down
2 changes: 1 addition & 1 deletion internal/kuberesource/sets.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ func CoordinatorBundle() []any {
func Runtime(platform platforms.Platform) ([]any, error) {
ns := ""

runtimeClass := ContrastRuntimeClass().RuntimeClassApplyConfiguration
runtimeClass := ContrastRuntimeClass(platform).RuntimeClassApplyConfiguration
nodeInstaller, err := NodeInstaller(ns, platform)
if err != nil {
return nil, fmt.Errorf("creating node installer: %w", err)
Expand Down
61 changes: 61 additions & 0 deletions node-installer/internal/constants/configuration-qemu-snp.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# Minimized list, inactive options removed.
# upstream source: https://github.com/kata-containers/kata-containers/blob/dac07239f5c7e3deefbdefa8d02bbc427487042e/src/runtime/config/configuration-qemu-snp.toml.in
[hypervisor.qemu]
path = "/usr/bin/qemu-system-x86_64"
kernel = "/opt/kata/share/kata-containers/vmlinuz-confidential.container"
initrd = "/opt/kata/share/kata-containers/kata-containers-initrd-confidential.img"
machine_type = "q35"
rootfs_type="erofs"
confidential_guest = true
sev_snp_guest = true
snp_certs_path = "/opt/snp/cert_chain.cert"
enable_annotations = ["enable_iommu", "virtio_fs_extra_args", "kernel_params", "default_vcpus", "default_memory"]
valid_hypervisor_paths = ["/usr/bin/qemu-system-x86_64"]
kernel_params = ""
firmware = "/usr/share/ovmf/OVMF.fd"
firmware_volume = ""
machine_accelerators=""
cpu_features="pmu=off"
default_vcpus = 1
default_maxvcpus = 0
default_bridges = 1
default_memory = 2048
default_maxmemory = 0
disable_block_device_use = false
shared_fs = "none"
virtio_fs_daemon = "/opt/kata/libexec/virtiofsd"
valid_virtio_fs_daemon_paths = ["/opt/kata/libexec/virtiofsd"]
virtio_fs_cache_size = 0
virtio_fs_queue_size = 1024
virtio_fs_extra_args = ["--thread-pool-size=1", "--announce-submounts"]
virtio_fs_cache = "auto"
block_device_driver = "virtio-scsi"
block_device_aio = "io_uring"
enable_iothreads = false
enable_vhost_user_store = false
vhost_user_store_path = "/var/run/kata-containers/vhost-user"
valid_vhost_user_store_paths = ["/var/run/kata-containers/vhost-user"]
vhost_user_reconnect_timeout_sec = 0
file_mem_backend = ""
valid_file_mem_backends = [""]
pflashes = []
disable_image_nvdimm = true
valid_entropy_sources = ["/dev/urandom","/dev/random",""]
disable_selinux=false
disable_guest_selinux=true

[agent.kata]
kernel_modules=[]
dial_timeout = 90

[runtime]
internetworking_model="tcfilter"
disable_guest_seccomp=true
sandbox_cgroup_only=false
static_sandbox_resource_mgmt=true
sandbox_bind_mounts=[]
vfio_mode="guest-kernel"
disable_guest_empty_dir=false
experimental=[]
create_container_timeout = 60
dan_conf = "/run/kata-containers/dans"
31 changes: 31 additions & 0 deletions node-installer/internal/constants/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@ var (
//go:embed configuration-qemu-tdx.toml
kataBareMetalQEMUTDXBaseConfig string

// kataBareMetalQEMUSNPBaseConfig is the configuration file for the Kata runtime on bare-metal SNP
// with QEMU.
//
//go:embed configuration-qemu-snp.toml
kataBareMetalQEMUSNPBaseConfig string

// containerdBaseConfig is the base configuration file for containerd
//
//go:embed containerd-config.toml
Expand Down Expand Up @@ -66,6 +72,27 @@ func KataRuntimeConfig(baseDir string, platform platforms.Platform, debug bool)
config.Runtime["enable_debug"] = true
}
return &config, nil
case platforms.K3sQEMUSNP:
if err := toml.Unmarshal([]byte(kataBareMetalQEMUSNPBaseConfig), &config); err != nil {
return nil, fmt.Errorf("failed to unmarshal kata runtime configuration: %w", err)
}
config.Hypervisor["qemu"]["path"] = filepath.Join(baseDir, "bin", "qemu-system-x86_64")
config.Hypervisor["qemu"]["firmware"] = filepath.Join(baseDir, "share", "OVMF.fd")
config.Hypervisor["qemu"]["image"] = filepath.Join(baseDir, "share", "kata-containers.img")
config.Hypervisor["qemu"]["kernel"] = filepath.Join(baseDir, "share", "kata-kernel")
delete(config.Hypervisor["qemu"], "initrd")
config.Hypervisor["qemu"]["block_device_aio"] = "threads"
config.Hypervisor["qemu"]["shared_fs"] = "virtio-9p"
config.Hypervisor["qemu"]["valid_hypervisor_paths"] = []string{filepath.Join(baseDir, "bin", "qemu-system-x86_64")}
config.Hypervisor["qemu"]["rootfs_type"] = "erofs"
if debug {
config.Hypervisor["qemu"]["enable_debug"] = true
config.Hypervisor["qemu"]["kernel_params"] = " agent.log=debug initcall_debug"
config.Agent["kata"]["enable_debug"] = true
config.Agent["kata"]["debug_console_enabled"] = true
config.Runtime["enable_debug"] = true
}
return &config, nil
default:
return nil, fmt.Errorf("unsupported platform: %s", platform)
}
Expand Down Expand Up @@ -99,6 +126,10 @@ func ContainerdRuntimeConfigFragment(baseDir, snapshotter string, platform platf
cfg.Options = map[string]any{
"ConfigPath": filepath.Join(baseDir, "etc", "configuration-qemu-tdx.toml"),
}
case platforms.K3sQEMUSNP:
cfg.Options = map[string]any{
"ConfigPath": filepath.Join(baseDir, "etc", "configuration-qemu-snp.toml"),
}
default:
return nil, fmt.Errorf("unsupported platform: %s", platform)
}
Expand Down
83 changes: 75 additions & 8 deletions node-installer/node-installer.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
package main

import (
"bufio"
"bytes"
"context"
"encoding/json"
"flag"
Expand Down Expand Up @@ -69,7 +71,7 @@ func run(ctx context.Context, fetcher assetFetcher, platform platforms.Platform,
if err := os.MkdirAll(binDir, os.ModePerm); err != nil {
return fmt.Errorf("creating runtime bin directory: %w", err)
}
if err := os.MkdirAll(filepath.Join(hostMount, runtimeBase, "share"), os.ModePerm); err != nil {
if err := os.MkdirAll(filepath.Join(hostMount, runtimeBase, "share/qemu"), os.ModePerm); err != nil {
return fmt.Errorf("creating runtime share directory: %w", err)
}
if err := os.MkdirAll(filepath.Join(hostMount, runtimeBase, "etc"), os.ModePerm); err != nil {
Expand Down Expand Up @@ -114,6 +116,9 @@ func run(ctx context.Context, fetcher assetFetcher, platform platforms.Platform,
case platforms.K3sQEMUTDX:
kataConfigPath = filepath.Join(kataConfigPath, "configuration-qemu-tdx.toml")
containerdConfigPath = filepath.Join(hostMount, "var", "lib", "rancher", "k3s", "agent", "etc", "containerd", "config.toml")
case platforms.K3sQEMUSNP:
kataConfigPath = filepath.Join(kataConfigPath, "configuration-qemu-snp.toml")
containerdConfigPath = filepath.Join(hostMount, "var", "lib", "rancher", "k3s", "agent", "etc", "containerd", "config.toml.tmpl")
case platforms.RKE2QEMUTDX:
kataConfigPath = filepath.Join(kataConfigPath, "configuration-qemu-tdx.toml")
containerdConfigPath = filepath.Join(hostMount, "var", "lib", "rancher", "rke2", "agent", "etc", "containerd", "config.toml")
Expand All @@ -131,7 +136,7 @@ func run(ctx context.Context, fetcher assetFetcher, platform platforms.Platform,
if err := patchContainerdConfig(config.RuntimeHandlerName, runtimeBase, containerdConfigPath, platform); err != nil {
return fmt.Errorf("patching containerd configuration: %w", err)
}
case platforms.K3sQEMUTDX, platforms.RKE2QEMUTDX:
case platforms.K3sQEMUTDX, platforms.K3sQEMUSNP, platforms.RKE2QEMUTDX:
// K3s or RKE2: We need to extend the configuration template, which, in it's un-templated form, is non-TOML.
// Therefore just write the TOML configuration fragment ourselves and append it to the template file.
// This assumes that the user does not yet have a runtime with the same name configured himself,
Expand All @@ -151,7 +156,7 @@ func run(ctx context.Context, fetcher assetFetcher, platform platforms.Platform,
switch platform {
case platforms.AKSCloudHypervisorSNP:
return restartHostContainerd(containerdConfigPath, "containerd")
case platforms.K3sQEMUTDX:
case platforms.K3sQEMUTDX, platforms.K3sQEMUSNP:
if hostServiceExists("k3s") {
return restartHostContainerd(containerdConfigPath, "k3s")
} else if hostServiceExists("k3s-agent") {
Expand Down Expand Up @@ -241,26 +246,88 @@ func patchContainerdConfigTemplate(runtimeName, basePath, configTemplatePath str
return fmt.Errorf("reading containerd config template: %w", err)
}

// Don't add the runtime section if it already exists.
runtimeSection := fmt.Sprintf("[plugins.'io.containerd.grpc.v1.cri'.containerd.runtimes.%s]", runtimeName)
if bytes.Contains(existingConfig, []byte(runtimeSection)) {
fmt.Printf("Runtime section %q already exists\n", runtimeSection)
return nil
}

// PluginFragment contains just the `Plugins` property used to configure containerd.
type PluginFragment struct {
// Plugins provides plugin specific configuration for the initialization of a plugin
Plugins map[string]any `toml:"plugins"`
}

// Extend a scratchpad config with the new plugin configuration. (including the new contrast-cc runtime)
var newConfigFragment config.ContainerdConfig
var newConfigFragment PluginFragment
runtimes := ensureMapPath(&newConfigFragment.Plugins, constants.CRIFQDN, "containerd", "runtimes")
containerdRuntimeConfig, err := constants.ContainerdRuntimeConfigFragment(basePath, "no-snapshotter", platform)
if err != nil {
return fmt.Errorf("generating containerd runtime config: %w", err)
}
runtimes[runtimeName] = containerdRuntimeConfig

// We purposely don't marshal the full config, as we only want to append the plugin section.
rawNewPluginConfig, err := toml.Marshal(newConfigFragment.Plugins)
rawNewPluginConfig, err := toml.Marshal(newConfigFragment)
if err != nil {
return fmt.Errorf("marshaling containerd runtime config: %w", err)
}

// First append the existing config template by a newline, so that if it ends without a newline,
// the new config fragment isn't appended to the last line..
newRawConfig := append(existingConfig, []byte("\n")...)
// ..then append the new config fragment
newRawConfig = append(newRawConfig, rawNewPluginConfig...)

// The marshalled config is shaped like a tree with the important bits at
// the leaves and lots of empty parent nodes (except for the link the one
// child):
// A > B > C > D > E=foo
// | > F > G=bar
// | > H=baz
// Nodes that don't contain any keys, but only contain a links to children
// are marshalled as empty sections:
// ```toml
// [A] # <- empty section
// [A.B] # <- empty section
// [A.B.C] # <- empty section
// [A.B.C.D] # <- non-empty section
// E = "foo"
// [A.B.C.D.F] # <- non-empty section
// G = "bar"
// H = "baz"
// ```
// We want to avoid appending empty sections (i.e. sections with only a
// section header, but no keys) to the file because there's a chance that
// they already exist in the template and creating duplicate sections is
// illegal.
// On the other hand, omitting intermediate empty sections is always legal
// in TOML, so there's no risk in omitting empty sections.
//
// We iterate over the marshalled config line by line. If we encounter a
// section header, we don't immediately append it to the file, but buffer
// it in `pendingHeaderSection`. If the next line is also a section header,
// we discard the value in `pendingHeaderSection` and fill it with the new
// section header. If we encounter a non-section header line, we flush the
// value of `pendingHeaderSection` to the file, before adding the
// non-section header line.
katexochen marked this conversation as resolved.
Show resolved Hide resolved
//
// TODO(freax13): One day, go-toml might add an option to omit empty
// sections: https://github.com/pelletier/go-toml/issues/957
var pendingHeaderSection []byte
scanner := bufio.NewScanner(bytes.NewReader(rawNewPluginConfig))
for scanner.Scan() {
// Is the line a section header?
if strings.HasPrefix(scanner.Text(), "[") {
pendingHeaderSection = scanner.Bytes()
continue
}
if len(pendingHeaderSection) != 0 {
newRawConfig = append(newRawConfig, pendingHeaderSection...)
newRawConfig = append(newRawConfig, []byte("\n")...)
pendingHeaderSection = []byte{}
}
newRawConfig = append(newRawConfig, scanner.Bytes()...)
newRawConfig = append(newRawConfig, []byte("\n")...)
}
katexochen marked this conversation as resolved.
Show resolved Hide resolved

return os.WriteFile(configTemplatePath, newRawConfig, os.ModePerm)
}
Expand Down
Loading