Skip to content

Commit

Permalink
Enable the use of CDI on OpenShift
Browse files Browse the repository at this point in the history
This commit configures the toolkit container and device-plugin appropriately
so that CDI can be used to provide GPU access to both management and application
containers on OpenShift.

On OpenShift, we cannot set 'nvidia' as the default runtime. Because of this, we have
decided to take a hybrid approach for enabling CDI. That is, we use the 'nvidia' runtime
configured in CDI mode for providing GPU access to management containers, and native CDI
support in CRI-O for providing GPU access to application containers.

Signed-off-by: Christopher Desiniotis <[email protected]>
  • Loading branch information
cdesiniotis committed Jun 5, 2024
1 parent 11cf3a5 commit 3e0f91d
Showing 1 changed file with 12 additions and 1 deletion.
13 changes: 12 additions & 1 deletion controllers/object_controls.go
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,8 @@ const (
MPSRootEnvName = "MPS_ROOT"
// DefaultMPSRoot is the default MPS root path on the host
DefaultMPSRoot = "/run/nvidia/mps"
// NvidiaRuntimeSetAsDefaultEnvName is the name of the toolkit container env for configuring NVIDIA Container Runtime as the default runtime
NvidiaRuntimeSetAsDefaultEnvName = "NVIDIA_RUNTIME_SET_AS_DEFAULT"
)

// ContainerProbe defines container probe types
Expand Down Expand Up @@ -1125,6 +1127,10 @@ func TransformToolkit(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n
if config.CDI.IsDefault() {
setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), NvidiaCtrRuntimeModeEnvName, "cdi")
}
// do not set 'nvidia' as the default runtime on OpenShift
if n.openshift != "" {
setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), NvidiaRuntimeSetAsDefaultEnvName, "false")
}
}

// set install directory for the toolkit
Expand Down Expand Up @@ -1279,7 +1285,12 @@ func TransformDevicePlugin(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpe
if config.CDI.IsEnabled() {
setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), CDIEnabledEnvName, "true")
setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), DeviceListStrategyEnvName, "envvar,cdi-annotations")
setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), CDIAnnotationPrefixEnvName, "nvidia.cdi.k8s.io/")
// for OpenShift, use native-CDI support in CRI-O as the 'nvidia' runtime will not be configured as the default runtime
cdiAnnotationPrefix := "nvidia.cdi.k8s.io/"
if n.openshift != "" {
cdiAnnotationPrefix = "cdi.k8s.io/"
}
setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), CDIAnnotationPrefixEnvName, cdiAnnotationPrefix)
if config.Toolkit.IsEnabled() {
setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), NvidiaCTKPathEnvName, filepath.Join(config.Toolkit.InstallDir, "toolkit/nvidia-ctk"))
}
Expand Down

0 comments on commit 3e0f91d

Please sign in to comment.