Skip to content

Commit

Permalink
Update registry initialization to read on-disk CDI spec
Browse files Browse the repository at this point in the history
Signed-off-by: Ivan Sim <[email protected]>
  • Loading branch information
ihcsim committed May 30, 2024
1 parent 4f0d3bd commit af72839
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 18 deletions.
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ toolchain go1.22.2

require (
github.com/container-orchestrated-devices/container-device-interface v0.5.4
github.com/google/uuid v1.4.0
github.com/prometheus/client_golang v1.19.0
github.com/rs/zerolog v1.32.0
github.com/spf13/cobra v1.8.0
Expand Down Expand Up @@ -40,6 +39,7 @@ require (
github.com/google/gnostic-models v0.6.8 // indirect
github.com/google/go-cmp v0.6.0 // indirect
github.com/google/gofuzz v1.2.0 // indirect
github.com/google/uuid v1.4.0 // indirect
github.com/hashicorp/hcl v1.0.0 // indirect
github.com/imdario/mergo v0.3.6 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
Expand Down
13 changes: 13 additions & 0 deletions pkg/drivers/gpu/kubelet/cdi/cdi.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package cdi

import (
"errors"
"fmt"
"sync"

Expand Down Expand Up @@ -64,3 +65,15 @@ func DeleteCDISpec(claimUID string) error {
specName := cdiapi.GenerateTransientSpecName(cdiVendor, cdiClass, claimUID)
return registry.SpecDB().RemoveSpec(specName)
}

func Specs() ([]*cdiapi.Spec, error) {
specs := registry.SpecDB().GetVendorSpecs(cdiVendor)

var errs error
for _, spec := range specs {
specErrs := registry.SpecDB().GetSpecErrors(spec)
specErrs = append(specErrs, errs)
errs = errors.Join(specErrs...)
}
return specs, errs
}
45 changes: 28 additions & 17 deletions pkg/drivers/gpu/kubelet/nodeserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,9 @@ import (
"errors"
"fmt"

"github.com/google/uuid"
draclientset "github.com/ihcsim/k8s-dra/pkg/apis/clientset/versioned"
gpuv1alpha1 "github.com/ihcsim/k8s-dra/pkg/apis/gpu/v1alpha1"
cdi "github.com/ihcsim/k8s-dra/pkg/drivers/gpu/kubelet/cdi"
"github.com/ihcsim/k8s-dra/pkg/drivers/gpu/kubelet/cdi"
zlog "github.com/rs/zerolog"
resourcev1alpha2 "k8s.io/api/resource/v1alpha2"
apierrs "k8s.io/apimachinery/pkg/api/errors"
Expand All @@ -21,6 +20,11 @@ const AvailableGPUsCount = 4

var _ kubeletdrav1.NodeServer = &NodeServer{}

type gpuDevice struct {
*gpuv1alpha1.GPUDevice
cdiAnnotations map[string]string

Check failure on line 25 in pkg/drivers/gpu/kubelet/nodeserver.go

View workflow job for this annotation

GitHub Actions / prebuild

field `cdiAnnotations` is unused (unused)
}

// NodeServer provides the API implementation of the node server.
// see https://pkg.go.dev/k8s.io/kubelet/pkg/apis/dra/v1alpha3#NodeServer
type NodeServer struct {
Expand All @@ -42,10 +46,14 @@ func NewNodeServer(
nodeName string,
availableGPUs int,
log zlog.Logger) (*NodeServer, error) {
gpus := discoverGPUs(availableGPUs)
gpus, err := discoverGPUs(availableGPUs)
if err != nil {
return nil, err
}

var gpuDevices []*gpuv1alpha1.GPUDevice
for _, gpu := range gpus {
gpuDevices = append(gpuDevices, gpu.device)
gpuDevices = append(gpuDevices, gpu.GPUDevice)
}

if _, err := clientSets.GpuV1alpha1().NodeDevices(namespace).Get(ctx, nodeName, metav1.GetOptions{}); err != nil && apierrs.IsNotFound(err) {
Expand Down Expand Up @@ -214,20 +222,23 @@ func (n *NodeServer) NodeListAndWatchResources(req *kubeletdrav1.NodeListAndWatc
return s.Send(res)
}

type gpu struct {
device *gpuv1alpha1.GPUDevice
}
func discoverGPUs(maxAvailableGPU int) ([]*gpuDevice, error) {
specs, err := cdi.Specs()
if err != nil {
return nil, err
}

func discoverGPUs(maxAvailableGPU int) []*gpu {
gpus := make([]*gpu, maxAvailableGPU)
for i := 0; i < maxAvailableGPU; i++ {
gpus = append(gpus, &gpu{
device: &gpuv1alpha1.GPUDevice{
UUID: uuid.NewString(),
ProductName: "NVIDIA Tesla V100",
},
})
var gpuDevices []*gpuDevice
for _, spec := range specs {
for _, device := range spec.Devices {
gpuDevices = append(gpuDevices, &gpuDevice{
GPUDevice: &gpuv1alpha1.GPUDevice{
UUID: device.Name,
ProductName: device.ContainerEdits.Env[1],
},
})
}
}

return gpus
return gpuDevices, nil
}

0 comments on commit af72839

Please sign in to comment.