Skip to content

Commit

Permalink
move environment detection to pkg/gpu/cuda package
Browse files Browse the repository at this point in the history
  • Loading branch information
gjulianm committed Jan 8, 2025
1 parent aa84a91 commit 9fc9d6e
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 9 deletions.
16 changes: 7 additions & 9 deletions pkg/config/env/environment_containers.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,8 @@ import (
"strings"
"time"

"github.com/NVIDIA/go-nvml/pkg/nvml"

"github.com/DataDog/datadog-agent/pkg/config/model"
"github.com/DataDog/datadog-agent/pkg/gpu/cuda"
"github.com/DataDog/datadog-agent/pkg/util/log"
"github.com/DataDog/datadog-agent/pkg/util/system/socket"
)
Expand Down Expand Up @@ -248,14 +247,13 @@ func detectPodResources(features FeatureMap, cfg model.Reader) {
}

func detectNVML(features FeatureMap) {
// TODO: Add configuration option for NVML library path
nvmlLib := nvml.New()
ret := nvmlLib.Init()
if ret == nvml.SUCCESS || ret == nvml.ERROR_ALREADY_INITIALIZED {
features[NVML] = struct{}{}
} else {
log.Infof("Agent did not find NVML library for NVIDIA GPU detection: %v", nvml.ErrorString(ret))
if err := cuda.CheckNVMLAvailability(); err != nil {
log.Infof("Agent did not find NVML library for NVIDIA GPU detection: %v", err)
return
}

log.Infof("Agent found NVML library for NVIDIA GPU detection")
features[NVML] = struct{}{}
}

func getHostMountPrefixes() []string {
Expand Down
27 changes: 27 additions & 0 deletions pkg/gpu/cuda/feature_detect_linux.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// Unless explicitly stated otherwise all files in this repository are licensed
// under the Apache License Version 2.0.
// This product includes software developed at Datadog (https://www.datadoghq.com/).
// Copyright 2024-present Datadog, Inc.

//go:build linux

package cuda

import (
"fmt"

"github.com/NVIDIA/go-nvml/pkg/nvml"
)

// CheckNVMLAvailability checks if NVML is available on the system, returning
// nil if it is, or an error if it is not present/cannot be loaded.
func CheckNVMLAvailability() error {
// TODO: Add configuration option for NVML library path
nvmlLib := nvml.New()
ret := nvmlLib.Init()
if ret != nvml.SUCCESS && ret != nvml.ERROR_ALREADY_INITIALIZED {
return fmt.Errorf("failed to initialize NVML library: %s", nvml.ErrorString(ret))
}

return nil
}
14 changes: 14 additions & 0 deletions pkg/gpu/cuda/feature_detect_other.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
// Unless explicitly stated otherwise all files in this repository are licensed
// under the Apache License Version 2.0.
// This product includes software developed at Datadog (https://www.datadoghq.com/).
// Copyright 2024-present Datadog, Inc.

//go:build !linux

package cuda

import "errors"

func CheckNVMLAvailability() error {
return errors.New("NVML is not available on this platform")
}

0 comments on commit 9fc9d6e

Please sign in to comment.