Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Intel NPU utilization #985

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/btop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,8 @@ void clean_quit(int sig) {
#ifdef GPU_SUPPORT
Gpu::Nvml::shutdown();
Gpu::Rsmi::shutdown();
Gpu::Intel::shutdown();
Gpu::IntelNPU::shutdown();
#endif

Config::write();
Expand Down
12 changes: 6 additions & 6 deletions src/btop_draw.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -874,7 +874,7 @@ namespace Cpu {
for (unsigned long i = 0; i < gpus.size(); ++i) {
if (gpu_auto and v_contains(Gpu::shown_panels, i))
continue;
out += Mv::to(b_y + ++cy, b_x + 1) + Theme::c("main_fg") + Fx::b + "GPU";
out += Mv::to(b_y + ++cy, b_x + 1) + Theme::c("main_fg") + Fx::b + gpus[i].get_device_type();
if (gpus.size() > 1) out += rjust(to_string(i), 1 + (gpus.size() > 9));
if (gpus[i].supported_functions.gpu_utilization) {
out += ' ';
Expand Down Expand Up @@ -1006,7 +1006,7 @@ namespace Gpu {
if (not single_graph)
out += Mv::to(y + graph_up_height + 1, x + 1) + graph_lower(safeVal(gpu.gpu_percent, "gpu-totals"s), (data_same or redraw[index]));

out += Mv::to(b_y + 1, b_x + 1) + Theme::c("main_fg") + Fx::b + "GPU " + gpu_meter(safeVal(gpu.gpu_percent, "gpu-totals"s).back())
out += Mv::to(b_y + 1, b_x + 1) + Theme::c("main_fg") + Fx::b + gpu.get_device_type() + " " + gpu_meter(safeVal(gpu.gpu_percent, "gpu-totals"s).back())
+ Theme::g("cpu").at(clamp(safeVal(gpu.gpu_percent, "gpu-totals"s).back(), 0ll, 100ll)) + rjust(to_string(safeVal(gpu.gpu_percent, "gpu-totals"s).back()), 5) + Theme::c("main_fg") + '%';

//? Temperature graph, I assume the device supports utilization if it supports temperature
Expand Down Expand Up @@ -1066,11 +1066,11 @@ namespace Gpu {
} else {
out += Theme::c("main_fg") + Mv::r(1);
if (gpu.supported_functions.mem_total)
out += "VRAM total:" + rjust(floating_humanizer(gpu.mem_total), b_width/(1 + gpu.supported_functions.mem_clock)-14);
else out += "VRAM usage:" + rjust(floating_humanizer(gpu.mem_used), b_width/(1 + gpu.supported_functions.mem_clock)-14);
out += gpu.get_memory_type() + " total:" + rjust(floating_humanizer(gpu.mem_total), b_width/(1 + gpu.supported_functions.mem_clock)-14);
else out += gpu.get_memory_type() + " usage:" + rjust(floating_humanizer(gpu.mem_used, false, 0, false, false), b_width/(1 + gpu.supported_functions.mem_clock)-14);

if (gpu.supported_functions.mem_clock)
out += " VRAM clock:" + rjust(to_string(gpu.mem_clock_speed) + " Mhz", b_width/2-13);
out += " " + gpu.get_memory_type() + " clock:" + rjust(to_string(gpu.mem_clock_speed) + " Mhz", b_width/2-13);
}
}

Expand Down Expand Up @@ -2106,7 +2106,7 @@ namespace Draw {

height += (height+Cpu::height == Term::height-1);
x_vec[i] = 1; y_vec[i] = 1 + i*height + (not Config::getB("cpu_bottom"))*Cpu::shown*Cpu::height;
box[i] = createBox(x_vec[i], y_vec[i], width, height, Theme::c("cpu_box"), true, std::string("gpu") + (char)(shown_panels[i]+'0'), "", (shown_panels[i]+5)%10); // TODO gpu_box
box[i] = createBox(x_vec[i], y_vec[i], width, height, Theme::c("cpu_box"), true, std::string("xpu") + (char)(shown_panels[i]+'0'), "", (shown_panels[i]+5)%10); // TODO gpu_box

b_height_vec[i] = 2 + gpu_b_height_offsets[shown_panels[i]];
b_width = clamp(width/2, min_width, 64);
Expand Down
41 changes: 32 additions & 9 deletions src/btop_shared.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ using std::deque;
using std::string;
using std::tuple;
using std::vector;
using std::string_view;

using namespace std::literals; // for operator""s

Expand Down Expand Up @@ -130,15 +131,16 @@ namespace Gpu {
//* Container for supported Gpu::*::collect() functions
struct gpu_info_supported {
bool gpu_utilization = true,
mem_utilization = true,
gpu_clock = true,
mem_clock = true,
pwr_usage = true,
pwr_state = true,
temp_info = true,
mem_total = true,
mem_used = true,
pcie_txrx = true;
mem_utilization = true,
gpu_clock = true,
mem_clock = true,
pwr_usage = true,
pwr_state = true,
temp_info = true,
mem_total = true,
mem_used = true,
pcie_txrx = true,
is_npu_device = true;
};

//* Per-device container for GPU info
Expand Down Expand Up @@ -167,6 +169,19 @@ namespace Gpu {

gpu_info_supported supported_functions;

string get_device_label() const {
return supported_functions.is_npu_device ? "npu" : "gpu";
}

string get_device_type() const {
return supported_functions.is_npu_device ? "NPU" : "GPU";
}

string get_memory_type() const {
// TODO: This should be set per device - GPU may use RAM and discrete NPU may have VRAM.
return supported_functions.is_npu_device ? "RAM" : "VRAM";
}

// vector<proc_info> graphics_processes = {}; // TODO
// vector<proc_info> compute_processes = {};
};
Expand All @@ -178,6 +193,14 @@ namespace Gpu {
extern bool shutdown();
}

namespace Intel {
extern bool shutdown();
}

namespace IntelNPU {
extern bool shutdown();
}

//* Collect gpu stats and temperatures
auto collect(bool no_update = false) -> vector<gpu_info>&;

Expand Down
138 changes: 136 additions & 2 deletions src/linux/btop_collect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ tab-size = 4
#include <unistd.h>
#include <numeric>
#include <sys/statvfs.h>
#include <sys/types.h>
#include <netdb.h>
#include <ifaddrs.h>
#include <net/if.h>
Expand All @@ -34,6 +35,7 @@ tab-size = 4
#include <dlfcn.h>
#include <unordered_map>
#include <utility>
#include <optional>

#if defined(RSMI_STATIC)
#include <rocm_smi/rocm_smi.h>
Expand Down Expand Up @@ -68,6 +70,7 @@ using std::vector;
using std::future;
using std::async;
using std::pair;
using std::optional;


namespace fs = std::filesystem;
Expand Down Expand Up @@ -219,6 +222,19 @@ namespace Gpu {
uint32_t device_count = 0;
}

namespace IntelNPU {
std::ifstream npu_util_sysfs;
optional<std::ifstream> npu_memory_util_sysfs;
using NpuSample = pair<double, uint64_t>;
NpuSample initial_sample;

bool initialized = false;
NpuSample read_util_value();
bool init();
bool shutdown();
template <bool is_init> bool collect(gpu_info* gpus_slice);
uint32_t device_count = 0;
}

//? Intel data collection
namespace Intel {
Expand Down Expand Up @@ -298,6 +314,7 @@ namespace Shared {
Gpu::Nvml::init();
Gpu::Rsmi::init();
Gpu::Intel::init();
Gpu::IntelNPU::init();
if (not Gpu::gpu_names.empty()) {
for (auto const& [key, _] : Gpu::gpus[0].gpu_percent)
Cpu::available_fields.push_back(key);
Expand Down Expand Up @@ -1126,7 +1143,7 @@ namespace Gpu {
result = nvmlDeviceGetHandleByIndex(i, devices.data() + i);
if (result != NVML_SUCCESS) {
Logger::warning(std::string("NVML: Failed to get device handle: ") + nvmlErrorString(result));
gpus[i].supported_functions = {false, false, false, false, false, false, false, false};
gpus[i].supported_functions = {false, false, false, false, false, false, false, false, false, false};
continue;
}

Expand Down Expand Up @@ -1677,7 +1694,8 @@ namespace Gpu {
.temp_info = false,
.mem_total = false,
.mem_used = false,
.pcie_txrx = false
.pcie_txrx = false,
.is_npu_device = false,
};

gpus_slice->pwr_max_usage = 10'000; //? 10W
Expand Down Expand Up @@ -1709,6 +1727,121 @@ namespace Gpu {
return true;
}
}
namespace IntelNPU {
bool init() {
if (initialized) return false;

bool busy_file_found = false;
bool memory_util_found = false;

// Intel NPU driver is called 'intel-vpu'
for (auto const& driver_dir_entry: fs::directory_iterator(fs::path("/sys/bus/pci/drivers/intel_vpu/"))) {
if (not driver_dir_entry.is_directory())
continue;
for (auto const& dir_entry: fs::directory_iterator(driver_dir_entry)) {
if (dir_entry.is_regular_file() and dir_entry.path().filename() == "npu_busy_time_us") {
npu_util_sysfs = ifstream(dir_entry.path());
if (!npu_util_sysfs) {
Logger::info("NPU: Failed opening npu_busy_time_us sysfs file for NPU utilization");
return false;
}

busy_file_found = true;
}

if (dir_entry.is_regular_file() and dir_entry.path().filename() == "npu_memory_utilization") {
npu_memory_util_sysfs = ifstream(dir_entry.path());
if (!npu_memory_util_sysfs) {
Logger::info("NPU: Failed opening npu_memory_utilization sysfs file for NPU memory usage");
return false;
}

memory_util_found = true;
}

if (busy_file_found and memory_util_found) {
break;
}
}
}

if (not busy_file_found) // memory_util_found is optional
return false;

Logger::info("Found Intel NPU device");

if (not npu_memory_util_sysfs.has_value())
Logger::info("NPU memory utilization function is not present. Use newer driver.");

device_count = 1;
gpus.resize(gpus.size() + device_count);
gpu_names.resize(gpus.size() + device_count);

gpu_names[Nvml::device_count + Rsmi::device_count + Intel::device_count] = "Intel NPU";

initialized = true;
IntelNPU::collect<1>(gpus.data() + Intel::device_count + Nvml::device_count + Rsmi::device_count);

return true;
}

NpuSample npu_read_util_value() {
ssize_t val;
npu_util_sysfs >> val;
npu_util_sysfs.seekg(0);
return {val * 1.0, Tools::time_ms()};
}

ssize_t npu_read_memory_usage() {
ssize_t val = 0;
if (npu_memory_util_sysfs.has_value()) {
npu_memory_util_sysfs->seekg(0);
*npu_memory_util_sysfs >> val;
}
return val;
}

bool shutdown() {
if (!initialized) return false;
npu_util_sysfs.close();
if (npu_memory_util_sysfs.has_value())
npu_memory_util_sysfs->close();
initialized = false;
return true;
}

template <bool is_init> bool collect(gpu_info* gpus_slice) {
if (!initialized) return false;

if constexpr(is_init) {
IntelNPU::initial_sample = IntelNPU::npu_read_util_value();

gpus_slice->supported_functions = {
.gpu_utilization = true,
.mem_utilization = false,
.gpu_clock = false,
.mem_clock = false,
.pwr_usage = false,
.pwr_state = false,
.temp_info = false,
.mem_total = false,
.mem_used = npu_memory_util_sysfs.has_value(),
.pcie_txrx = false,
.is_npu_device = true
};
}

NpuSample curr_sample = npu_read_util_value();
ssize_t npu_mem_usage = npu_read_memory_usage();
const double value_delta = curr_sample.first - initial_sample.first;
const double time_delta = curr_sample.second - initial_sample.second;

gpus_slice->gpu_percent.at("gpu-totals").push_back((long long)round(value_delta / time_delta / 10.0));
gpus_slice->mem_used = npu_mem_usage;
IntelNPU::initial_sample = std::move(curr_sample);
return true;
}
}

//? Collect data from GPU-specific libraries
auto collect(bool no_update) -> vector<gpu_info>& {
Expand All @@ -1720,6 +1853,7 @@ namespace Gpu {
Nvml::collect<0>(gpus.data()); // raw pointer to vector data, size == Nvml::device_count
Rsmi::collect<0>(gpus.data() + Nvml::device_count); // size = Rsmi::device_count
Intel::collect<0>(gpus.data() + Nvml::device_count + Rsmi::device_count); // size = Intel::device_count
IntelNPU::collect<0>(gpus.data() + Intel::device_count + Nvml::device_count + Rsmi::device_count); // size = IntelNPU::device_count

//* Calculate average usage
long long avg = 0;
Expand Down
Loading