aristocratos · m-falkowski · Nov 29, 2024
diff --git a/src/btop.cpp b/src/btop.cpp
@@ -333,6 +333,8 @@ void clean_quit(int sig) {
 #ifdef GPU_SUPPORT
 	Gpu::Nvml::shutdown();
 	Gpu::Rsmi::shutdown();
+	Gpu::Intel::shutdown();
+	Gpu::IntelNPU::shutdown();
 #endif
 
 	Config::write();

diff --git a/src/btop_draw.cpp b/src/btop_draw.cpp
@@ -874,7 +874,7 @@ namespace Cpu {
 			for (unsigned long i = 0; i < gpus.size(); ++i) {
 				if (gpu_auto and v_contains(Gpu::shown_panels, i))
 					continue;
-				out += Mv::to(b_y + ++cy, b_x + 1) + Theme::c("main_fg") + Fx::b + "GPU";
+				out += Mv::to(b_y + ++cy, b_x + 1) + Theme::c("main_fg") + Fx::b + gpus[i].get_device_type();
 				if (gpus.size() > 1) out += rjust(to_string(i), 1 + (gpus.size() > 9));
 				if (gpus[i].supported_functions.gpu_utilization) {
 					out += ' ';
@@ -1006,7 +1006,7 @@ namespace Gpu {
 			if (not single_graph)
 				out += Mv::to(y + graph_up_height + 1, x + 1) + graph_lower(safeVal(gpu.gpu_percent, "gpu-totals"s), (data_same or redraw[index]));
 
-			out += Mv::to(b_y + 1, b_x + 1) + Theme::c("main_fg") + Fx::b + "GPU " + gpu_meter(safeVal(gpu.gpu_percent, "gpu-totals"s).back())
+			out += Mv::to(b_y + 1, b_x + 1) + Theme::c("main_fg") + Fx::b + gpu.get_device_type() + " " + gpu_meter(safeVal(gpu.gpu_percent, "gpu-totals"s).back())
 				+ Theme::g("cpu").at(clamp(safeVal(gpu.gpu_percent, "gpu-totals"s).back(), 0ll, 100ll)) + rjust(to_string(safeVal(gpu.gpu_percent, "gpu-totals"s).back()), 5) + Theme::c("main_fg") + '%';
 
 			//? Temperature graph, I assume the device supports utilization if it supports temperature
@@ -1066,11 +1066,11 @@ namespace Gpu {
 			} else {
 				out += Theme::c("main_fg") + Mv::r(1);
 				if (gpu.supported_functions.mem_total)
-					out += "VRAM total:" + rjust(floating_humanizer(gpu.mem_total), b_width/(1 + gpu.supported_functions.mem_clock)-14);
-				else out += "VRAM usage:" + rjust(floating_humanizer(gpu.mem_used), b_width/(1 + gpu.supported_functions.mem_clock)-14);
+					out += gpu.get_memory_type() + " total:" + rjust(floating_humanizer(gpu.mem_total), b_width/(1 + gpu.supported_functions.mem_clock)-14);
+				else out += gpu.get_memory_type() + " usage:" + rjust(floating_humanizer(gpu.mem_used, false, 0, false, false), b_width/(1 + gpu.supported_functions.mem_clock)-14);
 
 				if (gpu.supported_functions.mem_clock)
-					out += "   VRAM clock:" + rjust(to_string(gpu.mem_clock_speed) + " Mhz", b_width/2-13);
+					out += "   " + gpu.get_memory_type() + " clock:" + rjust(to_string(gpu.mem_clock_speed) + " Mhz", b_width/2-13);
 			}
 		}
 
@@ -2106,7 +2106,7 @@ namespace Draw {
 
 				height += (height+Cpu::height == Term::height-1);
 				x_vec[i] = 1; y_vec[i] = 1 + i*height + (not Config::getB("cpu_bottom"))*Cpu::shown*Cpu::height;
-				box[i] = createBox(x_vec[i], y_vec[i], width, height, Theme::c("cpu_box"), true, std::string("gpu") + (char)(shown_panels[i]+'0'), "", (shown_panels[i]+5)%10); // TODO gpu_box
+				box[i] = createBox(x_vec[i], y_vec[i], width, height, Theme::c("cpu_box"), true, std::string("xpu") + (char)(shown_panels[i]+'0'), "", (shown_panels[i]+5)%10); // TODO gpu_box
 
 				b_height_vec[i] = 2 + gpu_b_height_offsets[shown_panels[i]];
 				b_width = clamp(width/2, min_width, 64);

diff --git a/src/btop_shared.hpp b/src/btop_shared.hpp
@@ -44,6 +44,7 @@ using std::deque;
 using std::string;
 using std::tuple;
 using std::vector;
+using std::string_view;
 
 using namespace std::literals; // for operator""s
 
@@ -130,15 +131,16 @@ namespace Gpu {
 	//* Container for supported Gpu::*::collect() functions
 	struct gpu_info_supported {
 		bool gpu_utilization = true,
-		   	 mem_utilization = true,
-				 gpu_clock = true,
-				 mem_clock = true,
-				 pwr_usage = true,
-				 pwr_state = true,
-				 temp_info = true,
-				 mem_total = true,
-				 mem_used = true,
-				 pcie_txrx = true;
+		mem_utilization = true,
+		gpu_clock = true,
+		mem_clock = true,
+		pwr_usage = true,
+		pwr_state = true,
+		temp_info = true,
+		mem_total = true,
+		mem_used = true,
+		pcie_txrx = true,
+		is_npu_device = true;
 	};
 
 	//* Per-device container for GPU info
@@ -167,6 +169,19 @@ namespace Gpu {
 
 		gpu_info_supported supported_functions;
 
+		string get_device_label() const {
+			return supported_functions.is_npu_device ? "npu" : "gpu";
+		}
+
+		string get_device_type() const {
+			return supported_functions.is_npu_device ? "NPU" : "GPU";
+		}
+
+		string get_memory_type() const {
+			// TODO: This should be set per device - GPU may use RAM and discrete NPU may have VRAM.
+			return supported_functions.is_npu_device ? "RAM" : "VRAM";
+		}
+
 		// vector<proc_info> graphics_processes = {}; // TODO
 		// vector<proc_info> compute_processes = {};
 	};
@@ -178,6 +193,14 @@ namespace Gpu {
 		extern bool shutdown();
 	}
 
+	namespace Intel {
+		extern bool shutdown();
+	}
+
+	namespace IntelNPU {
+		extern bool shutdown();
+	}
+
 	//* Collect gpu stats and temperatures
     auto collect(bool no_update = false) -> vector<gpu_info>&;
 

diff --git a/src/linux/btop_collect.cpp b/src/linux/btop_collect.cpp
@@ -25,6 +25,7 @@ tab-size = 4
 #include <unistd.h>
 #include <numeric>
 #include <sys/statvfs.h>
+#include <sys/types.h>
 #include <netdb.h>
 #include <ifaddrs.h>
 #include <net/if.h>
@@ -34,6 +35,7 @@ tab-size = 4
 #include <dlfcn.h>
 #include <unordered_map>
 #include <utility>
+#include <optional>
 
 #if defined(RSMI_STATIC)
 	#include <rocm_smi/rocm_smi.h>
@@ -68,6 +70,7 @@ using std::vector;
 using std::future;
 using std::async;
 using std::pair;
+using std::optional;
 
 
 namespace fs = std::filesystem;
@@ -219,6 +222,19 @@ namespace Gpu {
 		uint32_t device_count = 0;
 	}
 
+	namespace IntelNPU {
+		std::ifstream npu_util_sysfs;
+		optional<std::ifstream> npu_memory_util_sysfs;
+		using NpuSample = pair<double, uint64_t>;
+		NpuSample initial_sample;
+
+		bool initialized = false;
+		NpuSample read_util_value();
+		bool init();
+		bool shutdown();
+		template <bool is_init> bool collect(gpu_info* gpus_slice);
+		uint32_t device_count = 0;
+	}
 
 	//? Intel data collection
 	namespace Intel {
@@ -298,6 +314,7 @@ namespace Shared {
 		Gpu::Nvml::init();
 		Gpu::Rsmi::init();
 		Gpu::Intel::init();
+		Gpu::IntelNPU::init();
 		if (not Gpu::gpu_names.empty()) {
 			for (auto const& [key, _] : Gpu::gpus[0].gpu_percent)
 				Cpu::available_fields.push_back(key);
@@ -1126,7 +1143,7 @@ namespace Gpu {
     				result = nvmlDeviceGetHandleByIndex(i, devices.data() + i);
         			if (result != NVML_SUCCESS) {
     					Logger::warning(std::string("NVML: Failed to get device handle: ") + nvmlErrorString(result));
-    					gpus[i].supported_functions = {false, false, false, false, false, false, false, false};
+						gpus[i].supported_functions = {false, false, false, false, false, false, false, false, false, false};
     					continue;
         			}
 
@@ -1677,7 +1694,8 @@ namespace Gpu {
 					.temp_info = false,
 					.mem_total = false,
 					.mem_used = false,
-					.pcie_txrx = false
+					.pcie_txrx = false,
+					.is_npu_device = false,
 				};
 
 				gpus_slice->pwr_max_usage = 10'000; //? 10W
@@ -1709,6 +1727,121 @@ namespace Gpu {
 			return true;
 		}
 	}
+	namespace IntelNPU {
+		bool init() {
+			if (initialized) return false;
+
+			bool busy_file_found = false;
+			bool memory_util_found = false;
+
+			// Intel NPU driver is called 'intel-vpu'
+			for (auto const& driver_dir_entry: fs::directory_iterator(fs::path("/sys/bus/pci/drivers/intel_vpu/"))) {
+				if (not driver_dir_entry.is_directory())
+					continue;
+				for (auto const& dir_entry: fs::directory_iterator(driver_dir_entry)) {
+					if (dir_entry.is_regular_file() and dir_entry.path().filename() == "npu_busy_time_us") {
+						npu_util_sysfs = ifstream(dir_entry.path());
+						if (!npu_util_sysfs) {
+							Logger::info("NPU: Failed opening npu_busy_time_us sysfs file for NPU utilization");
+							return false;
+						}
+
+						busy_file_found = true;
+					}
+
+					if (dir_entry.is_regular_file() and dir_entry.path().filename() == "npu_memory_utilization") {
+						npu_memory_util_sysfs = ifstream(dir_entry.path());
+						if (!npu_memory_util_sysfs) {
+							Logger::info("NPU: Failed opening npu_memory_utilization sysfs file for NPU memory usage");
+							return false;
+						}
+
+						memory_util_found = true;
+					}
+
+					if (busy_file_found and memory_util_found) {
+						break;
+					}
+				}
+			}
+
+			if (not busy_file_found) // memory_util_found is optional
+				return false;
+
+			Logger::info("Found Intel NPU device");
+
+			if (not npu_memory_util_sysfs.has_value())
+				Logger::info("NPU memory utilization function is not present. Use newer driver.");
+
+			device_count = 1;
+			gpus.resize(gpus.size() + device_count);
+			gpu_names.resize(gpus.size() + device_count);
+
+			gpu_names[Nvml::device_count + Rsmi::device_count + Intel::device_count] = "Intel NPU";
+
+			initialized = true;
+			IntelNPU::collect<1>(gpus.data() + Intel::device_count + Nvml::device_count + Rsmi::device_count);
+
+			return true;
+		}
+
+		NpuSample npu_read_util_value() {
+			ssize_t val;
+			npu_util_sysfs >> val;
+			npu_util_sysfs.seekg(0);
+			return {val * 1.0, Tools::time_ms()};
+		}
+
+		ssize_t npu_read_memory_usage() {
+			ssize_t val = 0;
+			if (npu_memory_util_sysfs.has_value()) {
+				npu_memory_util_sysfs->seekg(0);
+				*npu_memory_util_sysfs >> val;
+			}
+			return val;
+		}
+
+		bool shutdown() {
+			if (!initialized) return false;
+			npu_util_sysfs.close();
+			if (npu_memory_util_sysfs.has_value())
+				npu_memory_util_sysfs->close();
+			initialized = false;
+			return true;
+		}
+
+		template <bool is_init> bool collect(gpu_info* gpus_slice) {
+			if (!initialized) return false;
+
+			if constexpr(is_init) {
+				IntelNPU::initial_sample = IntelNPU::npu_read_util_value();
+
+				gpus_slice->supported_functions = {
+					.gpu_utilization = true,
+					.mem_utilization = false,
+					.gpu_clock = false,
+					.mem_clock = false,
+					.pwr_usage = false,
+					.pwr_state = false,
+					.temp_info = false,
+					.mem_total = false,
+					.mem_used = npu_memory_util_sysfs.has_value(),
+					.pcie_txrx = false,
+					.is_npu_device = true
+				};
+			}
+
+			NpuSample curr_sample = npu_read_util_value();
+			ssize_t npu_mem_usage = npu_read_memory_usage();
+			const double value_delta = curr_sample.first - initial_sample.first;
+			const double time_delta = curr_sample.second - initial_sample.second;
+
+			gpus_slice->gpu_percent.at("gpu-totals").push_back((long long)round(value_delta / time_delta / 10.0));
+			gpus_slice->mem_used = npu_mem_usage;
+			IntelNPU::initial_sample = std::move(curr_sample);
+			return true;
+		}
+	}
 
 	//? Collect data from GPU-specific libraries
 	auto collect(bool no_update) -> vector<gpu_info>& {
@@ -1720,6 +1853,7 @@ namespace Gpu {
 		Nvml::collect<0>(gpus.data()); // raw pointer to vector data, size == Nvml::device_count
 		Rsmi::collect<0>(gpus.data() + Nvml::device_count); // size = Rsmi::device_count
 		Intel::collect<0>(gpus.data() + Nvml::device_count + Rsmi::device_count); // size = Intel::device_count
+		IntelNPU::collect<0>(gpus.data() + Intel::device_count + Nvml::device_count + Rsmi::device_count); // size = IntelNPU::device_count
 
 		//* Calculate average usage
 		long long avg = 0;