From 7618926b257d2fc7c4d55ada11cbac70c4232f00 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Tue, 6 Aug 2024 22:02:16 +0200 Subject: [PATCH 01/69] Add architecture query to GPU NVIDIA hardware sampler. --- .../gpu_nvidia/nvml_samples.hpp | 7 ++- .../gpu_nvidia/hardware_sampler.cpp | 58 +++++++++++++++++++ .../gpu_nvidia/nvml_samples.cpp | 11 +++- 3 files changed, 72 insertions(+), 4 deletions(-) diff --git a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp index 29b1d5a..89bc57c 100644 --- a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp +++ b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp @@ -41,9 +41,10 @@ class nvml_general_samples { */ [[nodiscard]] std::string generate_yaml_string() const; - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, name) // the name of the device - HWS_SAMPLE_STRUCT_FIXED_MEMBER(bool, persistence_mode) // the persistence mode: if true, the driver is always loaded reducing the latency for the first API call - HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, num_cores) // the number of CUDA cores + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, architecture) // the architecture name of the device + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, name) // the name of the device + HWS_SAMPLE_STRUCT_FIXED_MEMBER(bool, persistence_mode) // the persistence mode: if true, the driver is always loaded reducing the latency for the first API call + HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, num_cores) // the number of CUDA cores HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(int, performance_state) // the performance state: 0 - 15 where 0 is the maximum performance and 15 the minimum performance HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, utilization_gpu) // the GPU compute utilization in percent diff --git a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp index 8e4729f..0ea5b8c 100644 --- a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp +++ b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp @@ -89,6 +89,64 @@ void gpu_nvidia_hardware_sampler::sampling_loop() { // retrieve initial general information { // fixed information -> only retrieved once + nvmlDeviceArchitecture_t device_arch{}; + if (nvmlDeviceGetArchitecture(device, &device_arch) == NVML_SUCCESS) { + switch (device_arch) { +#if defined(NVML_DEVICE_ARCH_KEPLER) + case NVML_DEVICE_ARCH_KEPLER: + general_samples_.architecture_ = "Kepler"; + break; +#endif +#if defined(NVML_DEVICE_ARCH_MAXWELL) + case NVML_DEVICE_ARCH_MAXWELL: + general_samples_.architecture_ = "Maxwell"; + break; +#endif +#if defined(NVML_DEVICE_ARCH_PASCAL) + case NVML_DEVICE_ARCH_PASCAL: + general_samples_.architecture_ = "Pascal"; + break; +#endif +#if defined(NVML_DEVICE_ARCH_VOLTA) + case NVML_DEVICE_ARCH_VOLTA: + general_samples_.architecture_ = "Volta"; + break; +#endif +#if defined(NVML_DEVICE_ARCH_TURING) + case NVML_DEVICE_ARCH_TURING: + general_samples_.architecture_ = "Turing"; + break; +#endif +#if defined(NVML_DEVICE_ARCH_AMPERE) + case NVML_DEVICE_ARCH_AMPERE: + general_samples_.architecture_ = "Ampere"; + break; +#endif +#if defined(NVML_DEVICE_ARCH_ADA) + case NVML_DEVICE_ARCH_ADA: + general_samples_.architecture_ = "Ada"; + break; +#endif +#if defined(NVML_DEVICE_ARCH_HOPPER) + case NVML_DEVICE_ARCH_HOPPER: + general_samples_.architecture_ = "Hopper"; + break; +#endif +#if defined(NVML_DEVICE_ARCH_BLACKWELL) + case NVML_DEVICE_ARCH_BLACKWELL: + general_samples_.architecture_ = "Blackwell"; + break; +#endif +#if defined(NVML_DEVICE_ARCH_T23X) + case NVML_DEVICE_ARCH_T23X: + general_samples_.architecture_ = "Orin"; + break; +#endif + default: + break; + } + } + std::string name(NVML_DEVICE_NAME_V2_BUFFER_SIZE, '\0'); if (nvmlDeviceGetName(device, name.data(), name.size()) == NVML_SUCCESS) { general_samples_.name_ = name.substr(0, name.find_first_of('\0')); diff --git a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp index 76ffe47..878877f 100644 --- a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp +++ b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp @@ -24,6 +24,13 @@ namespace hws { std::string nvml_general_samples::generate_yaml_string() const { std::string str{ "general:\n" }; + // device architecture + if (this->architecture_.has_value()) { + str += std::format(" architecture:\n" + " unit: \"string\"\n" + " values: \"{}\"\n", + this->architecture_.value()); + } // device name if (this->name_.has_value()) { str += std::format(" name:\n" @@ -76,12 +83,14 @@ std::string nvml_general_samples::generate_yaml_string() const { } std::ostream &operator<<(std::ostream &out, const nvml_general_samples &samples) { - return out << std::format("name [string]: {}\n" + return out << std::format("architecture [string]: {}\n" + "name [string]: {}\n" "persistence_mode [bool]: {}\n" "num_cores [int]: {}\n" "performance_state [int]: [{}]\n" "utilization_gpu [%]: [{}]\n" "utilization_mem [%]: [{}]", + detail::value_or_default(samples.get_architecture()), detail::value_or_default(samples.get_name()), detail::value_or_default(samples.get_persistence_mode()), detail::value_or_default(samples.get_num_cores()), From 8618468b3d16d31a558cce7a3eb4e28eacaaff87 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Tue, 6 Aug 2024 22:47:43 +0200 Subject: [PATCH 02/69] Add endianness queries for all GPUs. --- .../hardware_sampling/gpu_amd/rocm_smi_samples.hpp | 3 ++- .../gpu_intel/level_zero_samples.hpp | 1 + include/hardware_sampling/gpu_nvidia/nvml_samples.hpp | 1 + src/hardware_sampling/gpu_amd/hardware_sampler.cpp | 3 +++ src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp | 11 ++++++++++- src/hardware_sampling/gpu_intel/hardware_sampler.cpp | 3 +++ .../gpu_intel/level_zero_samples.cpp | 11 ++++++++++- src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp | 3 +++ src/hardware_sampling/gpu_nvidia/nvml_samples.cpp | 9 +++++++++ 9 files changed, 42 insertions(+), 3 deletions(-) diff --git a/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp b/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp index 8f5d120..0b97bd0 100644 --- a/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp +++ b/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp @@ -42,7 +42,8 @@ class rocm_smi_general_samples { */ [[nodiscard]] std::string generate_yaml_string() const; - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, name) // the name of the device + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, byte_order) // the byte order (e.g., little/big endian) + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, name) // the name of the device HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(int, performance_level) // the performance level: one of rsmi_dev_perf_level_t HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint32_t, utilization_gpu) // the GPU compute utilization in percent diff --git a/include/hardware_sampling/gpu_intel/level_zero_samples.hpp b/include/hardware_sampling/gpu_intel/level_zero_samples.hpp index 7d0f713..94d7d3a 100644 --- a/include/hardware_sampling/gpu_intel/level_zero_samples.hpp +++ b/include/hardware_sampling/gpu_intel/level_zero_samples.hpp @@ -43,6 +43,7 @@ class level_zero_general_samples { */ [[nodiscard]] std::string generate_yaml_string() const; + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, byte_order) // the byte order (e.g., little/big endian) HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, name) // the model name of the device HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, standby_mode) // the enabled standby mode (power saving or never) HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint32_t, num_threads_per_eu) // the number of threads per EU unit diff --git a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp index 89bc57c..3de4053 100644 --- a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp +++ b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp @@ -42,6 +42,7 @@ class nvml_general_samples { [[nodiscard]] std::string generate_yaml_string() const; HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, architecture) // the architecture name of the device + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, byte_order) // the byte order (e.g., little/big endian) HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, name) // the name of the device HWS_SAMPLE_STRUCT_FIXED_MEMBER(bool, persistence_mode) // the persistence mode: if true, the driver is always loaded reducing the latency for the first API call HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, num_cores) // the number of CUDA cores diff --git a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp index 3e20ba8..4586c8d 100644 --- a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp +++ b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp @@ -84,6 +84,9 @@ void gpu_amd_hardware_sampler::sampling_loop() { // retrieve initial general information { // fixed information -> only retrieved once + // the byte order is given by AMD directly + general_samples_.byte_order_ = "Little Endian"; + std::string name(static_cast(1024), '\0'); if (rsmi_dev_name_get(device_id_, name.data(), name.size()) == RSMI_STATUS_SUCCESS) { general_samples_.name_ = name.substr(0, name.find_first_of('\0')); diff --git a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp b/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp index 52a1ae8..de9f77e 100644 --- a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp +++ b/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp @@ -24,6 +24,13 @@ namespace hws { std::string rocm_smi_general_samples::generate_yaml_string() const { std::string str{ "general:\n" }; + // device byte order + if (this->byte_order_.has_value()) { + str += std::format(" byte_order:\n" + " unit: \"string\"\n" + " values: \"{}\"\n", + this->byte_order_.value()); + } // device name if (this->name_.has_value()) { str += std::format(" name:\n" @@ -61,10 +68,12 @@ std::string rocm_smi_general_samples::generate_yaml_string() const { } std::ostream &operator<<(std::ostream &out, const rocm_smi_general_samples &samples) { - return out << std::format("name [string]: {}\n" + return out << std::format("byte_order [string]: {}\n" + "name [string]: {}\n" "performance_level [int]: [{}]\n" "utilization_gpu [%]: [{}]\n" "utilization_mem [%]: [{}]", + detail::value_or_default(samples.get_byte_order()), detail::value_or_default(samples.get_name()), detail::join(detail::value_or_default(samples.get_performance_level()), ", "), detail::join(detail::value_or_default(samples.get_utilization_gpu()), ", "), diff --git a/src/hardware_sampling/gpu_intel/hardware_sampler.cpp b/src/hardware_sampling/gpu_intel/hardware_sampler.cpp index 7bfa1c6..1583bf0 100644 --- a/src/hardware_sampling/gpu_intel/hardware_sampler.cpp +++ b/src/hardware_sampling/gpu_intel/hardware_sampler.cpp @@ -88,6 +88,9 @@ void gpu_intel_hardware_sampler::sampling_loop() { // retrieve initial general information { + // the byte order is given by Intel directly + general_samples_.byte_order_ = "Little Endian"; + ze_device_properties_t ze_device_prop{}; if (zeDeviceGetProperties(device, &ze_device_prop) == ZE_RESULT_SUCCESS) { general_samples_.num_threads_per_eu_ = ze_device_prop.numThreadsPerEU; diff --git a/src/hardware_sampling/gpu_intel/level_zero_samples.cpp b/src/hardware_sampling/gpu_intel/level_zero_samples.cpp index ea564e2..096a6ba 100644 --- a/src/hardware_sampling/gpu_intel/level_zero_samples.cpp +++ b/src/hardware_sampling/gpu_intel/level_zero_samples.cpp @@ -51,6 +51,13 @@ void append_map_values(std::string &str, const std::string_view entry_name, cons std::string level_zero_general_samples::generate_yaml_string() const { std::string str{ "general:\n" }; + // device byte order + if (this->byte_order_.has_value()) { + str += std::format(" byte_order:\n" + " unit: \"string\"\n" + " values: \"{}\"\n", + this->byte_order_.value()); + } // the model name if (this->name_.has_value()) { str += std::format(" model_name:\n" @@ -87,10 +94,12 @@ std::string level_zero_general_samples::generate_yaml_string() const { } std::ostream &operator<<(std::ostream &out, const level_zero_general_samples &samples) { - return out << std::format("name [string]: {}\n" + return out << std::format("byte_order [string]: {}\n" + "name [string]: {}\n" "standby_mode [string]: {}\n" "num_threads_per_eu [int]: {}\n" "eu_simd_width [int]: {}", + detail::value_or_default(samples.get_byte_order()), detail::value_or_default(samples.get_name()), detail::value_or_default(samples.get_standby_mode()), detail::value_or_default(samples.get_num_threads_per_eu()), diff --git a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp index 0ea5b8c..003b34b 100644 --- a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp +++ b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp @@ -147,6 +147,9 @@ void gpu_nvidia_hardware_sampler::sampling_loop() { } } + // the byte order is given by the NVIDIA CUDA guide + general_samples_.byte_order_ = "Little Endian"; + std::string name(NVML_DEVICE_NAME_V2_BUFFER_SIZE, '\0'); if (nvmlDeviceGetName(device, name.data(), name.size()) == NVML_SUCCESS) { general_samples_.name_ = name.substr(0, name.find_first_of('\0')); diff --git a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp index 878877f..e492b07 100644 --- a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp +++ b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp @@ -31,6 +31,13 @@ std::string nvml_general_samples::generate_yaml_string() const { " values: \"{}\"\n", this->architecture_.value()); } + // device byte order + if (this->byte_order_.has_value()) { + str += std::format(" byte_order:\n" + " unit: \"string\"\n" + " values: \"{}\"\n", + this->byte_order_.value()); + } // device name if (this->name_.has_value()) { str += std::format(" name:\n" @@ -84,6 +91,7 @@ std::string nvml_general_samples::generate_yaml_string() const { std::ostream &operator<<(std::ostream &out, const nvml_general_samples &samples) { return out << std::format("architecture [string]: {}\n" + "byte_order [string]: {}\n" "name [string]: {}\n" "persistence_mode [bool]: {}\n" "num_cores [int]: {}\n" @@ -91,6 +99,7 @@ std::ostream &operator<<(std::ostream &out, const nvml_general_samples &samples) "utilization_gpu [%]: [{}]\n" "utilization_mem [%]: [{}]", detail::value_or_default(samples.get_architecture()), + detail::value_or_default(samples.get_byte_order()), detail::value_or_default(samples.get_name()), detail::value_or_default(samples.get_persistence_mode()), detail::value_or_default(samples.get_num_cores()), From 8ed1bbbb5193d73b09c8fc0a9bbd2ae1366fe68c Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Tue, 6 Aug 2024 23:10:06 +0200 Subject: [PATCH 03/69] Add vendor_id queries for all GPUs. --- include/hardware_sampling/cpu/cpu_samples.hpp | 2 +- .../hardware_sampling/gpu_amd/rocm_smi_samples.hpp | 1 + .../gpu_intel/level_zero_samples.hpp | 1 + include/hardware_sampling/gpu_nvidia/nvml_samples.hpp | 1 + src/hardware_sampling/gpu_amd/hardware_sampler.cpp | 5 +++++ src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp | 9 +++++++++ src/hardware_sampling/gpu_intel/hardware_sampler.cpp | 1 + .../gpu_intel/level_zero_samples.cpp | 11 ++++++++++- src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp | 3 +++ src/hardware_sampling/gpu_nvidia/nvml_samples.cpp | 9 +++++++++ 10 files changed, 41 insertions(+), 2 deletions(-) diff --git a/include/hardware_sampling/cpu/cpu_samples.hpp b/include/hardware_sampling/cpu/cpu_samples.hpp index da08f84..aa4fa0c 100644 --- a/include/hardware_sampling/cpu/cpu_samples.hpp +++ b/include/hardware_sampling/cpu/cpu_samples.hpp @@ -49,7 +49,7 @@ class cpu_general_samples { HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, cores_per_socket) // the number of physical cores per socket HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, num_sockets) // the number of sockets HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, numa_nodes) // the number of NUMA nodes - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, vendor_id) // the vendor ID (e.g. GenuineIntel) + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, vendor_id) // the vendor ID (e.g., GenuineIntel) HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, name) // the name of the CPU HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector, flags) // potential CPU flags (e.g., sse4_1, avx, avx, etc) diff --git a/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp b/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp index 0b97bd0..7f1211a 100644 --- a/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp +++ b/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp @@ -43,6 +43,7 @@ class rocm_smi_general_samples { [[nodiscard]] std::string generate_yaml_string() const; HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, byte_order) // the byte order (e.g., little/big endian) + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, vendor_id) // the vendor ID HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, name) // the name of the device HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(int, performance_level) // the performance level: one of rsmi_dev_perf_level_t diff --git a/include/hardware_sampling/gpu_intel/level_zero_samples.hpp b/include/hardware_sampling/gpu_intel/level_zero_samples.hpp index 94d7d3a..1510199 100644 --- a/include/hardware_sampling/gpu_intel/level_zero_samples.hpp +++ b/include/hardware_sampling/gpu_intel/level_zero_samples.hpp @@ -44,6 +44,7 @@ class level_zero_general_samples { [[nodiscard]] std::string generate_yaml_string() const; HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, byte_order) // the byte order (e.g., little/big endian) + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, vendor_id) // the vendor ID HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, name) // the model name of the device HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, standby_mode) // the enabled standby mode (power saving or never) HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint32_t, num_threads_per_eu) // the number of threads per EU unit diff --git a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp index 3de4053..189c66c 100644 --- a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp +++ b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp @@ -43,6 +43,7 @@ class nvml_general_samples { HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, architecture) // the architecture name of the device HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, byte_order) // the byte order (e.g., little/big endian) + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, vendor_id) // the vendor ID HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, name) // the name of the device HWS_SAMPLE_STRUCT_FIXED_MEMBER(bool, persistence_mode) // the persistence mode: if true, the driver is always loaded reducing the latency for the first API call HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, num_cores) // the number of CUDA cores diff --git a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp index 4586c8d..af2e98f 100644 --- a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp +++ b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp @@ -87,6 +87,11 @@ void gpu_amd_hardware_sampler::sampling_loop() { // the byte order is given by AMD directly general_samples_.byte_order_ = "Little Endian"; + std::string vendor_id(static_cast(1024), '\0'); + if (rsmi_dev_vendor_name_get(device_id_, vendor_id.data(), vendor_id.size()) == RSMI_STATUS_SUCCESS) { + general_samples_.vendor_id_ = vendor_id.substr(0, vendor_id.find_first_of('\0')); + } + std::string name(static_cast(1024), '\0'); if (rsmi_dev_name_get(device_id_, name.data(), name.size()) == RSMI_STATUS_SUCCESS) { general_samples_.name_ = name.substr(0, name.find_first_of('\0')); diff --git a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp b/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp index de9f77e..27b16b4 100644 --- a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp +++ b/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp @@ -31,6 +31,13 @@ std::string rocm_smi_general_samples::generate_yaml_string() const { " values: \"{}\"\n", this->byte_order_.value()); } + // the vendor specific ID + if (this->vendor_id_.has_value()) { + str += std::format(" vendor_id:\n" + " unit: \"string\"\n" + " values: \"{}\"\n", + this->vendor_id_.value()); + } // device name if (this->name_.has_value()) { str += std::format(" name:\n" @@ -69,11 +76,13 @@ std::string rocm_smi_general_samples::generate_yaml_string() const { std::ostream &operator<<(std::ostream &out, const rocm_smi_general_samples &samples) { return out << std::format("byte_order [string]: {}\n" + "vendor_id [string]: {}\n" "name [string]: {}\n" "performance_level [int]: [{}]\n" "utilization_gpu [%]: [{}]\n" "utilization_mem [%]: [{}]", detail::value_or_default(samples.get_byte_order()), + detail::value_or_default(samples.get_vendor_id()), detail::value_or_default(samples.get_name()), detail::join(detail::value_or_default(samples.get_performance_level()), ", "), detail::join(detail::value_or_default(samples.get_utilization_gpu()), ", "), diff --git a/src/hardware_sampling/gpu_intel/hardware_sampler.cpp b/src/hardware_sampling/gpu_intel/hardware_sampler.cpp index 1583bf0..3fd9a1e 100644 --- a/src/hardware_sampling/gpu_intel/hardware_sampler.cpp +++ b/src/hardware_sampling/gpu_intel/hardware_sampler.cpp @@ -93,6 +93,7 @@ void gpu_intel_hardware_sampler::sampling_loop() { ze_device_properties_t ze_device_prop{}; if (zeDeviceGetProperties(device, &ze_device_prop) == ZE_RESULT_SUCCESS) { + general_samples_.vendor_id_ = std::format("{:x}", ze_device_prop.vendorId); // TODO: PCI configuration ID to name? general_samples_.num_threads_per_eu_ = ze_device_prop.numThreadsPerEU; general_samples_.eu_simd_width_ = ze_device_prop.physicalEUSimdWidth; } diff --git a/src/hardware_sampling/gpu_intel/level_zero_samples.cpp b/src/hardware_sampling/gpu_intel/level_zero_samples.cpp index 096a6ba..e88c7ab 100644 --- a/src/hardware_sampling/gpu_intel/level_zero_samples.cpp +++ b/src/hardware_sampling/gpu_intel/level_zero_samples.cpp @@ -58,7 +58,14 @@ std::string level_zero_general_samples::generate_yaml_string() const { " values: \"{}\"\n", this->byte_order_.value()); } - // the model name + // the vendor specific ID + if (this->vendor_id_.has_value()) { + str += std::format(" vendor_id:\n" + " unit: \"string\"\n" + " values: \"{}\"\n", + this->vendor_id_.value()); + } + // device name if (this->name_.has_value()) { str += std::format(" model_name:\n" " unit: \"string\"\n" @@ -95,11 +102,13 @@ std::string level_zero_general_samples::generate_yaml_string() const { std::ostream &operator<<(std::ostream &out, const level_zero_general_samples &samples) { return out << std::format("byte_order [string]: {}\n" + "vendor_id [string]: {}\n" "name [string]: {}\n" "standby_mode [string]: {}\n" "num_threads_per_eu [int]: {}\n" "eu_simd_width [int]: {}", detail::value_or_default(samples.get_byte_order()), + detail::value_or_default(samples.get_vendor_id()), detail::value_or_default(samples.get_name()), detail::value_or_default(samples.get_standby_mode()), detail::value_or_default(samples.get_num_threads_per_eu()), diff --git a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp index 003b34b..9ba8125 100644 --- a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp +++ b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp @@ -150,6 +150,9 @@ void gpu_nvidia_hardware_sampler::sampling_loop() { // the byte order is given by the NVIDIA CUDA guide general_samples_.byte_order_ = "Little Endian"; + // the vendor ID is fixed for NVIDIA GPUs + general_samples_.byte_order_ = "NVIDIA"; + std::string name(NVML_DEVICE_NAME_V2_BUFFER_SIZE, '\0'); if (nvmlDeviceGetName(device, name.data(), name.size()) == NVML_SUCCESS) { general_samples_.name_ = name.substr(0, name.find_first_of('\0')); diff --git a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp index e492b07..c8f17a8 100644 --- a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp +++ b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp @@ -38,6 +38,13 @@ std::string nvml_general_samples::generate_yaml_string() const { " values: \"{}\"\n", this->byte_order_.value()); } + // the vendor specific ID + if (this->vendor_id_.has_value()) { + str += std::format(" vendor_id:\n" + " unit: \"string\"\n" + " values: \"{}\"\n", + this->vendor_id_.value()); + } // device name if (this->name_.has_value()) { str += std::format(" name:\n" @@ -92,6 +99,7 @@ std::string nvml_general_samples::generate_yaml_string() const { std::ostream &operator<<(std::ostream &out, const nvml_general_samples &samples) { return out << std::format("architecture [string]: {}\n" "byte_order [string]: {}\n" + "vendor_id [string]: {}\n" "name [string]: {}\n" "persistence_mode [bool]: {}\n" "num_cores [int]: {}\n" @@ -100,6 +108,7 @@ std::ostream &operator<<(std::ostream &out, const nvml_general_samples &samples) "utilization_mem [%]: [{}]", detail::value_or_default(samples.get_architecture()), detail::value_or_default(samples.get_byte_order()), + detail::value_or_default(samples.get_vendor_id()), detail::value_or_default(samples.get_name()), detail::value_or_default(samples.get_persistence_mode()), detail::value_or_default(samples.get_num_cores()), From 9d0093d00a9b5443095ae23a20cafb9c9f2b7fdd Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Tue, 6 Aug 2024 23:10:31 +0200 Subject: [PATCH 04/69] Use same YAML entry for all samplers. --- src/hardware_sampling/gpu_intel/level_zero_samples.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hardware_sampling/gpu_intel/level_zero_samples.cpp b/src/hardware_sampling/gpu_intel/level_zero_samples.cpp index e88c7ab..70f1016 100644 --- a/src/hardware_sampling/gpu_intel/level_zero_samples.cpp +++ b/src/hardware_sampling/gpu_intel/level_zero_samples.cpp @@ -67,7 +67,7 @@ std::string level_zero_general_samples::generate_yaml_string() const { } // device name if (this->name_.has_value()) { - str += std::format(" model_name:\n" + str += std::format(" name:\n" " unit: \"string\"\n" " values: \"{}\"\n", this->name_.value()); From 62dfc334d5880a25739bbc69bf91023446215bc5 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Wed, 7 Aug 2024 20:12:11 +0200 Subject: [PATCH 05/69] Fix typo. --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c1bea7f..272d1c0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -237,7 +237,7 @@ endif () #################################################################################################################### ## enable Python bindings ## #################################################################################################################### -option(HWS_ENABLE_PYTHON_BINDINGS "Build langauge bindings for Python." ON) +option(HWS_ENABLE_PYTHON_BINDINGS "Build language bindings for Python." ON) if (HWS_ENABLE_PYTHON_BINDINGS) add_subdirectory(bindings) From 473eae69d4c9657d0b349f2bef882d9732968ad4 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Wed, 7 Aug 2024 20:42:27 +0200 Subject: [PATCH 06/69] Rename utilization samples. --- include/hardware_sampling/cpu/cpu_samples.hpp | 12 +++--- .../gpu_amd/rocm_smi_samples.hpp | 6 +-- .../gpu_nvidia/nvml_samples.hpp | 6 +-- src/hardware_sampling/cpu/cpu_samples.cpp | 10 ++--- .../cpu/hardware_sampler.cpp | 8 ++-- .../gpu_amd/hardware_sampler.cpp | 20 +++++----- .../gpu_amd/rocm_smi_samples.cpp | 38 +++++++++--------- .../gpu_nvidia/hardware_sampler.cpp | 10 ++--- .../gpu_nvidia/nvml_samples.cpp | 40 +++++++++---------- 9 files changed, 75 insertions(+), 75 deletions(-) diff --git a/include/hardware_sampling/cpu/cpu_samples.hpp b/include/hardware_sampling/cpu/cpu_samples.hpp index aa4fa0c..b422ff3 100644 --- a/include/hardware_sampling/cpu/cpu_samples.hpp +++ b/include/hardware_sampling/cpu/cpu_samples.hpp @@ -53,12 +53,12 @@ class cpu_general_samples { HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, name) // the name of the CPU HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector, flags) // potential CPU flags (e.g., sse4_1, avx, avx, etc) - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, busy_percent) // the percent the CPU was busy doing work - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, ipc) // the instructions-per-cycle count - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, irq) // the number of interrupts - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, smi) // the number of system management interrupts - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, poll) // the number of times the CPU was in the polling state - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, poll_percent) // the percent of the CPU was in the polling state + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, compute_utilization) // the percent the CPU was busy doing work + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, ipc) // the instructions-per-cycle count + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, irq) // the number of interrupts + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, smi) // the number of system management interrupts + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, poll) // the number of times the CPU was in the polling state + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, poll_percent) // the percent of the CPU was in the polling state }; /** diff --git a/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp b/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp index 7f1211a..a301aef 100644 --- a/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp +++ b/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp @@ -46,9 +46,9 @@ class rocm_smi_general_samples { HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, vendor_id) // the vendor ID HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, name) // the name of the device - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(int, performance_level) // the performance level: one of rsmi_dev_perf_level_t - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint32_t, utilization_gpu) // the GPU compute utilization in percent - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint32_t, utilization_mem) // the GPU memory utilization in percent + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint32_t, compute_utilization) // the GPU compute utilization in percent + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint32_t, memory_utilization) // the GPU memory utilization in percent + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(int, performance_level) // the performance level: one of rsmi_dev_perf_level_t }; /** diff --git a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp index 189c66c..bc5a9d1 100644 --- a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp +++ b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp @@ -48,9 +48,9 @@ class nvml_general_samples { HWS_SAMPLE_STRUCT_FIXED_MEMBER(bool, persistence_mode) // the persistence mode: if true, the driver is always loaded reducing the latency for the first API call HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, num_cores) // the number of CUDA cores - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(int, performance_state) // the performance state: 0 - 15 where 0 is the maximum performance and 15 the minimum performance - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, utilization_gpu) // the GPU compute utilization in percent - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, utilization_mem) // the GPU memory utilization in percent + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, compute_utilization) // the GPU compute utilization in percent + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, memory_utilization) // the GPU memory utilization in percent + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(int, performance_state) // the performance state: 0 - 15 where 0 is the maximum performance and 15 the minimum performance }; /** diff --git a/src/hardware_sampling/cpu/cpu_samples.cpp b/src/hardware_sampling/cpu/cpu_samples.cpp index ef5a3b9..2bfb12b 100644 --- a/src/hardware_sampling/cpu/cpu_samples.cpp +++ b/src/hardware_sampling/cpu/cpu_samples.cpp @@ -99,12 +99,12 @@ std::string cpu_general_samples::generate_yaml_string() const { } // the percent the CPU was busy - if (this->busy_percent_.has_value()) { - str += std::format(" utilization:\n" + if (this->compute_utilization_.has_value()) { + str += std::format(" compute_utilization:\n" " turbostat_name: \"Busy%\"\n" " unit: \"percentage\"\n" " values: [{}]\n", - detail::join(this->busy_percent_.value(), ", ")); + detail::join(this->compute_utilization_.value(), ", ")); } // the instructions per cycle count if (this->ipc_.has_value()) { @@ -164,7 +164,7 @@ std::ostream &operator<<(std::ostream &out, const cpu_general_samples &samples) "vendor_id [string]: {}\n" "name [string]: {}\n" "flags [string]: [{}]\n" - "busy_percent [%]: [{}]\n" + "compute_utilization [%]: [{}]\n" "ipc [float]: [{}]\n" "irq [int]: [{}]\n" "smi [int]: [{}]\n" @@ -180,7 +180,7 @@ std::ostream &operator<<(std::ostream &out, const cpu_general_samples &samples) detail::value_or_default(samples.get_vendor_id()), detail::value_or_default(samples.get_name()), detail::join(detail::value_or_default(samples.get_flags()), ", "), - detail::join(detail::value_or_default(samples.get_busy_percent()), ", "), + detail::join(detail::value_or_default(samples.get_compute_utilization()), ", "), detail::join(detail::value_or_default(samples.get_ipc()), ", "), detail::join(detail::value_or_default(samples.get_irq()), ", "), detail::join(detail::value_or_default(samples.get_smi()), ", "), diff --git a/src/hardware_sampling/cpu/hardware_sampler.cpp b/src/hardware_sampling/cpu/hardware_sampler.cpp index 6c967da..3101428 100644 --- a/src/hardware_sampling/cpu/hardware_sampler.cpp +++ b/src/hardware_sampling/cpu/hardware_sampler.cpp @@ -161,8 +161,8 @@ void cpu_hardware_sampler::sampling_loop() { using vector_type = decltype(clock_samples_.average_frequency_)::value_type; clock_samples_.average_frequency_ = vector_type{ detail::convert_to(values[i]) }; } else if (header[i] == "Busy%") { - using vector_type = decltype(general_samples_.busy_percent_)::value_type; - general_samples_.busy_percent_ = vector_type{ detail::convert_to(values[i]) }; + using vector_type = decltype(general_samples_.compute_utilization_)::value_type; + general_samples_.compute_utilization_ = vector_type{ detail::convert_to(values[i]) }; } else if (header[i] == "Bzy_MHz") { using vector_type = decltype(clock_samples_.average_non_idle_frequency_)::value_type; clock_samples_.average_non_idle_frequency_ = vector_type{ detail::convert_to(values[i]) }; @@ -306,8 +306,8 @@ void cpu_hardware_sampler::sampling_loop() { using vector_type = decltype(clock_samples_.average_frequency_)::value_type; clock_samples_.average_frequency_->push_back(detail::convert_to(values[i])); } else if (header[i] == "Busy%") { - using vector_type = decltype(general_samples_.busy_percent_)::value_type; - general_samples_.busy_percent_->push_back(detail::convert_to(values[i])); + using vector_type = decltype(general_samples_.compute_utilization_)::value_type; + general_samples_.compute_utilization_->push_back(detail::convert_to(values[i])); } else if (header[i] == "Bzy_MHz") { using vector_type = decltype(clock_samples_.average_non_idle_frequency_)::value_type; clock_samples_.average_non_idle_frequency_->push_back(detail::convert_to(values[i])); diff --git a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp index af2e98f..15592af 100644 --- a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp +++ b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp @@ -103,14 +103,14 @@ void gpu_amd_hardware_sampler::sampling_loop() { general_samples_.performance_level_ = decltype(general_samples_.performance_level_)::value_type{ static_cast(pstate) }; } - decltype(general_samples_.utilization_gpu_)::value_type::value_type utilization_gpu{}; + decltype(general_samples_.compute_utilization_)::value_type::value_type utilization_gpu{}; if (rsmi_dev_busy_percent_get(device_id_, &utilization_gpu) == RSMI_STATUS_SUCCESS) { - general_samples_.utilization_gpu_ = decltype(general_samples_.utilization_gpu_)::value_type{ utilization_gpu }; + general_samples_.compute_utilization_ = decltype(general_samples_.compute_utilization_)::value_type{ utilization_gpu }; } - decltype(general_samples_.utilization_mem_)::value_type::value_type utilization_mem{}; + decltype(general_samples_.memory_utilization_)::value_type::value_type utilization_mem{}; if (rsmi_dev_memory_busy_percent_get(device_id_, &utilization_mem) == RSMI_STATUS_SUCCESS) { - general_samples_.utilization_mem_ = decltype(general_samples_.utilization_mem_)::value_type{ utilization_mem }; + general_samples_.memory_utilization_ = decltype(general_samples_.memory_utilization_)::value_type{ utilization_mem }; } } @@ -441,16 +441,16 @@ void gpu_amd_hardware_sampler::sampling_loop() { general_samples_.performance_level_->push_back(static_cast(pstate)); } - if (general_samples_.utilization_gpu_.has_value()) { - decltype(general_samples_.utilization_gpu_)::value_type::value_type value{}; + if (general_samples_.compute_utilization_.has_value()) { + decltype(general_samples_.compute_utilization_)::value_type::value_type value{}; HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_busy_percent_get(device_id_, &value)); - general_samples_.utilization_gpu_->push_back(value); + general_samples_.compute_utilization_->push_back(value); } - if (general_samples_.utilization_mem_.has_value()) { - decltype(general_samples_.utilization_mem_)::value_type::value_type value{}; + if (general_samples_.memory_utilization_.has_value()) { + decltype(general_samples_.memory_utilization_)::value_type::value_type value{}; HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_memory_busy_percent_get(device_id_, &value)); - general_samples_.utilization_mem_->push_back(value); + general_samples_.memory_utilization_->push_back(value); } } diff --git a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp b/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp index 27b16b4..0c43f95 100644 --- a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp +++ b/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp @@ -46,26 +46,26 @@ std::string rocm_smi_general_samples::generate_yaml_string() const { this->name_.value()); } - // performance state - if (this->performance_level_.has_value()) { - str += std::format(" performance_state:\n" - " unit: \"int - see rsmi_dev_perf_level_t\"\n" - " values: [{}]\n", - detail::join(this->performance_level_.value(), ", ")); - } // device compute utilization - if (this->utilization_gpu_.has_value()) { - str += std::format(" utilization_gpu:\n" + if (this->compute_utilization_.has_value()) { + str += std::format(" compute_utilization:\n" " unit: \"percentage\"\n" " values: [{}]\n", - detail::join(this->utilization_gpu_.value(), ", ")); + detail::join(this->compute_utilization_.value(), ", ")); } // device memory utilization - if (this->utilization_mem_.has_value()) { - str += std::format(" utilization_mem:\n" + if (this->memory_utilization_.has_value()) { + str += std::format(" memory_utilization:\n" " unit: \"percentage\"\n" " values: [{}]\n", - detail::join(this->utilization_mem_.value(), ", ")); + detail::join(this->memory_utilization_.value(), ", ")); + } + // performance state + if (this->performance_level_.has_value()) { + str += std::format(" performance_state:\n" + " unit: \"int - see rsmi_dev_perf_level_t\"\n" + " values: [{}]\n", + detail::join(this->performance_level_.value(), ", ")); } // remove last newline @@ -78,15 +78,15 @@ std::ostream &operator<<(std::ostream &out, const rocm_smi_general_samples &samp return out << std::format("byte_order [string]: {}\n" "vendor_id [string]: {}\n" "name [string]: {}\n" - "performance_level [int]: [{}]\n" - "utilization_gpu [%]: [{}]\n" - "utilization_mem [%]: [{}]", + "compute_utilization [%]: [{}]\n" + "memory_utilization [%]: [{}]\n" + "performance_level [int]: [{}]", detail::value_or_default(samples.get_byte_order()), detail::value_or_default(samples.get_vendor_id()), detail::value_or_default(samples.get_name()), - detail::join(detail::value_or_default(samples.get_performance_level()), ", "), - detail::join(detail::value_or_default(samples.get_utilization_gpu()), ", "), - detail::join(detail::value_or_default(samples.get_utilization_mem()), ", ")); + detail::join(detail::value_or_default(samples.get_compute_utilization()), ", "), + detail::join(detail::value_or_default(samples.get_memory_utilization()), ", "), + detail::join(detail::value_or_default(samples.get_performance_level()), ", ")); } //*************************************************************************************************************************************// diff --git a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp index 9ba8125..6590402 100644 --- a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp +++ b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp @@ -176,8 +176,8 @@ void gpu_nvidia_hardware_sampler::sampling_loop() { nvmlUtilization_t util{}; if (nvmlDeviceGetUtilizationRates(device, &util) == NVML_SUCCESS) { - general_samples_.utilization_gpu_ = decltype(general_samples_.utilization_gpu_)::value_type{ util.gpu }; - general_samples_.utilization_mem_ = decltype(general_samples_.utilization_gpu_)::value_type{ util.memory }; + general_samples_.compute_utilization_ = decltype(general_samples_.compute_utilization_)::value_type{ util.gpu }; + general_samples_.memory_utilization_ = decltype(general_samples_.memory_utilization_)::value_type{ util.memory }; } } @@ -380,11 +380,11 @@ void gpu_nvidia_hardware_sampler::sampling_loop() { general_samples_.performance_state_->push_back(static_cast(pstate)); } - if (general_samples_.utilization_gpu_.has_value() && general_samples_.utilization_mem_.has_value()) { + if (general_samples_.compute_utilization_.has_value() && general_samples_.memory_utilization_.has_value()) { nvmlUtilization_t util{}; HWS_NVML_ERROR_CHECK(nvmlDeviceGetUtilizationRates(device, &util)); - general_samples_.utilization_gpu_->push_back(util.gpu); - general_samples_.utilization_mem_->push_back(util.memory); + general_samples_.compute_utilization_->push_back(util.gpu); + general_samples_.memory_utilization_->push_back(util.memory); } } diff --git a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp index c8f17a8..24329ad 100644 --- a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp +++ b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp @@ -67,27 +67,27 @@ std::string nvml_general_samples::generate_yaml_string() const { this->num_cores_.value()); } - // performance state - if (this->performance_state_.has_value()) { - str += std::format(" performance_state:\n" - " unit: \"0 - maximum performance; 15 - minimum performance; 32 - unknown\"\n" - " values: [{}]\n", - detail::join(this->performance_state_.value(), ", ")); - } // device compute utilization - if (this->utilization_gpu_.has_value()) { - str += std::format(" utilization_gpu:\n" + if (this->compute_utilization_.has_value()) { + str += std::format(" compute_utilization:\n" " unit: \"percentage\"\n" " values: [{}]\n", - detail::join(this->utilization_gpu_.value(), ", ")); + detail::join(this->compute_utilization_.value(), ", ")); } - // device compute utilization - if (this->utilization_mem_.has_value()) { - str += std::format(" utilization_mem:\n" + // device memory utilization + if (this->memory_utilization_.has_value()) { + str += std::format(" memory_utilization:\n" " unit: \"percentage\"\n" " values: [{}]\n", - detail::join(this->utilization_mem_.value(), ", ")); + detail::join(this->memory_utilization_.value(), ", ")); + } + // performance state + if (this->performance_state_.has_value()) { + str += std::format(" performance_state:\n" + " unit: \"0 - maximum performance; 15 - minimum performance; 32 - unknown\"\n" + " values: [{}]\n", + detail::join(this->performance_state_.value(), ", ")); } // remove last newline @@ -103,18 +103,18 @@ std::ostream &operator<<(std::ostream &out, const nvml_general_samples &samples) "name [string]: {}\n" "persistence_mode [bool]: {}\n" "num_cores [int]: {}\n" - "performance_state [int]: [{}]\n" - "utilization_gpu [%]: [{}]\n" - "utilization_mem [%]: [{}]", + "compute_utilization [%]: [{}]\n" + "memory_utilization [%]: [{}]\n" + "performance_state [int]: [{}]", detail::value_or_default(samples.get_architecture()), detail::value_or_default(samples.get_byte_order()), detail::value_or_default(samples.get_vendor_id()), detail::value_or_default(samples.get_name()), detail::value_or_default(samples.get_persistence_mode()), detail::value_or_default(samples.get_num_cores()), - detail::join(detail::value_or_default(samples.get_performance_state()), ", "), - detail::join(detail::value_or_default(samples.get_utilization_gpu()), ", "), - detail::join(detail::value_or_default(samples.get_utilization_mem()), ", ")); + detail::join(detail::value_or_default(samples.get_compute_utilization()), ", "), + detail::join(detail::value_or_default(samples.get_memory_utilization()), ", "), + detail::join(detail::value_or_default(samples.get_performance_state()), ", ")); } //*************************************************************************************************************************************// From 11f98317a2cd63367a074c944a9c453335bb19ba Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Wed, 7 Aug 2024 21:19:05 +0200 Subject: [PATCH 07/69] Fix usage of wrong variable type. --- src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp index 6590402..43af02a 100644 --- a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp +++ b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp @@ -271,7 +271,7 @@ void gpu_nvidia_hardware_sampler::sampling_loop() { // queried samples -> retrieved every iteration if available nvmlPstates_t pstate{}; if (nvmlDeviceGetPowerState(device, &pstate) == NVML_SUCCESS) { - power_samples_.power_state_ = decltype(general_samples_.performance_state_)::value_type{ static_cast(pstate) }; + power_samples_.power_state_ = decltype(power_samples_.power_state_)::value_type{ static_cast(pstate) }; } decltype(power_samples_.power_usage_)::value_type::value_type power_usage{}; From e7cc2b234c8599ac729299576dc225b72677bc4a Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Wed, 7 Aug 2024 21:19:33 +0200 Subject: [PATCH 08/69] Rename performance_state to performance_level. --- include/hardware_sampling/gpu_nvidia/nvml_samples.hpp | 2 +- src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp | 6 +++--- src/hardware_sampling/gpu_nvidia/nvml_samples.cpp | 10 +++++----- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp index bc5a9d1..3c973f4 100644 --- a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp +++ b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp @@ -50,7 +50,7 @@ class nvml_general_samples { HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, compute_utilization) // the GPU compute utilization in percent HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, memory_utilization) // the GPU memory utilization in percent - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(int, performance_state) // the performance state: 0 - 15 where 0 is the maximum performance and 15 the minimum performance + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(int, performance_level) // the performance state: 0 - 15 where 0 is the maximum performance and 15 the minimum performance }; /** diff --git a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp index 43af02a..c971725 100644 --- a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp +++ b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp @@ -171,7 +171,7 @@ void gpu_nvidia_hardware_sampler::sampling_loop() { // queried samples -> retrieved every iteration if available nvmlPstates_t pstate{}; if (nvmlDeviceGetPerformanceState(device, &pstate) == NVML_SUCCESS) { - general_samples_.performance_state_ = decltype(general_samples_.performance_state_)::value_type{ static_cast(pstate) }; + general_samples_.performance_level_ = decltype(general_samples_.performance_level_)::value_type{ static_cast(pstate) }; } nvmlUtilization_t util{}; @@ -374,10 +374,10 @@ void gpu_nvidia_hardware_sampler::sampling_loop() { // retrieve general samples { - if (general_samples_.performance_state_.has_value()) { + if (general_samples_.performance_level_.has_value()) { nvmlPstates_t pstate{}; HWS_NVML_ERROR_CHECK(nvmlDeviceGetPerformanceState(device, &pstate)); - general_samples_.performance_state_->push_back(static_cast(pstate)); + general_samples_.performance_level_->push_back(static_cast(pstate)); } if (general_samples_.compute_utilization_.has_value() && general_samples_.memory_utilization_.has_value()) { diff --git a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp index 24329ad..64b9a05 100644 --- a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp +++ b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp @@ -83,11 +83,11 @@ std::string nvml_general_samples::generate_yaml_string() const { detail::join(this->memory_utilization_.value(), ", ")); } // performance state - if (this->performance_state_.has_value()) { - str += std::format(" performance_state:\n" + if (this->performance_level_.has_value()) { + str += std::format(" performance_level:\n" " unit: \"0 - maximum performance; 15 - minimum performance; 32 - unknown\"\n" " values: [{}]\n", - detail::join(this->performance_state_.value(), ", ")); + detail::join(this->performance_level_.value(), ", ")); } // remove last newline @@ -105,7 +105,7 @@ std::ostream &operator<<(std::ostream &out, const nvml_general_samples &samples) "num_cores [int]: {}\n" "compute_utilization [%]: [{}]\n" "memory_utilization [%]: [{}]\n" - "performance_state [int]: [{}]", + "performance_level [int]: [{}]", detail::value_or_default(samples.get_architecture()), detail::value_or_default(samples.get_byte_order()), detail::value_or_default(samples.get_vendor_id()), @@ -114,7 +114,7 @@ std::ostream &operator<<(std::ostream &out, const nvml_general_samples &samples) detail::value_or_default(samples.get_num_cores()), detail::join(detail::value_or_default(samples.get_compute_utilization()), ", "), detail::join(detail::value_or_default(samples.get_memory_utilization()), ", "), - detail::join(detail::value_or_default(samples.get_performance_state()), ", ")); + detail::join(detail::value_or_default(samples.get_performance_level()), ", ")); } //*************************************************************************************************************************************// From e5f73374cad885a999c12d038f922c90ad7fbe2e Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Fri, 16 Aug 2024 12:25:27 +0200 Subject: [PATCH 09/69] Add samples to README (including TODOs). --- README.md | 179 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 179 insertions(+) diff --git a/README.md b/README.md index 39c2c3d..97a1b11 100644 --- a/README.md +++ b/README.md @@ -61,6 +61,185 @@ export CPLUS_INCLUDE_PATH=${CMAKE_INSTALL_PREFIX}/include:${CPLUS_INCLUDE_PATH} export PYTHONPATH=${CMAKE_INSTALL_PREFIX}/lib:${PYTHONPATH} ``` +## Available samples + +### General samples + +| sample | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs | +|:--------------------|:-----:|:-----------:|:---------:|:------------:| +| architecture | str | str | ? | ? | +| byte_order | str | str (fix) | str (fix) | str (fix) | +| num_threads | int | | | | +| threads_per_core | int | | | | +| cores_per_socket | int | | | | +| num_sockets | int | | | | +| numa_nodes | int | | | | +| vendor_id | str | str (fix) | str | str (PCIe ID | +| name | str | str | str | str | +| flags | str | | | | +| compute_utilization | % | % | % | ? | +| memory_utilization | - | % | % | ? | +| ipc | float | - | - | - | +| irq | int | - | - | - | +| smi | int | - | - | - | +| poll | int | - | - | - | +| poll_percent | % | - | - | - | +| performance_level | | int | int | | +| standby_mode | | | | str | +| num_threads_per_eu | | | | int | +| eu_simd_width | | | | int | +| persistence_mode | | bool | | | +| num_cores | | int | | | + +### clock-related samples + +| sample | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs | +|:---------------------------|:----:|:-----------:|:--------:|:----------:| +| frequency_boost | bool | | | | +| min_cpu_frequency | MHz | | | | +| max_cpu_frequency | MHz | | | | +| average_frequency | MHz | | | | +| average_non_idle_frequency | MHz | | | | +| time_stamp_counter | MHz | | | | +| clock_socket_min | | | Hz | | +| clock_socket_max | | | Hz | | +| clock_memory_min | | | Hz | | +| clock_memory_max | | | Hz | | +| clock_gpu_min | | MHz | Hz | MHz | +| clock_gpu_max | | MHz | Hz | MHz | +| clock_socket | | | Hz | | +| clock_memory | | | Hz | | +| clock_gpu | | MHz | Hz | MHz | +| overdrive_level | | | % | | +| memory_overdrive_level | | | % | | +| available_clocks_gpu | | | | MHz | +| clock_mem_min | | MHz | | MHz | +| clock_mem_max | | MHz | | MHz | +| available_clocks_mem | | | | MHz | +| tdp_frequency_limit_gpu | | | | MHz | +| throttle_reason_gpu | | | | bitmask | +| tdp_frequency_limit_mem | | | | MHz | + +### power-related samples + +| sample | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs | +|:----------------------------|:----:|:-----------:|:--------:|:----------:| +| package_power | W | | | | +| core_watt | W | | | | +| dram_watt | W | | | | +| package_rapl_throttling | % | | | | +| dram_rapl_throttling | % | | | | +| power_management_limit | | mW | muW | | +| power_enforced_limit | | mW | muW | | +| power_measurement_type | | | str | | +| available_power_profiles | | | str | | +| power_usage | | mW | muW | | +| power_total_energy_consumed | | J | muJ | J | +| power_profile | | | str | | +| energy_threshold_enabled | | | bool | | +| energy_threshold | | | J | | +| power_management_mode | | bool | | | +| power_state | | int | | | + +### memory-related samples + +| sample | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs | +|:----------------------------|:----:|:-----------:|:--------:|:----------:| +| cache_size_L1d | str | | | | +| cache_size_L1i | str | | | | +| cache_size_L2 | str | | | | +| cache_size_L3 | str | | | | +| memory_total | B | B | B | | +| swap_memory_total | B | | | | +| memory_free | B | B | B | | +| memory_used | B | B | B | | +| swap_memory_free | B | | | | +| swap_memory_used | B | | | | +| visible_memory_total | | | B | | +| min_num_pcie_lanes | | | int | | +| max_num_pcie_lanes | | | int | | +| pcie_bandwidth | | MBPS | T/s | MBPS | +| num_pcie_lanes | | | int | | +| memory_total_{} | | | | B | +| allocatable_memory_total_{} | | | | B | +| pcie_max_bandwidth | | MBPS | | BPS | +| max_pcie_link_width | | | | int | +| max_pcie_link_generation | | int | | int | +| memory_bus_width_{} | | | | Bit | +| memory_num_channels_{} | | | | int | +| memory_location_{} | | | | str | +| memory_free_{} | | | | B | +| memory_used_{} | | | | B | +| pcie_link_width | | int | | int | +| pcie_link_generation | | int | | int | +| memory_bus_width | | Bit | | | + +### temperature-related samples + +| sample | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs | +|:-------------------------|:----:|:-----------:|:--------:|:----------:| +| per_core_temperature | °C | | | | +| core_throttle_percentage | % | | | | +| per_package_temperature | °C | | | | +| num_fans | | int | int | | +| max_fan_speed | | | int | | +| temperature_gpu_min | | | m°C | | +| temperature_gpu_max | | °C | m°C | | +| temperature_hotspot_min | | | m°C | | +| temperature_hotspot_max | | | m°C | | +| temperature_memory_min | | | m°C | | +| temperature_memory_max | | | m°C | | +| temperature_hbm_0_min | | | m°C | | +| temperature_hbm_0_max | | | m°C | | +| temperature_hbm_1_min | | | m°C | MBPS | +| temperature_hbm_1_max | | | m°C | | +| temperature_hbm_2_min | | | m°C | B | +| temperature_hbm_2_max | | | m°C | B | +| temperature_hbm_3_min | | | m°C | BPS | +| temperature_hbm_3_max | | | m°C | int | +| fan_speed | | % | % | int | +| temperature_gpu | | °C | m°C | Bit | +| temperature_hotspot | | | m°C | int | +| temperature_memory | | | m°C | str | +| temperature_hbm_0 | | | m°C | B | +| temperature_hbm_1 | | | m°C | B | +| temperature_hbm_2 | | | m°C | int | +| temperature_hbm_3 | | | m°C | int | +| temperature_{}_max | | | | | +| temperature_psu | | | | | +| temperature_{} | | | | | +| min_fan_speed | | % | | | +| max_fan_speed | | % | | | +| temperature_mem_max | | °C | | | + +### gfx-related (iGPU) samples + +| sample | CPUs | +|:---------------------------|:----:| +| graphics_render_state | % | +| graphics_frequency | MHz | +| average_graphics_frequency | MHz | +| gpu_state_c0 | % | +| cpu_works_for_gpu | % | +| graphics_power | W | + +### "idle states"-related samples + +| sample | CPUs | +|:---------------------------------|:----:| +| all_cpus_state_c0 | % | +| any_cpu_state_c0 | % | +| lower_power_idle_state | % | +| system_lower_power_idle_state | % | +| package_lower_power_idle_state | % | +| cpu_idle_state_{}_percentage | % | +| package_idle_state_{}_percentage | % | +| package_idle_state_{}_percentage | % | +| idle_state_{}_percentage | % | +| idle_state_{} | int | + + + ## Example Python usage ```python From 1221588e8e2b29a312639be25093f61308608432 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Fri, 16 Aug 2024 13:42:44 +0200 Subject: [PATCH 10/69] Add architecture function for AMD GPUs --- .clang-format | 2 +- CMakeLists.txt | 3 ++- README.md | 16 ++++++++-------- .../gpu_amd/rocm_smi_samples.hpp | 7 ++++--- include/hardware_sampling/gpu_amd/utility.hpp | 10 ++++++++++ .../gpu_amd/hardware_sampler.cpp | 9 ++++++++- .../gpu_amd/rocm_smi_samples.cpp | 11 ++++++++++- 7 files changed, 43 insertions(+), 15 deletions(-) diff --git a/.clang-format b/.clang-format index 9fc54fe..5d6a911 100644 --- a/.clang-format +++ b/.clang-format @@ -79,7 +79,7 @@ IncludeBlocks: Regroup IncludeCategories: - Regex: '^"hardware_sampling/' Priority: 1 - - Regex: '^"(pybind|nvml|rocm_smi|level_zero|subprocess)' + - Regex: '^"(pybind|nvml|cuda|rocm_smi|hip|level_zero|subprocess)' Priority: 2 - Regex: '^.*' Priority: 3 diff --git a/CMakeLists.txt b/CMakeLists.txt index 272d1c0..e32478a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -190,7 +190,8 @@ endif () ## try finding ROCm SMI find_package(rocm_smi QUIET) if (rocm_smi_FOUND) - target_link_libraries(${HWS_LIBRARY_NAME} PRIVATE -lrocm_smi64) + find_package(HIP REQUIRED) + target_link_libraries(${HWS_LIBRARY_NAME} PRIVATE -lrocm_smi64 hip::host) target_include_directories(${HWS_LIBRARY_NAME} PRIVATE ${ROCM_SMI_INCLUDE_DIR}) message(STATUS "Enable sampling of AMD GPU information using ROCm SMI.") diff --git a/README.md b/README.md index 97a1b11..8305ba4 100644 --- a/README.md +++ b/README.md @@ -67,29 +67,29 @@ export PYTHONPATH=${CMAKE_INSTALL_PREFIX}/lib:${PYTHONPATH} | sample | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs | |:--------------------|:-----:|:-----------:|:---------:|:------------:| -| architecture | str | str | ? | ? | +| name | str | str | str | str | +| vendor_id | str | str (fix) | str | str (PCIe ID | +| architecture | str | str | str | ? | | byte_order | str | str (fix) | str (fix) | str (fix) | +| compute_utilization | % | % | % | ? | +| memory_utilization | - | % | % | ? | +| performance_level | | int | int | | | num_threads | int | | | | | threads_per_core | int | | | | | cores_per_socket | int | | | | | num_sockets | int | | | | | numa_nodes | int | | | | -| vendor_id | str | str (fix) | str | str (PCIe ID | -| name | str | str | str | str | | flags | str | | | | -| compute_utilization | % | % | % | ? | -| memory_utilization | - | % | % | ? | | ipc | float | - | - | - | | irq | int | - | - | - | | smi | int | - | - | - | | poll | int | - | - | - | | poll_percent | % | - | - | - | -| performance_level | | int | int | | +| persistence_mode | | bool | | | +| num_cores | | int | | | | standby_mode | | | | str | | num_threads_per_eu | | | | int | | eu_simd_width | | | | int | -| persistence_mode | | bool | | | -| num_cores | | int | | | ### clock-related samples diff --git a/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp b/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp index a301aef..b89402d 100644 --- a/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp +++ b/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp @@ -42,9 +42,10 @@ class rocm_smi_general_samples { */ [[nodiscard]] std::string generate_yaml_string() const; - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, byte_order) // the byte order (e.g., little/big endian) - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, vendor_id) // the vendor ID - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, name) // the name of the device + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, byte_order) // the byte order (e.g., little/big endian) + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, vendor_id) // the vendor ID + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, name) // the name of the device + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, architecture) // the architecture name of the device HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint32_t, compute_utilization) // the GPU compute utilization in percent HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint32_t, memory_utilization) // the GPU memory utilization in percent diff --git a/include/hardware_sampling/gpu_amd/utility.hpp b/include/hardware_sampling/gpu_amd/utility.hpp index 5d039c7..d96387a 100644 --- a/include/hardware_sampling/gpu_amd/utility.hpp +++ b/include/hardware_sampling/gpu_amd/utility.hpp @@ -38,8 +38,18 @@ namespace hws { } \ } \ } + + #define HWS_HIP_ERROR_CHECK(hip_func) \ + { \ + const hiperror_t errc = hip_func; \ + if (errc != hipSuccess) { \ + throw std::runtime_error{ std::format("Error in HIP function call \"{}\": {}", #hip_func, hipGetErrorString(errc)) }; \ + } \ + } + #else #define HWS_ROCM_SMI_ERROR_CHECK(rocm_smi_func) rocm_smi_func; + #define HWS_HIP_ERROR_CHECK(hip_func) hip_func; #endif } // namespace hws diff --git a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp index a30da35..083b083 100644 --- a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp +++ b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp @@ -12,7 +12,8 @@ #include "hardware_sampling/hardware_sampler.hpp" // hws::hardware_sampler #include "hardware_sampling/utility.hpp" // hws::detail::{time_points_to_epoch, join} -#include "rocm_smi/rocm_smi.h" // ROCm SMI runtime functions +#include "hip/hip_runtime_api.h" // HIP runtime functions +#include "rocm_smi/rocm_smi.h" // ROCm SMI runtime functions #include // std::chrono::{steady_clock, duration_cast, milliseconds} #include // std::size_t @@ -87,6 +88,12 @@ void gpu_amd_hardware_sampler::sampling_loop() { // the byte order is given by AMD directly general_samples_.byte_order_ = "Little Endian"; + hipDeviceProp_t prop{}; + if (hipGetDeviceProperties(&prop, device_id_) == hipSuccess) { + std::string architecture{ prop.gcnArchName }; + general_samples_.architecture_ = architecture.substr(0, architecture.find_first_of('\0')); + } + std::string vendor_id(static_cast(1024), '\0'); if (rsmi_dev_vendor_name_get(device_id_, vendor_id.data(), vendor_id.size()) == RSMI_STATUS_SUCCESS) { general_samples_.vendor_id_ = vendor_id.substr(0, vendor_id.find_first_of('\0')); diff --git a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp b/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp index 0c43f95..0bc1041 100644 --- a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp +++ b/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp @@ -24,6 +24,13 @@ namespace hws { std::string rocm_smi_general_samples::generate_yaml_string() const { std::string str{ "general:\n" }; + // device architecture + if (this->architecture_.has_value()) { + str += std::format(" architecture:\n" + " unit: \"string\"\n" + " values: \"{}\"\n", + this->architecture_.value()); + } // device byte order if (this->byte_order_.has_value()) { str += std::format(" byte_order:\n" @@ -75,12 +82,14 @@ std::string rocm_smi_general_samples::generate_yaml_string() const { } std::ostream &operator<<(std::ostream &out, const rocm_smi_general_samples &samples) { - return out << std::format("byte_order [string]: {}\n" + return out << std::format("architecture [string]: {}\n" + "byte_order [string]: {}\n" "vendor_id [string]: {}\n" "name [string]: {}\n" "compute_utilization [%]: [{}]\n" "memory_utilization [%]: [{}]\n" "performance_level [int]: [{}]", + detail::value_or_default(samples.get_architecture()), detail::value_or_default(samples.get_byte_order()), detail::value_or_default(samples.get_vendor_id()), detail::value_or_default(samples.get_name()), From ce69a52c8fde04a79d6c504537a81dd9d89cb9dc Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Fri, 16 Aug 2024 14:14:05 +0200 Subject: [PATCH 11/69] Add query for the number of cores. --- README.md | 2 +- include/hardware_sampling/cpu/cpu_samples.hpp | 1 + src/hardware_sampling/cpu/cpu_samples.cpp | 9 +++++++++ src/hardware_sampling/cpu/hardware_sampler.cpp | 5 +++++ 4 files changed, 16 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 8305ba4..df20d89 100644 --- a/README.md +++ b/README.md @@ -74,6 +74,7 @@ export PYTHONPATH=${CMAKE_INSTALL_PREFIX}/lib:${PYTHONPATH} | compute_utilization | % | % | % | ? | | memory_utilization | - | % | % | ? | | performance_level | | int | int | | +| num_cores | int | int | - | | | num_threads | int | | | | | threads_per_core | int | | | | | cores_per_socket | int | | | | @@ -86,7 +87,6 @@ export PYTHONPATH=${CMAKE_INSTALL_PREFIX}/lib:${PYTHONPATH} | poll | int | - | - | - | | poll_percent | % | - | - | - | | persistence_mode | | bool | | | -| num_cores | | int | | | | standby_mode | | | | str | | num_threads_per_eu | | | | int | | eu_simd_width | | | | int | diff --git a/include/hardware_sampling/cpu/cpu_samples.hpp b/include/hardware_sampling/cpu/cpu_samples.hpp index b422ff3..59cb9a1 100644 --- a/include/hardware_sampling/cpu/cpu_samples.hpp +++ b/include/hardware_sampling/cpu/cpu_samples.hpp @@ -44,6 +44,7 @@ class cpu_general_samples { HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, architecture) // the CPU architecture (e.g., x86_64) HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, byte_order) // the byte order (e.g., little/big endian) + HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, num_cores) // the total number of cores of the CPU(s) HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, num_threads) // the number of threads of the CPU(s) including potential hyper-threads HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, threads_per_core) // the number of hyper-threads per core HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, cores_per_socket) // the number of physical cores per socket diff --git a/src/hardware_sampling/cpu/cpu_samples.cpp b/src/hardware_sampling/cpu/cpu_samples.cpp index 2bfb12b..d73f86b 100644 --- a/src/hardware_sampling/cpu/cpu_samples.cpp +++ b/src/hardware_sampling/cpu/cpu_samples.cpp @@ -41,6 +41,13 @@ std::string cpu_general_samples::generate_yaml_string() const { " values: \"{}\"\n", this->byte_order_.value()); } + // number of cores + if (this->num_cores_.has_value()) { + str += std::format(" num_cores:\n" + " unit: \"int\"\n" + " values: {}\n", + this->num_cores_.value()); + } // number of threads including hyper-threads if (this->num_threads_.has_value()) { str += std::format(" num_threads:\n" @@ -156,6 +163,7 @@ std::string cpu_general_samples::generate_yaml_string() const { std::ostream &operator<<(std::ostream &out, const cpu_general_samples &samples) { std::string str = std::format("architecture [string]: {}\n" "byte_order [string]: {}\n" + "num_cores [int]: {}\n" "num_threads [int]: {}\n" "threads_per_core [int]: {}\n" "cores_per_socket [int]: {}\n" @@ -172,6 +180,7 @@ std::ostream &operator<<(std::ostream &out, const cpu_general_samples &samples) "poll_percent [%]: [{}]", detail::value_or_default(samples.get_architecture()), detail::value_or_default(samples.get_byte_order()), + detail::value_or_default(samples.get_num_cores()), detail::value_or_default(samples.get_num_threads()), detail::value_or_default(samples.get_threads_per_core()), detail::value_or_default(samples.get_cores_per_socket()), diff --git a/src/hardware_sampling/cpu/hardware_sampler.cpp b/src/hardware_sampling/cpu/hardware_sampler.cpp index 29747f7..d63efc8 100644 --- a/src/hardware_sampling/cpu/hardware_sampler.cpp +++ b/src/hardware_sampling/cpu/hardware_sampler.cpp @@ -105,6 +105,11 @@ void cpu_hardware_sampler::sampling_loop() { memory_samples_.l3_cache_ = detail::convert_to(value); } } + + // check if the number of cores can be derived from the otherwise found values + if (general_samples_.num_threads_.has_value() && general_samples_.threads_per_core_.has_value()) { + general_samples_.num_cores_ = general_samples_.num_threads_.value() / general_samples_.threads_per_core_.value(); + } } #endif From 567afcdc8747baa6dacad59a78b34df11d0ded61 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Thu, 12 Sep 2024 15:30:00 +0200 Subject: [PATCH 12/69] Update power related query to be more uniform (except Intel Level Zero). --- README.md | 43 ++++++----- include/hardware_sampling/cpu/cpu_samples.hpp | 13 ++-- .../gpu_amd/rocm_smi_samples.hpp | 12 +-- .../gpu_nvidia/nvml_samples.hpp | 16 ++-- src/hardware_sampling/cpu/cpu_samples.cpp | 30 ++++++-- .../cpu/hardware_sampler.cpp | 16 +++- .../gpu_amd/hardware_sampler.cpp | 45 +++++------ .../gpu_amd/rocm_smi_samples.cpp | 48 ++++++------ .../gpu_nvidia/hardware_sampler.cpp | 60 ++++++++++----- .../gpu_nvidia/nvml_samples.cpp | 74 +++++++++++-------- 10 files changed, 212 insertions(+), 145 deletions(-) diff --git a/README.md b/README.md index df20d89..642e56b 100644 --- a/README.md +++ b/README.md @@ -75,10 +75,11 @@ export PYTHONPATH=${CMAKE_INSTALL_PREFIX}/lib:${PYTHONPATH} | memory_utilization | - | % | % | ? | | performance_level | | int | int | | | num_cores | int | int | - | | -| num_threads | int | | | | -| threads_per_core | int | | | | -| cores_per_socket | int | | | | -| num_sockets | int | | | | +| num_compute_units | - | int | int | ? | TODO +| num_threads | int | - | - | - | +| threads_per_core | int | - | - | - | +| cores_per_socket | int | - | - | - | +| num_sockets | int | - | - | - | | numa_nodes | int | | | | | flags | str | | | | | ipc | float | - | - | - | @@ -122,24 +123,22 @@ export PYTHONPATH=${CMAKE_INSTALL_PREFIX}/lib:${PYTHONPATH} ### power-related samples -| sample | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs | -|:----------------------------|:----:|:-----------:|:--------:|:----------:| -| package_power | W | | | | -| core_watt | W | | | | -| dram_watt | W | | | | -| package_rapl_throttling | % | | | | -| dram_rapl_throttling | % | | | | -| power_management_limit | | mW | muW | | -| power_enforced_limit | | mW | muW | | -| power_measurement_type | | | str | | -| available_power_profiles | | | str | | -| power_usage | | mW | muW | | -| power_total_energy_consumed | | J | muJ | J | -| power_profile | | | str | | -| energy_threshold_enabled | | | bool | | -| energy_threshold | | | J | | -| power_management_mode | | bool | | | -| power_state | | int | | | +| sample | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs | +|:----------------------------|:---------:|:-----------:|:-----------:|:----------:| +| power_management_limit | - | W | W | | +| power_enforced_limit | - | W | W | | +| power_measurement_type | str (fix) | str | str | | +| power_management_mode | - | bool | - | | +| available_power_profiles | - | list of int | list of str | | +| power_usage | W | W | W | | +| core_watt | W | - | - | - | +| dram_watt | W | - | - | - | +| package_rapl_throttling | % | - | - | - | +| dram_rapl_throttling | % | - | - | - | +| power_total_energy_consumed | J | J | J | J | +| power_profile | - | int | str | | +| energy_threshold_enabled | | | | bool | +| energy_threshold | | | | J | ### memory-related samples diff --git a/include/hardware_sampling/cpu/cpu_samples.hpp b/include/hardware_sampling/cpu/cpu_samples.hpp index 59cb9a1..3263d77 100644 --- a/include/hardware_sampling/cpu/cpu_samples.hpp +++ b/include/hardware_sampling/cpu/cpu_samples.hpp @@ -127,11 +127,14 @@ class cpu_power_samples { */ [[nodiscard]] std::string generate_yaml_string() const; - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, package_watt) // the currently consumed power of the package of the CPU in W - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, core_watt) // the currently consumed power of the core part of the CPU in W - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, ram_watt) // the currently consumed power of the RAM part of the CPU in W - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, package_rapl_throttle_percent) // the percent of time the package throttled due to RAPL limiters - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, dram_rapl_throttle_percent) // the percent of time the DRAM throttled due to RAPL limiters + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, power_measurement_type) // the type of the power readings: always "instant/current" + + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, power_usage) // the currently consumed power of the package of the CPU in W + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, power_total_energy_consumption) // the total power consumption in J + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, core_watt) // the currently consumed power of the core part of the CPU in W + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, ram_watt) // the currently consumed power of the RAM part of the CPU in W + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, package_rapl_throttle_percent) // the percent of time the package throttled due to RAPL limiters + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, dram_rapl_throttle_percent) // the percent of time the DRAM throttled due to RAPL limiters }; /** diff --git a/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp b/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp index b89402d..3f89615 100644 --- a/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp +++ b/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp @@ -122,14 +122,14 @@ class rocm_smi_power_samples { */ [[nodiscard]] std::string generate_yaml_string() const; - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, power_default_cap) // the default power cap, may be different from power cap - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, power_cap) // if the GPU draws more power (μW) than the power cap, the GPU may throttle - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, power_type) // the type of the power management: either current power draw or average power draw + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, power_management_limit) // the default power cap (W), may be different from power cap + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, power_enforced_limit) // if the GPU draws more power (W) than the power cap, the GPU may throttle + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, power_measurement_type) // the type of the power readings: either current power draw or average power draw HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector, available_power_profiles) // a list of the available power profiles - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint64_t, power_usage) // the current GPU socket power draw in μW - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint64_t, power_total_energy_consumption) // the total power consumption since the last driver reload in μJ - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::string, power_profile) // the current active power profile; one of 'available_power_profiles' + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, power_usage) // the current GPU socket power draw in W + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, power_total_energy_consumption) // the total power consumption since the last driver reload in J + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::string, power_profile) // the current active power profile; one of 'available_power_profiles' }; /** diff --git a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp index 3c973f4..86420c8 100644 --- a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp +++ b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp @@ -123,13 +123,15 @@ class nvml_power_samples { */ [[nodiscard]] std::string generate_yaml_string() const; - HWS_SAMPLE_STRUCT_FIXED_MEMBER(bool, power_management_mode) // true if power management algorithms are supported and active - HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, power_management_limit) // if the GPU draws more power (mW) than the power management limit, the GPU may throttle - HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, power_enforced_limit) // the actually enforced power limit, may be different from power management limit if external limiters are set - - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(int, power_state) // the current GPU power state: 0 - 15 where 0 is the maximum power and 15 the minimum power - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, power_usage) // the current power draw of the GPU and its related circuity (e.g., memory) in mW - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned long long, power_total_energy_consumption) // the total power consumption since the last driver reload in mJ + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, power_management_limit) // if the GPU draws more power (W) than the power management limit, the GPU may throttle + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, power_enforced_limit) // the actually enforced power limit (W), may be different from power management limit if external limiters are set + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, power_measurement_type) // the type of the power readings: either current power draw or average power draw + HWS_SAMPLE_STRUCT_FIXED_MEMBER(bool, power_management_mode) // true if power management algorithms are supported and active + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector, available_power_profiles) // a list of the available power profiles + + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, power_usage) // the current power draw of the GPU and its related circuity (e.g., memory) in W + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, power_total_energy_consumption) // the total power consumption since the last driver reload in J + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(int, power_profile) // the current GPU power state: 0 - 15 where 0 is the maximum power and 15 the minimum power; 32 indicates unknown }; /** diff --git a/src/hardware_sampling/cpu/cpu_samples.cpp b/src/hardware_sampling/cpu/cpu_samples.cpp index d73f86b..92bf8a9 100644 --- a/src/hardware_sampling/cpu/cpu_samples.cpp +++ b/src/hardware_sampling/cpu/cpu_samples.cpp @@ -284,14 +284,30 @@ std::ostream &operator<<(std::ostream &out, const cpu_clock_samples &samples) { std::string cpu_power_samples::generate_yaml_string() const { std::string str{ "power:\n" }; + // power measurement type + if (this->power_measurement_type_.has_value()) { + str += std::format(" power_measurement_type:\n" + " unit: \"string\"\n" + " values: \"{}\"\n", + this->power_measurement_type_.value()); + } + // the package Watt - if (this->package_watt_.has_value()) { - str += std::format(" package_power:\n" + if (this->power_usage_.has_value()) { + str += std::format(" power_usage:\n" " turbostat_name: \"PkgWatt\"\n" " unit: \"W\"\n" " values: [{}]\n", - detail::join(this->package_watt_.value(), ", ")); + detail::join(this->power_usage_.value(), ", ")); } + // total energy consumed + if (this->power_total_energy_consumption_.has_value()) { + str += std::format(" power_total_energy_consumed:\n" + " unit: \"J\"\n" + " values: [{}]\n", + detail::join(this->power_total_energy_consumption_.value(), ", ")); + } + // the core Watt if (this->core_watt_.has_value()) { str += std::format(" core_power:\n" @@ -332,12 +348,16 @@ std::string cpu_power_samples::generate_yaml_string() const { } std::ostream &operator<<(std::ostream &out, const cpu_power_samples &samples) { - return out << std::format("package_watt [W]: [{}]\n" + return out << std::format("power_measurement_type [string]: {}\n" + "power_usage [W]: [{}]\n" + "power_total_energy_consumption [J]: [{}]\n" "core_watt [W]: [{}]\n" "ram_watt [W]: [{}]\n" "package_rapl_throttle_percent [%]: [{}]\n" "dram_rapl_throttle_percent [%]: [{}]", - detail::join(detail::value_or_default(samples.get_package_watt()), ", "), + detail::value_or_default(samples.get_power_measurement_type()), + detail::join(detail::value_or_default(samples.get_power_usage()), ", "), + detail::join(detail::value_or_default(samples.get_power_total_energy_consumption()), ", "), detail::join(detail::value_or_default(samples.get_core_watt()), ", "), detail::join(detail::value_or_default(samples.get_ram_watt()), ", "), detail::join(detail::value_or_default(samples.get_package_rapl_throttle_percent()), ", "), diff --git a/src/hardware_sampling/cpu/hardware_sampler.cpp b/src/hardware_sampling/cpu/hardware_sampler.cpp index d63efc8..e645fde 100644 --- a/src/hardware_sampling/cpu/hardware_sampler.cpp +++ b/src/hardware_sampling/cpu/hardware_sampler.cpp @@ -229,8 +229,10 @@ void cpu_hardware_sampler::sampling_loop() { using vector_type = decltype(idle_state_samples_.package_low_power_idle_state_percent_)::value_type; idle_state_samples_.package_low_power_idle_state_percent_ = vector_type{ detail::convert_to(values[i]) }; } else if (header[i] == "PkgWatt") { - using vector_type = decltype(power_samples_.package_watt_)::value_type; - power_samples_.package_watt_ = vector_type{ detail::convert_to(values[i]) }; + using vector_type = decltype(power_samples_.power_usage_)::value_type; + power_samples_.power_usage_ = vector_type{ detail::convert_to(values[i]) }; + power_samples_.power_measurement_type_ = "current/instant"; + power_samples_.power_total_energy_consumption_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ 0 }; } else if (header[i] == "CorWatt") { using vector_type = decltype(power_samples_.core_watt_)::value_type; power_samples_.core_watt_ = vector_type{ detail::convert_to(values[i]) }; @@ -374,8 +376,14 @@ void cpu_hardware_sampler::sampling_loop() { using vector_type = decltype(idle_state_samples_.package_low_power_idle_state_percent_)::value_type; idle_state_samples_.package_low_power_idle_state_percent_->push_back(detail::convert_to(values[i])); } else if (header[i] == "PkgWatt") { - using vector_type = decltype(power_samples_.package_watt_)::value_type; - power_samples_.package_watt_->push_back(detail::convert_to(values[i])); + using vector_type = decltype(power_samples_.power_usage_)::value_type; + power_samples_.power_usage_->push_back(detail::convert_to(values[i])); + // calculate total energy consumption + using value_type = decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type; + const std::size_t num_time_points = this->sampling_time_points().size(); + const value_type time_difference = std::chrono::duration(this->sampling_time_points()[num_time_points - 1] - this->sampling_time_points()[num_time_points - 2]).count(); + const auto current = power_samples_.power_usage_->back() * time_difference; + power_samples_.power_total_energy_consumption_->push_back(power_samples_.power_total_energy_consumption_->back() + current); } else if (header[i] == "CorWatt") { using vector_type = decltype(power_samples_.core_watt_)::value_type; power_samples_.core_watt_->push_back(detail::convert_to(values[i])); diff --git a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp index 083b083..434b047 100644 --- a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp +++ b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp @@ -82,6 +82,8 @@ void gpu_amd_hardware_sampler::sampling_loop() { this->add_time_point(std::chrono::steady_clock::now()); + std::uint64_t initial_power_usage{}; + // retrieve initial general information { // fixed information -> only retrieved once @@ -174,33 +176,32 @@ void gpu_amd_hardware_sampler::sampling_loop() { // retrieve initial power related information { - decltype(power_samples_.power_default_cap_)::value_type power_default_cap{}; + std::uint64_t power_default_cap{}; if (rsmi_dev_power_cap_default_get(device_id_, &power_default_cap) == RSMI_STATUS_SUCCESS) { - power_samples_.power_default_cap_ = power_default_cap; + power_samples_.power_management_limit_ = static_cast(power_default_cap) / 1000.0 / 1000.0; } - decltype(power_samples_.power_cap_)::value_type power_cap{}; + std::uint64_t power_cap{}; if (rsmi_dev_power_cap_get(device_id_, std::uint32_t{ 0 }, &power_cap) == RSMI_STATUS_SUCCESS) { - power_samples_.power_cap_ = power_cap; + power_samples_.power_enforced_limit_ = static_cast(power_cap) / 1000.0 / 1000.0; } { - decltype(power_samples_.power_usage_)::value_type::value_type power_usage{}; RSMI_POWER_TYPE power_type{}; - if (rsmi_dev_power_get(device_id_, &power_usage, &power_type) == RSMI_STATUS_SUCCESS) { + if (rsmi_dev_power_get(device_id_, &initial_power_usage, &power_type) == RSMI_STATUS_SUCCESS) { switch (power_type) { case RSMI_POWER_TYPE::RSMI_AVERAGE_POWER: - power_samples_.power_type_ = "average"; + power_samples_.power_measurement_type_ = "average"; break; case RSMI_POWER_TYPE::RSMI_CURRENT_POWER: - power_samples_.power_type_ = "current/instant"; + power_samples_.power_measurement_type_ = "current/instant"; break; case RSMI_POWER_TYPE::RSMI_INVALID_POWER: - power_samples_.power_type_ = "invalid/undetected"; + power_samples_.power_measurement_type_ = "invalid/undetected"; break; } - // queried samples -> retrieved every iteration if available - power_samples_.power_usage_ = decltype(power_samples_.power_usage_)::value_type{ power_usage }; + // report power usage since the first sample + power_samples_.power_usage_ = decltype(power_samples_.power_usage_)::value_type{ static_cast(0) }; } } @@ -263,10 +264,11 @@ void gpu_amd_hardware_sampler::sampling_loop() { // queried samples -> retrieved every iteration if available [[maybe_unused]] std::uint64_t timestamp{}; float resolution{}; - decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type power_total_energy_consumption{}; - if (rsmi_dev_energy_count_get(device_id_, &power_total_energy_consumption, &resolution, ×tamp) == RSMI_STATUS_SUCCESS) { // TODO: returns the same value for all invocations - const double scaled_value = static_cast(power_total_energy_consumption) * static_cast(resolution); - power_samples_.power_total_energy_consumption_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ static_cast(scaled_value) }; + std::uint64_t power_total_energy_consumption{}; + if (rsmi_dev_energy_count_get(device_id_, &power_total_energy_consumption, &resolution, ×tamp) == RSMI_STATUS_SUCCESS) { + const auto scaled_value = static_cast(power_total_energy_consumption) * + static_cast(resolution); + power_samples_.power_total_energy_consumption_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ scaled_value / 1000.0 / 1000.0 }; } } @@ -514,18 +516,19 @@ void gpu_amd_hardware_sampler::sampling_loop() { { if (power_samples_.power_usage_.has_value()) { [[maybe_unused]] RSMI_POWER_TYPE power_type{}; - decltype(power_samples_.power_usage_)::value_type::value_type value{}; + std::uint64_t value{}; HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_power_get(device_id_, &value, &power_type)); - power_samples_.power_usage_->push_back(value); + power_samples_.power_usage_->push_back(static_cast(value - initial_power_usage) / 1000.0 / 1000.0); } if (power_samples_.power_total_energy_consumption_.has_value()) { [[maybe_unused]] std::uint64_t timestamp{}; float resolution{}; - decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type value{}; - HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_energy_count_get(device_id_, &value, &resolution, ×tamp)); // TODO: returns the same value for all invocations - const double scaled_value = static_cast(value) * static_cast(resolution); - power_samples_.power_total_energy_consumption_->push_back(static_cast(scaled_value)); + std::uint64_t value{}; + HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_energy_count_get(device_id_, &value, &resolution, ×tamp)); + const auto scaled_value = static_cast(value) * + static_cast(resolution); + power_samples_.power_total_energy_consumption_->push_back(scaled_value / 1000.0); } if (power_samples_.power_profile_.has_value()) { diff --git a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp b/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp index 0bc1041..c7f7d88 100644 --- a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp +++ b/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp @@ -224,26 +224,26 @@ std::ostream &operator<<(std::ostream &out, const rocm_smi_clock_samples &sample std::string rocm_smi_power_samples::generate_yaml_string() const { std::string str{ "power:\n" }; - // default power cap - if (this->power_default_cap_.has_value()) { + // power management limit + if (this->power_management_limit_.has_value()) { str += std::format(" power_management_limit:\n" - " unit: \"muW\"\n" + " unit: \"W\"\n" " values: {}\n", - this->power_default_cap_.value()); + this->power_management_limit_.value()); } - // power cap - if (this->power_cap_.has_value()) { + // power enforced limit + if (this->power_enforced_limit_.has_value()) { str += std::format(" power_enforced_limit:\n" - " unit: \"muW\"\n" + " unit: \"W\"\n" " values: {}\n", - this->power_cap_.value()); + this->power_enforced_limit_.value()); } // power measurement type - if (this->power_type_.has_value()) { + if (this->power_measurement_type_.has_value()) { str += std::format(" power_measurement_type:\n" " unit: \"string\"\n" - " values: {}\n", - this->power_type_.value()); + " values: \"{}\"\n", + this->power_measurement_type_.value()); } // available power levels if (this->available_power_profiles_.has_value()) { @@ -256,20 +256,16 @@ std::string rocm_smi_power_samples::generate_yaml_string() const { // current power usage if (this->power_usage_.has_value()) { str += std::format(" power_usage:\n" - " unit: \"muW\"\n" + " unit: \"W\"\n" " values: [{}]\n", detail::join(this->power_usage_.value(), ", ")); } // total energy consumed if (this->power_total_energy_consumption_.has_value()) { - decltype(rocm_smi_power_samples::power_total_energy_consumption_)::value_type consumed_energy(this->power_total_energy_consumption_->size()); - for (std::size_t i = 0; i < consumed_energy.size(); ++i) { - consumed_energy[i] = this->power_total_energy_consumption_.value()[i] - this->power_total_energy_consumption_->front(); - } str += std::format(" power_total_energy_consumed:\n" - " unit: \"muJ\"\n" + " unit: \"J\"\n" " values: [{}]\n", - detail::join(consumed_energy, ", ")); + detail::join(this->power_total_energy_consumption_.value(), ", ")); } // current power level if (this->power_profile_.has_value()) { @@ -286,16 +282,16 @@ std::string rocm_smi_power_samples::generate_yaml_string() const { } std::ostream &operator<<(std::ostream &out, const rocm_smi_power_samples &samples) { - return out << std::format("power_default_cap [muW]: {}\n" - "power_cap [muW]: {}\n" - "power_type [string]: {}\n" + return out << std::format("power_management_limit [W]: {}\n" + "power_enforced_limit [W]: {}\n" + "power_measurement_type [string]: {}\n" "available_power_profiles [string]: [{}]\n" - "power_usage [muW]: [{}]\n" - "power_total_energy_consumption [muJ]: [{}]\n" + "power_usage [W]: [{}]\n" + "power_total_energy_consumption [J]: [{}]\n" "power_profile [string]: [{}]", - detail::value_or_default(samples.get_power_default_cap()), - detail::value_or_default(samples.get_power_cap()), - detail::value_or_default(samples.get_power_type()), + detail::value_or_default(samples.get_power_management_limit()), + detail::value_or_default(samples.get_power_enforced_limit()), + detail::value_or_default(samples.get_power_measurement_type()), detail::join(detail::value_or_default(samples.get_available_power_profiles()), ", "), detail::join(detail::value_or_default(samples.get_power_usage()), ", "), detail::join(detail::value_or_default(samples.get_power_total_energy_consumption()), ", "), diff --git a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp index c971725..41b3c0b 100644 --- a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp +++ b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp @@ -22,6 +22,7 @@ #include // std::format #include // std::ios_base #include // std::cerr, std::endl +#include // std::iota #include // std::optional #include // std::ostream #include // std::runtime_error @@ -86,6 +87,8 @@ void gpu_nvidia_hardware_sampler::sampling_loop() { this->add_time_point(std::chrono::steady_clock::now()); + unsigned int initial_power_usage{}; + // retrieve initial general information { // fixed information -> only retrieved once @@ -258,30 +261,49 @@ void gpu_nvidia_hardware_sampler::sampling_loop() { power_samples_.power_management_mode_ = mode == NVML_FEATURE_ENABLED; } - decltype(power_samples_.power_management_limit_)::value_type power_management_limit{}; + unsigned int power_management_limit{}; if (nvmlDeviceGetPowerManagementLimit(device, &power_management_limit) == NVML_SUCCESS) { - power_samples_.power_management_limit_ = power_management_limit; + power_samples_.power_management_limit_ = static_cast(power_management_limit) / 1000.0; } - decltype(power_samples_.power_enforced_limit_)::value_type power_enforced_limit{}; + unsigned int power_enforced_limit{}; if (nvmlDeviceGetEnforcedPowerLimit(device, &power_enforced_limit) == NVML_SUCCESS) { - power_samples_.power_enforced_limit_ = power_enforced_limit; + power_samples_.power_enforced_limit_ = static_cast(power_enforced_limit) / 1000.0; + } + + if (general_samples_.architecture_.has_value()) { + // based on https://docs.nvidia.com/deploy/nvml-api/group__nvmlDeviceQueries.html#group__nvmlDeviceQueries_1gf754f109beca3a4a8c8c1cd650d7d66c + if (general_samples_.architecture_ == "Kepler" || general_samples_.architecture_ == "Maxwell" || general_samples_.architecture_ == "Pascal" || general_samples_.architecture_ == "Volta" || general_samples_.architecture_ == "Turing") { + power_samples_.power_measurement_type_ = "current/instant"; + } else if (general_samples_.architecture_ == "Ampere" || general_samples_.architecture_ == "Ada" || general_samples_.architecture_ == "Hopper" || general_samples_.architecture_ == "Blackwell" || general_samples_.architecture_ == "Orin") { + if (general_samples_.name_.has_value() && general_samples_.name_.value().find("A100") != std::string::npos) { + // GA100 also has instant power draw measurements + power_samples_.power_measurement_type_ = "current/instant"; + } else { + power_samples_.power_measurement_type_ = "average"; + } + } else { + power_samples_.power_measurement_type_ = "invalid/undetected"; + } } + decltype(power_samples_.available_power_profiles_)::value_type power_states(17, 32); // 17 power states, value 32 = unknown + std::iota(power_states.begin(), power_states.end() - 1, decltype(power_samples_.available_power_profiles_)::value_type::value_type{ 0 }); + power_samples_.available_power_profiles_ = power_states; + // queried samples -> retrieved every iteration if available - nvmlPstates_t pstate{}; - if (nvmlDeviceGetPowerState(device, &pstate) == NVML_SUCCESS) { - power_samples_.power_state_ = decltype(power_samples_.power_state_)::value_type{ static_cast(pstate) }; + if (nvmlDeviceGetPowerUsage(device, &initial_power_usage) == NVML_SUCCESS) { + power_samples_.power_usage_ = decltype(power_samples_.power_usage_)::value_type{ static_cast(0) }; } - decltype(power_samples_.power_usage_)::value_type::value_type power_usage{}; - if (nvmlDeviceGetPowerUsage(device, &power_usage) == NVML_SUCCESS) { - power_samples_.power_usage_ = decltype(power_samples_.power_usage_)::value_type{ power_usage }; + unsigned long long power_total_energy_consumption{}; + if (nvmlDeviceGetTotalEnergyConsumption(device, &power_total_energy_consumption) == NVML_SUCCESS) { + power_samples_.power_total_energy_consumption_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ static_cast(power_total_energy_consumption) / 1000.0 }; } - decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type power_total_energy_consumption{}; - if (nvmlDeviceGetTotalEnergyConsumption(device, &power_total_energy_consumption) == NVML_SUCCESS) { - power_samples_.power_total_energy_consumption_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ power_total_energy_consumption }; + nvmlPstates_t pstate{}; + if (nvmlDeviceGetPowerState(device, &pstate) == NVML_SUCCESS) { + power_samples_.power_profile_ = decltype(power_samples_.power_profile_)::value_type{ static_cast(pstate) }; } } @@ -424,22 +446,22 @@ void gpu_nvidia_hardware_sampler::sampling_loop() { // retrieve power related information { - if (power_samples_.power_state_.has_value()) { + if (power_samples_.power_profile_.has_value()) { nvmlPstates_t pstate{}; HWS_NVML_ERROR_CHECK(nvmlDeviceGetPowerState(device, &pstate)); - power_samples_.power_state_->push_back(static_cast(pstate)); + power_samples_.power_profile_->push_back(static_cast(pstate)); } if (power_samples_.power_usage_.has_value()) { - decltype(power_samples_.power_usage_)::value_type::value_type value{}; + unsigned int value{}; HWS_NVML_ERROR_CHECK(nvmlDeviceGetPowerUsage(device, &value)); - power_samples_.power_usage_->push_back(value); + power_samples_.power_usage_->push_back(static_cast(value - initial_power_usage) / 1000.0); } if (power_samples_.power_total_energy_consumption_.has_value()) { - decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type value{}; + unsigned long long value{}; HWS_NVML_ERROR_CHECK(nvmlDeviceGetTotalEnergyConsumption(device, &value)); - power_samples_.power_total_energy_consumption_->push_back(value); + power_samples_.power_total_energy_consumption_->push_back(static_cast(value) / 1000.0); } } diff --git a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp index 64b9a05..95cfa17 100644 --- a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp +++ b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp @@ -241,52 +241,62 @@ std::ostream &operator<<(std::ostream &out, const nvml_clock_samples &samples) { std::string nvml_power_samples::generate_yaml_string() const { std::string str{ "power:\n" }; - // the power management mode - if (this->power_management_mode_.has_value()) { - str += std::format(" power_management_mode:\n" - " unit: \"bool\"\n" - " values: {}\n", - this->power_management_mode_.value()); - } // power management limit if (this->power_management_limit_.has_value()) { str += std::format(" power_management_limit:\n" - " unit: \"mW\"\n" + " unit: \"W\"\n" " values: {}\n", this->power_management_limit_.value()); } // power enforced limit if (this->power_enforced_limit_.has_value()) { str += std::format(" power_enforced_limit:\n" - " unit: \"mW\"\n" + " unit: \"W\"\n" " values: {}\n", this->power_enforced_limit_.value()); } - - // power state - if (this->power_state_.has_value()) { - str += std::format(" power_state:\n" - " unit: \"0 - maximum performance; 15 - minimum performance; 32 - unknown\"\n" + // power measurement type + if (this->power_measurement_type_.has_value()) { + str += std::format(" power_measurement_type:\n" + " unit: \"string\"\n" + " values: \"{}\"\n", + this->power_measurement_type_.value()); + } + // the power management mode + if (this->power_management_mode_.has_value()) { + str += std::format(" power_management_mode:\n" + " unit: \"bool\"\n" + " values: {}\n", + this->power_management_mode_.value()); + } + // available power levels + if (this->available_power_profiles_.has_value()) { + str += std::format(" available_power_profiles:\n" + " unit: \"int\"\n" " values: [{}]\n", - detail::join(this->power_state_.value(), ", ")); + detail::join(this->available_power_profiles_.value(), ", ")); } + // current power usage if (this->power_usage_.has_value()) { str += std::format(" power_usage:\n" - " unit: \"mW\"\n" + " unit: \"W\"\n" " values: [{}]\n", detail::join(this->power_usage_.value(), ", ")); } // total energy consumed if (this->power_total_energy_consumption_.has_value()) { - decltype(nvml_power_samples::power_total_energy_consumption_)::value_type consumed_energy(this->power_total_energy_consumption_->size()); - for (std::size_t i = 0; i < consumed_energy.size(); ++i) { - consumed_energy[i] = this->power_total_energy_consumption_.value()[i] - this->power_total_energy_consumption_->front(); - } str += std::format(" power_total_energy_consumed:\n" " unit: \"J\"\n" " values: [{}]\n", - detail::join(consumed_energy, ", ")); + detail::join(this->power_total_energy_consumption_.value(), ", ")); + } + // power state + if (this->power_profile_.has_value()) { + str += std::format(" power_profile:\n" + " unit: \"int\"\n" + " values: [{}]\n", + detail::join(this->power_profile_.value(), ", ")); } // remove last newline @@ -296,18 +306,22 @@ std::string nvml_power_samples::generate_yaml_string() const { } std::ostream &operator<<(std::ostream &out, const nvml_power_samples &samples) { - return out << std::format("power_management_mode [bool]: {}\n" - "power_management_limit [mW]: {}\n" - "power_enforced_limit [mW]: {}\n" - "power_state [int]: [{}]\n" - "power_usage [mW]: [{}]\n" - "power_total_energy_consumption [J]: [{}]", - detail::value_or_default(samples.get_power_management_mode()), + return out << std::format("power_management_limit [W]: {}\n" + "power_enforced_limit [W]: {}\n" + "power_measurement_type [string]: {}\n" + "power_management_mode [bool]: {}\n" + "available_power_profiles [int]: [{}]\n" + "power_usage [W]: [{}]\n" + "power_total_energy_consumption [J]: [{}]" + "power_profile [int]: [{}]\n", detail::value_or_default(samples.get_power_management_limit()), detail::value_or_default(samples.get_power_enforced_limit()), - detail::join(detail::value_or_default(samples.get_power_state()), ", "), + detail::value_or_default(samples.get_power_measurement_type()), + detail::value_or_default(samples.get_power_management_mode()), + detail::join(detail::value_or_default(samples.get_available_power_profiles()), ", "), detail::join(detail::value_or_default(samples.get_power_usage()), ", "), - detail::join(detail::value_or_default(samples.get_power_total_energy_consumption()), ", ")); + detail::join(detail::value_or_default(samples.get_power_total_energy_consumption()), ", "), + detail::join(detail::value_or_default(samples.get_power_profile()), ", ")); } //*************************************************************************************************************************************// From 0c317841328b7c4f59aedcce908ac1848d170cf6 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Thu, 12 Sep 2024 15:40:26 +0200 Subject: [PATCH 13/69] Clarify total energy consumption is only calculated and not sampled via turbostat. --- README.md | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 642e56b..d6c5ec3 100644 --- a/README.md +++ b/README.md @@ -123,22 +123,22 @@ export PYTHONPATH=${CMAKE_INSTALL_PREFIX}/lib:${PYTHONPATH} ### power-related samples -| sample | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs | -|:----------------------------|:---------:|:-----------:|:-----------:|:----------:| -| power_management_limit | - | W | W | | -| power_enforced_limit | - | W | W | | -| power_measurement_type | str (fix) | str | str | | -| power_management_mode | - | bool | - | | -| available_power_profiles | - | list of int | list of str | | -| power_usage | W | W | W | | -| core_watt | W | - | - | - | -| dram_watt | W | - | - | - | -| package_rapl_throttling | % | - | - | - | -| dram_rapl_throttling | % | - | - | - | -| power_total_energy_consumed | J | J | J | J | -| power_profile | - | int | str | | -| energy_threshold_enabled | | | | bool | -| energy_threshold | | | | J | +| sample | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs | +|:----------------------------|:---------------------------------:|:-----------:|:-----------:|:----------:| +| power_management_limit | - | W | W | | +| power_enforced_limit | - | W | W | | +| power_measurement_type | str (fix) | str | str | | +| power_management_mode | - | bool | - | | +| available_power_profiles | - | list of int | list of str | | +| power_usage | W | W | W | | +| core_watt | W | - | - | - | +| dram_watt | W | - | - | - | +| package_rapl_throttling | % | - | - | - | +| dram_rapl_throttling | % | - | - | - | +| power_total_energy_consumed | J
(calculated via power_usage) | J | J | J | +| power_profile | - | int | str | | +| energy_threshold_enabled | | | | bool | +| energy_threshold | | | | J | ### memory-related samples From 046753ce0982e91205baa12c632abdeb9e29bfa5 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Thu, 12 Sep 2024 15:47:24 +0200 Subject: [PATCH 14/69] Clarify total energy consumption is only calculated and not sampled via turbostat. --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 9e03206..1d90c9b 100644 --- a/.gitignore +++ b/.gitignore @@ -36,6 +36,7 @@ Prerequisites # CMake ================================ bin/ build*/ +install*/ cmake-build*/ CMakeLists.txt.user CMakeCache.txt From 453cffe6d87b3d223db0837f986320c234b7546c Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Thu, 12 Sep 2024 16:14:27 +0200 Subject: [PATCH 15/69] Split time_point output into unit and values such that the unit prefix is not repeated in the actual values. --- include/hardware_sampling/utility.hpp | 14 +++++++------- src/hardware_sampling/hardware_sampler.cpp | 8 ++++++-- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/include/hardware_sampling/utility.hpp b/include/hardware_sampling/utility.hpp index e66d6c8..2a62da3 100644 --- a/include/hardware_sampling/utility.hpp +++ b/include/hardware_sampling/utility.hpp @@ -14,6 +14,7 @@ #include // std::from_chars #include // std::chrono::{milliseconds, duration_cast} +#include // std::trunc #include // std::size_t #include // std::format, std::formatter, std::basic_format_context, std::format_to #include // std::back_inserter, std::next, std::prev @@ -54,19 +55,18 @@ namespace hws::detail { std::optional> sample_name##_{}; /** - * @brief Convert all time points to their duration passed since the @p reference time point. - * @tparam Duration the duration type to return + * @brief Convert all time points to their duration in seconds (using double) truncated to three decimal places passed since the @p reference time point. * @tparam TimePoint the type if the time points * @param[in] time_points the time points * @param[in] reference the reference time point - * @return the duration passed since the @p reference time point (`[[nodiscard]]`) + * @return the duration passed in seconds since the @p reference time point (`[[nodiscard]]`) */ -template -[[nodiscard]] inline std::vector durations_from_reference_time(const std::vector &time_points, const TimePoint &reference) { - std::vector durations(time_points.size()); +template +[[nodiscard]] inline std::vector durations_from_reference_time(const std::vector &time_points, const TimePoint &reference) { + std::vector durations(time_points.size()); for (std::size_t i = 0; i < durations.size(); ++i) { - durations[i] = std::chrono::duration_cast(time_points[i] - reference); + durations[i] = std::trunc(std::chrono::duration(time_points[i] - reference).count() * 1000.0) / 1000.0; } return durations; diff --git a/src/hardware_sampling/hardware_sampler.cpp b/src/hardware_sampling/hardware_sampler.cpp index c6554cd..1c3ff49 100644 --- a/src/hardware_sampling/hardware_sampler.cpp +++ b/src/hardware_sampling/hardware_sampler.cpp @@ -139,14 +139,18 @@ void hardware_sampler::dump_yaml(const char *filename) { event_names.push_back(name); } file << std::format("events:\n" - " time_points: [{}]\n" + " time_points:\n" + " unit: \"s\"\n" + " values: [{}]\n" " names: [{}]\n\n", detail::join(detail::durations_from_reference_time(event_time_points, this->get_event(0).time_point), ", "), detail::join(event_names, ", ")); // output the sampling information file << std::format("sampling_interval: {}\n" - "time_points: [{}]\n" + "time_points:\n" + " unit: \"s\"\n" + " values: [{}]\n" "{}\n\n", this->sampling_interval(), detail::join(detail::durations_from_reference_time(this->sampling_time_points(), this->get_event(0).time_point), ", "), From 5d376d49f36e6f891099dd9552e984309a0ab167 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Fri, 13 Sep 2024 16:28:25 +0200 Subject: [PATCH 16/69] Unify clock related samples and add new ones depending on the target hardware. --- README.md | 51 +++--- include/hardware_sampling/cpu/cpu_samples.hpp | 8 +- .../gpu_amd/rocm_smi_samples.hpp | 22 +-- .../gpu_nvidia/nvml_samples.hpp | 29 ++-- .../hardware_sampling/gpu_nvidia/utility.hpp | 45 ++++++ include/hardware_sampling/utility.hpp | 32 ++++ src/hardware_sampling/cpu/cpu_samples.cpp | 40 ++--- .../cpu/hardware_sampler.cpp | 14 +- .../gpu_amd/hardware_sampler.cpp | 60 ++++--- .../gpu_amd/rocm_smi_samples.cpp | 140 +++++++++-------- .../gpu_nvidia/hardware_sampler.cpp | 99 +++++++----- .../gpu_nvidia/nvml_samples.cpp | 148 ++++++++++-------- 12 files changed, 420 insertions(+), 268 deletions(-) diff --git a/README.md b/README.md index d6c5ec3..ce9598d 100644 --- a/README.md +++ b/README.md @@ -94,32 +94,31 @@ export PYTHONPATH=${CMAKE_INSTALL_PREFIX}/lib:${PYTHONPATH} ### clock-related samples -| sample | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs | -|:---------------------------|:----:|:-----------:|:--------:|:----------:| -| frequency_boost | bool | | | | -| min_cpu_frequency | MHz | | | | -| max_cpu_frequency | MHz | | | | -| average_frequency | MHz | | | | -| average_non_idle_frequency | MHz | | | | -| time_stamp_counter | MHz | | | | -| clock_socket_min | | | Hz | | -| clock_socket_max | | | Hz | | -| clock_memory_min | | | Hz | | -| clock_memory_max | | | Hz | | -| clock_gpu_min | | MHz | Hz | MHz | -| clock_gpu_max | | MHz | Hz | MHz | -| clock_socket | | | Hz | | -| clock_memory | | | Hz | | -| clock_gpu | | MHz | Hz | MHz | -| overdrive_level | | | % | | -| memory_overdrive_level | | | % | | -| available_clocks_gpu | | | | MHz | -| clock_mem_min | | MHz | | MHz | -| clock_mem_max | | MHz | | MHz | -| available_clocks_mem | | | | MHz | -| tdp_frequency_limit_gpu | | | | MHz | -| throttle_reason_gpu | | | | bitmask | -| tdp_frequency_limit_mem | | | | MHz | +| sample | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs | +|:-----------------------------------|:----:|:----------------:|:--------:|:----------:| +| auto_boosted_clock_enabled | bool | bool | - | | +| clock_frequency_min | MHz | MHz | MHz | | +| clock_frequency_max | MHz | MHz | MHz | | +| memory_clock_frequency_min | - | MHz | MHz | | +| memory_clock_frequency_max | - | MHz | MHz | | +| socket_clock_frequency_min | - | - | MHz | - | +| socket_clock_frequency_min | - | - | MHz | - | +| sm_clock_frequency_max | - | MHz | - | - | +| available_clock_frequencies | - | MHz | MHz | | +| available_memory_clock_frequencies | - | MHz | MHz | | +| clock_frequency | MHz | MHz | MHz | | +| average_non_idle_frequency | MHz | - | - | - | +| time_stamp_counter | MHz | - | - | - | +| memory_clock_frequency | - | MHz | MHz | | +| socket_clock_frequency | - | - | MHz | - | +| sm_clock_frequency | - | MHz | - | - | +| overdrive_level | - | - | % | - | +| memory_overdrive_level | - | - | % | - | +| throttle_reason | - | string (bitmask) | - | | +| memory_throttle_reason | - | - | - | | +| auto_boosted_clock | - | bool | - | - | +| tdp_frequency_limit | - | - | - | | +| memory_tdp_frequency_limit | - | - | - | | ### power-related samples diff --git a/include/hardware_sampling/cpu/cpu_samples.hpp b/include/hardware_sampling/cpu/cpu_samples.hpp index 3263d77..196572b 100644 --- a/include/hardware_sampling/cpu/cpu_samples.hpp +++ b/include/hardware_sampling/cpu/cpu_samples.hpp @@ -90,11 +90,11 @@ class cpu_clock_samples { */ [[nodiscard]] std::string generate_yaml_string() const; - HWS_SAMPLE_STRUCT_FIXED_MEMBER(bool, frequency_boost) // true if frequency boosting is enabled - HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, min_frequency) // the minimum possible CPU frequency in MHz - HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, max_frequency) // the maximum possible CPU frequency in MHz + HWS_SAMPLE_STRUCT_FIXED_MEMBER(bool, auto_boosted_clock_enabled) // true if frequency boosting is enabled + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, clock_frequency_min) // the minimum possible CPU frequency in MHz + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, clock_frequency_max) // the maximum possible CPU frequency in MHz - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, average_frequency) // the average CPU frequency in MHz including idle cores + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, clock_frequency) // the average CPU frequency in MHz including idle cores HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, average_non_idle_frequency) // the average CPU frequency in MHz excluding idle cores HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, time_stamp_counter) // the time stamp counter }; diff --git a/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp b/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp index 3f89615..e0cb925 100644 --- a/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp +++ b/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp @@ -80,16 +80,18 @@ class rocm_smi_clock_samples { */ [[nodiscard]] std::string generate_yaml_string() const; - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, clock_system_min) // the minimum possible system clock frequency in Hz - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, clock_system_max) // the maximum possible system clock frequency in Hz - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, clock_socket_min) // the minimum possible socket clock frequency in Hz - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, clock_socket_max) // the maximum possible socket clock frequency in Hz - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, clock_memory_min) // the minimum possible memory clock frequency in Hz - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, clock_memory_max) // the maximum possible memory clock frequency in Hz - - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint64_t, clock_system) // the current system clock frequency in Hz - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint64_t, clock_socket) // the current socket clock frequency in Hz - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint64_t, clock_memory) // the current memory clock frequency in Hz + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, clock_frequency_min) // the minimum possible system clock frequency in MHz + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, clock_frequency_max) // the maximum possible system clock frequency in MHz + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, memory_clock_frequency_min) // the minimum possible memory clock frequency in MHz + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, memory_clock_frequency_max) // the maximum possible memory clock frequency in MHz + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, socket_clock_frequency_min) // the minimum possible socket clock frequency in MHz + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, socket_clock_frequency_max) // the maximum possible socket clock frequency in MHz + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector, available_clock_frequencies) // the available clock frequencies in MHz (slowest to fastest) + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector, available_memory_clock_frequencies) // the available memory clock frequencies in MHz (slowest to fastest) + + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, clock_frequency) // the current system clock frequency in MHz + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, memory_clock_frequency) // the current memory clock frequency in MHz + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, socket_clock_frequency) // the current socket clock frequency in MHz HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint32_t, overdrive_level) // the GPU overdrive percentage HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint32_t, memory_overdrive_level) // the GPU memory overdrive percentage }; diff --git a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp index 86420c8..f766c3d 100644 --- a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp +++ b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp @@ -16,6 +16,7 @@ #include // std::formatter #include // std::ostream forward declaration +#include // std::map #include // std::optional #include // std::string #include // std::vector @@ -73,6 +74,8 @@ class nvml_clock_samples { // befriend hardware sampler class friend class gpu_nvidia_hardware_sampler; + using map_type = std::map>; + public: /** * @brief Assemble the YAML string containing all available general hardware samples. @@ -81,18 +84,20 @@ class nvml_clock_samples { */ [[nodiscard]] std::string generate_yaml_string() const; - HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, adaptive_clock_status) // true if clock boosting is currently enabled - HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, clock_graph_min) // the minimum possible graphics clock frequency in MHz - HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, clock_graph_max) // the maximum possible graphics clock frequency in MHz - HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, clock_sm_max) // the maximum possible SM clock frequency in MHz - HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, clock_mem_min) // the minimum possible memory clock frequency in MHz - HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, clock_mem_max) // the maximum possible memory clock frequency in MHz - - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, clock_graph) // the current graphics clock frequency in MHz - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, clock_sm) // the current SM clock frequency in Mhz - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, clock_mem) // the current memory clock frequency in MHz - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned long long, clock_throttle_reason) // the reason the GPU clock throttled (bitmask) - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(bool, auto_boosted_clocks) // true if the clocks are currently auto boosted + HWS_SAMPLE_STRUCT_FIXED_MEMBER(bool, auto_boosted_clock_enabled) // true if clock boosting is currently enabled + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, clock_frequency_min) // the minimum possible graphics clock frequency in MHz + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, clock_frequency_max) // the maximum possible graphics clock frequency in MHz + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, memory_clock_frequency_min) // the minimum possible memory clock frequency in MHz + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, memory_clock_frequency_max) // the maximum possible memory clock frequency in MHz + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, sm_clock_frequency_max) // the maximum possible SM clock frequency in MHz + HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type, available_clock_frequencies) // the available clock frequencies in MHz, based on a memory clock frequency (slowest to fastest) + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector, available_memory_clock_frequencies) // the available memory clock frequencies in MHz (slowest to fastest) + + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, clock_frequency) // the current graphics clock frequency in MHz + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, memory_clock_frequency) // the current memory clock frequency in MHz + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, sm_clock_frequency) // the current SM clock frequency in Mhz + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::string, throttle_reason) // the reason the GPU clock throttled + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(bool, auto_boosted_clock) // true if the clocks are currently auto boosted }; /** diff --git a/include/hardware_sampling/gpu_nvidia/utility.hpp b/include/hardware_sampling/gpu_nvidia/utility.hpp index f4f8577..8a1c590 100644 --- a/include/hardware_sampling/gpu_nvidia/utility.hpp +++ b/include/hardware_sampling/gpu_nvidia/utility.hpp @@ -12,10 +12,14 @@ #define HARDWARE_SAMPLING_GPU_NVIDIA_UTILITY_HPP_ #pragma once +#include "hardware_sampling/utility.hpp" // hws::detail::join + #include "nvml.h" // NVML runtime functions #include // std::format #include // std::runtime_error +#include // std::string +#include // std::vector namespace hws::detail { @@ -36,6 +40,47 @@ namespace hws::detail { #define HWS_NVML_ERROR_CHECK(nvml_func) nvml_func; #endif +/** + * @brief Convert the clock throttle reason event bitmask to a string representation. If the provided bitmask represents multiple reasons, they are split using "|". + * @param[in] clocks_event_reasons the bitmask to convert to a string + * @return all event throttle reasons + */ +[[nodiscard]] inline std::string throttle_event_reason_to_string(const unsigned long long clocks_event_reasons) { + if (clocks_event_reasons == 0ull) { + return "None"; + } else { + std::vector reasons{}; + if ((clocks_event_reasons & nvmlClocksEventReasonApplicationsClocksSetting) != 0ull) { + reasons.emplace_back("ApplicationsClocksSetting"); + } + if ((clocks_event_reasons & nvmlClocksEventReasonDisplayClockSetting) != 0ull) { + reasons.emplace_back("DisplayClockSetting"); + } + if ((clocks_event_reasons & nvmlClocksEventReasonGpuIdle) != 0ull) { + reasons.emplace_back("GpuIdle"); + } + if ((clocks_event_reasons & nvmlClocksEventReasonSwPowerCap) != 0ull) { + reasons.emplace_back("SwPowerCap"); + } + if ((clocks_event_reasons & nvmlClocksEventReasonSwThermalSlowdown) != 0ull) { + reasons.emplace_back("SwThermalSlowdown"); + } + if ((clocks_event_reasons & nvmlClocksEventReasonSyncBoost) != 0ull) { + reasons.emplace_back("SyncBoost"); + } + if ((clocks_event_reasons & nvmlClocksThrottleReasonHwPowerBrakeSlowdown) != 0ull) { + reasons.emplace_back("HwPowerBrakeSlowdown"); + } + if ((clocks_event_reasons & nvmlClocksThrottleReasonHwSlowdown) != 0ull) { + reasons.emplace_back("HwSlowdown"); + } + if ((clocks_event_reasons & nvmlClocksThrottleReasonHwThermalSlowdown) != 0ull) { + reasons.emplace_back("HwThermalSlowdown"); + } + return std::format("\"{}\"", detail::join(reasons, "|")); + } +} + } // namespace hws::detail #endif // HARDWARE_SAMPLING_GPU_NVIDIA_UTILITY_HPP_ diff --git a/include/hardware_sampling/utility.hpp b/include/hardware_sampling/utility.hpp index 2a62da3..81e1136 100644 --- a/include/hardware_sampling/utility.hpp +++ b/include/hardware_sampling/utility.hpp @@ -243,6 +243,38 @@ template } } +template +struct is_vector : std::false_type { }; + +template +struct is_vector> : std::true_type { }; + +template +constexpr bool is_vector_v = is_vector::value; + +/** + * @brief Convert all entries in the map to a single dict-like string. + * @details The resulting string is of form "{KEY, VALUE}" or "{KEY, [VALUES]}". + * @tparam MapType the type of the map + * @param[in] map the map to convert to a string + * @return the result string (`[[nodiscard]]`( + */ +template +[[nodiscard]] inline std::string map_entry_to_string(const std::optional &map) { + if (map.has_value()) { + std::vector entries{}; + for (const auto &[key, value] : map.value()) { + if constexpr (is_vector_v>) { + entries.push_back(std::format("{{{}, [{}]}}", key, detail::join(value, ", "))); + } else { + entries.push_back(std::format("{{{}, {}}}", key, value)); + } + } + return detail::join(entries, ", "); + } + return ""; +} + } // namespace hws::detail #endif // HARDWARE_SAMPLING_UTILITY_HPP_ diff --git a/src/hardware_sampling/cpu/cpu_samples.cpp b/src/hardware_sampling/cpu/cpu_samples.cpp index 92bf8a9..deb1ddc 100644 --- a/src/hardware_sampling/cpu/cpu_samples.cpp +++ b/src/hardware_sampling/cpu/cpu_samples.cpp @@ -210,34 +210,34 @@ std::string cpu_clock_samples::generate_yaml_string() const { std::string str{ "clock:\n" }; // true if frequency boost is enabled - if (this->frequency_boost_.has_value()) { - str += std::format(" frequency_boost:\n" + if (this->auto_boosted_clock_enabled_.has_value()) { + str += std::format(" auto_boosted_clock_enabled:\n" " unit: \"bool\"\n" " values: {}\n", - this->frequency_boost_.value()); + this->auto_boosted_clock_enabled_.value()); } // the minimal CPU frequency - if (this->min_frequency_.has_value()) { - str += std::format(" min_cpu_frequency:\n" + if (this->clock_frequency_min_.has_value()) { + str += std::format(" clock_frequency_min:\n" " unit: \"MHz\"\n" " values: {}\n", - this->min_frequency_.value()); + this->clock_frequency_min_.value()); } // the maximum CPU frequency - if (this->max_frequency_.has_value()) { - str += std::format(" max_cpu_frequency:\n" + if (this->clock_frequency_max_.has_value()) { + str += std::format(" clock_frequency_max:\n" " unit: \"MHz\"\n" " values: {}\n", - this->max_frequency_.value()); + this->clock_frequency_max_.value()); } // the average CPU frequency - if (this->average_frequency_.has_value()) { - str += std::format(" average_frequency:\n" + if (this->clock_frequency_.has_value()) { + str += std::format(" clock_frequency:\n" " turbostat_name: \"Avg_MHz\"\n" " unit: \"MHz\"\n" " values: [{}]\n", - detail::join(this->average_frequency_.value(), ", ")); + detail::join(this->clock_frequency_.value(), ", ")); } // the average CPU frequency excluding idle time if (this->average_non_idle_frequency_.has_value()) { @@ -263,16 +263,16 @@ std::string cpu_clock_samples::generate_yaml_string() const { } std::ostream &operator<<(std::ostream &out, const cpu_clock_samples &samples) { - return out << std::format("frequency_boost [bool]: {}\n" - "min_frequency [MHz]: {}\n" - "max_frequency [MHz]: {}\n" - "average_frequency [MHz]: [{}]\n" + return out << std::format("auto_boosted_clock_enabled [bool]: {}\n" + "clock_frequency_min [MHz]: {}\n" + "clock_frequency_max [MHz]: {}\n" + "clock_frequency [MHz]: [{}]\n" "average_non_idle_frequency [MHz]: [{}]\n" "time_stamp_counter [MHz]: [{}]", - detail::value_or_default(samples.get_frequency_boost()), - detail::value_or_default(samples.get_min_frequency()), - detail::value_or_default(samples.get_max_frequency()), - detail::join(detail::value_or_default(samples.get_average_frequency()), ", "), + detail::value_or_default(samples.get_auto_boosted_clock_enabled()), + detail::value_or_default(samples.get_clock_frequency_min()), + detail::value_or_default(samples.get_clock_frequency_max()), + detail::join(detail::value_or_default(samples.get_clock_frequency()), ", "), detail::join(detail::value_or_default(samples.get_average_non_idle_frequency()), ", "), detail::join(detail::value_or_default(samples.get_time_stamp_counter()), ", ")); } diff --git a/src/hardware_sampling/cpu/hardware_sampler.cpp b/src/hardware_sampling/cpu/hardware_sampler.cpp index e645fde..89683eb 100644 --- a/src/hardware_sampling/cpu/hardware_sampler.cpp +++ b/src/hardware_sampling/cpu/hardware_sampler.cpp @@ -90,11 +90,11 @@ void cpu_hardware_sampler::sampling_loop() { } else if (line.starts_with("Flags")) { general_samples_.flags_ = detail::split_as(value, ' '); } else if (line.starts_with("Frequency boost")) { - clock_samples_.frequency_boost_ = value == "enabled"; + clock_samples_.auto_boosted_clock_enabled_ = value == "enabled"; } else if (line.starts_with("CPU max MHz")) { - clock_samples_.max_frequency_ = detail::convert_to(value); + clock_samples_.clock_frequency_max_ = detail::convert_to(value); } else if (line.starts_with("CPU min MHz")) { - clock_samples_.min_frequency_ = detail::convert_to(value); + clock_samples_.clock_frequency_min_ = detail::convert_to(value); } else if (line.starts_with("L1d cache")) { memory_samples_.l1d_cache_ = detail::convert_to(value); } else if (line.starts_with("L1i cache")) { @@ -163,8 +163,8 @@ void cpu_hardware_sampler::sampling_loop() { for (std::size_t i = 0; i < header.size(); ++i) { if (header[i] == "Avg_MHz") { - using vector_type = decltype(clock_samples_.average_frequency_)::value_type; - clock_samples_.average_frequency_ = vector_type{ detail::convert_to(values[i]) }; + using vector_type = decltype(clock_samples_.clock_frequency_)::value_type; + clock_samples_.clock_frequency_ = vector_type{ detail::convert_to(values[i]) }; } else if (header[i] == "Busy%") { using vector_type = decltype(general_samples_.compute_utilization_)::value_type; general_samples_.compute_utilization_ = vector_type{ detail::convert_to(values[i]) }; @@ -310,8 +310,8 @@ void cpu_hardware_sampler::sampling_loop() { // add values to the respective sample entries for (std::size_t i = 0; i < header.size(); ++i) { if (header[i] == "Avg_MHz") { - using vector_type = decltype(clock_samples_.average_frequency_)::value_type; - clock_samples_.average_frequency_->push_back(detail::convert_to(values[i])); + using vector_type = decltype(clock_samples_.clock_frequency_)::value_type; + clock_samples_.clock_frequency_->push_back(detail::convert_to(values[i])); } else if (header[i] == "Busy%") { using vector_type = decltype(general_samples_.compute_utilization_)::value_type; general_samples_.compute_utilization_->push_back(detail::convert_to(values[i])); diff --git a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp index 434b047..63fbda4 100644 --- a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp +++ b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp @@ -127,38 +127,50 @@ void gpu_amd_hardware_sampler::sampling_loop() { { rsmi_frequencies_t frequency_info{}; if (rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_SYS, &frequency_info) == RSMI_STATUS_SUCCESS) { - clock_samples_.clock_system_min_ = frequency_info.frequency[0]; - clock_samples_.clock_system_max_ = frequency_info.frequency[frequency_info.num_supported - 1]; + clock_samples_.clock_frequency_min_ = static_cast(frequency_info.frequency[0]) / 1000.0 / 1000.0; + clock_samples_.clock_frequency_max_ = static_cast(frequency_info.frequency[frequency_info.num_supported - 1]) / 1000.0 / 1000.0; + decltype(clock_samples_.available_clock_frequencies_)::value_type frequencies{}; + for (std::size_t i = 0; i < frequency_info.num_supported; ++i) { + frequencies.push_back(static_cast(frequency_info.frequency[i]) / 1000.0 / 1000.0); + } + clock_samples_.available_clock_frequencies_ = frequencies; + // queried samples -> retrieved every iteration if available - clock_samples_.clock_system_ = decltype(clock_samples_.clock_system_)::value_type{}; + clock_samples_.clock_frequency_ = decltype(clock_samples_.clock_frequency_)::value_type{}; if (frequency_info.current < RSMI_MAX_NUM_FREQUENCIES) { - clock_samples_.clock_system_->push_back(frequency_info.frequency[frequency_info.current]); + clock_samples_.clock_frequency_->push_back(static_cast(frequency_info.frequency[frequency_info.current]) / 1000.0 / 1000.0); } else { - clock_samples_.clock_system_->push_back(0); + clock_samples_.clock_frequency_->push_back(0); } } if (rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_SOC, &frequency_info) == RSMI_STATUS_SUCCESS) { - clock_samples_.clock_socket_min_ = frequency_info.frequency[0]; - clock_samples_.clock_socket_max_ = frequency_info.frequency[frequency_info.num_supported - 1]; + clock_samples_.socket_clock_frequency_min_ = static_cast(frequency_info.frequency[0]) / 1000.0 / 1000.0; + clock_samples_.socket_clock_frequency_max_ = static_cast(frequency_info.frequency[frequency_info.num_supported - 1]) / 1000.0 / 1000.0; // queried samples -> retrieved every iteration if available - clock_samples_.clock_socket_ = decltype(clock_samples_.clock_socket_)::value_type{}; + clock_samples_.socket_clock_frequency_ = decltype(clock_samples_.socket_clock_frequency_)::value_type{}; if (frequency_info.current < RSMI_MAX_NUM_FREQUENCIES) { - clock_samples_.clock_socket_->push_back(frequency_info.frequency[frequency_info.current]); + clock_samples_.socket_clock_frequency_->push_back(static_cast(frequency_info.frequency[frequency_info.current]) / 1000.0 / 1000.0); } else { - clock_samples_.clock_socket_->push_back(0); + clock_samples_.socket_clock_frequency_->push_back(0); } } if (rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_MEM, &frequency_info) == RSMI_STATUS_SUCCESS) { - clock_samples_.clock_memory_min_ = frequency_info.frequency[0]; - clock_samples_.clock_memory_max_ = frequency_info.frequency[frequency_info.num_supported - 1]; + clock_samples_.memory_clock_frequency_min_ = static_cast(frequency_info.frequency[0]) / 1000.0 / 1000.0; + clock_samples_.memory_clock_frequency_max_ = static_cast(frequency_info.frequency[frequency_info.num_supported - 1]) / 1000.0 / 1000.0; + decltype(clock_samples_.available_memory_clock_frequencies_)::value_type frequencies{}; + for (std::size_t i = 0; i < frequency_info.num_supported; ++i) { + frequencies.push_back(static_cast(frequency_info.frequency[i]) / 1000.0 / 1000.0); + } + clock_samples_.available_memory_clock_frequencies_ = frequencies; + // queried samples -> retrieved every iteration if available - clock_samples_.clock_memory_ = decltype(clock_samples_.clock_memory_)::value_type{}; + clock_samples_.memory_clock_frequency_ = decltype(clock_samples_.memory_clock_frequency_)::value_type{}; if (frequency_info.current < RSMI_MAX_NUM_FREQUENCIES) { - clock_samples_.clock_memory_->push_back(frequency_info.frequency[frequency_info.current]); + clock_samples_.memory_clock_frequency_->push_back(static_cast(frequency_info.frequency[frequency_info.current]) / 1000.0 / 1000.0); } else { - clock_samples_.clock_memory_->push_back(0); + clock_samples_.memory_clock_frequency_->push_back(0); } } @@ -466,36 +478,36 @@ void gpu_amd_hardware_sampler::sampling_loop() { // retrieve clock related samples { - if (clock_samples_.clock_system_.has_value()) { + if (clock_samples_.clock_frequency_.has_value()) { rsmi_frequencies_t frequency_info{}; HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_SYS, &frequency_info)); if (frequency_info.current < RSMI_MAX_NUM_FREQUENCIES) { - clock_samples_.clock_system_->push_back(frequency_info.frequency[frequency_info.current]); + clock_samples_.clock_frequency_->push_back(static_cast(frequency_info.frequency[frequency_info.current]) / 1000.0 / 1000.0); } else { // the current index is (somehow) wrong - clock_samples_.clock_system_->push_back(0); + clock_samples_.clock_frequency_->push_back(0); } } - if (clock_samples_.clock_socket_.has_value()) { + if (clock_samples_.socket_clock_frequency_.has_value()) { rsmi_frequencies_t frequency_info{}; HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_SOC, &frequency_info)); if (frequency_info.current < RSMI_MAX_NUM_FREQUENCIES) { - clock_samples_.clock_socket_->push_back(frequency_info.frequency[frequency_info.current]); + clock_samples_.socket_clock_frequency_->push_back(static_cast(frequency_info.frequency[frequency_info.current]) / 1000.0 / 1000.0); } else { // the current index is (somehow) wrong - clock_samples_.clock_socket_->push_back(0); + clock_samples_.socket_clock_frequency_->push_back(0); } } - if (clock_samples_.clock_memory_.has_value()) { + if (clock_samples_.memory_clock_frequency_.has_value()) { rsmi_frequencies_t frequency_info{}; HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_MEM, &frequency_info)); if (frequency_info.current < RSMI_MAX_NUM_FREQUENCIES) { - clock_samples_.clock_memory_->push_back(frequency_info.frequency[frequency_info.current]); + clock_samples_.memory_clock_frequency_->push_back(static_cast(frequency_info.frequency[frequency_info.current]) / 1000.0 / 1000.0); } else { // the current index is (somehow) wrong - clock_samples_.clock_memory_->push_back(0); + clock_samples_.memory_clock_frequency_->push_back(0); } } diff --git a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp b/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp index c7f7d88..579ea29 100644 --- a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp +++ b/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp @@ -105,71 +105,83 @@ std::ostream &operator<<(std::ostream &out, const rocm_smi_general_samples &samp std::string rocm_smi_clock_samples::generate_yaml_string() const { std::string str{ "clock:\n" }; - // socket clock min frequencies - if (this->clock_socket_min_.has_value()) { - str += std::format(" clock_socket_min:\n" - " unit: \"Hz\"\n" + // system clock min frequencies + if (this->clock_frequency_min_.has_value()) { + str += std::format(" clock_frequency_min:\n" + " unit: \"MHz\"\n" " values: {}\n", - this->clock_socket_min_.value()); + this->clock_frequency_min_.value()); } - // socket clock max frequencies - if (this->clock_socket_max_.has_value()) { - str += std::format(" clock_socket_max:\n" - " unit: \"Hz\"\n" + // system clock max frequencies + if (this->clock_frequency_max_.has_value()) { + str += std::format(" clock_frequency_max:\n" + " unit: \"MHz\"\n" " values: {}\n", - this->clock_socket_max_.value()); + this->clock_frequency_max_.value()); } - // memory clock min frequencies - if (this->clock_memory_min_.has_value()) { - str += std::format(" clock_memory_min:\n" - " unit: \"Hz\"\n" + if (this->memory_clock_frequency_min_.has_value()) { + str += std::format(" memory_clock_frequency_min:\n" + " unit: \"MHz\"\n" " values: {}\n", - this->clock_memory_min_.value()); + this->memory_clock_frequency_min_.value()); } // memory clock max frequencies - if (this->clock_memory_max_.has_value()) { - str += std::format(" clock_memory_max:\n" - " unit: \"Hz\"\n" + if (this->memory_clock_frequency_max_.has_value()) { + str += std::format(" memory_clock_frequency_max:\n" + " unit: \"MHz\"\n" " values: {}\n", - this->clock_memory_max_.value()); + this->memory_clock_frequency_max_.value()); } - - // system clock min frequencies - if (this->clock_system_min_.has_value()) { - str += std::format(" clock_gpu_min:\n" - " unit: \"Hz\"\n" + // socket clock min frequencies + if (this->socket_clock_frequency_min_.has_value()) { + str += std::format(" socket_clock_frequency_min:\n" + " unit: \"MHz\"\n" " values: {}\n", - this->clock_system_min_.value()); + this->socket_clock_frequency_min_.value()); } - // system clock max frequencies - if (this->clock_system_max_.has_value()) { - str += std::format(" clock_gpu_max:\n" - " unit: \"Hz\"\n" + // socket clock max frequencies + if (this->socket_clock_frequency_max_.has_value()) { + str += std::format(" socket_clock_frequency_max:\n" + " unit: \"MHz\"\n" " values: {}\n", - this->clock_system_max_.value()); + this->socket_clock_frequency_max_.value()); + } + // the available clock frequencies + if (this->available_clock_frequencies_.has_value()) { + str += std::format(" available_clock_frequencies:\n" + " unit: \"MHz\"\n" + " values: [{}]\n", + detail::join(this->available_clock_frequencies_.value(), ", ")); + } + // the available memory clock frequencies + if (this->available_memory_clock_frequencies_.has_value()) { + str += std::format(" available_memory_clock_frequencies:\n" + " unit: \"MHz\"\n" + " values: [{}]\n", + detail::join(this->available_memory_clock_frequencies_.value(), ", ")); } - // socket clock frequency - if (this->clock_socket_.has_value()) { - str += std::format(" clock_socket:\n" - " unit: \"Hz\"\n" + // system clock frequency + if (this->clock_frequency_.has_value()) { + str += std::format(" clock_frequency:\n" + " unit: \"MHz\"\n" " values: [{}]\n", - detail::join(this->clock_socket_.value(), ", ")); + detail::join(this->clock_frequency_.value(), ", ")); } // memory clock frequency - if (this->clock_memory_.has_value()) { - str += std::format(" clock_memory:\n" - " unit: \"Hz\"\n" + if (this->memory_clock_frequency_.has_value()) { + str += std::format(" memory_clock_frequency:\n" + " unit: \"MHz\"\n" " values: [{}]\n", - detail::join(this->clock_memory_.value(), ", ")); + detail::join(this->memory_clock_frequency_.value(), ", ")); } - // system clock frequency - if (this->clock_system_.has_value()) { - str += std::format(" clock_gpu:\n" - " unit: \"Hz\"\n" + // socket clock frequency + if (this->socket_clock_frequency_.has_value()) { + str += std::format(" socket_clock_frequency:\n" + " unit: \"MHz\"\n" " values: [{}]\n", - detail::join(this->clock_system_.value(), ", ")); + detail::join(this->socket_clock_frequency_.value(), ", ")); } // overdrive level if (this->overdrive_level_.has_value()) { @@ -193,26 +205,30 @@ std::string rocm_smi_clock_samples::generate_yaml_string() const { } std::ostream &operator<<(std::ostream &out, const rocm_smi_clock_samples &samples) { - return out << std::format("clock_system_min [Hz]: {}\n" - "clock_system_max [Hz]: {}\n" - "clock_socket_min [Hz]: {}\n" - "clock_socket_max [Hz]: {}\n" - "clock_memory_min [Hz]: {}\n" - "clock_memory_max [Hz]: {}\n" - "clock_system [Hz]: [{}]\n" - "clock_socket [Hz]: [{}]\n" - "clock_memory [Hz]: [{}]\n" + return out << std::format("clock_frequency_min [MHz]: {}\n" + "clock_frequency_max [MHz]: {}\n" + "memory_clock_frequency_min [MHz]: {}\n" + "memory_clock_frequency_max [MHz]: {}\n" + "socket_clock_frequency_min [MHz]: {}\n" + "socket_clock_frequency_max [MHz]: {}\n" + "available_clock_frequencies [MHz]: [{}]\n" + "available_memory_clock_frequencies [MHz]: [{}]\n" + "clock_frequency [MHz]: [{}]\n" + "memory_clock_frequency [MHz]: [{}]\n" + "socket_clock_frequency [MHz]: [{}]\n" "overdrive_level [%]: [{}]\n" "memory_overdrive_level [%]: [{}]", - detail::value_or_default(samples.get_clock_system_min()), - detail::value_or_default(samples.get_clock_system_max()), - detail::value_or_default(samples.get_clock_socket_min()), - detail::value_or_default(samples.get_clock_socket_max()), - detail::value_or_default(samples.get_clock_memory_min()), - detail::value_or_default(samples.get_clock_memory_max()), - detail::join(detail::value_or_default(samples.get_clock_system()), ", "), - detail::join(detail::value_or_default(samples.get_clock_socket()), ", "), - detail::join(detail::value_or_default(samples.get_clock_memory()), ", "), + detail::value_or_default(samples.get_clock_frequency_min()), + detail::value_or_default(samples.get_clock_frequency_max()), + detail::value_or_default(samples.get_memory_clock_frequency_min()), + detail::value_or_default(samples.get_memory_clock_frequency_max()), + detail::value_or_default(samples.get_socket_clock_frequency_min()), + detail::value_or_default(samples.get_socket_clock_frequency_max()), + detail::join(detail::value_or_default(samples.get_available_clock_frequencies()), ", "), + detail::join(detail::value_or_default(samples.get_available_memory_clock_frequencies()), ", "), + detail::join(detail::value_or_default(samples.get_clock_frequency()), ", "), + detail::join(detail::value_or_default(samples.get_memory_clock_frequency()), ", "), + detail::join(detail::value_or_default(samples.get_socket_clock_frequency()), ", "), detail::join(detail::value_or_default(samples.get_overdrive_level()), ", "), detail::join(detail::value_or_default(samples.get_memory_overdrive_level()), ", ")); } diff --git a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp index 41b3c0b..6e0fe7c 100644 --- a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp +++ b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp @@ -15,7 +15,7 @@ #include "nvml.h" // NVML runtime functions -#include // std::min_element +#include // std::min_element, std::sort, std::transform #include // std::chrono::{steady_clock, duration_cast, milliseconds} #include // std::size_t #include // std::exception, std::terminate @@ -187,24 +187,24 @@ void gpu_nvidia_hardware_sampler::sampling_loop() { // retrieve initial clock related information { // fixed information -> only retrieved once - decltype(clock_samples_.adaptive_clock_status_)::value_type adaptive_clock_status{}; + unsigned int adaptive_clock_status{}; if (nvmlDeviceGetAdaptiveClockInfoStatus(device, &adaptive_clock_status) == NVML_SUCCESS) { - clock_samples_.adaptive_clock_status_ = adaptive_clock_status; + clock_samples_.auto_boosted_clock_enabled_ = adaptive_clock_status == NVML_ADAPTIVE_CLOCKING_INFO_STATUS_ENABLED; } - decltype(clock_samples_.clock_graph_max_)::value_type clock_graph_max{}; + unsigned int clock_graph_max{}; if (nvmlDeviceGetMaxClockInfo(device, NVML_CLOCK_GRAPHICS, &clock_graph_max) == NVML_SUCCESS) { - clock_samples_.clock_graph_max_ = clock_graph_max; + clock_samples_.clock_frequency_max_ = static_cast(clock_graph_max); } - decltype(clock_samples_.clock_sm_max_)::value_type clock_sm_max{}; + unsigned int clock_sm_max{}; if (nvmlDeviceGetMaxClockInfo(device, NVML_CLOCK_SM, &clock_sm_max) == NVML_SUCCESS) { - clock_samples_.clock_sm_max_ = clock_sm_max; + clock_samples_.sm_clock_frequency_max_ = static_cast(clock_sm_max); } - decltype(clock_samples_.clock_mem_max_)::value_type clock_mem_max{}; + unsigned int clock_mem_max{}; if (nvmlDeviceGetMaxClockInfo(device, NVML_CLOCK_MEM, &clock_mem_max) == NVML_SUCCESS) { - clock_samples_.clock_mem_max_ = clock_mem_max; + clock_samples_.memory_clock_frequency_max_ = static_cast(clock_mem_max); } { @@ -212,44 +212,67 @@ void gpu_nvidia_hardware_sampler::sampling_loop() { std::vector supported_clocks(clock_count); if (nvmlDeviceGetSupportedMemoryClocks(device, &clock_count, supported_clocks.data()) == NVML_SUCCESS) { supported_clocks.resize(clock_count); - clock_samples_.clock_mem_min_ = *std::min_element(supported_clocks.cbegin(), supported_clocks.cend()); + clock_samples_.memory_clock_frequency_min_ = static_cast(*std::min_element(supported_clocks.cbegin(), supported_clocks.cend())); + + decltype(clock_samples_.available_memory_clock_frequencies_)::value_type available_memory_clock_frequencies(supported_clocks.size()); + // convert unsigned int values to double values + std::transform(supported_clocks.cbegin(), supported_clocks.cend(), available_memory_clock_frequencies.begin(), [](const unsigned int c) { return static_cast(c); }); + // we want to report all supported memory clocks in ascending order + std::sort(available_memory_clock_frequencies.begin(), available_memory_clock_frequencies.end()); + clock_samples_.available_memory_clock_frequencies_ = available_memory_clock_frequencies; } } { unsigned int clock_count{ 128 }; std::vector supported_clocks(clock_count); - if (clock_samples_.clock_mem_min_.has_value() && nvmlDeviceGetSupportedGraphicsClocks(device, clock_samples_.clock_mem_min_.value(), &clock_count, supported_clocks.data()) == NVML_SUCCESS) { - supported_clocks.resize(clock_count); - clock_samples_.clock_graph_min_ = *std::min_element(supported_clocks.cbegin(), supported_clocks.cend()); + if (clock_samples_.memory_clock_frequency_min_.has_value() && nvmlDeviceGetSupportedGraphicsClocks(device, clock_samples_.memory_clock_frequency_min_.value(), &clock_count, supported_clocks.data()) == NVML_SUCCESS) { + clock_samples_.clock_frequency_min_ = static_cast(*std::min_element(supported_clocks.cbegin(), supported_clocks.cbegin() + clock_count)); + } + + if (clock_samples_.available_memory_clock_frequencies_.has_value()) { + for (const auto value : clock_samples_.available_memory_clock_frequencies_.value()) { + if (nvmlDeviceGetSupportedGraphicsClocks(device, static_cast(value), &clock_count, supported_clocks.data()) == NVML_SUCCESS) { + decltype(clock_samples_.available_clock_frequencies_)::value_type::mapped_type available_clock_frequencies(clock_count); + // convert unsigned int values to double values + std::transform(supported_clocks.cbegin(), supported_clocks.cbegin() + clock_count, available_clock_frequencies.begin(), [](const unsigned int c) { return static_cast(c); }); + // we want to report all supported memory clocks in ascending order + std::sort(available_clock_frequencies.begin(), available_clock_frequencies.end()); + // if no map exists, default construct an empty map + if (!clock_samples_.available_clock_frequencies_.has_value()) { + clock_samples_.available_clock_frequencies_ = decltype(clock_samples_)::map_type{}; + } + clock_samples_.available_clock_frequencies_->emplace(value, available_clock_frequencies); + } + } } } // queried samples -> retrieved every iteration if available - decltype(clock_samples_.clock_graph_)::value_type::value_type clock_graph{}; + unsigned int clock_graph{}; if (nvmlDeviceGetClockInfo(device, NVML_CLOCK_GRAPHICS, &clock_graph) == NVML_SUCCESS) { - clock_samples_.clock_graph_ = decltype(clock_samples_.clock_graph_)::value_type{ clock_graph }; + clock_samples_.clock_frequency_ = decltype(clock_samples_.clock_frequency_)::value_type{ static_cast(clock_graph) }; } - decltype(clock_samples_.clock_sm_)::value_type::value_type clock_sm{}; + unsigned int clock_sm{}; if (nvmlDeviceGetClockInfo(device, NVML_CLOCK_SM, &clock_sm) == NVML_SUCCESS) { - clock_samples_.clock_sm_ = decltype(clock_samples_.clock_sm_)::value_type{ clock_sm }; + clock_samples_.sm_clock_frequency_ = decltype(clock_samples_.sm_clock_frequency_)::value_type{ static_cast(clock_sm) }; } - decltype(clock_samples_.clock_mem_)::value_type::value_type clock_mem{}; + unsigned int clock_mem{}; if (nvmlDeviceGetClockInfo(device, NVML_CLOCK_MEM, &clock_mem) == NVML_SUCCESS) { - clock_samples_.clock_mem_ = decltype(clock_samples_.clock_mem_)::value_type{ clock_mem }; + clock_samples_.memory_clock_frequency_ = decltype(clock_samples_.memory_clock_frequency_)::value_type{ static_cast(clock_mem) }; } - decltype(clock_samples_.clock_throttle_reason_)::value_type::value_type clock_throttle_reason{}; - if (nvmlDeviceGetCurrentClocksThrottleReasons(device, &clock_throttle_reason) == NVML_SUCCESS) { - clock_samples_.clock_throttle_reason_ = decltype(clock_samples_.clock_throttle_reason_)::value_type{ clock_throttle_reason }; + unsigned long long clock_throttle_reason{}; + if (nvmlDeviceGetCurrentClocksEventReasons(device, &clock_throttle_reason) == NVML_SUCCESS) { + clock_samples_.throttle_reason_ = decltype(clock_samples_.throttle_reason_)::value_type{ detail::throttle_event_reason_to_string(clock_throttle_reason) }; } nvmlEnableState_t mode{}; nvmlEnableState_t default_mode{}; if (nvmlDeviceGetAutoBoostedClocksEnabled(device, &mode, &default_mode) == NVML_SUCCESS) { - clock_samples_.auto_boosted_clocks_ = decltype(clock_samples_.auto_boosted_clocks_)::value_type{ mode == NVML_FEATURE_ENABLED }; + clock_samples_.auto_boosted_clock_ = decltype(clock_samples_.auto_boosted_clock_)::value_type{ mode == NVML_FEATURE_ENABLED }; } } @@ -412,35 +435,35 @@ void gpu_nvidia_hardware_sampler::sampling_loop() { // retrieve clock related samples { - if (clock_samples_.clock_graph_.has_value()) { - decltype(clock_samples_.clock_graph_)::value_type::value_type value{}; + if (clock_samples_.clock_frequency_.has_value()) { + unsigned int value{}; HWS_NVML_ERROR_CHECK(nvmlDeviceGetClockInfo(device, NVML_CLOCK_GRAPHICS, &value)); - clock_samples_.clock_graph_->push_back(value); + clock_samples_.clock_frequency_->push_back(static_cast(value)); } - if (clock_samples_.clock_sm_.has_value()) { - decltype(clock_samples_.clock_sm_)::value_type::value_type value{}; + if (clock_samples_.sm_clock_frequency_.has_value()) { + unsigned int value{}; HWS_NVML_ERROR_CHECK(nvmlDeviceGetClockInfo(device, NVML_CLOCK_SM, &value)); - clock_samples_.clock_sm_->push_back(value); + clock_samples_.sm_clock_frequency_->push_back(static_cast(value)); } - if (clock_samples_.clock_mem_.has_value()) { - decltype(clock_samples_.clock_mem_)::value_type::value_type value{}; + if (clock_samples_.memory_clock_frequency_.has_value()) { + unsigned int value{}; HWS_NVML_ERROR_CHECK(nvmlDeviceGetClockInfo(device, NVML_CLOCK_MEM, &value)); - clock_samples_.clock_mem_->push_back(value); + clock_samples_.memory_clock_frequency_->push_back(static_cast(value)); } - if (clock_samples_.clock_throttle_reason_.has_value()) { - decltype(clock_samples_.clock_throttle_reason_)::value_type::value_type value{}; - HWS_NVML_ERROR_CHECK(nvmlDeviceGetCurrentClocksThrottleReasons(device, &value)); - clock_samples_.clock_throttle_reason_->push_back(value); + if (clock_samples_.throttle_reason_.has_value()) { + unsigned long long value{}; + HWS_NVML_ERROR_CHECK(nvmlDeviceGetCurrentClocksEventReasons(device, &value)); + clock_samples_.throttle_reason_->push_back(detail::throttle_event_reason_to_string(value)); } - if (clock_samples_.auto_boosted_clocks_.has_value()) { + if (clock_samples_.auto_boosted_clock_.has_value()) { nvmlEnableState_t mode{}; nvmlEnableState_t default_mode{}; HWS_NVML_ERROR_CHECK(nvmlDeviceGetAutoBoostedClocksEnabled(device, &mode, &default_mode)); - clock_samples_.auto_boosted_clocks_->push_back(mode == NVML_FEATURE_ENABLED); + clock_samples_.auto_boosted_clock_->push_back(mode == NVML_FEATURE_ENABLED); } } diff --git a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp index 95cfa17..64f559d 100644 --- a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp +++ b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp @@ -7,9 +7,7 @@ #include "hardware_sampling/gpu_nvidia/nvml_samples.hpp" -#include "hardware_sampling/utility.hpp" // hws::detail::{value_or_default, join} - -#include "nvml.h" // NVML_ADAPTIVE_CLOCKING_INFO_STATUS_ENABLED +#include "hardware_sampling/utility.hpp" // hws::detail::{value_or_default, join, map_entry_to_string} #include // std::format #include // std::ostream @@ -125,82 +123,98 @@ std::string nvml_clock_samples::generate_yaml_string() const { std::string str{ "clock:\n" }; // adaptive clock status - if (this->adaptive_clock_status_.has_value()) { - str += std::format(" adaptive_clock_status:\n" + if (this->auto_boosted_clock_enabled_.has_value()) { + str += std::format(" auto_boosted_clock_enabled:\n" " unit: \"bool\"\n" " values: {}\n", - this->adaptive_clock_status_.value() == NVML_ADAPTIVE_CLOCKING_INFO_STATUS_ENABLED); + this->auto_boosted_clock_enabled_.value()); } - // maximum SM clock - if (this->clock_sm_max_.has_value()) { - str += std::format(" clock_sm_max:\n" + // minimum graph clock + if (this->clock_frequency_min_.has_value()) { + str += std::format(" clock_frequency_min:\n" " unit: \"MHz\"\n" " values: {}\n", - this->clock_sm_max_.value()); + this->clock_frequency_min_.value()); + } + // maximum graph clock + if (this->clock_frequency_max_.has_value()) { + str += std::format(" clock_frequency_max:\n" + " unit: \"MHz\"\n" + " values: {}\n", + this->clock_frequency_max_.value()); } // minimum memory clock - if (this->clock_mem_min_.has_value()) { - str += std::format(" clock_mem_min:\n" + if (this->memory_clock_frequency_min_.has_value()) { + str += std::format(" memory_clock_frequency_min:\n" " unit: \"MHz\"\n" " values: {}\n", - this->clock_mem_min_.value()); + this->memory_clock_frequency_min_.value()); } // maximum memory clock - if (this->clock_mem_max_.has_value()) { - str += std::format(" clock_mem_max:\n" + if (this->memory_clock_frequency_max_.has_value()) { + str += std::format(" memory_clock_frequency_max:\n" " unit: \"MHz\"\n" " values: {}\n", - this->clock_mem_max_.value()); + this->memory_clock_frequency_max_.value()); } - // minimum graph clock - if (this->clock_graph_min_.has_value()) { - str += std::format(" clock_gpu_min:\n" + // maximum SM clock + if (this->sm_clock_frequency_max_.has_value()) { + str += std::format(" sm_clock_frequency_max:\n" " unit: \"MHz\"\n" " values: {}\n", - this->clock_graph_min_.value()); + this->sm_clock_frequency_max_.value()); } - // maximum graph clock - if (this->clock_graph_max_.has_value()) { - str += std::format(" clock_gpu_max:\n" + // the available clock frequencies + if (this->available_clock_frequencies_.has_value()) { + str += std::format(" available_clock_frequencies:\n" " unit: \"MHz\"\n" - " values: {}\n", - this->clock_graph_max_.value()); + " values:\n"); + for (const auto &[key, value] : this->available_clock_frequencies_.value()) { + str += std::format(" {}: [{}]\n", key, detail::join(value, ", ")); + } + } + // the available memory clock frequencies + if (this->available_memory_clock_frequencies_.has_value()) { + str += std::format(" available_memory_clock_frequencies:\n" + " unit: \"MHz\"\n" + " values: [{}]\n", + detail::join(this->available_memory_clock_frequencies_.value(), ", ")); } - // SM clock - if (this->clock_sm_.has_value()) { - str += std::format(" clock_sm:\n" + // graph clock + if (this->clock_frequency_.has_value()) { + str += std::format(" clock_frequency:\n" " unit: \"MHz\"\n" " values: [{}]\n", - detail::join(this->clock_sm_.value(), ", ")); + detail::join(this->clock_frequency_.value(), ", ")); } // memory clock - if (this->clock_mem_.has_value()) { - str += std::format(" clock_mem:\n" + if (this->memory_clock_frequency_.has_value()) { + str += std::format(" memory_clock_frequency:\n" " unit: \"MHz\"\n" " values: [{}]\n", - detail::join(this->clock_mem_.value(), ", ")); + detail::join(this->memory_clock_frequency_.value(), ", ")); } - // graph clock - if (this->clock_graph_.has_value()) { - str += std::format(" clock_gpu:\n" + // SM clock + if (this->sm_clock_frequency_.has_value()) { + str += std::format(" sm_clock_frequency:\n" " unit: \"MHz\"\n" " values: [{}]\n", - detail::join(this->clock_graph_.value(), ", ")); + detail::join(this->sm_clock_frequency_.value(), ", ")); } // clock throttle reason - if (this->clock_throttle_reason_.has_value()) { - str += std::format(" clock_throttle_reason:\n" - " unit: \"bitmask\"\n" + if (this->throttle_reason_.has_value()) { + str += std::format(" throttle_reason:\n" + " unit: \"string\"\n" " values: [{}]\n", - detail::join(this->clock_throttle_reason_.value(), ", ")); + detail::join(this->throttle_reason_.value(), ", ")); } // clock is auto-boosted - if (this->auto_boosted_clocks_.has_value()) { - str += std::format(" auto_boosted_clocks:\n" + if (this->auto_boosted_clock_.has_value()) { + str += std::format(" auto_boosted_clock:\n" " unit: \"bool\"\n" " values: [{}]\n", - detail::join(this->auto_boosted_clocks_.value(), ", ")); + detail::join(this->auto_boosted_clock_.value(), ", ")); } // remove last newline @@ -210,28 +224,32 @@ std::string nvml_clock_samples::generate_yaml_string() const { } std::ostream &operator<<(std::ostream &out, const nvml_clock_samples &samples) { - return out << std::format("adaptive_clock_status [int]: {}\n" - "clock_graph_min [MHz]: {}\n" - "clock_graph_max [MHz]: {}\n" - "clock_sm_max [MHz]: {}\n" - "clock_mem_min [MHz]: {}\n" - "clock_mem_max [MHz]: {}\n" - "clock_graph [MHz]: [{}]\n" - "clock_sm [MHz]: [{}]\n" - "clock_mem [MHz]: [{}]\n" - "clock_throttle_reason [bitmask]: [{}]\n" - "auto_boosted_clocks [bool]: [{}]", - detail::value_or_default(samples.get_adaptive_clock_status()), - detail::value_or_default(samples.get_clock_graph_min()), - detail::value_or_default(samples.get_clock_graph_max()), - detail::value_or_default(samples.get_clock_sm_max()), - detail::value_or_default(samples.get_clock_mem_min()), - detail::value_or_default(samples.get_clock_mem_max()), - detail::join(detail::value_or_default(samples.get_clock_graph()), ", "), - detail::join(detail::value_or_default(samples.get_clock_sm()), ", "), - detail::join(detail::value_or_default(samples.get_clock_mem()), ", "), - detail::join(detail::value_or_default(samples.get_clock_throttle_reason()), ", "), - detail::join(detail::value_or_default(samples.get_auto_boosted_clocks()), ", ")); + return out << std::format("auto_boosted_clock_enabled [bool]: {}\n" + "clock_frequency_min [MHz]: {}\n" + "clock_frequency_max [MHz]: {}\n" + "memory_clock_frequency_min [MHz]: {}\n" + "memory_clock_frequency_max [MHz]: {}\n" + "sm_clock_frequency_max [MHz]: {}\n" + "available_clock_frequencies [MHz]: [{}]\n" + "available_memory_clock_frequencies [MHz]: [{}]\n" + "clock_frequency [MHz]: [{}]\n" + "memory_clock_frequency [MHz]: [{}]\n" + "sm_clock_frequency [MHz]: [{}]\n" + "throttle_reason [string]: [{}]\n" + "auto_boosted_clock [bool]: [{}]", + detail::value_or_default(samples.get_auto_boosted_clock_enabled()), + detail::value_or_default(samples.get_clock_frequency_min()), + detail::value_or_default(samples.get_clock_frequency_max()), + detail::value_or_default(samples.get_memory_clock_frequency_min()), + detail::value_or_default(samples.get_memory_clock_frequency_max()), + detail::value_or_default(samples.get_sm_clock_frequency_max()), + detail::map_entry_to_string(samples.get_available_clock_frequencies()), + detail::join(detail::value_or_default(samples.get_available_memory_clock_frequencies()), ", "), + detail::join(detail::value_or_default(samples.get_clock_frequency()), ", "), + detail::join(detail::value_or_default(samples.get_memory_clock_frequency()), ", "), + detail::join(detail::value_or_default(samples.get_sm_clock_frequency()), ", "), + detail::join(detail::value_or_default(samples.get_throttle_reason()), ", "), + detail::join(detail::value_or_default(samples.get_auto_boosted_clock()), ", ")); } //*************************************************************************************************************************************// From dfe75aedf819b97b5cb0dc3ae30271099fefb5e5 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Fri, 13 Sep 2024 16:31:28 +0200 Subject: [PATCH 17/69] (temporarily) disable level zero support. --- CMakeLists.txt | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e32478a..3e915ea 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -214,25 +214,25 @@ endif () ## Intel GPU sampling via Level Zero ## #################################################################################################################### # try finding Level Zero -find_package(level_zero QUIET) -if (level_zero_FOUND) - target_link_libraries(${HWS_LIBRARY_NAME} PRIVATE level_zero) - - message(STATUS "Enable sampling of Intel GPU information using Level Zero.") - - # add source file to source file list - target_sources(${HWS_LIBRARY_NAME} PRIVATE - $) - - # add compile definition - target_compile_definitions(${HWS_LIBRARY_NAME} PUBLIC HWS_FOR_INTEL_GPUS_ENABLED) -else () - message(STATUS "Hardware sampling for Intel GPUs disabled!") -endif () +#find_package(level_zero QUIET) +#if (level_zero_FOUND) +# target_link_libraries(${HWS_LIBRARY_NAME} PRIVATE level_zero) +# +# message(STATUS "Enable sampling of Intel GPU information using Level Zero.") +# +# # add source file to source file list +# target_sources(${HWS_LIBRARY_NAME} PRIVATE +# $) +# +# # add compile definition +# target_compile_definitions(${HWS_LIBRARY_NAME} PUBLIC HWS_FOR_INTEL_GPUS_ENABLED) +#else () +# message(STATUS "Hardware sampling for Intel GPUs disabled!") +#endif () #################################################################################################################### From 2919e1c46af4d662a4db55ea72500dca3eaf9b2f Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Mon, 16 Sep 2024 09:36:19 +0200 Subject: [PATCH 18/69] Backport library to support C++17 instead of only C++20 (mainly changing std::format to fmt::format). --- .clang-format | 2 +- CMakeLists.txt | 30 +- README.md | 3 +- cmake/hardware_samplingConfig.cmake.in | 6 + examples/cpp/CMakeLists.txt | 2 +- include/hardware_sampling/cpu/cpu_samples.hpp | 19 +- .../cpu/hardware_sampler.hpp | 6 +- include/hardware_sampling/cpu/utility.hpp | 7 +- include/hardware_sampling/event.hpp | 22 +- .../gpu_amd/hardware_sampler.hpp | 6 +- .../gpu_amd/rocm_smi_samples.hpp | 15 +- include/hardware_sampling/gpu_amd/utility.hpp | 8 +- .../gpu_intel/level_zero_samples.hpp | 7 +- .../gpu_nvidia/hardware_sampler.hpp | 6 +- .../gpu_nvidia/nvml_samples.hpp | 15 +- .../hardware_sampling/gpu_nvidia/utility.hpp | 11 +- include/hardware_sampling/utility.hpp | 85 ++---- src/hardware_sampling/cpu/cpu_samples.cpp | 270 +++++++++--------- .../cpu/hardware_sampler.cpp | 48 ++-- src/hardware_sampling/cpu/utility.cpp | 4 +- src/hardware_sampling/event.cpp | 5 +- .../gpu_amd/hardware_sampler.cpp | 19 +- .../gpu_amd/rocm_smi_samples.cpp | 235 +++++++-------- .../gpu_intel/hardware_sampler.cpp | 36 ++- .../gpu_intel/level_zero_samples.cpp | 46 +-- .../gpu_nvidia/hardware_sampler.cpp | 15 +- .../gpu_nvidia/nvml_samples.cpp | 190 ++++++------ src/hardware_sampling/hardware_sampler.cpp | 23 +- 28 files changed, 602 insertions(+), 539 deletions(-) diff --git a/.clang-format b/.clang-format index 5d6a911..97d4dc9 100644 --- a/.clang-format +++ b/.clang-format @@ -79,7 +79,7 @@ IncludeBlocks: Regroup IncludeCategories: - Regex: '^"hardware_sampling/' Priority: 1 - - Regex: '^"(pybind|nvml|cuda|rocm_smi|hip|level_zero|subprocess)' + - Regex: '^"(pybind|nvml|cuda|rocm_smi|hip|level_zero|subprocess|fmt)' Priority: 2 - Regex: '^.*' Priority: 3 diff --git a/CMakeLists.txt b/CMakeLists.txt index 3e915ea..48ed48e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,7 +26,7 @@ add_library(${HWS_LIBRARY_NAME} SHARED ${HWS_SOURCES}) set(HWS_TARGETS_TO_INSTALL ${HWS_LIBRARY_NAME}) # use C++20 -target_compile_features(${HWS_LIBRARY_NAME} PUBLIC cxx_std_20) +target_compile_features(${HWS_LIBRARY_NAME} PUBLIC cxx_std_17) # add target include directory target_include_directories(${HWS_LIBRARY_NAME} PUBLIC @@ -58,6 +58,34 @@ endif () message(STATUS "Setting the hardware sampler interval to ${HWS_SAMPLING_INTERVAL}ms.") target_compile_definitions(${HWS_LIBRARY_NAME} PUBLIC HWS_SAMPLING_INTERVAL=${HWS_SAMPLING_INTERVAL}ms) +# install fmt as dependency +include(FetchContent) +set(HWS_fmt_VERSION 11.0.2) +find_package(fmt 11.0.2 QUIET) +if (fmt_FOUND) + message(STATUS "Found package fmt.") +else () + message(STATUS "Couldn't find package fmt. Building version ${HWS_fmt_VERSION} from source.") + set(FMT_PEDANTIC OFF CACHE INTERNAL "" FORCE) + set(FMT_WERROR OFF CACHE INTERNAL "" FORCE) + set(FMT_DOC OFF CACHE INTERNAL "" FORCE) + set(FMT_INSTALL ON CACHE INTERNAL "" FORCE) # let {fmt} handle the install target + set(FMT_TEST OFF CACHE INTERNAL "" FORCE) + set(FMT_FUZZ OFF CACHE INTERNAL "" FORCE) + set(FMT_CUDA_TEST OFF CACHE INTERNAL "" FORCE) + set(FMT_MODULE OFF CACHE INTERNAL "" FORCE) + set(FMT_SYSTEM_HEADERS ON CACHE INTERNAL "" FORCE) + # fetch string formatting library fmt + FetchContent_Declare(fmt + GIT_REPOSITORY https://github.com/fmtlib/fmt.git + GIT_TAG ${HWS_fmt_VERSION} + QUIET + ) + FetchContent_MakeAvailable(fmt) + set_property(TARGET fmt PROPERTY POSITION_INDEPENDENT_CODE ON) + add_dependencies(${HWS_LIBRARY_NAME} fmt) +endif () +target_link_libraries(${HWS_LIBRARY_NAME} PUBLIC fmt::fmt) #################################################################################################################### ## CPU measurements ## diff --git a/README.md b/README.md index ce9598d..bf47501 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,8 @@ It currently supports CPUs as well as GPUs from NVIDIA, AMD, and Intel. General dependencies: -- a C++20 capable compiler supporting `std::format` (tested with GCC 14.1.0) +- a C++17 capable compiler +- [{fmt} > 11.0.2](https://github.com/fmtlib/fmt) for string formatting (automatically build during the CMake configuration if it couldn't be found using the respective `find_package` call) - [Pybind11 > v2.13.1](https://github.com/pybind/pybind11) if Python bindings are enabled (automatically build during the CMake configuration if it couldn't be found using the respective `find_package` call) Dependencies based on the hardware to sample: diff --git a/cmake/hardware_samplingConfig.cmake.in b/cmake/hardware_samplingConfig.cmake.in index 56ba42a..53829a0 100644 --- a/cmake/hardware_samplingConfig.cmake.in +++ b/cmake/hardware_samplingConfig.cmake.in @@ -8,6 +8,12 @@ include(CMakeFindDependencyMacro) +# always try finding {fmt} +# -> CMAKE_PREFIX_PATH necessary if build via FetchContent +# -> doesn't hurt to be set everytime +list(APPEND CMAKE_PREFIX_PATH "${CMAKE_CURRENT_LIST_DIR}/../../../lib/cmake/fmt") +find_dependency(fmt REQUIRED) + # sanity checks include("${CMAKE_CURRENT_LIST_DIR}/hardware_samplingTargets.cmake") check_required_components("hardware_sampling") \ No newline at end of file diff --git a/examples/cpp/CMakeLists.txt b/examples/cpp/CMakeLists.txt index 6086f5b..56cff22 100644 --- a/examples/cpp/CMakeLists.txt +++ b/examples/cpp/CMakeLists.txt @@ -12,5 +12,5 @@ find_package(hardware_sampling REQUIRED) add_executable(prog main.cpp) -target_compile_features(prog PUBLIC cxx_std_20) +target_compile_features(prog PUBLIC cxx_std_17) target_link_libraries(prog PUBLIC hws::hardware_sampling) \ No newline at end of file diff --git a/include/hardware_sampling/cpu/cpu_samples.hpp b/include/hardware_sampling/cpu/cpu_samples.hpp index 196572b..b537326 100644 --- a/include/hardware_sampling/cpu/cpu_samples.hpp +++ b/include/hardware_sampling/cpu/cpu_samples.hpp @@ -12,9 +12,10 @@ #define HARDWARE_SAMPLING_CPU_CPU_SAMPLES_HPP_ #pragma once -#include "hardware_sampling/utility.hpp" // HWS_SAMPLE_STRUCT_FIXED_MEMBER, HWS_SAMPLE_STRUCT_SAMPLING_MEMBER, hws::detail::ostream_formatter +#include "hardware_sampling/utility.hpp" // HWS_SAMPLE_STRUCT_FIXED_MEMBER, HWS_SAMPLE_STRUCT_SAMPLING_MEMBER + +#include "fmt/ostream.h" // fmt::formatter, fmt::ostream_formatter -#include // std::formatter #include // std::ostream forward declaration #include // std::optional #include // std::string @@ -297,24 +298,24 @@ std::ostream &operator<<(std::ostream &out, const cpu_idle_states_samples &sampl } // namespace hws template <> -struct std::formatter : hws::detail::ostream_formatter { }; +struct fmt::formatter : fmt::ostream_formatter { }; template <> -struct std::formatter : hws::detail::ostream_formatter { }; +struct fmt::formatter : fmt::ostream_formatter { }; template <> -struct std::formatter : hws::detail::ostream_formatter { }; +struct fmt::formatter : fmt::ostream_formatter { }; template <> -struct std::formatter : hws::detail::ostream_formatter { }; +struct fmt::formatter : fmt::ostream_formatter { }; template <> -struct std::formatter : hws::detail::ostream_formatter { }; +struct fmt::formatter : fmt::ostream_formatter { }; template <> -struct std::formatter : hws::detail::ostream_formatter { }; +struct fmt::formatter : fmt::ostream_formatter { }; template <> -struct std::formatter : hws::detail::ostream_formatter { }; +struct fmt::formatter : fmt::ostream_formatter { }; #endif // HARDWARE_SAMPLING_CPU_CPU_SAMPLES_HPP_ diff --git a/include/hardware_sampling/cpu/hardware_sampler.hpp b/include/hardware_sampling/cpu/hardware_sampler.hpp index b86771e..18b489f 100644 --- a/include/hardware_sampling/cpu/hardware_sampler.hpp +++ b/include/hardware_sampling/cpu/hardware_sampler.hpp @@ -14,10 +14,10 @@ #include "hardware_sampling/cpu/cpu_samples.hpp" // hws::{cpu_general_samples, clock_samples, power_samples, memory_samples, temperature_samples, gfx_samples, idle_state_samples} #include "hardware_sampling/hardware_sampler.hpp" // hws::hardware_sampler -#include "hardware_sampling/utility.hpp" // hws::detail::ostream_formatter + +#include "fmt/ostream.h" // fmt::formatter, fmt::ostream_formatter #include // std::chrono::milliseconds, std::chrono_literals namespace -#include // std::formatter #include // std::ostream forward declaration namespace hws { @@ -148,6 +148,6 @@ std::ostream &operator<<(std::ostream &out, const cpu_hardware_sampler &sampler) } // namespace hws template <> -struct std::formatter : hws::detail::ostream_formatter { }; +struct fmt::formatter : fmt::ostream_formatter { }; #endif // HARDWARE_SAMPLING_CPU_HARDWARE_SAMPLER_HPP_ diff --git a/include/hardware_sampling/cpu/utility.hpp b/include/hardware_sampling/cpu/utility.hpp index d203e0b..467d4e5 100644 --- a/include/hardware_sampling/cpu/utility.hpp +++ b/include/hardware_sampling/cpu/utility.hpp @@ -12,7 +12,8 @@ #define HARDWARE_SAMPLING_CPU_UTILITY_HPP_ #pragma once -#include // std::format +#include "fmt/format.h" // fmt::format + #include // std::runtime_error #include // std::string #include // std::string_view @@ -29,7 +30,7 @@ namespace hws::detail { { \ const int errc = subprocess_func; \ if (errc != 0) { \ - throw std::runtime_error{ std::format("Error calling subprocess function \"{}\"", #subprocess_func) }; \ + throw std::runtime_error{ fmt::format("Error calling subprocess function \"{}\"", #subprocess_func) }; \ } \ } #else @@ -43,6 +44,6 @@ namespace hws::detail { */ [[nodiscard]] std::string run_subprocess(std::string_view cmd_line); -} // namespace hws +} // namespace hws::detail #endif // HARDWARE_SAMPLING_CPU_UTILITY_HPP_ diff --git a/include/hardware_sampling/event.hpp b/include/hardware_sampling/event.hpp index 4375813..7129141 100644 --- a/include/hardware_sampling/event.hpp +++ b/include/hardware_sampling/event.hpp @@ -12,12 +12,12 @@ #define HARDWARE_SAMPLING_EVENT_HPP_ #pragma once -#include "hardware_sampling/utility.hpp" // hws::detail::ostream_formatter +#include "fmt/ostream.h" // fmt::formatter, fmt::ostream_formatter -#include // std::chrono::steady_clock::time_point -#include // std::formatter -#include // std::ostream forward declaration -#include // std::string +#include // std::chrono::steady_clock::time_point +#include // std::ostream forward declaration +#include // std::string +#include // std::move namespace hws { @@ -25,9 +25,17 @@ namespace hws { * @brief A struct encapsulating a single event. */ struct event { + /** + * @brief Construct a new event given a time point and name. + * @param[in] time_point_p the time when the event occurred + * @param[in] name_p the name of the event + */ + event(const std::chrono::steady_clock::time_point time_point_p, std::string name_p) : + time_point{ time_point_p }, + name{ std::move(name_p) } { } + /// The time point this event occurred at. std::chrono::steady_clock::time_point time_point; - /// The name of this event. std::string name; }; @@ -43,6 +51,6 @@ std::ostream &operator<<(std::ostream &out, const event &e); } // namespace hws template <> -struct std::formatter : hws::detail::ostream_formatter { }; +struct fmt::formatter : fmt::ostream_formatter { }; #endif // HARDWARE_SAMPLING_EVENT_HPP_ diff --git a/include/hardware_sampling/gpu_amd/hardware_sampler.hpp b/include/hardware_sampling/gpu_amd/hardware_sampler.hpp index 55ab3a9..80a7dbe 100644 --- a/include/hardware_sampling/gpu_amd/hardware_sampler.hpp +++ b/include/hardware_sampling/gpu_amd/hardware_sampler.hpp @@ -14,13 +14,13 @@ #include "hardware_sampling/gpu_amd/rocm_smi_samples.hpp" // hws::{rocm_smi_general_samples, rocm_smi_clock_samples, rocm_smi_power_samples, rocm_smi_memory_samples, rocm_smi_temperature_samples} #include "hardware_sampling/hardware_sampler.hpp" // hws::hardware_sampler -#include "hardware_sampling/utility.hpp" // hws::detail::ostream_formatter + +#include "fmt/ostream.h" // fmt::formatter, fmt::ostream_formatter #include // std::atomic #include // std::chrono::milliseconds, std::chrono_literals namespace #include // std::size_t #include // std::uint32_t -#include // std::formatter #include // std::ostream forward declaration namespace hws { @@ -159,6 +159,6 @@ std::ostream &operator<<(std::ostream &out, const gpu_amd_hardware_sampler &samp } // namespace hws template <> -struct std::formatter : hws::detail::ostream_formatter { }; +struct fmt::formatter : fmt::ostream_formatter { }; #endif // HARDWARE_SAMPLING_GPU_AMD_HARDWARE_SAMPLER_HPP_ diff --git a/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp b/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp index e0cb925..407a68c 100644 --- a/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp +++ b/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp @@ -12,10 +12,11 @@ #define HARDWARE_SAMPLING_GPU_AMD_ROCM_SMI_SAMPLES_HPP_ #pragma once -#include "hardware_sampling/utility.hpp" // HWS_SAMPLE_STRUCT_FIXED_MEMBER, HWS_SAMPLE_STRUCT_SAMPLING_MEMBER, hws::detail::ostream_formatter +#include "hardware_sampling/utility.hpp" // HWS_SAMPLE_STRUCT_FIXED_MEMBER, HWS_SAMPLE_STRUCT_SAMPLING_MEMBER + +#include "fmt/ostream.h" // fmt::formatter, fmt::ostream_formatter #include // std::uint64_t, std::int64_t, std::uint32_t -#include // std::formatter #include // std::ostream forward declaration #include // std::optional #include // std::string @@ -239,18 +240,18 @@ std::ostream &operator<<(std::ostream &out, const rocm_smi_temperature_samples & } // namespace hws template <> -struct std::formatter : hws::detail::ostream_formatter { }; +struct fmt::formatter : fmt::ostream_formatter { }; template <> -struct std::formatter : hws::detail::ostream_formatter { }; +struct fmt::formatter : fmt::ostream_formatter { }; template <> -struct std::formatter : hws::detail::ostream_formatter { }; +struct fmt::formatter : fmt::ostream_formatter { }; template <> -struct std::formatter : hws::detail::ostream_formatter { }; +struct fmt::formatter : fmt::ostream_formatter { }; template <> -struct std::formatter : hws::detail::ostream_formatter { }; +struct fmt::formatter : fmt::ostream_formatter { }; #endif // HARDWARE_SAMPLING_GPU_AMD_ROCM_SMI_SAMPLES_HPP_ diff --git a/include/hardware_sampling/gpu_amd/utility.hpp b/include/hardware_sampling/gpu_amd/utility.hpp index d96387a..59e19b5 100644 --- a/include/hardware_sampling/gpu_amd/utility.hpp +++ b/include/hardware_sampling/gpu_amd/utility.hpp @@ -12,9 +12,9 @@ #define HARDWARE_SAMPLING_GPU_AMD_UTILITY_HPP_ #pragma once +#include "fmt/format.h" // fmt::format #include "rocm_smi/rocm_smi.h" // ROCm SMI runtime functions -#include // std::format #include // std::runtime_error namespace hws { @@ -32,9 +32,9 @@ namespace hws { const char *error_string; \ const rsmi_status_t ret = rsmi_status_string(errc, &error_string); \ if (ret == RSMI_STATUS_SUCCESS) { \ - throw std::runtime_error{ std::format("Error in ROCm SMI function call \"{}\": {}", #rocm_smi_func, error_string) }; \ + throw std::runtime_error{ fmt::format("Error in ROCm SMI function call \"{}\": {}", #rocm_smi_func, error_string) }; \ } else { \ - throw std::runtime_error{ std::format("Error in ROCm SMI function call \"{}\": {}", #rocm_smi_func, static_cast(errc)) }; \ + throw std::runtime_error{ fmt::format("Error in ROCm SMI function call \"{}\": {}", #rocm_smi_func, static_cast(errc)) }; \ } \ } \ } @@ -43,7 +43,7 @@ namespace hws { { \ const hiperror_t errc = hip_func; \ if (errc != hipSuccess) { \ - throw std::runtime_error{ std::format("Error in HIP function call \"{}\": {}", #hip_func, hipGetErrorString(errc)) }; \ + throw std::runtime_error{ fmt::format("Error in HIP function call \"{}\": {}", #hip_func, hipGetErrorString(errc)) }; \ } \ } diff --git a/include/hardware_sampling/gpu_intel/level_zero_samples.hpp b/include/hardware_sampling/gpu_intel/level_zero_samples.hpp index 1510199..dc8b411 100644 --- a/include/hardware_sampling/gpu_intel/level_zero_samples.hpp +++ b/include/hardware_sampling/gpu_intel/level_zero_samples.hpp @@ -122,10 +122,11 @@ class level_zero_power_samples { */ [[nodiscard]] std::string generate_yaml_string() const; - HWS_SAMPLE_STRUCT_FIXED_MEMBER(bool, energy_threshold_enabled) // true if the energy threshold is enabled - HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, energy_threshold) // the energy threshold in J + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, power_enforced_limit) // the actually enforced power limit (W), may be different from power management limit if external limiters are set + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, power_measurement_type) // the type of the power readings + HWS_SAMPLE_STRUCT_FIXED_MEMBER(bool, power_management_mode) // true if power management limits are enabled - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint64_t, power_total_energy_consumption) // the total power consumption since the last driver reload in mJ + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, power_total_energy_consumption) // the total power consumption since the last driver reload in J }; /** diff --git a/include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp b/include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp index de22f3f..60ed693 100644 --- a/include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp +++ b/include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp @@ -15,12 +15,12 @@ #include "hardware_sampling/gpu_nvidia/nvml_device_handle.hpp" // hws::nvml_device_handle #include "hardware_sampling/gpu_nvidia/nvml_samples.hpp" // hws::{nvml_general_samples, nvml_clock_samples, nvml_power_samples, nvml_memory_samples, nvml_temperature_samples} #include "hardware_sampling/hardware_sampler.hpp" // hws::hardware_sampler -#include "hardware_sampling/utility.hpp" // hws::detail::ostream_formatter + +#include "fmt/format.h" // fmt::formatter, fmt::ostream_formatter #include // std::atomic #include // std::chrono::milliseconds, std::chrono_literals namespace #include // std::size_t -#include // std::formatter #include // std::ostream forward declaration #include // std::string @@ -160,6 +160,6 @@ std::ostream &operator<<(std::ostream &out, const gpu_nvidia_hardware_sampler &s } // namespace hws template <> -struct std::formatter : hws::detail::ostream_formatter { }; +struct fmt::formatter : fmt::ostream_formatter { }; #endif // HARDWARE_SAMPLING_GPU_NVIDIA_HARDWARE_SAMPLER_HPP_ diff --git a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp index f766c3d..2757a60 100644 --- a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp +++ b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp @@ -12,9 +12,10 @@ #define HARDWARE_SAMPLING_GPU_NVIDIA_NVML_SAMPLES_HPP_ #pragma once -#include "hardware_sampling/utility.hpp" // HWS_SAMPLE_STRUCT_FIXED_MEMBER, HWS_SAMPLE_STRUCT_SAMPLING_MEMBER, hws::detail::ostream_formatter +#include "hardware_sampling/utility.hpp" // HWS_SAMPLE_STRUCT_FIXED_MEMBER, HWS_SAMPLE_STRUCT_SAMPLING_MEMBER + +#include "fmt/ostream.h" // fmt::formatter, fmt::ostream_formatter -#include // std::formatter #include // std::ostream forward declaration #include // std::map #include // std::optional @@ -229,18 +230,18 @@ std::ostream &operator<<(std::ostream &out, const nvml_temperature_samples &samp } // namespace hws template <> -struct std::formatter : hws::detail::ostream_formatter { }; +struct fmt::formatter : fmt::ostream_formatter { }; template <> -struct std::formatter : hws::detail::ostream_formatter { }; +struct fmt::formatter : fmt::ostream_formatter { }; template <> -struct std::formatter : hws::detail::ostream_formatter { }; +struct fmt::formatter : fmt::ostream_formatter { }; template <> -struct std::formatter : hws::detail::ostream_formatter { }; +struct fmt::formatter : fmt::ostream_formatter { }; template <> -struct std::formatter : hws::detail::ostream_formatter { }; +struct fmt::formatter : fmt::ostream_formatter { }; #endif // HARDWARE_SAMPLING_GPU_NVIDIA_NVML_SAMPLES_HPP_ diff --git a/include/hardware_sampling/gpu_nvidia/utility.hpp b/include/hardware_sampling/gpu_nvidia/utility.hpp index 8a1c590..272126b 100644 --- a/include/hardware_sampling/gpu_nvidia/utility.hpp +++ b/include/hardware_sampling/gpu_nvidia/utility.hpp @@ -12,11 +12,10 @@ #define HARDWARE_SAMPLING_GPU_NVIDIA_UTILITY_HPP_ #pragma once -#include "hardware_sampling/utility.hpp" // hws::detail::join +#include "fmt/format.h" // fmt::format +#include "fmt/ranges.h" // fmt::join +#include "nvml.h" // NVML runtime functions -#include "nvml.h" // NVML runtime functions - -#include // std::format #include // std::runtime_error #include // std::string #include // std::vector @@ -33,7 +32,7 @@ namespace hws::detail { { \ const nvmlReturn_t errc = nvml_func; \ if (errc != NVML_SUCCESS) { \ - throw std::runtime_error{ std::format("Error in NVML function call \"{}\": {} ({})", #nvml_func, nvmlErrorString(errc), static_cast(errc)) }; \ + throw std::runtime_error{ fmt::format("Error in NVML function call \"{}\": {} ({})", #nvml_func, nvmlErrorString(errc), static_cast(errc)) }; \ } \ } #else @@ -77,7 +76,7 @@ namespace hws::detail { if ((clocks_event_reasons & nvmlClocksThrottleReasonHwThermalSlowdown) != 0ull) { reasons.emplace_back("HwThermalSlowdown"); } - return std::format("\"{}\"", detail::join(reasons, "|")); + return fmt::format("\"{}\"", fmt::join(reasons, "|")); } } diff --git a/include/hardware_sampling/utility.hpp b/include/hardware_sampling/utility.hpp index 81e1136..91836c4 100644 --- a/include/hardware_sampling/utility.hpp +++ b/include/hardware_sampling/utility.hpp @@ -12,11 +12,13 @@ #define HARDWARE_SAMPLING_UTILITY_HPP_ #pragma once +#include "fmt/format.h" // fmt::format +#include "fmt/ranges.h" // fmt::join + #include // std::from_chars #include // std::chrono::{milliseconds, duration_cast} #include // std::trunc #include // std::size_t -#include // std::format, std::formatter, std::basic_format_context, std::format_to #include // std::back_inserter, std::next, std::prev #include // std::optional #include // std::basic_stringstream @@ -24,7 +26,7 @@ #include // std::string, std::stof, std::stod, std::stold #include // std::string_view, std::basic_string_view #include // std::errc -#include // std::is_same_v, std::remove_cvref_t +#include // std::is_same_v, std::remove_cv_t, std::remove_reference_t #include // std::vector namespace hws::detail { @@ -54,6 +56,21 @@ namespace hws::detail { private: \ std::optional> sample_name##_{}; +// TODO: clean-up + +/** + * @brief Checks whether the string @p sv starts with the substring @p start + * @param[in] sv the full string + * @param[in] start the substring + * @return `true` if @p sv starts with @p start, otherwise `false` + */ +[[nodiscard]] inline bool starts_with(const std::string_view sv, const std::string_view start) { + return sv.substr(0, start.size()) == start; +} + +template +using remove_cvref_t = std::remove_cv_t>; + /** * @brief Convert all time points to their duration in seconds (using double) truncated to three decimal places passed since the @p reference time point. * @tparam TimePoint the type if the time points @@ -125,10 +142,10 @@ template */ template [[nodiscard]] inline T convert_to(const std::string_view str) { - if constexpr (std::is_same_v, std::string>) { + if constexpr (std::is_same_v, std::string>) { // convert string_view to string return std::string{ trim(str) }; - } else if constexpr (std::is_same_v, bool>) { + } else if constexpr (std::is_same_v, bool>) { const std::string lower_case_str = to_lower_case(trim(str)); // the string true if (lower_case_str == "true") { @@ -140,17 +157,17 @@ template } // convert a number to its "long long" value and convert it to a bool: 0 -> false, otherwise true return static_cast(convert_to(str)); - } else if constexpr (std::is_same_v, char>) { + } else if constexpr (std::is_same_v, char>) { const std::string_view trimmed = trim(str); // since we expect a character, after trimming the string must only contain exactly one character if (trimmed.size() != 1) { - throw std::runtime_error{ std::format("Can't convert '{}' to a value of type char!", str) }; + throw std::runtime_error{ fmt::format("Can't convert '{}' to a value of type char!", str) }; } return trimmed.front(); - } else if constexpr (std::is_floating_point_v>) { - if constexpr (std::is_same_v, float>) { + } else if constexpr (std::is_floating_point_v>) { + if constexpr (std::is_same_v, float>) { return std::stof(std::string{ str }); - } else if constexpr (std::is_same_v, double>) { + } else if constexpr (std::is_same_v, double>) { return std::stod(std::string{ str }); } else { return std::stold(std::string{ str }); @@ -163,7 +180,7 @@ template T val; auto res = std::from_chars(trimmed_str.data(), trimmed_str.data() + trimmed_str.size(), val); if (res.ec != std::errc{}) { - throw std::runtime_error{ std::format("Can't convert '{}' to a value of type T!", str) }; + throw std::runtime_error{ fmt::format("Can't convert '{}' to a value of type T!", str) }; } return val; } @@ -203,46 +220,6 @@ template */ [[nodiscard]] std::vector split(std::string_view str, char delim = ' '); -/** - * @brief A std::formatter child class allowing to format custom types using an `operator<<` overload. - * @tparam CharT the character type - */ -template -struct basic_ostream_formatter : std::formatter, CharT> { - template - OutputIt format(const T &value, std::basic_format_context &ctx) const { - std::basic_stringstream ss; - ss << value; - return std::formatter, CharT>::format(ss.view(), ctx); - } -}; - -/// Type alias for a basic_ostream_formatter using a normal char. -using ostream_formatter = basic_ostream_formatter; - -/** - * @brief Join all values in @p c to a single string using @p delim as delimiter. - * @tparam Container the type of the container - * @param[in] c the container for what the values should be joined - * @param[in] delim the delimiter used in joining the values - * @return the joined string (`[[nodiscard]]`) - */ -template -[[nodiscard]] inline std::string join(const Container &c, const std::string_view delim) { - if (c.empty()) { - return ""; - } else if (c.size() == 1) { - return std::format("{}", *c.cbegin()); - } else { - std::string out{}; - for (auto it = c.cbegin(); it != std::prev(c.cend()); it = std::next(it)) { - std::format_to(std::back_inserter(out), "{}{}", *it, delim); - } - std::format_to(std::back_inserter(out), "{}", *std::prev(c.end())); - return out; - } -} - template struct is_vector : std::false_type { }; @@ -264,13 +241,13 @@ template if (map.has_value()) { std::vector entries{}; for (const auto &[key, value] : map.value()) { - if constexpr (is_vector_v>) { - entries.push_back(std::format("{{{}, [{}]}}", key, detail::join(value, ", "))); + if constexpr (is_vector_v>) { + entries.push_back(fmt::format("{{{}, [{}]}}", key, fmt::join(value, ", "))); } else { - entries.push_back(std::format("{{{}, {}}}", key, value)); + entries.push_back(fmt::format("{{{}, {}}}", key, value)); } } - return detail::join(entries, ", "); + return fmt::format("{}", fmt::join(entries, ", ")); } return ""; } diff --git a/src/hardware_sampling/cpu/cpu_samples.cpp b/src/hardware_sampling/cpu/cpu_samples.cpp index deb1ddc..02148f0 100644 --- a/src/hardware_sampling/cpu/cpu_samples.cpp +++ b/src/hardware_sampling/cpu/cpu_samples.cpp @@ -7,11 +7,13 @@ #include "hardware_sampling/cpu/cpu_samples.hpp" -#include "hardware_sampling/utility.hpp" // hws::detail::{value_or_default, join} +#include "hardware_sampling/utility.hpp" // hws::detail::value_or_default + +#include "fmt/format.h" // fmt::format +#include "fmt/ranges.h" // fmt::join #include // std::array #include // std::size_t -#include // std::format #include // std::ostream #include // std::regex, std::regex::extended, std::regex_match, std::regex_replace #include // std::string @@ -29,129 +31,129 @@ std::string cpu_general_samples::generate_yaml_string() const { // architecture if (this->architecture_.has_value()) { - str += std::format(" architecture:\n" + str += fmt::format(" architecture:\n" " unit: \"string\"\n" " values: \"{}\"\n", this->architecture_.value()); } // byte order if (this->byte_order_.has_value()) { - str += std::format(" byte_order:\n" + str += fmt::format(" byte_order:\n" " unit: \"string\"\n" " values: \"{}\"\n", this->byte_order_.value()); } // number of cores if (this->num_cores_.has_value()) { - str += std::format(" num_cores:\n" + str += fmt::format(" num_cores:\n" " unit: \"int\"\n" " values: {}\n", this->num_cores_.value()); } // number of threads including hyper-threads if (this->num_threads_.has_value()) { - str += std::format(" num_threads:\n" + str += fmt::format(" num_threads:\n" " unit: \"int\"\n" " values: {}\n", this->num_threads_.value()); } // number of threads per core if (this->threads_per_core_.has_value()) { - str += std::format(" threads_per_core:\n" + str += fmt::format(" threads_per_core:\n" " unit: \"int\"\n" " values: {}\n", this->threads_per_core_.value()); } // number of cores per socket if (this->cores_per_socket_.has_value()) { - str += std::format(" cores_per_socket:\n" + str += fmt::format(" cores_per_socket:\n" " unit: \"int\"\n" " values: {}\n", this->cores_per_socket_.value()); } // number of cores per socket if (this->num_sockets_.has_value()) { - str += std::format(" num_sockets:\n" + str += fmt::format(" num_sockets:\n" " unit: \"int\"\n" " values: {}\n", this->num_sockets_.value()); } // number of NUMA nodes if (this->numa_nodes_.has_value()) { - str += std::format(" numa_nodes:\n" + str += fmt::format(" numa_nodes:\n" " unit: \"int\"\n" " values: {}\n", this->numa_nodes_.value()); } // the vendor specific ID if (this->vendor_id_.has_value()) { - str += std::format(" vendor_id:\n" + str += fmt::format(" vendor_id:\n" " unit: \"string\"\n" " values: \"{}\"\n", this->vendor_id_.value()); } // the CPU name if (this->name_.has_value()) { - str += std::format(" name:\n" + str += fmt::format(" name:\n" " unit: \"string\"\n" " values: \"{}\"\n", this->name_.value()); } // CPU specific flags (like SSE, AVX, ...) if (this->flags_.has_value()) { - str += std::format(" flags:\n" + str += fmt::format(" flags:\n" " unit: \"string\"\n" " values: [{}]\n", - detail::join(this->flags_.value(), ", ")); + fmt::join(this->flags_.value(), ", ")); } // the percent the CPU was busy if (this->compute_utilization_.has_value()) { - str += std::format(" compute_utilization:\n" + str += fmt::format(" compute_utilization:\n" " turbostat_name: \"Busy%\"\n" " unit: \"percentage\"\n" " values: [{}]\n", - detail::join(this->compute_utilization_.value(), ", ")); + fmt::join(this->compute_utilization_.value(), ", ")); } // the instructions per cycle count if (this->ipc_.has_value()) { - str += std::format(" instructions_per_cycle:\n" + str += fmt::format(" instructions_per_cycle:\n" " turbostat_name: \"IPC\"\n" " unit: \"float\"\n" " values: [{}]\n", - detail::join(this->ipc_.value(), ", ")); + fmt::join(this->ipc_.value(), ", ")); } // the number of interrupts if (this->irq_.has_value()) { - str += std::format(" interrupts:\n" + str += fmt::format(" interrupts:\n" " turbostat_name: \"IRQ\"\n" " unit: \"int\"\n" " values: [{}]\n", - detail::join(this->irq_.value(), ", ")); + fmt::join(this->irq_.value(), ", ")); } // the number of system management interrupts if (this->smi_.has_value()) { - str += std::format(" system_management_interrupts:\n" + str += fmt::format(" system_management_interrupts:\n" " turbostat_name: \"SMI\"\n" " unit: \"int\"\n" " values: [{}]\n", - detail::join(this->smi_.value(), ", ")); + fmt::join(this->smi_.value(), ", ")); } // the number of times the CPU was in the poll state if (this->poll_.has_value()) { - str += std::format(" polling_state:\n" + str += fmt::format(" polling_state:\n" " turbostat_name: \"POLL\"\n" " unit: \"int\"\n" " values: [{}]\n", - detail::join(this->poll_.value(), ", ")); + fmt::join(this->poll_.value(), ", ")); } // the percent the CPU was in the polling state if (this->poll_percent_.has_value()) { - str += std::format(" polling_percentage:\n" + str += fmt::format(" polling_percentage:\n" " turbostat_name: \"POLL%\"\n" " unit: \"percentage\"\n" " values: [{}]\n", - detail::join(this->poll_percent_.value(), ", ")); + fmt::join(this->poll_percent_.value(), ", ")); } // remove last newline @@ -161,7 +163,7 @@ std::string cpu_general_samples::generate_yaml_string() const { } std::ostream &operator<<(std::ostream &out, const cpu_general_samples &samples) { - std::string str = std::format("architecture [string]: {}\n" + std::string str = fmt::format("architecture [string]: {}\n" "byte_order [string]: {}\n" "num_cores [int]: {}\n" "num_threads [int]: {}\n" @@ -188,13 +190,13 @@ std::ostream &operator<<(std::ostream &out, const cpu_general_samples &samples) detail::value_or_default(samples.get_numa_nodes()), detail::value_or_default(samples.get_vendor_id()), detail::value_or_default(samples.get_name()), - detail::join(detail::value_or_default(samples.get_flags()), ", "), - detail::join(detail::value_or_default(samples.get_compute_utilization()), ", "), - detail::join(detail::value_or_default(samples.get_ipc()), ", "), - detail::join(detail::value_or_default(samples.get_irq()), ", "), - detail::join(detail::value_or_default(samples.get_smi()), ", "), - detail::join(detail::value_or_default(samples.get_poll()), ", "), - detail::join(detail::value_or_default(samples.get_poll_percent()), ", ")); + fmt::join(detail::value_or_default(samples.get_flags()), ", "), + fmt::join(detail::value_or_default(samples.get_compute_utilization()), ", "), + fmt::join(detail::value_or_default(samples.get_ipc()), ", "), + fmt::join(detail::value_or_default(samples.get_irq()), ", "), + fmt::join(detail::value_or_default(samples.get_smi()), ", "), + fmt::join(detail::value_or_default(samples.get_poll()), ", "), + fmt::join(detail::value_or_default(samples.get_poll_percent()), ", ")); // remove last newline str.pop_back(); @@ -211,21 +213,21 @@ std::string cpu_clock_samples::generate_yaml_string() const { // true if frequency boost is enabled if (this->auto_boosted_clock_enabled_.has_value()) { - str += std::format(" auto_boosted_clock_enabled:\n" + str += fmt::format(" auto_boosted_clock_enabled:\n" " unit: \"bool\"\n" " values: {}\n", this->auto_boosted_clock_enabled_.value()); } // the minimal CPU frequency if (this->clock_frequency_min_.has_value()) { - str += std::format(" clock_frequency_min:\n" + str += fmt::format(" clock_frequency_min:\n" " unit: \"MHz\"\n" " values: {}\n", this->clock_frequency_min_.value()); } // the maximum CPU frequency if (this->clock_frequency_max_.has_value()) { - str += std::format(" clock_frequency_max:\n" + str += fmt::format(" clock_frequency_max:\n" " unit: \"MHz\"\n" " values: {}\n", this->clock_frequency_max_.value()); @@ -233,27 +235,27 @@ std::string cpu_clock_samples::generate_yaml_string() const { // the average CPU frequency if (this->clock_frequency_.has_value()) { - str += std::format(" clock_frequency:\n" + str += fmt::format(" clock_frequency:\n" " turbostat_name: \"Avg_MHz\"\n" " unit: \"MHz\"\n" " values: [{}]\n", - detail::join(this->clock_frequency_.value(), ", ")); + fmt::join(this->clock_frequency_.value(), ", ")); } // the average CPU frequency excluding idle time if (this->average_non_idle_frequency_.has_value()) { - str += std::format(" average_non_idle_frequency:\n" + str += fmt::format(" average_non_idle_frequency:\n" " turbostat_name: \"Bzy_MHz\"\n" " unit: \"MHz\"\n" " values: [{}]\n", - detail::join(this->average_non_idle_frequency_.value(), ", ")); + fmt::join(this->average_non_idle_frequency_.value(), ", ")); } // the time stamp counter if (this->time_stamp_counter_.has_value()) { - str += std::format(" time_stamp_counter:\n" + str += fmt::format(" time_stamp_counter:\n" " turbostat_name: \"TSC_MHz\"\n" " unit: \"MHz\"\n" " values: [{}]\n", - detail::join(this->time_stamp_counter_.value(), ", ")); + fmt::join(this->time_stamp_counter_.value(), ", ")); } // remove last newline @@ -263,7 +265,7 @@ std::string cpu_clock_samples::generate_yaml_string() const { } std::ostream &operator<<(std::ostream &out, const cpu_clock_samples &samples) { - return out << std::format("auto_boosted_clock_enabled [bool]: {}\n" + return out << fmt::format("auto_boosted_clock_enabled [bool]: {}\n" "clock_frequency_min [MHz]: {}\n" "clock_frequency_max [MHz]: {}\n" "clock_frequency [MHz]: [{}]\n" @@ -272,9 +274,9 @@ std::ostream &operator<<(std::ostream &out, const cpu_clock_samples &samples) { detail::value_or_default(samples.get_auto_boosted_clock_enabled()), detail::value_or_default(samples.get_clock_frequency_min()), detail::value_or_default(samples.get_clock_frequency_max()), - detail::join(detail::value_or_default(samples.get_clock_frequency()), ", "), - detail::join(detail::value_or_default(samples.get_average_non_idle_frequency()), ", "), - detail::join(detail::value_or_default(samples.get_time_stamp_counter()), ", ")); + fmt::join(detail::value_or_default(samples.get_clock_frequency()), ", "), + fmt::join(detail::value_or_default(samples.get_average_non_idle_frequency()), ", "), + fmt::join(detail::value_or_default(samples.get_time_stamp_counter()), ", ")); } //*************************************************************************************************************************************// @@ -286,7 +288,7 @@ std::string cpu_power_samples::generate_yaml_string() const { // power measurement type if (this->power_measurement_type_.has_value()) { - str += std::format(" power_measurement_type:\n" + str += fmt::format(" power_measurement_type:\n" " unit: \"string\"\n" " values: \"{}\"\n", this->power_measurement_type_.value()); @@ -294,51 +296,51 @@ std::string cpu_power_samples::generate_yaml_string() const { // the package Watt if (this->power_usage_.has_value()) { - str += std::format(" power_usage:\n" + str += fmt::format(" power_usage:\n" " turbostat_name: \"PkgWatt\"\n" " unit: \"W\"\n" " values: [{}]\n", - detail::join(this->power_usage_.value(), ", ")); + fmt::join(this->power_usage_.value(), ", ")); } // total energy consumed if (this->power_total_energy_consumption_.has_value()) { - str += std::format(" power_total_energy_consumed:\n" + str += fmt::format(" power_total_energy_consumed:\n" " unit: \"J\"\n" " values: [{}]\n", - detail::join(this->power_total_energy_consumption_.value(), ", ")); + fmt::join(this->power_total_energy_consumption_.value(), ", ")); } // the core Watt if (this->core_watt_.has_value()) { - str += std::format(" core_power:\n" + str += fmt::format(" core_power:\n" " turbostat_name: \"CorWatt\"\n" " unit: \"W\"\n" " values: [{}]\n", - detail::join(this->core_watt_.value(), ", ")); + fmt::join(this->core_watt_.value(), ", ")); } // the DRAM Watt if (this->ram_watt_.has_value()) { - str += std::format(" dram_power:\n" + str += fmt::format(" dram_power:\n" " turbostat_name: \"RAMWatt\"\n" " unit: \"W\"\n" " values: [{}]\n", - detail::join(this->ram_watt_.value(), ", ")); + fmt::join(this->ram_watt_.value(), ", ")); } // the percent of time when the RAPL package throttle was active if (this->package_rapl_throttle_percent_.has_value()) { - str += std::format(" package_rapl_throttling:\n" + str += fmt::format(" package_rapl_throttling:\n" " turbostat_name: \"PKG_%\"\n" " unit: \"percentage\"\n" " values: [{}]\n", - detail::join(this->package_rapl_throttle_percent_.value(), ", ")); + fmt::join(this->package_rapl_throttle_percent_.value(), ", ")); } // the percent of time when the RAPL DRAM throttle was active if (this->dram_rapl_throttle_percent_.has_value()) { - str += std::format(" dram_rapl_throttling:\n" + str += fmt::format(" dram_rapl_throttling:\n" " turbostat_name: \"RAM_%\"\n" " unit: \"percentage\"\n" " values: [{}]\n", - detail::join(this->dram_rapl_throttle_percent_.value(), ", ")); + fmt::join(this->dram_rapl_throttle_percent_.value(), ", ")); } // remove last newline @@ -348,7 +350,7 @@ std::string cpu_power_samples::generate_yaml_string() const { } std::ostream &operator<<(std::ostream &out, const cpu_power_samples &samples) { - return out << std::format("power_measurement_type [string]: {}\n" + return out << fmt::format("power_measurement_type [string]: {}\n" "power_usage [W]: [{}]\n" "power_total_energy_consumption [J]: [{}]\n" "core_watt [W]: [{}]\n" @@ -356,12 +358,12 @@ std::ostream &operator<<(std::ostream &out, const cpu_power_samples &samples) { "package_rapl_throttle_percent [%]: [{}]\n" "dram_rapl_throttle_percent [%]: [{}]", detail::value_or_default(samples.get_power_measurement_type()), - detail::join(detail::value_or_default(samples.get_power_usage()), ", "), - detail::join(detail::value_or_default(samples.get_power_total_energy_consumption()), ", "), - detail::join(detail::value_or_default(samples.get_core_watt()), ", "), - detail::join(detail::value_or_default(samples.get_ram_watt()), ", "), - detail::join(detail::value_or_default(samples.get_package_rapl_throttle_percent()), ", "), - detail::join(detail::value_or_default(samples.get_dram_rapl_throttle_percent()), ", ")); + fmt::join(detail::value_or_default(samples.get_power_usage()), ", "), + fmt::join(detail::value_or_default(samples.get_power_total_energy_consumption()), ", "), + fmt::join(detail::value_or_default(samples.get_core_watt()), ", "), + fmt::join(detail::value_or_default(samples.get_ram_watt()), ", "), + fmt::join(detail::value_or_default(samples.get_package_rapl_throttle_percent()), ", "), + fmt::join(detail::value_or_default(samples.get_dram_rapl_throttle_percent()), ", ")); } //*************************************************************************************************************************************// @@ -373,28 +375,28 @@ std::string cpu_memory_samples::generate_yaml_string() const { // the size of the L1 data cache if (this->l1d_cache_.has_value()) { - str += std::format(" cache_size_L1d:\n" + str += fmt::format(" cache_size_L1d:\n" " unit: \"string\"\n" " values: \"{}\"\n", this->l1d_cache_.value()); } // the size of the L1 instruction cache if (this->l1i_cache_.has_value()) { - str += std::format(" cache_size_L1i:\n" + str += fmt::format(" cache_size_L1i:\n" " unit: \"string\"\n" " values: \"{}\"\n", this->l1i_cache_.value()); } // the size of the L2 cache if (this->l2_cache_.has_value()) { - str += std::format(" cache_size_L2:\n" + str += fmt::format(" cache_size_L2:\n" " unit: \"string\"\n" " values: \"{}\"\n", this->l2_cache_.value()); } // the size of the L3 cache if (this->l3_cache_.has_value()) { - str += std::format(" cache_size_L3:\n" + str += fmt::format(" cache_size_L3:\n" " unit: \"string\"\n" " values: \"{}\"\n", this->l3_cache_.value()); @@ -402,14 +404,14 @@ std::string cpu_memory_samples::generate_yaml_string() const { // the total size of available memory if (this->memory_total_.has_value()) { - str += std::format(" memory_total:\n" + str += fmt::format(" memory_total:\n" " unit: \"B\"\n" " values: {}\n", this->memory_total_.value()); } // the total size of the swap memory if (this->swap_memory_total_.has_value()) { - str += std::format(" swap_memory_total:\n" + str += fmt::format(" swap_memory_total:\n" " unit: \"B\"\n" " values: {}\n", this->swap_memory_total_.value()); @@ -417,31 +419,31 @@ std::string cpu_memory_samples::generate_yaml_string() const { // the available free memory if (this->memory_free_.has_value()) { - str += std::format(" memory_free:\n" + str += fmt::format(" memory_free:\n" " unit: \"B\"\n" " values: [{}]\n", - detail::join(this->memory_free_.value(), ", ")); + fmt::join(this->memory_free_.value(), ", ")); } // the used memory if (this->memory_used_.has_value()) { - str += std::format(" memory_used:\n" + str += fmt::format(" memory_used:\n" " unit: \"B\"\n" " values: [{}]\n", - detail::join(this->memory_used_.value(), ", ")); + fmt::join(this->memory_used_.value(), ", ")); } // the available swap memory if (this->swap_memory_free_.has_value()) { - str += std::format(" swap_memory_free:\n" + str += fmt::format(" swap_memory_free:\n" " unit: \"B\"\n" " values: [{}]\n", - detail::join(this->swap_memory_free_.value(), ", ")); + fmt::join(this->swap_memory_free_.value(), ", ")); } // the swap memory if (this->swap_memory_used_.has_value()) { - str += std::format(" swap_memory_used:\n" + str += fmt::format(" swap_memory_used:\n" " unit: \"B\"\n" " values: [{}]\n", - detail::join(this->swap_memory_used_.value(), ", ")); + fmt::join(this->swap_memory_used_.value(), ", ")); } // remove last newline @@ -451,7 +453,7 @@ std::string cpu_memory_samples::generate_yaml_string() const { } std::ostream &operator<<(std::ostream &out, const cpu_memory_samples &samples) { - return out << std::format("l1d_cache [string]: {}\n" + return out << fmt::format("l1d_cache [string]: {}\n" "l1i_cache [string]: {}\n" "l2_cache [string]: {}\n" "l3_cache [string]: {}\n" @@ -467,10 +469,10 @@ std::ostream &operator<<(std::ostream &out, const cpu_memory_samples &samples) { detail::value_or_default(samples.get_l3_cache()), detail::value_or_default(samples.get_memory_total()), detail::value_or_default(samples.get_swap_memory_total()), - detail::join(detail::value_or_default(samples.get_memory_free()), ", "), - detail::join(detail::value_or_default(samples.get_memory_used()), ", "), - detail::join(detail::value_or_default(samples.get_swap_memory_free()), ", "), - detail::join(detail::value_or_default(samples.get_swap_memory_used()), ", ")); + fmt::join(detail::value_or_default(samples.get_memory_free()), ", "), + fmt::join(detail::value_or_default(samples.get_memory_used()), ", "), + fmt::join(detail::value_or_default(samples.get_swap_memory_free()), ", "), + fmt::join(detail::value_or_default(samples.get_swap_memory_used()), ", ")); } //*************************************************************************************************************************************// @@ -482,27 +484,27 @@ std::string cpu_temperature_samples::generate_yaml_string() const { // the temperature of the cores if (this->core_temperature_.has_value()) { - str += std::format(" per_core_temperature:\n" + str += fmt::format(" per_core_temperature:\n" " turbostat_name: \"CoreTmp\"\n" " unit: \"°C\"\n" " values: [{}]\n", - detail::join(this->core_temperature_.value(), ", ")); + fmt::join(this->core_temperature_.value(), ", ")); } // the percentage of time the core throttled due the temperature constraints if (this->core_throttle_percent_.has_value()) { - str += std::format(" core_throttle_percentage:\n" + str += fmt::format(" core_throttle_percentage:\n" " turbostat_name: \"CoreThr\"\n" " unit: \"percentage\"\n" " values: [{}]\n", - detail::join(this->core_throttle_percent_.value(), ", ")); + fmt::join(this->core_throttle_percent_.value(), ", ")); } // the temperature of the whole package if (this->package_temperature_.has_value()) { - str += std::format(" per_package_temperature:\n" + str += fmt::format(" per_package_temperature:\n" " turbostat_name: \"PkgTmp\"\n" " unit: \"°C\"\n" " values: [{}]\n", - detail::join(this->package_temperature_.value(), ", ")); + fmt::join(this->package_temperature_.value(), ", ")); } // remove last newline @@ -512,12 +514,12 @@ std::string cpu_temperature_samples::generate_yaml_string() const { } std::ostream &operator<<(std::ostream &out, const cpu_temperature_samples &samples) { - return out << std::format("core_temperature [°C]: [{}]\n" + return out << fmt::format("core_temperature [°C]: [{}]\n" "core_throttle_percent [%]: [{}]\n" "package_temperature [°C]: [{}]", - detail::join(detail::value_or_default(samples.get_core_temperature()), ", "), - detail::join(detail::value_or_default(samples.get_core_throttle_percent()), ", "), - detail::join(detail::value_or_default(samples.get_package_temperature()), ", ")); + fmt::join(detail::value_or_default(samples.get_core_temperature()), ", "), + fmt::join(detail::value_or_default(samples.get_core_throttle_percent()), ", "), + fmt::join(detail::value_or_default(samples.get_package_temperature()), ", ")); } //*************************************************************************************************************************************// @@ -529,51 +531,51 @@ std::string cpu_gfx_samples::generate_yaml_string() const { // the percentage of time the iGPU was in the render state if (this->gfx_render_state_percent_.has_value()) { - str += std::format(" graphics_render_state:\n" + str += fmt::format(" graphics_render_state:\n" " turbostat_name: \"GFX%rc6\"\n" " unit: \"percentage\"\n" " values: [{}]\n", - detail::join(this->gfx_render_state_percent_.value(), ", ")); + fmt::join(this->gfx_render_state_percent_.value(), ", ")); } // the core frequency of the iGPU if (this->gfx_frequency_.has_value()) { - str += std::format(" graphics_frequency:\n" + str += fmt::format(" graphics_frequency:\n" " turbostat_name: \"GFXMHz\"\n" " unit: \"MHz\"\n" " values: [{}]\n", - detail::join(this->gfx_frequency_.value(), ", ")); + fmt::join(this->gfx_frequency_.value(), ", ")); } // the average core frequency of the iGPU if (this->average_gfx_frequency_.has_value()) { - str += std::format(" average_graphics_frequency:\n" + str += fmt::format(" average_graphics_frequency:\n" " turbostat_name: \"GFXAMHz\"\n" " unit: \"MHz\"\n" " values: [{}]\n", - detail::join(this->average_gfx_frequency_.value(), ", ")); + fmt::join(this->average_gfx_frequency_.value(), ", ")); } // the percentage of time the iGPU was in the c0 state if (this->gfx_state_c0_percent_.has_value()) { - str += std::format(" gpu_state_c0:\n" + str += fmt::format(" gpu_state_c0:\n" " turbostat_name: \"GFX%C0\"\n" " unit: \"percentage\"\n" " values: [{}]\n", - detail::join(this->gfx_state_c0_percent_.value(), ", ")); + fmt::join(this->gfx_state_c0_percent_.value(), ", ")); } // the percentage of time the CPU worked for the iGPU if (this->cpu_works_for_gpu_percent_.has_value()) { - str += std::format(" cpu_works_for_gpu:\n" + str += fmt::format(" cpu_works_for_gpu:\n" " turbostat_name: \"CPUGFX%\"\n" " unit: \"percentage\"\n" " values: [{}]\n", - detail::join(this->cpu_works_for_gpu_percent_.value(), ", ")); + fmt::join(this->cpu_works_for_gpu_percent_.value(), ", ")); } // the iGPU Watt if (this->gfx_watt_.has_value()) { - str += std::format(" graphics_power:\n" + str += fmt::format(" graphics_power:\n" " turbostat_name: \"GFXWatt\"\n" " unit: \"W\"\n" " values: [{}]\n", - detail::join(this->gfx_watt_.value(), ", ")); + fmt::join(this->gfx_watt_.value(), ", ")); } // remove last newline @@ -583,18 +585,18 @@ std::string cpu_gfx_samples::generate_yaml_string() const { } std::ostream &operator<<(std::ostream &out, const cpu_gfx_samples &samples) { - return out << std::format("gfx_render_state_percent [%]: [{}]\n" + return out << fmt::format("gfx_render_state_percent [%]: [{}]\n" "gfx_frequency [MHz]: [{}]\n" "average_gfx_frequency [MHz]: [{}]\n" "gfx_state_c0_percent [%]: [{}]\n" "cpu_works_for_gpu_percent [%]: [{}]\n" "gfx_watt [W]: [{}]", - detail::join(detail::value_or_default(samples.get_gfx_render_state_percent()), ", "), - detail::join(detail::value_or_default(samples.get_gfx_frequency()), ", "), - detail::join(detail::value_or_default(samples.get_average_gfx_frequency()), ", "), - detail::join(detail::value_or_default(samples.get_gfx_state_c0_percent()), ", "), - detail::join(detail::value_or_default(samples.get_cpu_works_for_gpu_percent()), ", "), - detail::join(detail::value_or_default(samples.get_gfx_watt()), ", ")); + fmt::join(detail::value_or_default(samples.get_gfx_render_state_percent()), ", "), + fmt::join(detail::value_or_default(samples.get_gfx_frequency()), ", "), + fmt::join(detail::value_or_default(samples.get_average_gfx_frequency()), ", "), + fmt::join(detail::value_or_default(samples.get_gfx_state_c0_percent()), ", "), + fmt::join(detail::value_or_default(samples.get_cpu_works_for_gpu_percent()), ", "), + fmt::join(detail::value_or_default(samples.get_gfx_watt()), ", ")); } //*************************************************************************************************************************************// @@ -606,43 +608,43 @@ std::string cpu_idle_states_samples::generate_yaml_string() const { // the percentage of time all CPUs were in the c0 state if (this->all_cpus_state_c0_percent_.has_value()) { - str += std::format(" all_cpus_state_c0:\n" + str += fmt::format(" all_cpus_state_c0:\n" " turbostat_name: \"Totl%C0\"\n" " unit: \"percentage\"\n" " values: [{}]\n", - detail::join(this->all_cpus_state_c0_percent_.value(), ", ")); + fmt::join(this->all_cpus_state_c0_percent_.value(), ", ")); } // the percentage of time any CPU was in the c0 state if (this->any_cpu_state_c0_percent_.has_value()) { - str += std::format(" any_cpu_state_c0:\n" + str += fmt::format(" any_cpu_state_c0:\n" " turbostat_name: \"Any%C0\"\n" " unit: \"percentage\"\n" " values: [{}]\n", - detail::join(this->any_cpu_state_c0_percent_.value(), ", ")); + fmt::join(this->any_cpu_state_c0_percent_.value(), ", ")); } // the percentage of time the CPUs were in the low power idle state if (this->low_power_idle_state_percent_.has_value()) { - str += std::format(" lower_power_idle_state:\n" + str += fmt::format(" lower_power_idle_state:\n" " turbostat_name: \"CPU%LPI\"\n" " unit: \"percentage\"\n" " values: [{}]\n", - detail::join(this->low_power_idle_state_percent_.value(), ", ")); + fmt::join(this->low_power_idle_state_percent_.value(), ", ")); } // the percentage of time the CPUs were in the system low power idle state if (this->system_low_power_idle_state_percent_.has_value()) { - str += std::format(" system_lower_power_idle_state:\n" + str += fmt::format(" system_lower_power_idle_state:\n" " turbostat_name: \"SYS%LPI\"\n" " unit: \"percentage\"\n" " values: [{}]\n", - detail::join(this->system_low_power_idle_state_percent_.value(), ", ")); + fmt::join(this->system_low_power_idle_state_percent_.value(), ", ")); } // the percentage of time the package was in the low power idle state if (this->package_low_power_idle_state_percent_.has_value()) { - str += std::format(" package_lower_power_idle_state:\n" + str += fmt::format(" package_lower_power_idle_state:\n" " turbostat_name: \"Pkg%LPI\"\n" " unit: \"percentage\"\n" " values: [{}]\n", - detail::join(this->package_low_power_idle_state_percent_.value(), ", ")); + fmt::join(this->package_low_power_idle_state_percent_.value(), ", ")); } // the other core idle states @@ -671,14 +673,14 @@ std::string cpu_idle_states_samples::generate_yaml_string() const { std::string entry_name_with_state{}; std::regex_replace(std::back_inserter(entry_name_with_state), entry_name_placeholder.begin(), entry_name_placeholder.end(), placeholder_reg, std::string{ state }); - str += std::format(" {}:\n" + str += fmt::format(" {}:\n" " turbostat_name: \"{}\"\n" " unit: \"{}\"\n" " values: [{}]\n", entry_name_with_state, entry, entry_unit, - detail::join(values, ", ")); + fmt::join(values, ", ")); break; } } @@ -692,21 +694,21 @@ std::string cpu_idle_states_samples::generate_yaml_string() const { } std::ostream &operator<<(std::ostream &out, const cpu_idle_states_samples &samples) { - std::string str = std::format("all_cpus_state_c0_percent [%]: [{}]\n" + std::string str = fmt::format("all_cpus_state_c0_percent [%]: [{}]\n" "any_cpu_state_c0_percent [%]: [{}]\n" "low_power_idle_state_percent [%]: [{}]\n" "system_low_power_idle_state_percent [%]: [{}]\n" "package_low_power_idle_state_percent [%]: [{}]\n", - detail::join(detail::value_or_default(samples.get_all_cpus_state_c0_percent()), ", "), - detail::join(detail::value_or_default(samples.get_any_cpu_state_c0_percent()), ", "), - detail::join(detail::value_or_default(samples.get_low_power_idle_state_percent()), ", "), - detail::join(detail::value_or_default(samples.get_system_low_power_idle_state_percent()), ", "), - detail::join(detail::value_or_default(samples.get_package_low_power_idle_state_percent()), ", ")); + fmt::join(detail::value_or_default(samples.get_all_cpus_state_c0_percent()), ", "), + fmt::join(detail::value_or_default(samples.get_any_cpu_state_c0_percent()), ", "), + fmt::join(detail::value_or_default(samples.get_low_power_idle_state_percent()), ", "), + fmt::join(detail::value_or_default(samples.get_system_low_power_idle_state_percent()), ", "), + fmt::join(detail::value_or_default(samples.get_package_low_power_idle_state_percent()), ", ")); // add map entries if (samples.get_idle_states().has_value()) { for (const auto &[key, value] : samples.get_idle_states().value()) { - str += std::format("{}: [{}]\n", key, detail::join(value, ", ")); + str += fmt::format("{}: [{}]\n", key, fmt::join(value, ", ")); } } diff --git a/src/hardware_sampling/cpu/hardware_sampler.cpp b/src/hardware_sampling/cpu/hardware_sampler.cpp index 89683eb..6c8471d 100644 --- a/src/hardware_sampling/cpu/hardware_sampler.cpp +++ b/src/hardware_sampling/cpu/hardware_sampler.cpp @@ -10,13 +10,15 @@ #include "hardware_sampling/cpu/cpu_samples.hpp" // hws::{cpu_general_samples, clock_samples, power_samples, memory_samples, temperature_samples, gfx_samples, idle_state_samples} #include "hardware_sampling/cpu/utility.hpp" // HWS_SUBPROCESS_ERROR_CHECK, hws::detail::run_subprocess #include "hardware_sampling/hardware_sampler.hpp" // hws::tracking::hardware_sampler -#include "hardware_sampling/utility.hpp" // hws::detail::{split, split_as, trim, convert_to, ostream_formatter, join} +#include "hardware_sampling/utility.hpp" // hws::detail::{split, split_as, trim, convert_to, starts_with} + +#include "fmt/format.h" // fmt::format +#include "fmt/ranges.h" // fmt::join #include // assert #include // std::chrono::{steady_clock, milliseconds} #include // std::size_t #include // std::exception, std::terminate -#include // std::format #include // std::ios_base #include // std::cerr, std::endl #include // std::make_optional @@ -69,39 +71,39 @@ void cpu_hardware_sampler::sampling_loop() { value = detail::trim(value); // check the lines if the start with an entry that we want to sample - if (line.starts_with("Architecture")) { + if (detail::starts_with(line, "Architecture")) { general_samples_.architecture_ = detail::convert_to(value); - } else if (line.starts_with("Byte Order")) { + } else if (detail::starts_with(line, "Byte Order")) { general_samples_.byte_order_ = detail::convert_to(value); - } else if (line.starts_with("CPU(s)")) { + } else if (detail::starts_with(line, "CPU(s)")) { general_samples_.num_threads_ = detail::convert_to(value); - } else if (line.starts_with("Thread(s) per core")) { + } else if (detail::starts_with(line, "Thread(s) per core")) { general_samples_.threads_per_core_ = detail::convert_to(value); - } else if (line.starts_with("Core(s) per socket")) { + } else if (detail::starts_with(line, "Core(s) per socket")) { general_samples_.cores_per_socket_ = detail::convert_to(value); - } else if (line.starts_with("Socket(s)")) { + } else if (detail::starts_with(line, "Socket(s)")) { general_samples_.num_sockets_ = detail::convert_to(value); - } else if (line.starts_with("NUMA node(s)")) { + } else if (detail::starts_with(line, "NUMA node(s)")) { general_samples_.numa_nodes_ = detail::convert_to(value); - } else if (line.starts_with("Vendor ID")) { + } else if (detail::starts_with(line, "Vendor ID")) { general_samples_.vendor_id_ = detail::convert_to(value); - } else if (line.starts_with("Model name")) { + } else if (detail::starts_with(line, "Model name")) { general_samples_.name_ = detail::convert_to(value); - } else if (line.starts_with("Flags")) { + } else if (detail::starts_with(line, "Flags")) { general_samples_.flags_ = detail::split_as(value, ' '); - } else if (line.starts_with("Frequency boost")) { + } else if (detail::starts_with(line, "Frequency boost")) { clock_samples_.auto_boosted_clock_enabled_ = value == "enabled"; - } else if (line.starts_with("CPU max MHz")) { + } else if (detail::starts_with(line, "CPU max MHz")) { clock_samples_.clock_frequency_max_ = detail::convert_to(value); - } else if (line.starts_with("CPU min MHz")) { + } else if (detail::starts_with(line, "CPU min MHz")) { clock_samples_.clock_frequency_min_ = detail::convert_to(value); - } else if (line.starts_with("L1d cache")) { + } else if (detail::starts_with(line, "L1d cache")) { memory_samples_.l1d_cache_ = detail::convert_to(value); - } else if (line.starts_with("L1i cache")) { + } else if (detail::starts_with(line, "L1i cache")) { memory_samples_.l1i_cache_ = detail::convert_to(value); - } else if (line.starts_with("L2 cache")) { + } else if (detail::starts_with(line, "L2 cache")) { memory_samples_.l2_cache_ = detail::convert_to(value); - } else if (line.starts_with("L3 cache")) { + } else if (detail::starts_with(line, "L3 cache")) { memory_samples_.l3_cache_ = detail::convert_to(value); } } @@ -401,7 +403,7 @@ void cpu_hardware_sampler::sampling_loop() { power_samples_.dram_rapl_throttle_percent_->push_back(detail::convert_to(values[i])); } else { const std::string header_str{ header[i] }; - if (idle_state_samples_.idle_states_.value().contains(header_str)) { + if (idle_state_samples_.idle_states_.value().count(header_str) > decltype(idle_state_samples_)::map_type::size_type{ 0 }) { using vector_type = cpu_idle_states_samples::map_type::mapped_type; idle_state_samples_.idle_states_.value()[header_str].push_back(detail::convert_to(values[i])); } @@ -426,7 +428,7 @@ std::string cpu_hardware_sampler::generate_yaml_string() const { throw std::runtime_error{ "Can't create the final YAML entry if the hardware sampler is still running!" }; } - return std::format("{}\n" + return fmt::format("{}\n" "{}\n" "{}\n" "{}\n" @@ -447,7 +449,7 @@ std::ostream &operator<<(std::ostream &out, const cpu_hardware_sampler &sampler) out.setstate(std::ios_base::failbit); return out; } else { - return out << std::format("sampling interval: {}\n" + return out << fmt::format("sampling interval: {}\n" "time points: [{}]\n\n" "general samples:\n{}\n\n" "clock samples:\n{}\n\n" @@ -457,7 +459,7 @@ std::ostream &operator<<(std::ostream &out, const cpu_hardware_sampler &sampler) "gfx samples:\n{}\n\n" "idle state samples:\n{}", sampler.sampling_interval(), - detail::join(detail::time_points_to_epoch(sampler.sampling_time_points()), ", "), + fmt::join(detail::time_points_to_epoch(sampler.sampling_time_points()), ", "), sampler.general_samples(), sampler.clock_samples(), sampler.power_samples(), diff --git a/src/hardware_sampling/cpu/utility.cpp b/src/hardware_sampling/cpu/utility.cpp index 7ba16d2..2b0080f 100644 --- a/src/hardware_sampling/cpu/utility.cpp +++ b/src/hardware_sampling/cpu/utility.cpp @@ -9,12 +9,12 @@ #include "hardware_sampling/utility.hpp" // hws::detail::split_as +#include "fmt/format.h" // fmt::format #include "subprocess.h" // subprocess_s, subprocess_create, subprocess_join, subprocess_stdout, subprocess_option_e #include // std::transform #include // std::size_t #include // std::FILE, std::fread -#include // std::format #include // std::runtime_error #include // std::string #include // std::string_view @@ -41,7 +41,7 @@ std::string run_subprocess(const std::string_view cmd_line) { int return_code{}; HWS_SUBPROCESS_ERROR_CHECK(subprocess_join(&proc, &return_code)); if (return_code != 0) { - throw std::runtime_error{ std::format("Error: \"{}\" returned with {}!", cmd_line, return_code) }; + throw std::runtime_error{ fmt::format("Error: \"{}\" returned with {}!", cmd_line, return_code) }; } // get output handle and read data -> stdout and stderr are the same handle diff --git a/src/hardware_sampling/event.cpp b/src/hardware_sampling/event.cpp index b88eaa3..e21c715 100644 --- a/src/hardware_sampling/event.cpp +++ b/src/hardware_sampling/event.cpp @@ -7,13 +7,14 @@ #include "hardware_sampling/event.hpp" -#include // std::format +#include "fmt/format.h" // fmt::format + #include // std::ostream namespace hws { std::ostream &operator<<(std::ostream &out, const event &e) { - return out << std::format("time_point: {}\n" + return out << fmt::format("time_point: {}\n" "name: {}", e.time_point.time_since_epoch(), e.name); diff --git a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp index 63fbda4..0b80c81 100644 --- a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp +++ b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp @@ -10,8 +10,10 @@ #include "hardware_sampling/gpu_amd/rocm_smi_samples.hpp" // hws::{rocm_smi_general_samples, rocm_smi_clock_samples, rocm_smi_power_samples, rocm_smi_memory_samples, rocm_smi_temperature_samples} #include "hardware_sampling/gpu_amd/utility.hpp" // HWS_ROCM_SMI_ERROR_CHECK #include "hardware_sampling/hardware_sampler.hpp" // hws::hardware_sampler -#include "hardware_sampling/utility.hpp" // hws::detail::{time_points_to_epoch, join} +#include "hardware_sampling/utility.hpp" // hws::detail::time_points_to_epoch +#include "fmt/format.h" // fmt::format +#include "fmt/ranges.h" // fmt::join #include "hip/hip_runtime_api.h" // HIP runtime functions #include "rocm_smi/rocm_smi.h" // ROCm SMI runtime functions @@ -19,7 +21,6 @@ #include // std::size_t #include // std::uint32_t, std::uint64_t #include // std::exception, std::terminate -#include // std::format #include // std::ios_base #include // std::cerr, std::endl #include // std::optional @@ -278,8 +279,7 @@ void gpu_amd_hardware_sampler::sampling_loop() { float resolution{}; std::uint64_t power_total_energy_consumption{}; if (rsmi_dev_energy_count_get(device_id_, &power_total_energy_consumption, &resolution, ×tamp) == RSMI_STATUS_SUCCESS) { - const auto scaled_value = static_cast(power_total_energy_consumption) * - static_cast(resolution); + const auto scaled_value = static_cast(power_total_energy_consumption) * static_cast(resolution); power_samples_.power_total_energy_consumption_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ scaled_value / 1000.0 / 1000.0 }; } } @@ -538,8 +538,7 @@ void gpu_amd_hardware_sampler::sampling_loop() { float resolution{}; std::uint64_t value{}; HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_energy_count_get(device_id_, &value, &resolution, ×tamp)); - const auto scaled_value = static_cast(value) * - static_cast(resolution); + const auto scaled_value = static_cast(value) * static_cast(resolution); power_samples_.power_total_energy_consumption_->push_back(scaled_value / 1000.0); } @@ -655,7 +654,7 @@ void gpu_amd_hardware_sampler::sampling_loop() { } std::string gpu_amd_hardware_sampler::device_identification() const { - return std::format("gpu_amd_device_{}", device_id_); + return fmt::format("gpu_amd_device_{}", device_id_); } std::string gpu_amd_hardware_sampler::generate_yaml_string() const { @@ -664,7 +663,7 @@ std::string gpu_amd_hardware_sampler::generate_yaml_string() const { throw std::runtime_error{ "Can't create the final YAML entry if the hardware sampler is still running!" }; } - return std::format("{}\n" + return fmt::format("{}\n" "{}\n" "{}\n" "{}\n" @@ -681,7 +680,7 @@ std::ostream &operator<<(std::ostream &out, const gpu_amd_hardware_sampler &samp out.setstate(std::ios_base::failbit); return out; } else { - return out << std::format("sampling interval: {}\n" + return out << fmt::format("sampling interval: {}\n" "time points: [{}]\n\n" "general samples:\n{}\n\n" "clock samples:\n{}\n\n" @@ -689,7 +688,7 @@ std::ostream &operator<<(std::ostream &out, const gpu_amd_hardware_sampler &samp "memory samples:\n{}\n\n" "temperature samples:\n{}", sampler.sampling_interval(), - detail::join(detail::time_points_to_epoch(sampler.sampling_time_points()), ", "), + fmt::join(detail::time_points_to_epoch(sampler.sampling_time_points()), ", "), sampler.general_samples(), sampler.clock_samples(), sampler.power_samples(), diff --git a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp b/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp index 579ea29..568082e 100644 --- a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp +++ b/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp @@ -7,11 +7,12 @@ #include "hardware_sampling/gpu_amd/rocm_smi_samples.hpp" -#include "hardware_sampling/utility.hpp" // hws::detail::{value_or_default, join} +#include "hardware_sampling/utility.hpp" // hws::detail::{value_or_default,} +#include "fmt/format.h" // fmt::format +#include "fmt/ranges.h" // fmt::join #include "rocm_smi/rocm_smi.h" // RSMI_MAX_FAN_SPEED -#include // std::format #include // std::ostream #include // std::string @@ -26,28 +27,28 @@ std::string rocm_smi_general_samples::generate_yaml_string() const { // device architecture if (this->architecture_.has_value()) { - str += std::format(" architecture:\n" + str += fmt::format(" architecture:\n" " unit: \"string\"\n" " values: \"{}\"\n", this->architecture_.value()); } // device byte order if (this->byte_order_.has_value()) { - str += std::format(" byte_order:\n" + str += fmt::format(" byte_order:\n" " unit: \"string\"\n" " values: \"{}\"\n", this->byte_order_.value()); } // the vendor specific ID if (this->vendor_id_.has_value()) { - str += std::format(" vendor_id:\n" + str += fmt::format(" vendor_id:\n" " unit: \"string\"\n" " values: \"{}\"\n", this->vendor_id_.value()); } // device name if (this->name_.has_value()) { - str += std::format(" name:\n" + str += fmt::format(" name:\n" " unit: \"string\"\n" " values: \"{}\"\n", this->name_.value()); @@ -55,24 +56,24 @@ std::string rocm_smi_general_samples::generate_yaml_string() const { // device compute utilization if (this->compute_utilization_.has_value()) { - str += std::format(" compute_utilization:\n" + str += fmt::format(" compute_utilization:\n" " unit: \"percentage\"\n" " values: [{}]\n", - detail::join(this->compute_utilization_.value(), ", ")); + fmt::join(this->compute_utilization_.value(), ", ")); } // device memory utilization if (this->memory_utilization_.has_value()) { - str += std::format(" memory_utilization:\n" + str += fmt::format(" memory_utilization:\n" " unit: \"percentage\"\n" " values: [{}]\n", - detail::join(this->memory_utilization_.value(), ", ")); + fmt::join(this->memory_utilization_.value(), ", ")); } // performance state if (this->performance_level_.has_value()) { - str += std::format(" performance_state:\n" + str += fmt::format(" performance_state:\n" " unit: \"int - see rsmi_dev_perf_level_t\"\n" " values: [{}]\n", - detail::join(this->performance_level_.value(), ", ")); + fmt::join(this->performance_level_.value(), ", ")); } // remove last newline @@ -82,7 +83,7 @@ std::string rocm_smi_general_samples::generate_yaml_string() const { } std::ostream &operator<<(std::ostream &out, const rocm_smi_general_samples &samples) { - return out << std::format("architecture [string]: {}\n" + return out << fmt::format("architecture [string]: {}\n" "byte_order [string]: {}\n" "vendor_id [string]: {}\n" "name [string]: {}\n" @@ -93,9 +94,9 @@ std::ostream &operator<<(std::ostream &out, const rocm_smi_general_samples &samp detail::value_or_default(samples.get_byte_order()), detail::value_or_default(samples.get_vendor_id()), detail::value_or_default(samples.get_name()), - detail::join(detail::value_or_default(samples.get_compute_utilization()), ", "), - detail::join(detail::value_or_default(samples.get_memory_utilization()), ", "), - detail::join(detail::value_or_default(samples.get_performance_level()), ", ")); + fmt::join(detail::value_or_default(samples.get_compute_utilization()), ", "), + fmt::join(detail::value_or_default(samples.get_memory_utilization()), ", "), + fmt::join(detail::value_or_default(samples.get_performance_level()), ", ")); } //*************************************************************************************************************************************// @@ -107,95 +108,95 @@ std::string rocm_smi_clock_samples::generate_yaml_string() const { // system clock min frequencies if (this->clock_frequency_min_.has_value()) { - str += std::format(" clock_frequency_min:\n" + str += fmt::format(" clock_frequency_min:\n" " unit: \"MHz\"\n" " values: {}\n", this->clock_frequency_min_.value()); } // system clock max frequencies if (this->clock_frequency_max_.has_value()) { - str += std::format(" clock_frequency_max:\n" + str += fmt::format(" clock_frequency_max:\n" " unit: \"MHz\"\n" " values: {}\n", this->clock_frequency_max_.value()); } // memory clock min frequencies if (this->memory_clock_frequency_min_.has_value()) { - str += std::format(" memory_clock_frequency_min:\n" + str += fmt::format(" memory_clock_frequency_min:\n" " unit: \"MHz\"\n" " values: {}\n", this->memory_clock_frequency_min_.value()); } // memory clock max frequencies if (this->memory_clock_frequency_max_.has_value()) { - str += std::format(" memory_clock_frequency_max:\n" + str += fmt::format(" memory_clock_frequency_max:\n" " unit: \"MHz\"\n" " values: {}\n", this->memory_clock_frequency_max_.value()); } // socket clock min frequencies if (this->socket_clock_frequency_min_.has_value()) { - str += std::format(" socket_clock_frequency_min:\n" + str += fmt::format(" socket_clock_frequency_min:\n" " unit: \"MHz\"\n" " values: {}\n", this->socket_clock_frequency_min_.value()); } // socket clock max frequencies if (this->socket_clock_frequency_max_.has_value()) { - str += std::format(" socket_clock_frequency_max:\n" + str += fmt::format(" socket_clock_frequency_max:\n" " unit: \"MHz\"\n" " values: {}\n", this->socket_clock_frequency_max_.value()); } // the available clock frequencies if (this->available_clock_frequencies_.has_value()) { - str += std::format(" available_clock_frequencies:\n" + str += fmt::format(" available_clock_frequencies:\n" " unit: \"MHz\"\n" " values: [{}]\n", - detail::join(this->available_clock_frequencies_.value(), ", ")); + fmt::join(this->available_clock_frequencies_.value(), ", ")); } // the available memory clock frequencies if (this->available_memory_clock_frequencies_.has_value()) { - str += std::format(" available_memory_clock_frequencies:\n" + str += fmt::format(" available_memory_clock_frequencies:\n" " unit: \"MHz\"\n" " values: [{}]\n", - detail::join(this->available_memory_clock_frequencies_.value(), ", ")); + fmt::join(this->available_memory_clock_frequencies_.value(), ", ")); } // system clock frequency if (this->clock_frequency_.has_value()) { - str += std::format(" clock_frequency:\n" + str += fmt::format(" clock_frequency:\n" " unit: \"MHz\"\n" " values: [{}]\n", - detail::join(this->clock_frequency_.value(), ", ")); + fmt::join(this->clock_frequency_.value(), ", ")); } // memory clock frequency if (this->memory_clock_frequency_.has_value()) { - str += std::format(" memory_clock_frequency:\n" + str += fmt::format(" memory_clock_frequency:\n" " unit: \"MHz\"\n" " values: [{}]\n", - detail::join(this->memory_clock_frequency_.value(), ", ")); + fmt::join(this->memory_clock_frequency_.value(), ", ")); } // socket clock frequency if (this->socket_clock_frequency_.has_value()) { - str += std::format(" socket_clock_frequency:\n" + str += fmt::format(" socket_clock_frequency:\n" " unit: \"MHz\"\n" " values: [{}]\n", - detail::join(this->socket_clock_frequency_.value(), ", ")); + fmt::join(this->socket_clock_frequency_.value(), ", ")); } // overdrive level if (this->overdrive_level_.has_value()) { - str += std::format(" overdrive_level:\n" + str += fmt::format(" overdrive_level:\n" " unit: \"percentage\"\n" " values: [{}]\n", - detail::join(this->overdrive_level_.value(), ", ")); + fmt::join(this->overdrive_level_.value(), ", ")); } // memory overdrive level if (this->memory_overdrive_level_.has_value()) { - str += std::format(" memory_overdrive_level:\n" + str += fmt::format(" memory_overdrive_level:\n" " unit: \"percentage\"\n" " values: [{}]\n", - detail::join(this->memory_overdrive_level_.value(), ", ")); + fmt::join(this->memory_overdrive_level_.value(), ", ")); } // remove last newline @@ -205,7 +206,7 @@ std::string rocm_smi_clock_samples::generate_yaml_string() const { } std::ostream &operator<<(std::ostream &out, const rocm_smi_clock_samples &samples) { - return out << std::format("clock_frequency_min [MHz]: {}\n" + return out << fmt::format("clock_frequency_min [MHz]: {}\n" "clock_frequency_max [MHz]: {}\n" "memory_clock_frequency_min [MHz]: {}\n" "memory_clock_frequency_max [MHz]: {}\n" @@ -224,13 +225,13 @@ std::ostream &operator<<(std::ostream &out, const rocm_smi_clock_samples &sample detail::value_or_default(samples.get_memory_clock_frequency_max()), detail::value_or_default(samples.get_socket_clock_frequency_min()), detail::value_or_default(samples.get_socket_clock_frequency_max()), - detail::join(detail::value_or_default(samples.get_available_clock_frequencies()), ", "), - detail::join(detail::value_or_default(samples.get_available_memory_clock_frequencies()), ", "), - detail::join(detail::value_or_default(samples.get_clock_frequency()), ", "), - detail::join(detail::value_or_default(samples.get_memory_clock_frequency()), ", "), - detail::join(detail::value_or_default(samples.get_socket_clock_frequency()), ", "), - detail::join(detail::value_or_default(samples.get_overdrive_level()), ", "), - detail::join(detail::value_or_default(samples.get_memory_overdrive_level()), ", ")); + fmt::join(detail::value_or_default(samples.get_available_clock_frequencies()), ", "), + fmt::join(detail::value_or_default(samples.get_available_memory_clock_frequencies()), ", "), + fmt::join(detail::value_or_default(samples.get_clock_frequency()), ", "), + fmt::join(detail::value_or_default(samples.get_memory_clock_frequency()), ", "), + fmt::join(detail::value_or_default(samples.get_socket_clock_frequency()), ", "), + fmt::join(detail::value_or_default(samples.get_overdrive_level()), ", "), + fmt::join(detail::value_or_default(samples.get_memory_overdrive_level()), ", ")); } //*************************************************************************************************************************************// @@ -242,53 +243,53 @@ std::string rocm_smi_power_samples::generate_yaml_string() const { // power management limit if (this->power_management_limit_.has_value()) { - str += std::format(" power_management_limit:\n" + str += fmt::format(" power_management_limit:\n" " unit: \"W\"\n" " values: {}\n", this->power_management_limit_.value()); } // power enforced limit if (this->power_enforced_limit_.has_value()) { - str += std::format(" power_enforced_limit:\n" + str += fmt::format(" power_enforced_limit:\n" " unit: \"W\"\n" " values: {}\n", this->power_enforced_limit_.value()); } // power measurement type if (this->power_measurement_type_.has_value()) { - str += std::format(" power_measurement_type:\n" + str += fmt::format(" power_measurement_type:\n" " unit: \"string\"\n" " values: \"{}\"\n", this->power_measurement_type_.value()); } // available power levels if (this->available_power_profiles_.has_value()) { - str += std::format(" available_power_profiles:\n" + str += fmt::format(" available_power_profiles:\n" " unit: \"string\"\n" " values: [{}]\n", - detail::join(this->available_power_profiles_.value(), ", ")); + fmt::join(this->available_power_profiles_.value(), ", ")); } // current power usage if (this->power_usage_.has_value()) { - str += std::format(" power_usage:\n" + str += fmt::format(" power_usage:\n" " unit: \"W\"\n" " values: [{}]\n", - detail::join(this->power_usage_.value(), ", ")); + fmt::join(this->power_usage_.value(), ", ")); } // total energy consumed if (this->power_total_energy_consumption_.has_value()) { - str += std::format(" power_total_energy_consumed:\n" + str += fmt::format(" power_total_energy_consumed:\n" " unit: \"J\"\n" " values: [{}]\n", - detail::join(this->power_total_energy_consumption_.value(), ", ")); + fmt::join(this->power_total_energy_consumption_.value(), ", ")); } // current power level if (this->power_profile_.has_value()) { - str += std::format(" power_profile:\n" + str += fmt::format(" power_profile:\n" " unit: \"string\"\n" " values: [{}]\n", - detail::join(this->power_profile_.value(), ", ")); + fmt::join(this->power_profile_.value(), ", ")); } // remove last newline @@ -298,7 +299,7 @@ std::string rocm_smi_power_samples::generate_yaml_string() const { } std::ostream &operator<<(std::ostream &out, const rocm_smi_power_samples &samples) { - return out << std::format("power_management_limit [W]: {}\n" + return out << fmt::format("power_management_limit [W]: {}\n" "power_enforced_limit [W]: {}\n" "power_measurement_type [string]: {}\n" "available_power_profiles [string]: [{}]\n" @@ -308,10 +309,10 @@ std::ostream &operator<<(std::ostream &out, const rocm_smi_power_samples &sample detail::value_or_default(samples.get_power_management_limit()), detail::value_or_default(samples.get_power_enforced_limit()), detail::value_or_default(samples.get_power_measurement_type()), - detail::join(detail::value_or_default(samples.get_available_power_profiles()), ", "), - detail::join(detail::value_or_default(samples.get_power_usage()), ", "), - detail::join(detail::value_or_default(samples.get_power_total_energy_consumption()), ", "), - detail::join(detail::value_or_default(samples.get_power_profile()), ", ")); + fmt::join(detail::value_or_default(samples.get_available_power_profiles()), ", "), + fmt::join(detail::value_or_default(samples.get_power_usage()), ", "), + fmt::join(detail::value_or_default(samples.get_power_total_energy_consumption()), ", "), + fmt::join(detail::value_or_default(samples.get_power_profile()), ", ")); } //*************************************************************************************************************************************// @@ -323,28 +324,28 @@ std::string rocm_smi_memory_samples::generate_yaml_string() const { // total memory if (this->memory_total_.has_value()) { - str += std::format(" memory_total:\n" + str += fmt::format(" memory_total:\n" " unit: \"B\"\n" " values: {}\n", this->memory_total_.value()); } // total visible memory if (this->visible_memory_total_.has_value()) { - str += std::format(" visible_memory_total:\n" + str += fmt::format(" visible_memory_total:\n" " unit: \"B\"\n" " values: {}\n", this->visible_memory_total_.value()); } // min number of PCIe lanes if (this->min_num_pcie_lanes_.has_value()) { - str += std::format(" min_num_pcie_lanes:\n" + str += fmt::format(" min_num_pcie_lanes:\n" " unit: \"int\"\n" " values: {}\n", this->min_num_pcie_lanes_.value()); } // max number of PCIe lanes if (this->max_num_pcie_lanes_.has_value()) { - str += std::format(" max_num_pcie_lanes:\n" + str += fmt::format(" max_num_pcie_lanes:\n" " unit: \"int\"\n" " values: {}\n", this->max_num_pcie_lanes_.value()); @@ -352,10 +353,10 @@ std::string rocm_smi_memory_samples::generate_yaml_string() const { // used memory if (this->memory_used_.has_value()) { - str += std::format(" memory_used:\n" + str += fmt::format(" memory_used:\n" " unit: \"B\"\n" " values: [{}]\n", - detail::join(this->memory_used_.value(), ", ")); + fmt::join(this->memory_used_.value(), ", ")); } // free memory if (this->memory_used_.has_value() && this->memory_total_.has_value()) { @@ -363,25 +364,25 @@ std::string rocm_smi_memory_samples::generate_yaml_string() const { for (std::size_t i = 0; i < memory_free.size(); ++i) { memory_free[i] -= this->memory_used_.value()[i]; } - str += std::format(" memory_free:\n" + str += fmt::format(" memory_free:\n" " unit: \"B\"\n" " values: [{}]\n", - detail::join(memory_free, ", ")); + fmt::join(memory_free, ", ")); } // PCIe bandwidth if (this->pcie_transfer_rate_.has_value()) { - str += std::format(" pcie_bandwidth:\n" + str += fmt::format(" pcie_bandwidth:\n" " unit: \"T/s\"\n" " values: [{}]\n", - detail::join(this->pcie_transfer_rate_.value(), ", ")); + fmt::join(this->pcie_transfer_rate_.value(), ", ")); } // number of PCIe lanes if (this->num_pcie_lanes_.has_value()) { - str += std::format(" pcie_num_lanes:\n" + str += fmt::format(" pcie_num_lanes:\n" " unit: \"int\"\n" " values: [{}]\n", - detail::join(this->num_pcie_lanes_.value(), ", ")); + fmt::join(this->num_pcie_lanes_.value(), ", ")); } // remove last newline @@ -391,7 +392,7 @@ std::string rocm_smi_memory_samples::generate_yaml_string() const { } std::ostream &operator<<(std::ostream &out, const rocm_smi_memory_samples &samples) { - return out << std::format("memory_total [B]: {}\n" + return out << fmt::format("memory_total [B]: {}\n" "visible_memory_total [B]: {}\n" "min_num_pcie_lanes [int]: {}\n" "max_num_pcie_lanes [int]: {}\n" @@ -402,9 +403,9 @@ std::ostream &operator<<(std::ostream &out, const rocm_smi_memory_samples &sampl detail::value_or_default(samples.get_visible_memory_total()), detail::value_or_default(samples.get_min_num_pcie_lanes()), detail::value_or_default(samples.get_max_num_pcie_lanes()), - detail::join(detail::value_or_default(samples.get_memory_used()), ", "), - detail::join(detail::value_or_default(samples.get_pcie_transfer_rate()), ", "), - detail::join(detail::value_or_default(samples.get_num_pcie_lanes()), ", ")); + fmt::join(detail::value_or_default(samples.get_memory_used()), ", "), + fmt::join(detail::value_or_default(samples.get_pcie_transfer_rate()), ", "), + fmt::join(detail::value_or_default(samples.get_num_pcie_lanes()), ", ")); } //*************************************************************************************************************************************// @@ -416,112 +417,112 @@ std::string rocm_smi_temperature_samples::generate_yaml_string() const { // number of fans (emulated) if (this->num_fans_.has_value()) { - str += std::format(" num_fans:\n" + str += fmt::format(" num_fans:\n" " unit: \"int\"\n" " values: {}\n", this->num_fans_.value()); } // maximum fan speed if (this->max_fan_speed_.has_value()) { - str += std::format(" max_fan_speed:\n" + str += fmt::format(" max_fan_speed:\n" " unit: \"int\"\n" " values: {}\n", this->max_fan_speed_.value()); } // minimum GPU edge temperature if (this->temperature_edge_min_.has_value()) { - str += std::format(" temperature_gpu_min:\n" + str += fmt::format(" temperature_gpu_min:\n" " unit: \"m°C\"\n" " values: {}\n", this->temperature_edge_min_.value()); } // maximum GPU edge temperature if (this->temperature_edge_max_.has_value()) { - str += std::format(" temperature_gpu_max:\n" + str += fmt::format(" temperature_gpu_max:\n" " unit: \"m°C\"\n" " values: {}\n", this->temperature_edge_max_.value()); } // minimum GPU hotspot temperature if (this->temperature_hotspot_min_.has_value()) { - str += std::format(" temperature_hotspot_min:\n" + str += fmt::format(" temperature_hotspot_min:\n" " unit: \"m°C\"\n" " values: {}\n", this->temperature_hotspot_min_.value()); } // maximum GPU hotspot temperature if (this->temperature_hotspot_max_.has_value()) { - str += std::format(" temperature_hotspot_max:\n" + str += fmt::format(" temperature_hotspot_max:\n" " unit: \"m°C\"\n" " values: {}\n", this->temperature_hotspot_max_.value()); } // minimum GPU memory temperature if (this->temperature_memory_min_.has_value()) { - str += std::format(" temperature_memory_min:\n" + str += fmt::format(" temperature_memory_min:\n" " unit: \"m°C\"\n" " values: {}\n", this->temperature_memory_min_.value()); } // maximum GPU memory temperature if (this->temperature_memory_max_.has_value()) { - str += std::format(" temperature_memory_max:\n" + str += fmt::format(" temperature_memory_max:\n" " unit: \"m°C\"\n" " values: {}\n", this->temperature_memory_max_.value()); } // minimum GPU HBM 0 temperature if (this->temperature_hbm_0_min_.has_value()) { - str += std::format(" temperature_hbm_0_min:\n" + str += fmt::format(" temperature_hbm_0_min:\n" " unit: \"m°C\"\n" " values: {}\n", this->temperature_hbm_0_min_.value()); } // maximum GPU HBM 0 temperature if (this->temperature_hbm_0_max_.has_value()) { - str += std::format(" temperature_hbm_0_max:\n" + str += fmt::format(" temperature_hbm_0_max:\n" " unit: \"m°C\"\n" " values: {}\n", this->temperature_hbm_0_max_.value()); } // minimum GPU HBM 1 temperature if (this->temperature_hbm_1_min_.has_value()) { - str += std::format(" temperature_hbm_1_min:\n" + str += fmt::format(" temperature_hbm_1_min:\n" " unit: \"m°C\"\n" " values: {}\n", this->temperature_hbm_1_min_.value()); } // maximum GPU HBM 1 temperature if (this->temperature_hbm_1_max_.has_value()) { - str += std::format(" temperature_hbm_1_max:\n" + str += fmt::format(" temperature_hbm_1_max:\n" " unit: \"m°C\"\n" " values: {}\n", this->temperature_hbm_1_max_.value()); } // minimum GPU HBM 2 temperature if (this->temperature_hbm_2_min_.has_value()) { - str += std::format(" temperature_hbm_2_min:\n" + str += fmt::format(" temperature_hbm_2_min:\n" " unit: \"m°C\"\n" " values: {}\n", this->temperature_hbm_2_min_.value()); } // maximum GPU HBM 2 temperature if (this->temperature_hbm_2_max_.has_value()) { - str += std::format(" temperature_hbm_2_max:\n" + str += fmt::format(" temperature_hbm_2_max:\n" " unit: \"m°C\"\n" " values: {}\n", this->temperature_hbm_2_max_.value()); } // minimum GPU HBM 3 temperature if (this->temperature_hbm_3_min_.has_value()) { - str += std::format(" temperature_hbm_3_min:\n" + str += fmt::format(" temperature_hbm_3_min:\n" " unit: \"m°C\"\n" " values: {}\n", this->temperature_hbm_3_min_.value()); } // maximum GPU HBM 3 temperature if (this->temperature_hbm_3_max_.has_value()) { - str += std::format(" temperature_hbm_3_max:\n" + str += fmt::format(" temperature_hbm_3_max:\n" " unit: \"m°C\"\n" " values: {}\n", this->temperature_hbm_3_max_.value()); @@ -533,59 +534,59 @@ std::string rocm_smi_temperature_samples::generate_yaml_string() const { for (std::size_t i = 0; i < fan_speed_percent.size(); ++i) { fan_speed_percent[i] = static_cast(this->fan_speed_.value()[i]) / static_cast(RSMI_MAX_FAN_SPEED); } - str += std::format(" fan_speed:\n" + str += fmt::format(" fan_speed:\n" " unit: \"percentage\"\n" " values: [{}]\n", - detail::join(fan_speed_percent, ", ")); + fmt::join(fan_speed_percent, ", ")); } // GPU edge temperature if (this->temperature_edge_.has_value()) { - str += std::format(" temperature_gpu:\n" + str += fmt::format(" temperature_gpu:\n" " unit: \"m°C\"\n" " values: [{}]\n", - detail::join(this->temperature_edge_.value(), ", ")); + fmt::join(this->temperature_edge_.value(), ", ")); } // GPU hotspot temperature if (this->temperature_hotspot_.has_value()) { - str += std::format(" temperature_hotspot:\n" + str += fmt::format(" temperature_hotspot:\n" " unit: \"m°C\"\n" " values: [{}]\n", - detail::join(this->temperature_hotspot_.value(), ", ")); + fmt::join(this->temperature_hotspot_.value(), ", ")); } // GPU memory temperature if (this->temperature_memory_.has_value()) { - str += std::format(" temperature_memory:\n" + str += fmt::format(" temperature_memory:\n" " unit: \"m°C\"\n" " values: [{}]\n", - detail::join(this->temperature_memory_.value(), ", ")); + fmt::join(this->temperature_memory_.value(), ", ")); } // GPU HBM 0 temperature if (this->temperature_hbm_0_.has_value()) { - str += std::format(" temperature_hbm_0:\n" + str += fmt::format(" temperature_hbm_0:\n" " unit: \"m°C\"\n" " values: [{}]\n", - detail::join(this->temperature_hbm_0_.value(), ", ")); + fmt::join(this->temperature_hbm_0_.value(), ", ")); } // GPU HBM 1 temperature if (this->temperature_hbm_1_.has_value()) { - str += std::format(" temperature_hbm_1:\n" + str += fmt::format(" temperature_hbm_1:\n" " unit: \"m°C\"\n" " values: [{}]\n", - detail::join(this->temperature_hbm_1_.value(), ", ")); + fmt::join(this->temperature_hbm_1_.value(), ", ")); } // GPU HBM 2 temperature if (this->temperature_hbm_2_.has_value()) { - str += std::format(" temperature_hbm_2:\n" + str += fmt::format(" temperature_hbm_2:\n" " unit: \"m°C\"\n" " values: [{}]\n", - detail::join(this->temperature_hbm_2_.value(), ", ")); + fmt::join(this->temperature_hbm_2_.value(), ", ")); } // GPU HBM 3 temperature if (this->temperature_hbm_3_.has_value()) { - str += std::format(" temperature_hbm_3:\n" + str += fmt::format(" temperature_hbm_3:\n" " unit: \"m°C\"\n" " values: [{}]\n", - detail::join(this->temperature_hbm_3_.value(), ", ")); + fmt::join(this->temperature_hbm_3_.value(), ", ")); } // remove last newline @@ -595,7 +596,7 @@ std::string rocm_smi_temperature_samples::generate_yaml_string() const { } std::ostream &operator<<(std::ostream &out, const rocm_smi_temperature_samples &samples) { - return out << std::format("num_fans [int]: {}\n" + return out << fmt::format("num_fans [int]: {}\n" "max_fan_speed [int]: {}\n" "temperature_edge_min [m°C]: {}\n" "temperature_edge_max [m°C]: {}\n" @@ -635,14 +636,14 @@ std::ostream &operator<<(std::ostream &out, const rocm_smi_temperature_samples & detail::value_or_default(samples.get_temperature_hbm_2_max()), detail::value_or_default(samples.get_temperature_hbm_3_min()), detail::value_or_default(samples.get_temperature_hbm_3_max()), - detail::join(detail::value_or_default(samples.get_fan_speed()), ", "), - detail::join(detail::value_or_default(samples.get_temperature_edge()), ", "), - detail::join(detail::value_or_default(samples.get_temperature_hotspot()), ", "), - detail::join(detail::value_or_default(samples.get_temperature_memory()), ", "), - detail::join(detail::value_or_default(samples.get_temperature_hbm_0()), ", "), - detail::join(detail::value_or_default(samples.get_temperature_hbm_1()), ", "), - detail::join(detail::value_or_default(samples.get_temperature_hbm_2()), ", "), - detail::join(detail::value_or_default(samples.get_temperature_hbm_3()), ", ")); + fmt::join(detail::value_or_default(samples.get_fan_speed()), ", "), + fmt::join(detail::value_or_default(samples.get_temperature_edge()), ", "), + fmt::join(detail::value_or_default(samples.get_temperature_hotspot()), ", "), + fmt::join(detail::value_or_default(samples.get_temperature_memory()), ", "), + fmt::join(detail::value_or_default(samples.get_temperature_hbm_0()), ", "), + fmt::join(detail::value_or_default(samples.get_temperature_hbm_1()), ", "), + fmt::join(detail::value_or_default(samples.get_temperature_hbm_2()), ", "), + fmt::join(detail::value_or_default(samples.get_temperature_hbm_3()), ", ")); } } // namespace hws diff --git a/src/hardware_sampling/gpu_intel/hardware_sampler.cpp b/src/hardware_sampling/gpu_intel/hardware_sampler.cpp index 3fd9a1e..2027135 100644 --- a/src/hardware_sampling/gpu_intel/hardware_sampler.cpp +++ b/src/hardware_sampling/gpu_intel/hardware_sampler.cpp @@ -227,17 +227,45 @@ void gpu_intel_hardware_sampler::sampling_loop() { if (zesDeviceEnumPowerDomains(device, &num_power_domains, power_handles.data()) == ZE_RESULT_SUCCESS) { if (!power_handles.empty()) { // NOTE: only the first power domain is used here + // get the power measurement type + // NOTE: only the first value is used here! + std::uint32_t num_power_limit_descriptors{ 1 }; + zes_power_limit_ext_desc_t desc{}; + if (zesPowerGetLimitsExt(power_handles.front(), &num_power_limit_descriptors, &desc) == ZE_RESULT_SUCCESS) { + switch (desc.level) { + case ZES_POWER_LEVEL_UNKNOWN: + power_samples_.power_measurement_type_ = "unknown"; + break; + case ZES_POWER_LEVEL_SUSTAINED: + power_samples_.power_measurement_type_ = "sustained"; + break; + case ZES_POWER_LEVEL_BURST: + power_samples_.power_measurement_type_ = "burst"; + break; + case ZES_POWER_LEVEL_PEAK: + power_samples_.power_measurement_type_ = "peak"; + break; + case ZES_POWER_LEVEL_INSTANTANEOUS: + power_samples_.power_measurement_type_ = "current/instant"; + break; + case ZES_POWER_LEVEL_FORCE_UINT32: + power_samples_.power_measurement_type_ = "force uint32"; + break; + } + + power_samples_.power_enforced_limit_ = static_cast(desc.limit); + } + // get total power consumption zes_power_energy_counter_t energy_counter{}; if (zesPowerGetEnergyCounter(power_handles.front(), &energy_counter) == ZE_RESULT_SUCCESS) { - power_samples_.power_total_energy_consumption_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ energy_counter.energy }; + power_samples_.power_total_energy_consumption_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ static_cast(energy_counter.energy) / 1000.0 / 1000.0 }; } // get energy thresholds zes_energy_threshold_t energy_threshold{}; if (zesPowerGetEnergyThreshold(power_handles.front(), &energy_threshold) == ZE_RESULT_SUCCESS) { - power_samples_.energy_threshold_enabled_ = static_cast(energy_threshold.enable); - power_samples_.energy_threshold_ = energy_threshold.threshold; + power_samples_.power_management_mode_ = static_cast(energy_threshold.enable); } } } @@ -453,7 +481,7 @@ void gpu_intel_hardware_sampler::sampling_loop() { zes_power_energy_counter_t energy_counter{}; HWS_LEVEL_ZERO_ERROR_CHECK(zesPowerGetEnergyCounter(power_handles.front(), &energy_counter)); - power_samples_.power_total_energy_consumption_->push_back(energy_counter.energy); + power_samples_.power_total_energy_consumption_->push_back(static_cast(energy_counter.energy) / 1000.0 / 1000.0); } } } diff --git a/src/hardware_sampling/gpu_intel/level_zero_samples.cpp b/src/hardware_sampling/gpu_intel/level_zero_samples.cpp index 70f1016..971bfb9 100644 --- a/src/hardware_sampling/gpu_intel/level_zero_samples.cpp +++ b/src/hardware_sampling/gpu_intel/level_zero_samples.cpp @@ -20,15 +20,6 @@ namespace hws { namespace detail { -template -struct is_vector : std::false_type { }; - -template -struct is_vector> : std::true_type { }; - -template -constexpr bool is_vector_v = is_vector::value; - template void append_map_values(std::string &str, const std::string_view entry_name, const MapType &map) { if (map.has_value()) { @@ -248,19 +239,26 @@ std::ostream &operator<<(std::ostream &out, const level_zero_clock_samples &samp std::string level_zero_power_samples::generate_yaml_string() const { std::string str{ "power:\n" }; - // flag whether the energy threshold is enabled - if (this->energy_threshold_enabled_.has_value()) { - str += std::format(" energy_threshold_enabled:\n" - " unit: \"bool\"\n" + // power enforced limit + if (this->power_enforced_limit_.has_value()) { + str += std::format(" power_enforced_limit:\n" + " unit: \"W\"\n" " values: {}\n", - this->energy_threshold_enabled_.value()); + this->power_enforced_limit_.value()); } - // the energy threshold - if (this->energy_threshold_.has_value()) { - str += std::format(" energy_threshold:\n" - " unit: \"J\"\n" + // power measurement type + if (this->power_measurement_type_.has_value()) { + str += std::format(" power_measurement_type:\n" + " unit: \"string\"\n" + " values: {}\n", + this->power_measurement_type_.value()); + } + // the power management mode + if (this->power_management_mode_.has_value()) { + str += std::format(" power_management_mode:\n" + " unit: \"bool\"\n" " values: {}\n", - this->energy_threshold_.value()); + this->power_management_mode_.value()); } // the total consumed energy @@ -282,11 +280,13 @@ std::string level_zero_power_samples::generate_yaml_string() const { } std::ostream &operator<<(std::ostream &out, const level_zero_power_samples &samples) { - return out << std::format("energy_threshold_enabled [bool]: {}\n" - "energy_threshold [J]: {}\n" + return out << std::format("power_enforced_limit [W]: {}\n" + "power_measurement_type [string]: {}\n" + "power_management_mode [bool]: {}\n" "power_total_energy_consumption [J]: [{}]", - detail::value_or_default(samples.get_energy_threshold_enabled()), - detail::value_or_default(samples.get_energy_threshold()), + detail::value_or_default(samples.get_power_enforced_limit()), + detail::value_or_default(samples.get_power_measurement_type()), + detail::value_or_default(samples.get_power_management_mode()), detail::join(detail::value_or_default(samples.get_power_total_energy_consumption()), ", ")); } diff --git a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp index 6e0fe7c..17e7049 100644 --- a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp +++ b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp @@ -11,15 +11,16 @@ #include "hardware_sampling/gpu_nvidia/nvml_samples.hpp" // hws::{nvml_general_samples, nvml_clock_samples, nvml_power_samples, nvml_memory_samples, nvml_temperature_samples} #include "hardware_sampling/gpu_nvidia/utility.hpp" // HWS_NVML_ERROR_CHECK #include "hardware_sampling/hardware_sampler.hpp" // hws::hardware_sampler -#include "hardware_sampling/utility.hpp" // hws::detail::{time_points_to_epoch, join} +#include "hardware_sampling/utility.hpp" // hws::detail::time_points_to_epoch -#include "nvml.h" // NVML runtime functions +#include "fmt/format.h" // fmt::format +#include "fmt/ranges.h" // fmt::join +#include "nvml.h" // NVML runtime functions #include // std::min_element, std::sort, std::transform #include // std::chrono::{steady_clock, duration_cast, milliseconds} #include // std::size_t #include // std::exception, std::terminate -#include // std::format #include // std::ios_base #include // std::cerr, std::endl #include // std::iota @@ -534,7 +535,7 @@ void gpu_nvidia_hardware_sampler::sampling_loop() { std::string gpu_nvidia_hardware_sampler::device_identification() const { nvmlPciInfo_st pcie_info{}; HWS_NVML_ERROR_CHECK(nvmlDeviceGetPciInfo_v3(device_.get_impl().device, &pcie_info)); - return std::format("gpu_nvidia_device_{}_{}", pcie_info.bus, pcie_info.device); + return fmt::format("gpu_nvidia_device_{}_{}", pcie_info.bus, pcie_info.device); } std::string gpu_nvidia_hardware_sampler::generate_yaml_string() const { @@ -543,7 +544,7 @@ std::string gpu_nvidia_hardware_sampler::generate_yaml_string() const { throw std::runtime_error{ "Can't create the final YAML entry if the hardware sampler is still running!" }; } - return std::format("{}\n" + return fmt::format("{}\n" "{}\n" "{}\n" "{}\n" @@ -560,7 +561,7 @@ std::ostream &operator<<(std::ostream &out, const gpu_nvidia_hardware_sampler &s out.setstate(std::ios_base::failbit); return out; } else { - return out << std::format("sampling interval: {}\n" + return out << fmt::format("sampling interval: {}\n" "time points: [{}]\n\n" "general samples:\n{}\n\n" "clock samples:\n{}\n\n" @@ -568,7 +569,7 @@ std::ostream &operator<<(std::ostream &out, const gpu_nvidia_hardware_sampler &s "memory samples:\n{}\n\n" "temperature samples:\n{}", sampler.sampling_interval(), - detail::join(detail::time_points_to_epoch(sampler.sampling_time_points()), ", "), + fmt::join(detail::time_points_to_epoch(sampler.sampling_time_points()), ", "), sampler.general_samples(), sampler.clock_samples(), sampler.power_samples(), diff --git a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp index 64f559d..71fb7a6 100644 --- a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp +++ b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp @@ -7,9 +7,11 @@ #include "hardware_sampling/gpu_nvidia/nvml_samples.hpp" -#include "hardware_sampling/utility.hpp" // hws::detail::{value_or_default, join, map_entry_to_string} +#include "hardware_sampling/utility.hpp" // hws::detail::{value_or_default, map_entry_to_string} + +#include "fmt/format.h" // fmt::format +#include "fmt/ranges.h" // fmt::join -#include // std::format #include // std::ostream #include // std::string @@ -24,42 +26,42 @@ std::string nvml_general_samples::generate_yaml_string() const { // device architecture if (this->architecture_.has_value()) { - str += std::format(" architecture:\n" + str += fmt::format(" architecture:\n" " unit: \"string\"\n" " values: \"{}\"\n", this->architecture_.value()); } // device byte order if (this->byte_order_.has_value()) { - str += std::format(" byte_order:\n" + str += fmt::format(" byte_order:\n" " unit: \"string\"\n" " values: \"{}\"\n", this->byte_order_.value()); } // the vendor specific ID if (this->vendor_id_.has_value()) { - str += std::format(" vendor_id:\n" + str += fmt::format(" vendor_id:\n" " unit: \"string\"\n" " values: \"{}\"\n", this->vendor_id_.value()); } // device name if (this->name_.has_value()) { - str += std::format(" name:\n" + str += fmt::format(" name:\n" " unit: \"string\"\n" " values: \"{}\"\n", this->name_.value()); } // persistence mode enabled if (this->persistence_mode_.has_value()) { - str += std::format(" persistence_mode:\n" + str += fmt::format(" persistence_mode:\n" " unit: \"bool\"\n" " values: {}\n", this->persistence_mode_.value()); } // number of cores if (this->num_cores_.has_value()) { - str += std::format(" num_cores:\n" + str += fmt::format(" num_cores:\n" " unit: \"int\"\n" " values: {}\n", this->num_cores_.value()); @@ -67,25 +69,25 @@ std::string nvml_general_samples::generate_yaml_string() const { // device compute utilization if (this->compute_utilization_.has_value()) { - str += std::format(" compute_utilization:\n" + str += fmt::format(" compute_utilization:\n" " unit: \"percentage\"\n" " values: [{}]\n", - detail::join(this->compute_utilization_.value(), ", ")); + fmt::join(this->compute_utilization_.value(), ", ")); } // device memory utilization if (this->memory_utilization_.has_value()) { - str += std::format(" memory_utilization:\n" + str += fmt::format(" memory_utilization:\n" " unit: \"percentage\"\n" " values: [{}]\n", - detail::join(this->memory_utilization_.value(), ", ")); + fmt::join(this->memory_utilization_.value(), ", ")); } // performance state if (this->performance_level_.has_value()) { - str += std::format(" performance_level:\n" + str += fmt::format(" performance_level:\n" " unit: \"0 - maximum performance; 15 - minimum performance; 32 - unknown\"\n" " values: [{}]\n", - detail::join(this->performance_level_.value(), ", ")); + fmt::join(this->performance_level_.value(), ", ")); } // remove last newline @@ -95,7 +97,7 @@ std::string nvml_general_samples::generate_yaml_string() const { } std::ostream &operator<<(std::ostream &out, const nvml_general_samples &samples) { - return out << std::format("architecture [string]: {}\n" + return out << fmt::format("architecture [string]: {}\n" "byte_order [string]: {}\n" "vendor_id [string]: {}\n" "name [string]: {}\n" @@ -110,9 +112,9 @@ std::ostream &operator<<(std::ostream &out, const nvml_general_samples &samples) detail::value_or_default(samples.get_name()), detail::value_or_default(samples.get_persistence_mode()), detail::value_or_default(samples.get_num_cores()), - detail::join(detail::value_or_default(samples.get_compute_utilization()), ", "), - detail::join(detail::value_or_default(samples.get_memory_utilization()), ", "), - detail::join(detail::value_or_default(samples.get_performance_level()), ", ")); + fmt::join(detail::value_or_default(samples.get_compute_utilization()), ", "), + fmt::join(detail::value_or_default(samples.get_memory_utilization()), ", "), + fmt::join(detail::value_or_default(samples.get_performance_level()), ", ")); } //*************************************************************************************************************************************// @@ -124,97 +126,97 @@ std::string nvml_clock_samples::generate_yaml_string() const { // adaptive clock status if (this->auto_boosted_clock_enabled_.has_value()) { - str += std::format(" auto_boosted_clock_enabled:\n" + str += fmt::format(" auto_boosted_clock_enabled:\n" " unit: \"bool\"\n" " values: {}\n", this->auto_boosted_clock_enabled_.value()); } // minimum graph clock if (this->clock_frequency_min_.has_value()) { - str += std::format(" clock_frequency_min:\n" + str += fmt::format(" clock_frequency_min:\n" " unit: \"MHz\"\n" " values: {}\n", this->clock_frequency_min_.value()); } // maximum graph clock if (this->clock_frequency_max_.has_value()) { - str += std::format(" clock_frequency_max:\n" + str += fmt::format(" clock_frequency_max:\n" " unit: \"MHz\"\n" " values: {}\n", this->clock_frequency_max_.value()); } // minimum memory clock if (this->memory_clock_frequency_min_.has_value()) { - str += std::format(" memory_clock_frequency_min:\n" + str += fmt::format(" memory_clock_frequency_min:\n" " unit: \"MHz\"\n" " values: {}\n", this->memory_clock_frequency_min_.value()); } // maximum memory clock if (this->memory_clock_frequency_max_.has_value()) { - str += std::format(" memory_clock_frequency_max:\n" + str += fmt::format(" memory_clock_frequency_max:\n" " unit: \"MHz\"\n" " values: {}\n", this->memory_clock_frequency_max_.value()); } // maximum SM clock if (this->sm_clock_frequency_max_.has_value()) { - str += std::format(" sm_clock_frequency_max:\n" + str += fmt::format(" sm_clock_frequency_max:\n" " unit: \"MHz\"\n" " values: {}\n", this->sm_clock_frequency_max_.value()); } // the available clock frequencies if (this->available_clock_frequencies_.has_value()) { - str += std::format(" available_clock_frequencies:\n" + str += fmt::format(" available_clock_frequencies:\n" " unit: \"MHz\"\n" " values:\n"); for (const auto &[key, value] : this->available_clock_frequencies_.value()) { - str += std::format(" {}: [{}]\n", key, detail::join(value, ", ")); + str += fmt::format(" {}: [{}]\n", key, fmt::join(value, ", ")); } } // the available memory clock frequencies if (this->available_memory_clock_frequencies_.has_value()) { - str += std::format(" available_memory_clock_frequencies:\n" + str += fmt::format(" available_memory_clock_frequencies:\n" " unit: \"MHz\"\n" " values: [{}]\n", - detail::join(this->available_memory_clock_frequencies_.value(), ", ")); + fmt::join(this->available_memory_clock_frequencies_.value(), ", ")); } // graph clock if (this->clock_frequency_.has_value()) { - str += std::format(" clock_frequency:\n" + str += fmt::format(" clock_frequency:\n" " unit: \"MHz\"\n" " values: [{}]\n", - detail::join(this->clock_frequency_.value(), ", ")); + fmt::join(this->clock_frequency_.value(), ", ")); } // memory clock if (this->memory_clock_frequency_.has_value()) { - str += std::format(" memory_clock_frequency:\n" + str += fmt::format(" memory_clock_frequency:\n" " unit: \"MHz\"\n" " values: [{}]\n", - detail::join(this->memory_clock_frequency_.value(), ", ")); + fmt::join(this->memory_clock_frequency_.value(), ", ")); } // SM clock if (this->sm_clock_frequency_.has_value()) { - str += std::format(" sm_clock_frequency:\n" + str += fmt::format(" sm_clock_frequency:\n" " unit: \"MHz\"\n" " values: [{}]\n", - detail::join(this->sm_clock_frequency_.value(), ", ")); + fmt::join(this->sm_clock_frequency_.value(), ", ")); } // clock throttle reason if (this->throttle_reason_.has_value()) { - str += std::format(" throttle_reason:\n" + str += fmt::format(" throttle_reason:\n" " unit: \"string\"\n" " values: [{}]\n", - detail::join(this->throttle_reason_.value(), ", ")); + fmt::join(this->throttle_reason_.value(), ", ")); } // clock is auto-boosted if (this->auto_boosted_clock_.has_value()) { - str += std::format(" auto_boosted_clock:\n" + str += fmt::format(" auto_boosted_clock:\n" " unit: \"bool\"\n" " values: [{}]\n", - detail::join(this->auto_boosted_clock_.value(), ", ")); + fmt::join(this->auto_boosted_clock_.value(), ", ")); } // remove last newline @@ -224,7 +226,7 @@ std::string nvml_clock_samples::generate_yaml_string() const { } std::ostream &operator<<(std::ostream &out, const nvml_clock_samples &samples) { - return out << std::format("auto_boosted_clock_enabled [bool]: {}\n" + return out << fmt::format("auto_boosted_clock_enabled [bool]: {}\n" "clock_frequency_min [MHz]: {}\n" "clock_frequency_max [MHz]: {}\n" "memory_clock_frequency_min [MHz]: {}\n" @@ -244,12 +246,12 @@ std::ostream &operator<<(std::ostream &out, const nvml_clock_samples &samples) { detail::value_or_default(samples.get_memory_clock_frequency_max()), detail::value_or_default(samples.get_sm_clock_frequency_max()), detail::map_entry_to_string(samples.get_available_clock_frequencies()), - detail::join(detail::value_or_default(samples.get_available_memory_clock_frequencies()), ", "), - detail::join(detail::value_or_default(samples.get_clock_frequency()), ", "), - detail::join(detail::value_or_default(samples.get_memory_clock_frequency()), ", "), - detail::join(detail::value_or_default(samples.get_sm_clock_frequency()), ", "), - detail::join(detail::value_or_default(samples.get_throttle_reason()), ", "), - detail::join(detail::value_or_default(samples.get_auto_boosted_clock()), ", ")); + fmt::join(detail::value_or_default(samples.get_available_memory_clock_frequencies()), ", "), + fmt::join(detail::value_or_default(samples.get_clock_frequency()), ", "), + fmt::join(detail::value_or_default(samples.get_memory_clock_frequency()), ", "), + fmt::join(detail::value_or_default(samples.get_sm_clock_frequency()), ", "), + fmt::join(detail::value_or_default(samples.get_throttle_reason()), ", "), + fmt::join(detail::value_or_default(samples.get_auto_boosted_clock()), ", ")); } //*************************************************************************************************************************************// @@ -261,60 +263,60 @@ std::string nvml_power_samples::generate_yaml_string() const { // power management limit if (this->power_management_limit_.has_value()) { - str += std::format(" power_management_limit:\n" + str += fmt::format(" power_management_limit:\n" " unit: \"W\"\n" " values: {}\n", this->power_management_limit_.value()); } // power enforced limit if (this->power_enforced_limit_.has_value()) { - str += std::format(" power_enforced_limit:\n" + str += fmt::format(" power_enforced_limit:\n" " unit: \"W\"\n" " values: {}\n", this->power_enforced_limit_.value()); } // power measurement type if (this->power_measurement_type_.has_value()) { - str += std::format(" power_measurement_type:\n" + str += fmt::format(" power_measurement_type:\n" " unit: \"string\"\n" " values: \"{}\"\n", this->power_measurement_type_.value()); } // the power management mode if (this->power_management_mode_.has_value()) { - str += std::format(" power_management_mode:\n" + str += fmt::format(" power_management_mode:\n" " unit: \"bool\"\n" " values: {}\n", this->power_management_mode_.value()); } // available power levels if (this->available_power_profiles_.has_value()) { - str += std::format(" available_power_profiles:\n" + str += fmt::format(" available_power_profiles:\n" " unit: \"int\"\n" " values: [{}]\n", - detail::join(this->available_power_profiles_.value(), ", ")); + fmt::join(this->available_power_profiles_.value(), ", ")); } // current power usage if (this->power_usage_.has_value()) { - str += std::format(" power_usage:\n" + str += fmt::format(" power_usage:\n" " unit: \"W\"\n" " values: [{}]\n", - detail::join(this->power_usage_.value(), ", ")); + fmt::join(this->power_usage_.value(), ", ")); } // total energy consumed if (this->power_total_energy_consumption_.has_value()) { - str += std::format(" power_total_energy_consumed:\n" + str += fmt::format(" power_total_energy_consumed:\n" " unit: \"J\"\n" " values: [{}]\n", - detail::join(this->power_total_energy_consumption_.value(), ", ")); + fmt::join(this->power_total_energy_consumption_.value(), ", ")); } // power state if (this->power_profile_.has_value()) { - str += std::format(" power_profile:\n" + str += fmt::format(" power_profile:\n" " unit: \"int\"\n" " values: [{}]\n", - detail::join(this->power_profile_.value(), ", ")); + fmt::join(this->power_profile_.value(), ", ")); } // remove last newline @@ -324,7 +326,7 @@ std::string nvml_power_samples::generate_yaml_string() const { } std::ostream &operator<<(std::ostream &out, const nvml_power_samples &samples) { - return out << std::format("power_management_limit [W]: {}\n" + return out << fmt::format("power_management_limit [W]: {}\n" "power_enforced_limit [W]: {}\n" "power_measurement_type [string]: {}\n" "power_management_mode [bool]: {}\n" @@ -336,10 +338,10 @@ std::ostream &operator<<(std::ostream &out, const nvml_power_samples &samples) { detail::value_or_default(samples.get_power_enforced_limit()), detail::value_or_default(samples.get_power_measurement_type()), detail::value_or_default(samples.get_power_management_mode()), - detail::join(detail::value_or_default(samples.get_available_power_profiles()), ", "), - detail::join(detail::value_or_default(samples.get_power_usage()), ", "), - detail::join(detail::value_or_default(samples.get_power_total_energy_consumption()), ", "), - detail::join(detail::value_or_default(samples.get_power_profile()), ", ")); + fmt::join(detail::value_or_default(samples.get_available_power_profiles()), ", "), + fmt::join(detail::value_or_default(samples.get_power_usage()), ", "), + fmt::join(detail::value_or_default(samples.get_power_total_energy_consumption()), ", "), + fmt::join(detail::value_or_default(samples.get_power_profile()), ", ")); } //*************************************************************************************************************************************// @@ -351,28 +353,28 @@ std::string nvml_memory_samples::generate_yaml_string() const { // total memory size if (this->memory_total_.has_value()) { - str += std::format(" memory_total:\n" + str += fmt::format(" memory_total:\n" " unit: \"B\"\n" " values: {}\n", this->memory_total_.value()); } // maximum PCIe link speed if (this->pcie_link_max_speed_.has_value()) { - str += std::format(" pcie_max_bandwidth:\n" + str += fmt::format(" pcie_max_bandwidth:\n" " unit: \"MBPS\"\n" " values: {}\n", this->pcie_link_max_speed_.value()); } // memory bus width if (this->memory_bus_width_.has_value()) { - str += std::format(" memory_bus_width:\n" + str += fmt::format(" memory_bus_width:\n" " unit: \"Bit\"\n" " values: {}\n", this->memory_bus_width_.value()); } // maximum PCIe link generation if (this->max_pcie_link_generation_.has_value()) { - str += std::format(" max_pcie_link_generation:\n" + str += fmt::format(" max_pcie_link_generation:\n" " unit: \"int\"\n" " values: {}\n", this->max_pcie_link_generation_.value()); @@ -380,38 +382,38 @@ std::string nvml_memory_samples::generate_yaml_string() const { // free memory size if (this->memory_free_.has_value()) { - str += std::format(" memory_free:\n" + str += fmt::format(" memory_free:\n" " unit: \"B\"\n" " values: [{}]\n", - detail::join(this->memory_free_.value(), ", ")); + fmt::join(this->memory_free_.value(), ", ")); } // used memory size if (this->memory_used_.has_value()) { - str += std::format(" memory_used:\n" + str += fmt::format(" memory_used:\n" " unit: \"B\"\n" " values: [{}]\n", - detail::join(this->memory_used_.value(), ", ")); + fmt::join(this->memory_used_.value(), ", ")); } // PCIe link speed if (this->pcie_link_speed_.has_value()) { - str += std::format(" pcie_bandwidth:\n" + str += fmt::format(" pcie_bandwidth:\n" " unit: \"MBPS\"\n" " values: [{}]\n", - detail::join(this->pcie_link_speed_.value(), ", ")); + fmt::join(this->pcie_link_speed_.value(), ", ")); } // PCIe link width if (this->pcie_link_width_.has_value()) { - str += std::format(" pcie_link_width:\n" + str += fmt::format(" pcie_link_width:\n" " unit: \"int\"\n" " values: [{}]\n", - detail::join(this->pcie_link_width_.value(), ", ")); + fmt::join(this->pcie_link_width_.value(), ", ")); } // PCIe link generation if (this->pcie_link_generation_.has_value()) { - str += std::format(" pcie_link_generation:\n" + str += fmt::format(" pcie_link_generation:\n" " unit: \"int\"\n" " values: [{}]\n", - detail::join(this->pcie_link_generation_.value(), ", ")); + fmt::join(this->pcie_link_generation_.value(), ", ")); } // remove last newline @@ -421,7 +423,7 @@ std::string nvml_memory_samples::generate_yaml_string() const { } std::ostream &operator<<(std::ostream &out, const nvml_memory_samples &samples) { - return out << std::format("memory_total [B]: {}\n" + return out << fmt::format("memory_total [B]: {}\n" "pcie_link_max_speed [MBPS]: {}\n" "memory_bus_width [Bit]: {}\n" "max_pcie_link_generation [int]: {}\n" @@ -434,11 +436,11 @@ std::ostream &operator<<(std::ostream &out, const nvml_memory_samples &samples) detail::value_or_default(samples.get_pcie_link_max_speed()), detail::value_or_default(samples.get_memory_bus_width()), detail::value_or_default(samples.get_max_pcie_link_generation()), - detail::join(detail::value_or_default(samples.get_memory_free()), ", "), - detail::join(detail::value_or_default(samples.get_memory_used()), ", "), - detail::join(detail::value_or_default(samples.get_pcie_link_speed()), ", "), - detail::join(detail::value_or_default(samples.get_pcie_link_width()), ", "), - detail::join(detail::value_or_default(samples.get_pcie_link_generation()), ", ")); + fmt::join(detail::value_or_default(samples.get_memory_free()), ", "), + fmt::join(detail::value_or_default(samples.get_memory_used()), ", "), + fmt::join(detail::value_or_default(samples.get_pcie_link_speed()), ", "), + fmt::join(detail::value_or_default(samples.get_pcie_link_width()), ", "), + fmt::join(detail::value_or_default(samples.get_pcie_link_generation()), ", ")); } //*************************************************************************************************************************************// @@ -450,35 +452,35 @@ std::string nvml_temperature_samples::generate_yaml_string() const { // number of fans if (this->num_fans_.has_value()) { - str += std::format(" num_fans:\n" + str += fmt::format(" num_fans:\n" " unit: \"int\"\n" " values: {}\n", this->num_fans_.value()); } // min fan speed if (this->min_fan_speed_.has_value()) { - str += std::format(" min_fan_speed:\n" + str += fmt::format(" min_fan_speed:\n" " unit: \"percentage\"\n" " values: {}\n", this->min_fan_speed_.value()); } // max fan speed if (this->max_fan_speed_.has_value()) { - str += std::format(" max_fan_speed:\n" + str += fmt::format(" max_fan_speed:\n" " unit: \"percentage\"\n" " values: {}\n", this->max_fan_speed_.value()); } // temperature threshold GPU max if (this->temperature_threshold_gpu_max_.has_value()) { - str += std::format(" temperature_gpu_max:\n" + str += fmt::format(" temperature_gpu_max:\n" " unit: \"°C\"\n" " values: {}\n", this->temperature_threshold_gpu_max_.value()); } // temperature threshold memory max if (this->temperature_threshold_mem_max_.has_value()) { - str += std::format(" temperature_mem_max:\n" + str += fmt::format(" temperature_mem_max:\n" " unit: \"°C\"\n" " values: {}\n", this->temperature_threshold_mem_max_.value()); @@ -486,17 +488,17 @@ std::string nvml_temperature_samples::generate_yaml_string() const { // fan speed if (this->fan_speed_.has_value()) { - str += std::format(" fan_speed:\n" + str += fmt::format(" fan_speed:\n" " unit: \"percentage\"\n" " values: [{}]\n", - detail::join(this->fan_speed_.value(), ", ")); + fmt::join(this->fan_speed_.value(), ", ")); } // temperature GPU if (this->temperature_gpu_.has_value()) { - str += std::format(" temperature_gpu:\n" + str += fmt::format(" temperature_gpu:\n" " unit: \"°C\"\n" " values: [{}]\n", - detail::join(this->temperature_gpu_.value(), ", ")); + fmt::join(this->temperature_gpu_.value(), ", ")); } // remove last newline @@ -506,7 +508,7 @@ std::string nvml_temperature_samples::generate_yaml_string() const { } std::ostream &operator<<(std::ostream &out, const nvml_temperature_samples &samples) { - return out << std::format("num_fans [int]: {}\n" + return out << fmt::format("num_fans [int]: {}\n" "min_fan_speed [%]: {}\n" "max_fan_speed [%]: {}\n" "temperature_threshold_gpu_max [°C]: {}\n" @@ -518,8 +520,8 @@ std::ostream &operator<<(std::ostream &out, const nvml_temperature_samples &samp detail::value_or_default(samples.get_max_fan_speed()), detail::value_or_default(samples.get_temperature_threshold_gpu_max()), detail::value_or_default(samples.get_temperature_threshold_mem_max()), - detail::join(detail::value_or_default(samples.get_fan_speed()), ", "), - detail::join(detail::value_or_default(samples.get_temperature_gpu()), ", ")); + fmt::join(detail::value_or_default(samples.get_fan_speed()), ", "), + fmt::join(detail::value_or_default(samples.get_temperature_gpu()), ", ")); } } // namespace hws diff --git a/src/hardware_sampling/hardware_sampler.cpp b/src/hardware_sampling/hardware_sampler.cpp index 1c3ff49..1ac8f81 100644 --- a/src/hardware_sampling/hardware_sampler.cpp +++ b/src/hardware_sampling/hardware_sampler.cpp @@ -8,12 +8,15 @@ #include "hardware_sampling/hardware_sampler.hpp" #include "hardware_sampling/event.hpp" // hws::event -#include "hardware_sampling/utility.hpp" // hws::detail::{durations_from_reference_time, join} +#include "hardware_sampling/utility.hpp" // hws::detail::durations_from_reference_time + +#include "fmt/format.h" // fmt::format +#include "fmt/ranges.h" // fmt::join +#include "fmt/chrono.h" // fmt::localtime, direct formatting of std::chrono types #include // std::chrono::{system_clock, steady_clock, duration_cast, milliseconds} #include // std::size_t #include // std::exception -#include // std::format #include // std::ofstream #include // std::cerr, std::endl #include // std::runtime_error, std::out_of_range @@ -109,7 +112,7 @@ void hardware_sampler::add_event(decltype(event::name) name) { event hardware_sampler::get_event(const std::size_t idx) const { if (idx >= this->num_events()) { - throw std::out_of_range{ std::format("The index {} is out-of-range for the number of events {}!", idx, this->num_events()) }; + throw std::out_of_range{ fmt::format("The index {} is out-of-range for the number of events {}!", idx, this->num_events()) }; } return events_[idx]; @@ -126,10 +129,10 @@ void hardware_sampler::dump_yaml(const char *filename) { file << "---\n\n"; // set the device identification - file << std::format("device_identification: {}\n\n", this->device_identification()); + file << fmt::format("device_identification: {}\n\n", this->device_identification()); // output the start date time of this hardware sampling - file << std::format("start_time: \"{:%Y-%m-%d %X}\"\n\n", std::chrono::current_zone()->to_local(start_date_time_)); + file << fmt::format("start_time: \"{:%Y-%m-%d %X}\"\n\n", start_date_time_); // output the event information std::vector event_time_points{}; @@ -138,22 +141,22 @@ void hardware_sampler::dump_yaml(const char *filename) { event_time_points.push_back(time_point); event_names.push_back(name); } - file << std::format("events:\n" + file << fmt::format("events:\n" " time_points:\n" " unit: \"s\"\n" " values: [{}]\n" " names: [{}]\n\n", - detail::join(detail::durations_from_reference_time(event_time_points, this->get_event(0).time_point), ", "), - detail::join(event_names, ", ")); + fmt::join(detail::durations_from_reference_time(event_time_points, this->get_event(0).time_point), ", "), + fmt::join(event_names, ", ")); // output the sampling information - file << std::format("sampling_interval: {}\n" + file << fmt::format("sampling_interval: {}\n" "time_points:\n" " unit: \"s\"\n" " values: [{}]\n" "{}\n\n", this->sampling_interval(), - detail::join(detail::durations_from_reference_time(this->sampling_time_points(), this->get_event(0).time_point), ", "), + fmt::join(detail::durations_from_reference_time(this->sampling_time_points(), this->get_event(0).time_point), ", "), this->generate_yaml_string()); } From 0f7a253a1bbf4d81b97914099b894cf0e436f72a Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Mon, 16 Sep 2024 11:12:11 +0200 Subject: [PATCH 19/69] Unify temperature related samples. --- README.md | 57 ++-- include/hardware_sampling/cpu/cpu_samples.hpp | 2 +- .../gpu_amd/rocm_smi_samples.hpp | 50 +-- .../gpu_nvidia/nvml_samples.hpp | 16 +- src/hardware_sampling/cpu/cpu_samples.cpp | 28 +- .../cpu/hardware_sampler.cpp | 8 +- .../gpu_amd/hardware_sampler.cpp | 189 ++++++------ .../gpu_amd/rocm_smi_samples.cpp | 290 +++++++++--------- .../gpu_nvidia/hardware_sampler.cpp | 44 +-- .../gpu_nvidia/nvml_samples.cpp | 56 ++-- 10 files changed, 368 insertions(+), 372 deletions(-) diff --git a/README.md b/README.md index bf47501..36b116d 100644 --- a/README.md +++ b/README.md @@ -177,39 +177,36 @@ export PYTHONPATH=${CMAKE_INSTALL_PREFIX}/lib:${PYTHONPATH} | sample | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs | |:-------------------------|:----:|:-----------:|:--------:|:----------:| -| per_core_temperature | °C | | | | -| core_throttle_percentage | % | | | | -| per_package_temperature | °C | | | | -| num_fans | | int | int | | -| max_fan_speed | | | int | | -| temperature_gpu_min | | | m°C | | -| temperature_gpu_max | | °C | m°C | | -| temperature_hotspot_min | | | m°C | | -| temperature_hotspot_max | | | m°C | | -| temperature_memory_min | | | m°C | | -| temperature_memory_max | | | m°C | | -| temperature_hbm_0_min | | | m°C | | -| temperature_hbm_0_max | | | m°C | | -| temperature_hbm_1_min | | | m°C | MBPS | -| temperature_hbm_1_max | | | m°C | | -| temperature_hbm_2_min | | | m°C | B | -| temperature_hbm_2_max | | | m°C | B | -| temperature_hbm_3_min | | | m°C | BPS | -| temperature_hbm_3_max | | | m°C | int | -| fan_speed | | % | % | int | -| temperature_gpu | | °C | m°C | Bit | -| temperature_hotspot | | | m°C | int | -| temperature_memory | | | m°C | str | -| temperature_hbm_0 | | | m°C | B | -| temperature_hbm_1 | | | m°C | B | -| temperature_hbm_2 | | | m°C | int | -| temperature_hbm_3 | | | m°C | int | +| num_fans | - | int | int | | +| fan_speed_min | - | % | - | | +| fan_speed_max | - | % | RPM | | +| temperature_min | - | - | °C | | +| temperature_max | - | °C | °C | | +| memory_temperature_min | - | - | °C | | +| memory_temperature_max | - | °C | °C | | +| hotspot_temperature_min | - | - | °C | | +| hotspot_temperature_max | - | - | °C | | +| hbm_0_temperature_min | - | - | °C | | +| hbm_0_temperature_max | - | - | °C | | +| hbm_1_temperature_min | - | - | °C | | +| hbm_1_temperature_max | - | - | °C | | +| hbm_2_temperature_min | - | - | °C | | +| hbm_2_temperature_max | - | - | °C | | +| hbm_3_temperature_min | - | - | °C | | +| hbm_3_temperature_max | - | - | °C | | +| fan_speed_percentage | - | % | % | | +| temperature | °C | °C | °C | | +| memory_temperature | - | - | °C | | +| hotspot_temperature | - | - | °C | | +| hbm_0_temperature | - | - | °C | | +| hbm_1_temperature | - | - | °C | | +| hbm_2_temperature | - | - | °C | | +| hbm_3_temperature | - | - | °C | | | temperature_{}_max | | | | | | temperature_psu | | | | | | temperature_{} | | | | | -| min_fan_speed | | % | | | -| max_fan_speed | | % | | | -| temperature_mem_max | | °C | | | +| core_temperature | °C | - | - | - | +| core_throttle_percentage | % | - | - | - | ### gfx-related (iGPU) samples diff --git a/include/hardware_sampling/cpu/cpu_samples.hpp b/include/hardware_sampling/cpu/cpu_samples.hpp index b537326..8a90c30 100644 --- a/include/hardware_sampling/cpu/cpu_samples.hpp +++ b/include/hardware_sampling/cpu/cpu_samples.hpp @@ -207,9 +207,9 @@ class cpu_temperature_samples { */ [[nodiscard]] std::string generate_yaml_string() const; + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, temperature) // the current temperature of the whole package in °C HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, core_temperature) // the current temperature of the core part of the CPU in °C HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, core_throttle_percent) // the percent of time the CPU has throttled - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, package_temperature) // the current temperature of the whole package in °C }; /** diff --git a/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp b/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp index 407a68c..05deb6b 100644 --- a/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp +++ b/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp @@ -201,31 +201,31 @@ class rocm_smi_temperature_samples { */ [[nodiscard]] std::string generate_yaml_string() const; - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint32_t, num_fans) // the number of fans (if any) - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, max_fan_speed) // the maximum fan speed - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, temperature_edge_min) // the minimum temperature on the GPU's edge temperature sensor in m°C - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, temperature_edge_max) // the maximum temperature on the GPU's edge temperature sensor in m°C - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, temperature_hotspot_min) // the minimum temperature on the GPU's hotspot temperature sensor in m°C - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, temperature_hotspot_max) // the maximum temperature on the GPU's hotspot temperature sensor in m°C - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, temperature_memory_min) // the minimum temperature on the GPU's memory temperature sensor in m°C - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, temperature_memory_max) // the maximum temperature on the GPU's memory temperature sensor in m°C - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, temperature_hbm_0_min) // the minimum temperature on the GPU's HBM0 temperature sensor in m°C - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, temperature_hbm_0_max) // the maximum temperature on the GPU's HBM0 temperature sensor in m°C - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, temperature_hbm_1_min) // the minimum temperature on the GPU's HBM1 temperature sensor in m°C - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, temperature_hbm_1_max) // the maximum temperature on the GPU's HBM1 temperature sensor in m°C - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, temperature_hbm_2_min) // the minimum temperature on the GPU's HBM2 temperature sensor in m°C - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, temperature_hbm_2_max) // the maximum temperature on the GPU's HBM2 temperature sensor in m°C - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, temperature_hbm_3_min) // the minimum temperature on the GPU's HBM3 temperature sensor in m°C - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, temperature_hbm_3_max) // the maximum temperature on the GPU's HBM3 temperature sensor in m°C - - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int64_t, fan_speed) // the current fan speed in % - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int64_t, temperature_edge) // the current temperature on the GPU's edge temperature sensor in m°C - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int64_t, temperature_hotspot) // the current temperature on the GPU's hotspot temperature sensor in m°C - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int64_t, temperature_memory) // the current temperature on the GPU's memory temperature sensor in m°C - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int64_t, temperature_hbm_0) // the current temperature on the GPU's HBM0 temperature sensor in m°C - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int64_t, temperature_hbm_1) // the current temperature on the GPU's HBM1 temperature sensor in m°C - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int64_t, temperature_hbm_2) // the current temperature on the GPU's HBM2 temperature sensor in m°C - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int64_t, temperature_hbm_3) // the current temperature on the GPU's HBM3 temperature sensor in m°C + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint32_t, num_fans) // the number of fans (if any) + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, fan_speed_max) // the maximum fan speed in RPM + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, temperature_min) // the minimum temperature on the GPU's edge temperature sensor in °C + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, temperature_max) // the maximum temperature on the GPU's edge temperature sensor in °C + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, memory_temperature_min) // the minimum temperature on the GPU's memory temperature sensor in °C + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, memory_temperature_max) // the maximum temperature on the GPU's memory temperature sensor in °C + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, hotspot_temperature_min) // the minimum temperature on the GPU's hotspot temperature sensor in °C + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, hotspot_temperature_max) // the maximum temperature on the GPU's hotspot temperature sensor in °C + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, hbm_0_temperature_min) // the minimum temperature on the GPU's HBM0 temperature sensor in °C + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, hbm_0_temperature_max) // the maximum temperature on the GPU's HBM0 temperature sensor in °C + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, hbm_1_temperature_min) // the minimum temperature on the GPU's HBM1 temperature sensor in °C + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, hbm_1_temperature_max) // the maximum temperature on the GPU's HBM1 temperature sensor in °C + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, hbm_2_temperature_min) // the minimum temperature on the GPU's HBM2 temperature sensor in °C + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, hbm_2_temperature_max) // the maximum temperature on the GPU's HBM2 temperature sensor in °C + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, hbm_3_temperature_min) // the minimum temperature on the GPU's HBM3 temperature sensor in °C + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, hbm_3_temperature_max) // the maximum temperature on the GPU's HBM3 temperature sensor in °C + + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, fan_speed_percentage) // the current fan speed in % + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, temperature) // the current temperature on the GPU's edge temperature sensor in °C + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, hotspot_temperature) // the current temperature on the GPU's hotspot temperature sensor in °C + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, memory_temperature) // the current temperature on the GPU's memory temperature sensor in °C + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, hbm_0_temperature) // the current temperature on the GPU's HBM0 temperature sensor in °C + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, hbm_1_temperature) // the current temperature on the GPU's HBM1 temperature sensor in °C + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, hbm_2_temperature) // the current temperature on the GPU's HBM2 temperature sensor in °C + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, hbm_3_temperature) // the current temperature on the GPU's HBM3 temperature sensor in °C }; /** diff --git a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp index 2757a60..b1af6bc 100644 --- a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp +++ b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp @@ -208,14 +208,14 @@ class nvml_temperature_samples { */ [[nodiscard]] std::string generate_yaml_string() const; - HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, num_fans) // the number of fans (if any) - HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, min_fan_speed) // the minimum fan speed the user can set in % - HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, max_fan_speed) // the maximum fan speed the user can set in % - HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, temperature_threshold_gpu_max) // the maximum graphics temperature threshold in °C - HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, temperature_threshold_mem_max) // the maximum memory temperature threshold in °C - - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, fan_speed) // the current intended fan speed in % - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, temperature_gpu) // the current GPU temperature in °C + HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, num_fans) // the number of fans (if any) + HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, fan_speed_min) // the minimum fan speed the user can set in % + HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, fan_speed_max) // the maximum fan speed the user can set in % + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, temperature_max) // the maximum graphics temperature threshold in °C + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, memory_temperature_max) // the maximum memory temperature threshold in °C + + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, fan_speed_percentage) // the current intended fan speed in % + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, temperature) // the current GPU temperature in °C }; /** diff --git a/src/hardware_sampling/cpu/cpu_samples.cpp b/src/hardware_sampling/cpu/cpu_samples.cpp index 02148f0..80ed9b4 100644 --- a/src/hardware_sampling/cpu/cpu_samples.cpp +++ b/src/hardware_sampling/cpu/cpu_samples.cpp @@ -482,9 +482,17 @@ std::ostream &operator<<(std::ostream &out, const cpu_memory_samples &samples) { std::string cpu_temperature_samples::generate_yaml_string() const { std::string str{ "temperature:\n" }; + // the temperature of the whole package + if (this->temperature_.has_value()) { + str += fmt::format(" temperature:\n" + " turbostat_name: \"PkgTmp\"\n" + " unit: \"°C\"\n" + " values: [{}]\n", + fmt::join(this->temperature_.value(), ", ")); + } // the temperature of the cores if (this->core_temperature_.has_value()) { - str += fmt::format(" per_core_temperature:\n" + str += fmt::format(" core_temperature:\n" " turbostat_name: \"CoreTmp\"\n" " unit: \"°C\"\n" " values: [{}]\n", @@ -498,14 +506,6 @@ std::string cpu_temperature_samples::generate_yaml_string() const { " values: [{}]\n", fmt::join(this->core_throttle_percent_.value(), ", ")); } - // the temperature of the whole package - if (this->package_temperature_.has_value()) { - str += fmt::format(" per_package_temperature:\n" - " turbostat_name: \"PkgTmp\"\n" - " unit: \"°C\"\n" - " values: [{}]\n", - fmt::join(this->package_temperature_.value(), ", ")); - } // remove last newline str.pop_back(); @@ -514,12 +514,12 @@ std::string cpu_temperature_samples::generate_yaml_string() const { } std::ostream &operator<<(std::ostream &out, const cpu_temperature_samples &samples) { - return out << fmt::format("core_temperature [°C]: [{}]\n" - "core_throttle_percent [%]: [{}]\n" - "package_temperature [°C]: [{}]", + return out << fmt::format("temperature [°C]: [{}]\n" + "core_temperature [°C]: [{}]\n" + "core_throttle_percent [%]: [{}]", + fmt::join(detail::value_or_default(samples.get_temperature()), ", "), fmt::join(detail::value_or_default(samples.get_core_temperature()), ", "), - fmt::join(detail::value_or_default(samples.get_core_throttle_percent()), ", "), - fmt::join(detail::value_or_default(samples.get_package_temperature()), ", ")); + fmt::join(detail::value_or_default(samples.get_core_throttle_percent()), ", ")); } //*************************************************************************************************************************************// diff --git a/src/hardware_sampling/cpu/hardware_sampler.cpp b/src/hardware_sampling/cpu/hardware_sampler.cpp index 6c8471d..cf2eeda 100644 --- a/src/hardware_sampling/cpu/hardware_sampler.cpp +++ b/src/hardware_sampling/cpu/hardware_sampler.cpp @@ -198,8 +198,8 @@ void cpu_hardware_sampler::sampling_loop() { using vector_type = decltype(temperature_samples_.core_throttle_percent_)::value_type; temperature_samples_.core_throttle_percent_ = vector_type{ detail::convert_to(values[i]) }; } else if (header[i] == "PkgTmp") { - using vector_type = decltype(temperature_samples_.package_temperature_)::value_type; - temperature_samples_.package_temperature_ = vector_type{ detail::convert_to(values[i]) }; + using vector_type = decltype(temperature_samples_.temperature_)::value_type; + temperature_samples_.temperature_ = vector_type{ detail::convert_to(values[i]) }; } else if (header[i] == "GFX%rc6") { using vector_type = decltype(gfx_samples_.gfx_render_state_percent_)::value_type; gfx_samples_.gfx_render_state_percent_ = vector_type{ detail::convert_to(values[i]) }; @@ -345,8 +345,8 @@ void cpu_hardware_sampler::sampling_loop() { using vector_type = decltype(temperature_samples_.core_throttle_percent_)::value_type; temperature_samples_.core_throttle_percent_->push_back(detail::convert_to(values[i])); } else if (header[i] == "PkgTmp") { - using vector_type = decltype(temperature_samples_.package_temperature_)::value_type; - temperature_samples_.package_temperature_->push_back(detail::convert_to(values[i])); + using vector_type = decltype(temperature_samples_.temperature_)::value_type; + temperature_samples_.temperature_->push_back(detail::convert_to(values[i])); } else if (header[i] == "GFX%rc6") { using vector_type = decltype(gfx_samples_.gfx_render_state_percent_)::value_type; gfx_samples_.gfx_render_state_percent_->push_back(detail::convert_to(values[i])); diff --git a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp index 0b80c81..76cde4b 100644 --- a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp +++ b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp @@ -323,125 +323,127 @@ void gpu_amd_hardware_sampler::sampling_loop() { // retrieve fixed temperature related information { std::uint32_t fan_id{ 0 }; - decltype(temperature_samples_.fan_speed_)::value_type::value_type fan_speed{}; + std::int64_t fan_speed{}; while (rsmi_dev_fan_speed_get(device_id_, fan_id, &fan_speed) == RSMI_STATUS_SUCCESS) { if (fan_id == 0) { // queried samples -> retrieved every iteration if available - temperature_samples_.fan_speed_ = decltype(temperature_samples_.fan_speed_)::value_type{ fan_speed }; + const auto percentage = static_cast(fan_speed) / + static_cast(RSMI_MAX_FAN_SPEED); + temperature_samples_.fan_speed_percentage_ = decltype(temperature_samples_.fan_speed_percentage_)::value_type{ percentage }; } ++fan_id; } temperature_samples_.num_fans_ = fan_id; - decltype(temperature_samples_.max_fan_speed_)::value_type max_fan_speed{}; + decltype(temperature_samples_.fan_speed_max_)::value_type max_fan_speed{}; if (rsmi_dev_fan_speed_max_get(device_id_, std::uint32_t{ 0 }, &max_fan_speed) == RSMI_STATUS_SUCCESS) { - temperature_samples_.max_fan_speed_ = max_fan_speed; + temperature_samples_.fan_speed_max_ = max_fan_speed; } - decltype(temperature_samples_.temperature_edge_min_)::value_type temperature_edge_min{}; - if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_EDGE, RSMI_TEMP_MIN, &temperature_edge_min) == RSMI_STATUS_SUCCESS) { - temperature_samples_.temperature_edge_min_ = temperature_edge_min; + std::int64_t temperature_min{}; + if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_EDGE, RSMI_TEMP_MIN, &temperature_min) == RSMI_STATUS_SUCCESS) { + temperature_samples_.temperature_min_ = static_cast(temperature_min) / 1000.0; } - decltype(temperature_samples_.temperature_edge_max_)::value_type temperature_edge_max{}; - if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_EDGE, RSMI_TEMP_MAX, &temperature_edge_max) == RSMI_STATUS_SUCCESS) { - temperature_samples_.temperature_edge_max_ = temperature_edge_min; + std::int64_t temperature_max{}; + if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_EDGE, RSMI_TEMP_MAX, &temperature_max) == RSMI_STATUS_SUCCESS) { + temperature_samples_.temperature_max_ = static_cast(temperature_max) / 1000.0; } - decltype(temperature_samples_.temperature_hotspot_min_)::value_type temperature_hotspot_min{}; - if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_JUNCTION, RSMI_TEMP_MIN, &temperature_hotspot_min) == RSMI_STATUS_SUCCESS) { - temperature_samples_.temperature_hotspot_min_ = temperature_hotspot_min; + std::int64_t memory_temperature_min{}; + if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_MEMORY, RSMI_TEMP_MIN, &memory_temperature_min) == RSMI_STATUS_SUCCESS) { + temperature_samples_.memory_temperature_min_ = static_cast(memory_temperature_min) / 1000.0; } - decltype(temperature_samples_.temperature_hotspot_max_)::value_type temperature_hotspot_max{}; - if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_JUNCTION, RSMI_TEMP_MAX, &temperature_hotspot_max) == RSMI_STATUS_SUCCESS) { - temperature_samples_.temperature_hotspot_max_ = temperature_hotspot_max; + std::int64_t memory_temperature_max{}; + if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_MEMORY, RSMI_TEMP_MAX, &memory_temperature_max) == RSMI_STATUS_SUCCESS) { + temperature_samples_.memory_temperature_max_ = static_cast(memory_temperature_max) / 1000.0; } - decltype(temperature_samples_.temperature_memory_min_)::value_type temperature_memory_min{}; - if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_MEMORY, RSMI_TEMP_MIN, &temperature_memory_min) == RSMI_STATUS_SUCCESS) { - temperature_samples_.temperature_memory_min_ = temperature_memory_min; + std::int64_t hotspot_temperature_min{}; + if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_JUNCTION, RSMI_TEMP_MIN, &hotspot_temperature_min) == RSMI_STATUS_SUCCESS) { + temperature_samples_.hotspot_temperature_min_ = static_cast(hotspot_temperature_min) / 1000.0; } - decltype(temperature_samples_.temperature_memory_max_)::value_type temperature_memory_max{}; - if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_MEMORY, RSMI_TEMP_MAX, &temperature_memory_max) == RSMI_STATUS_SUCCESS) { - temperature_samples_.temperature_memory_max_ = temperature_memory_max; + std::int64_t hotspot_temperature_max{}; + if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_JUNCTION, RSMI_TEMP_MAX, &hotspot_temperature_max) == RSMI_STATUS_SUCCESS) { + temperature_samples_.hotspot_temperature_max_ = static_cast(hotspot_temperature_max) / 1000.0; } - decltype(temperature_samples_.temperature_hbm_0_min_)::value_type temperature_hbm_0_min{}; - if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_0, RSMI_TEMP_MIN, &temperature_hbm_0_min) == RSMI_STATUS_SUCCESS) { - temperature_samples_.temperature_hbm_0_min_ = temperature_hbm_0_min; + std::int64_t hbm_0_temperature_min{}; + if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_0, RSMI_TEMP_MIN, &hbm_0_temperature_min) == RSMI_STATUS_SUCCESS) { + temperature_samples_.hbm_0_temperature_min_ = static_cast(hbm_0_temperature_min) / 1000.0; } - decltype(temperature_samples_.temperature_hbm_0_max_)::value_type temperature_hbm_0_max{}; - if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_0, RSMI_TEMP_MAX, &temperature_hbm_0_max) == RSMI_STATUS_SUCCESS) { - temperature_samples_.temperature_hbm_0_max_ = temperature_hbm_0_max; + std::int64_t hbm_0_temperature_max{}; + if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_0, RSMI_TEMP_MAX, &hbm_0_temperature_max) == RSMI_STATUS_SUCCESS) { + temperature_samples_.hbm_0_temperature_max_ = static_cast(hbm_0_temperature_max) / 1000.0; } - decltype(temperature_samples_.temperature_hbm_1_min_)::value_type temperature_hbm_1_min{}; - if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_1, RSMI_TEMP_MIN, &temperature_hbm_1_min) == RSMI_STATUS_SUCCESS) { - temperature_samples_.temperature_hbm_1_min_ = temperature_hbm_1_min; + std::int64_t hbm_1_temperature_min{}; + if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_1, RSMI_TEMP_MIN, &hbm_1_temperature_min) == RSMI_STATUS_SUCCESS) { + temperature_samples_.hbm_1_temperature_min_ = static_cast(hbm_1_temperature_min) / 1000.0; } - decltype(temperature_samples_.temperature_hbm_1_max_)::value_type temperature_hbm_1_max{}; - if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_1, RSMI_TEMP_MAX, &temperature_hbm_1_max) == RSMI_STATUS_SUCCESS) { - temperature_samples_.temperature_hbm_1_max_ = temperature_hbm_1_max; + std::int64_t hbm_1_temperature_max{}; + if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_1, RSMI_TEMP_MAX, &hbm_1_temperature_max) == RSMI_STATUS_SUCCESS) { + temperature_samples_.hbm_1_temperature_max_ = static_cast(hbm_1_temperature_max) / 1000.0; } - decltype(temperature_samples_.temperature_hbm_2_min_)::value_type temperature_hbm_2_min{}; - if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_2, RSMI_TEMP_MIN, &temperature_hbm_2_min) == RSMI_STATUS_SUCCESS) { - temperature_samples_.temperature_hbm_2_min_ = temperature_hbm_2_min; + std::int64_t hbm_2_temperature_min{}; + if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_2, RSMI_TEMP_MIN, &hbm_2_temperature_min) == RSMI_STATUS_SUCCESS) { + temperature_samples_.hbm_2_temperature_min_ = static_cast(hbm_2_temperature_min) / 1000.0; } - decltype(temperature_samples_.temperature_hbm_2_max_)::value_type temperature_hbm_2_max{}; - if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_2, RSMI_TEMP_MAX, &temperature_hbm_2_max) == RSMI_STATUS_SUCCESS) { - temperature_samples_.temperature_hbm_2_max_ = temperature_hbm_2_max; + std::int64_t hbm_2_temperature_max{}; + if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_2, RSMI_TEMP_MAX, &hbm_2_temperature_max) == RSMI_STATUS_SUCCESS) { + temperature_samples_.hbm_2_temperature_max_ = static_cast(hbm_2_temperature_max) / 1000.0; } - decltype(temperature_samples_.temperature_hbm_3_min_)::value_type temperature_hbm_3_min{}; - if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_3, RSMI_TEMP_MIN, &temperature_hbm_3_min) == RSMI_STATUS_SUCCESS) { - temperature_samples_.temperature_hbm_3_min_ = temperature_hbm_3_min; + std::int64_t hbm_3_temperature_min{}; + if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_3, RSMI_TEMP_MIN, &hbm_3_temperature_min) == RSMI_STATUS_SUCCESS) { + temperature_samples_.hbm_3_temperature_min_ = static_cast(hbm_3_temperature_min) / 1000.0; } - decltype(temperature_samples_.temperature_hbm_3_max_)::value_type temperature_hbm_3_max{}; - if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_3, RSMI_TEMP_MAX, &temperature_hbm_3_max) == RSMI_STATUS_SUCCESS) { - temperature_samples_.temperature_hbm_3_max_ = temperature_hbm_3_max; + std::int64_t hbm_3_temperature_max{}; + if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_3, RSMI_TEMP_MAX, &hbm_3_temperature_max) == RSMI_STATUS_SUCCESS) { + temperature_samples_.hbm_3_temperature_max_ = static_cast(hbm_3_temperature_max) / 1000.0; } // queried samples -> retrieved every iteration if available - decltype(temperature_samples_.temperature_edge_)::value_type::value_type temperature_edge{}; - if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_EDGE, RSMI_TEMP_CURRENT, &temperature_edge) == RSMI_STATUS_SUCCESS) { - temperature_samples_.temperature_edge_ = decltype(temperature_samples_.temperature_edge_)::value_type{ temperature_edge }; + std::int64_t temperature{}; + if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_EDGE, RSMI_TEMP_CURRENT, &temperature) == RSMI_STATUS_SUCCESS) { + temperature_samples_.temperature_ = decltype(temperature_samples_.temperature_)::value_type{ static_cast(temperature) / 1000.0 }; } - decltype(temperature_samples_.temperature_hotspot_)::value_type::value_type temperature_hotspot{}; - if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_JUNCTION, RSMI_TEMP_CURRENT, &temperature_hotspot) == RSMI_STATUS_SUCCESS) { - temperature_samples_.temperature_hotspot_ = decltype(temperature_samples_.temperature_hotspot_)::value_type{ temperature_hotspot }; + std::int64_t hotspot_temperature{}; + if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_JUNCTION, RSMI_TEMP_CURRENT, &hotspot_temperature) == RSMI_STATUS_SUCCESS) { + temperature_samples_.hotspot_temperature_ = decltype(temperature_samples_.hotspot_temperature_)::value_type{ static_cast(hotspot_temperature) / 1000.0 }; } - decltype(temperature_samples_.temperature_memory_)::value_type::value_type temperature_memory{}; - if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_MEMORY, RSMI_TEMP_CURRENT, &temperature_memory) == RSMI_STATUS_SUCCESS) { - temperature_samples_.temperature_memory_ = decltype(temperature_samples_.temperature_memory_)::value_type{ temperature_memory }; + std::int64_t memory_temperature{}; + if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_MEMORY, RSMI_TEMP_CURRENT, &memory_temperature) == RSMI_STATUS_SUCCESS) { + temperature_samples_.memory_temperature_ = decltype(temperature_samples_.memory_temperature_)::value_type{ static_cast(memory_temperature) / 1000.0 }; } - decltype(temperature_samples_.temperature_hbm_0_)::value_type::value_type temperature_hbm_0{}; - if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_0, RSMI_TEMP_CURRENT, &temperature_hbm_0) == RSMI_STATUS_SUCCESS) { - temperature_samples_.temperature_hbm_0_ = decltype(temperature_samples_.temperature_hbm_0_)::value_type{ temperature_hbm_0 }; + std::int64_t hbm_0_temperature{}; + if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_0, RSMI_TEMP_CURRENT, &hbm_0_temperature) == RSMI_STATUS_SUCCESS) { + temperature_samples_.hbm_0_temperature_ = decltype(temperature_samples_.hbm_0_temperature_)::value_type{ static_cast(hbm_0_temperature) / 1000.0 }; } - decltype(temperature_samples_.temperature_hbm_1_)::value_type::value_type temperature_hbm_1{}; - if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_1, RSMI_TEMP_CURRENT, &temperature_hbm_1) == RSMI_STATUS_SUCCESS) { - temperature_samples_.temperature_hbm_1_ = decltype(temperature_samples_.temperature_hbm_1_)::value_type{ temperature_hbm_1 }; + std::int64_t hbm_1_temperature{}; + if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_1, RSMI_TEMP_CURRENT, &hbm_1_temperature) == RSMI_STATUS_SUCCESS) { + temperature_samples_.hbm_1_temperature_ = decltype(temperature_samples_.hbm_1_temperature_)::value_type{ static_cast(hbm_1_temperature) / 1000.0 }; } - decltype(temperature_samples_.temperature_hbm_2_)::value_type::value_type temperature_hbm_2{}; - if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_2, RSMI_TEMP_CURRENT, &temperature_hbm_2) == RSMI_STATUS_SUCCESS) { - temperature_samples_.temperature_hbm_2_ = decltype(temperature_samples_.temperature_hbm_2_)::value_type{ temperature_hbm_2 }; + std::int64_t hbm_2_temperature{}; + if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_2, RSMI_TEMP_CURRENT, &hbm_2_temperature) == RSMI_STATUS_SUCCESS) { + temperature_samples_.hbm_2_temperature_ = decltype(temperature_samples_.hbm_2_temperature_)::value_type{ static_cast(hbm_2_temperature) / 1000.0 }; } - decltype(temperature_samples_.temperature_hbm_3_)::value_type::value_type temperature_hbm_3{}; - if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_3, RSMI_TEMP_CURRENT, &temperature_hbm_3) == RSMI_STATUS_SUCCESS) { - temperature_samples_.temperature_hbm_3_ = decltype(temperature_samples_.temperature_hbm_3_)::value_type{ temperature_hbm_3 }; + std::int64_t hbm_3_temperature{}; + if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_3, RSMI_TEMP_CURRENT, &hbm_3_temperature) == RSMI_STATUS_SUCCESS) { + temperature_samples_.hbm_3_temperature_ = decltype(temperature_samples_.hbm_3_temperature_)::value_type{ static_cast(hbm_3_temperature) / 1000.0 }; } } @@ -598,52 +600,53 @@ void gpu_amd_hardware_sampler::sampling_loop() { // retrieve temperature related samples { - if (temperature_samples_.fan_speed_.has_value()) { - decltype(temperature_samples_.fan_speed_)::value_type::value_type value{}; + if (temperature_samples_.fan_speed_percentage_.has_value()) { + std::int64_t value{}; HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_fan_speed_get(device_id_, std::uint32_t{ 0 }, &value)); - temperature_samples_.fan_speed_->push_back(value); + temperature_samples_.fan_speed_percentage_->push_back(static_cast(value) / + static_cast(RSMI_MAX_FAN_SPEED)); } - if (temperature_samples_.temperature_edge_.has_value()) { - decltype(temperature_samples_.temperature_edge_)::value_type::value_type value{}; + if (temperature_samples_.temperature_.has_value()) { + std::int64_t value{}; HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_EDGE, RSMI_TEMP_CURRENT, &value)); - temperature_samples_.temperature_edge_->push_back(value); + temperature_samples_.temperature_->push_back(static_cast(value) / 1000.0); } - if (temperature_samples_.temperature_hotspot_.has_value()) { - decltype(temperature_samples_.temperature_hotspot_)::value_type::value_type value{}; - HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_JUNCTION, RSMI_TEMP_CURRENT, &value)); - temperature_samples_.temperature_hotspot_->push_back(value); + if (temperature_samples_.memory_temperature_.has_value()) { + std::int64_t value{}; + HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_MEMORY, RSMI_TEMP_CURRENT, &value)); + temperature_samples_.memory_temperature_->push_back(static_cast(value) / 1000.0); } - if (temperature_samples_.temperature_memory_.has_value()) { - decltype(temperature_samples_.temperature_memory_)::value_type::value_type value{}; - HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_MEMORY, RSMI_TEMP_CURRENT, &value)); - temperature_samples_.temperature_memory_->push_back(value); + if (temperature_samples_.hotspot_temperature_.has_value()) { + std::int64_t value{}; + HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_JUNCTION, RSMI_TEMP_CURRENT, &value)); + temperature_samples_.hotspot_temperature_->push_back(static_cast(value) / 1000.0); } - if (temperature_samples_.temperature_hbm_0_.has_value()) { - decltype(temperature_samples_.temperature_hbm_0_)::value_type::value_type value{}; + if (temperature_samples_.hbm_0_temperature_.has_value()) { + std::int64_t value{}; HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_0, RSMI_TEMP_CURRENT, &value)); - temperature_samples_.temperature_hbm_0_->push_back(value); + temperature_samples_.hbm_0_temperature_->push_back(static_cast(value) / 1000.0); } - if (temperature_samples_.temperature_hbm_1_.has_value()) { - decltype(temperature_samples_.temperature_hbm_1_)::value_type::value_type value{}; + if (temperature_samples_.hbm_1_temperature_.has_value()) { + std::int64_t value{}; HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_1, RSMI_TEMP_CURRENT, &value)); - temperature_samples_.temperature_hbm_1_->push_back(value); + temperature_samples_.hbm_1_temperature_->push_back(static_cast(value) / 1000.0); } - if (temperature_samples_.temperature_hbm_2_.has_value()) { - decltype(temperature_samples_.temperature_hbm_2_)::value_type::value_type value{}; + if (temperature_samples_.hbm_2_temperature_.has_value()) { + std::int64_t value{}; HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_2, RSMI_TEMP_CURRENT, &value)); - temperature_samples_.temperature_hbm_2_->push_back(value); + temperature_samples_.hbm_2_temperature_->push_back(static_cast(value) / 1000.0); } - if (temperature_samples_.temperature_hbm_3_.has_value()) { - decltype(temperature_samples_.temperature_hbm_3_)::value_type::value_type value{}; + if (temperature_samples_.hbm_3_temperature_.has_value()) { + std::int64_t value{}; HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_3, RSMI_TEMP_CURRENT, &value)); - temperature_samples_.temperature_hbm_3_->push_back(value); + temperature_samples_.hbm_3_temperature_->push_back(static_cast(value) / 1000.0); } } } diff --git a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp b/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp index 568082e..e8dcffa 100644 --- a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp +++ b/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp @@ -423,170 +423,166 @@ std::string rocm_smi_temperature_samples::generate_yaml_string() const { this->num_fans_.value()); } // maximum fan speed - if (this->max_fan_speed_.has_value()) { - str += fmt::format(" max_fan_speed:\n" - " unit: \"int\"\n" + if (this->fan_speed_max_.has_value()) { + str += fmt::format(" fan_speed_max:\n" + " unit: \"RPM\"\n" " values: {}\n", - this->max_fan_speed_.value()); + this->fan_speed_max_.value()); } // minimum GPU edge temperature - if (this->temperature_edge_min_.has_value()) { - str += fmt::format(" temperature_gpu_min:\n" - " unit: \"m°C\"\n" + if (this->temperature_min_.has_value()) { + str += fmt::format(" temperature_min:\n" + " unit: \"°C\"\n" " values: {}\n", - this->temperature_edge_min_.value()); + this->temperature_min_.value()); } // maximum GPU edge temperature - if (this->temperature_edge_max_.has_value()) { - str += fmt::format(" temperature_gpu_max:\n" - " unit: \"m°C\"\n" + if (this->temperature_max_.has_value()) { + str += fmt::format(" temperature_max:\n" + " unit: \"°C\"\n" " values: {}\n", - this->temperature_edge_max_.value()); + this->temperature_max_.value()); } - // minimum GPU hotspot temperature - if (this->temperature_hotspot_min_.has_value()) { - str += fmt::format(" temperature_hotspot_min:\n" - " unit: \"m°C\"\n" + // minimum GPU memory temperature + if (this->memory_temperature_min_.has_value()) { + str += fmt::format(" memory_temperature_min:\n" + " unit: \"°C\"\n" " values: {}\n", - this->temperature_hotspot_min_.value()); + this->memory_temperature_min_.value()); } - // maximum GPU hotspot temperature - if (this->temperature_hotspot_max_.has_value()) { - str += fmt::format(" temperature_hotspot_max:\n" - " unit: \"m°C\"\n" + // maximum GPU memory temperature + if (this->memory_temperature_max_.has_value()) { + str += fmt::format(" memory_temperature_max:\n" + " unit: \"°C\"\n" " values: {}\n", - this->temperature_hotspot_max_.value()); + this->memory_temperature_max_.value()); } - // minimum GPU memory temperature - if (this->temperature_memory_min_.has_value()) { - str += fmt::format(" temperature_memory_min:\n" - " unit: \"m°C\"\n" + // minimum GPU hotspot temperature + if (this->hotspot_temperature_min_.has_value()) { + str += fmt::format(" hotspot_temperature_min:\n" + " unit: \"°C\"\n" " values: {}\n", - this->temperature_memory_min_.value()); + this->hotspot_temperature_min_.value()); } - // maximum GPU memory temperature - if (this->temperature_memory_max_.has_value()) { - str += fmt::format(" temperature_memory_max:\n" - " unit: \"m°C\"\n" + // maximum GPU hotspot temperature + if (this->hotspot_temperature_max_.has_value()) { + str += fmt::format(" hotspot_temperature_max:\n" + " unit: \"°C\"\n" " values: {}\n", - this->temperature_memory_max_.value()); + this->hotspot_temperature_max_.value()); } // minimum GPU HBM 0 temperature - if (this->temperature_hbm_0_min_.has_value()) { - str += fmt::format(" temperature_hbm_0_min:\n" - " unit: \"m°C\"\n" + if (this->hbm_0_temperature_min_.has_value()) { + str += fmt::format(" hbm_0_temperature_min:\n" + " unit: \"°C\"\n" " values: {}\n", - this->temperature_hbm_0_min_.value()); + this->hbm_0_temperature_min_.value()); } // maximum GPU HBM 0 temperature - if (this->temperature_hbm_0_max_.has_value()) { - str += fmt::format(" temperature_hbm_0_max:\n" - " unit: \"m°C\"\n" + if (this->hbm_0_temperature_max_.has_value()) { + str += fmt::format(" hbm_0_temperature_max:\n" + " unit: \"°C\"\n" " values: {}\n", - this->temperature_hbm_0_max_.value()); + this->hbm_0_temperature_max_.value()); } // minimum GPU HBM 1 temperature - if (this->temperature_hbm_1_min_.has_value()) { - str += fmt::format(" temperature_hbm_1_min:\n" - " unit: \"m°C\"\n" + if (this->hbm_1_temperature_min_.has_value()) { + str += fmt::format(" hbm_1_temperature_min:\n" + " unit: \"°C\"\n" " values: {}\n", - this->temperature_hbm_1_min_.value()); + this->hbm_1_temperature_min_.value()); } // maximum GPU HBM 1 temperature - if (this->temperature_hbm_1_max_.has_value()) { - str += fmt::format(" temperature_hbm_1_max:\n" - " unit: \"m°C\"\n" + if (this->hbm_1_temperature_max_.has_value()) { + str += fmt::format(" hbm_1_temperature_max:\n" + " unit: \"°C\"\n" " values: {}\n", - this->temperature_hbm_1_max_.value()); + this->hbm_1_temperature_max_.value()); } // minimum GPU HBM 2 temperature - if (this->temperature_hbm_2_min_.has_value()) { - str += fmt::format(" temperature_hbm_2_min:\n" - " unit: \"m°C\"\n" + if (this->hbm_2_temperature_min_.has_value()) { + str += fmt::format(" hbm_2_temperature_min:\n" + " unit: \"°C\"\n" " values: {}\n", - this->temperature_hbm_2_min_.value()); + this->hbm_2_temperature_min_.value()); } // maximum GPU HBM 2 temperature - if (this->temperature_hbm_2_max_.has_value()) { - str += fmt::format(" temperature_hbm_2_max:\n" - " unit: \"m°C\"\n" + if (this->hbm_2_temperature_max_.has_value()) { + str += fmt::format(" hbm_2_temperature_max:\n" + " unit: \"°C\"\n" " values: {}\n", - this->temperature_hbm_2_max_.value()); + this->hbm_2_temperature_max_.value()); } // minimum GPU HBM 3 temperature - if (this->temperature_hbm_3_min_.has_value()) { - str += fmt::format(" temperature_hbm_3_min:\n" - " unit: \"m°C\"\n" + if (this->hbm_3_temperature_min_.has_value()) { + str += fmt::format(" hbm_3_temperature_min:\n" + " unit: \"°C\"\n" " values: {}\n", - this->temperature_hbm_3_min_.value()); + this->hbm_3_temperature_min_.value()); } // maximum GPU HBM 3 temperature - if (this->temperature_hbm_3_max_.has_value()) { - str += fmt::format(" temperature_hbm_3_max:\n" - " unit: \"m°C\"\n" + if (this->hbm_3_temperature_max_.has_value()) { + str += fmt::format(" hbm_3_temperature_max:\n" + " unit: \"°C\"\n" " values: {}\n", - this->temperature_hbm_3_max_.value()); + this->hbm_3_temperature_max_.value()); } // fan speed - if (this->fan_speed_.has_value()) { - std::vector fan_speed_percent(this->fan_speed_->size()); - for (std::size_t i = 0; i < fan_speed_percent.size(); ++i) { - fan_speed_percent[i] = static_cast(this->fan_speed_.value()[i]) / static_cast(RSMI_MAX_FAN_SPEED); - } - str += fmt::format(" fan_speed:\n" + if (this->fan_speed_percentage_.has_value()) { + str += fmt::format(" fan_speed_percentage:\n" " unit: \"percentage\"\n" " values: [{}]\n", - fmt::join(fan_speed_percent, ", ")); + fmt::join(this->fan_speed_percentage_.value(), ", ")); } // GPU edge temperature - if (this->temperature_edge_.has_value()) { - str += fmt::format(" temperature_gpu:\n" - " unit: \"m°C\"\n" + if (this->temperature_.has_value()) { + str += fmt::format(" temperature:\n" + " unit: \"°C\"\n" " values: [{}]\n", - fmt::join(this->temperature_edge_.value(), ", ")); + fmt::join(this->temperature_.value(), ", ")); } - // GPU hotspot temperature - if (this->temperature_hotspot_.has_value()) { - str += fmt::format(" temperature_hotspot:\n" - " unit: \"m°C\"\n" + // GPU memory temperature + if (this->memory_temperature_.has_value()) { + str += fmt::format(" memory_temperature:\n" + " unit: \"°C\"\n" " values: [{}]\n", - fmt::join(this->temperature_hotspot_.value(), ", ")); + fmt::join(this->memory_temperature_.value(), ", ")); } - // GPU memory temperature - if (this->temperature_memory_.has_value()) { - str += fmt::format(" temperature_memory:\n" - " unit: \"m°C\"\n" + // GPU hotspot temperature + if (this->hotspot_temperature_.has_value()) { + str += fmt::format(" hotspot_temperature:\n" + " unit: \"°C\"\n" " values: [{}]\n", - fmt::join(this->temperature_memory_.value(), ", ")); + fmt::join(this->hotspot_temperature_.value(), ", ")); } // GPU HBM 0 temperature - if (this->temperature_hbm_0_.has_value()) { - str += fmt::format(" temperature_hbm_0:\n" - " unit: \"m°C\"\n" + if (this->hbm_0_temperature_.has_value()) { + str += fmt::format(" hbm_0_temperature:\n" + " unit: \"°C\"\n" " values: [{}]\n", - fmt::join(this->temperature_hbm_0_.value(), ", ")); + fmt::join(this->hbm_0_temperature_.value(), ", ")); } // GPU HBM 1 temperature - if (this->temperature_hbm_1_.has_value()) { - str += fmt::format(" temperature_hbm_1:\n" - " unit: \"m°C\"\n" + if (this->hbm_1_temperature_.has_value()) { + str += fmt::format(" hbm_1_temperature:\n" + " unit: \"°C\"\n" " values: [{}]\n", - fmt::join(this->temperature_hbm_1_.value(), ", ")); + fmt::join(this->hbm_1_temperature_.value(), ", ")); } // GPU HBM 2 temperature - if (this->temperature_hbm_2_.has_value()) { - str += fmt::format(" temperature_hbm_2:\n" - " unit: \"m°C\"\n" + if (this->hbm_2_temperature_.has_value()) { + str += fmt::format(" hbm_2_temperature:\n" + " unit: \"°C\"\n" " values: [{}]\n", - fmt::join(this->temperature_hbm_2_.value(), ", ")); + fmt::join(this->hbm_2_temperature_.value(), ", ")); } // GPU HBM 3 temperature - if (this->temperature_hbm_3_.has_value()) { - str += fmt::format(" temperature_hbm_3:\n" - " unit: \"m°C\"\n" + if (this->hbm_3_temperature_.has_value()) { + str += fmt::format(" hbm_3_temperature:\n" + " unit: \"°C\"\n" " values: [{}]\n", - fmt::join(this->temperature_hbm_3_.value(), ", ")); + fmt::join(this->hbm_3_temperature_.value(), ", ")); } // remove last newline @@ -597,53 +593,53 @@ std::string rocm_smi_temperature_samples::generate_yaml_string() const { std::ostream &operator<<(std::ostream &out, const rocm_smi_temperature_samples &samples) { return out << fmt::format("num_fans [int]: {}\n" - "max_fan_speed [int]: {}\n" - "temperature_edge_min [m°C]: {}\n" - "temperature_edge_max [m°C]: {}\n" - "temperature_hotspot_min [m°C]: {}\n" - "temperature_hotspot_max [m°C]: {}\n" - "temperature_memory_min [m°C]: {}\n" - "temperature_memory_max [m°C]: {}\n" - "temperature_hbm_0_min [m°C]: {}\n" - "temperature_hbm_0_max [m°C]: {}\n" - "temperature_hbm_1_min [m°C]: {}\n" - "temperature_hbm_1_max [m°C]: {}\n" - "temperature_hbm_2_min [m°C]: {}\n" - "temperature_hbm_2_max [m°C]: {}\n" - "temperature_hbm_3_min [m°C]: {}\n" - "temperature_hbm_3_max [m°C]: {}\n" - "fan_speed [%]: [{}]\n" - "temperature_edge [m°C]: [{}]\n" - "temperature_hotspot [m°C]: [{}]\n" - "temperature_memory [m°C]: [{}]\n" - "temperature_hbm_0 [m°C]: [{}]\n" - "temperature_hbm_1 [m°C]: [{}]\n" - "temperature_hbm_2 [m°C]: [{}]\n" - "temperature_hbm_3 [m°C]: [{}]", + "fan_speed_max [RPM]: {}\n" + "temperature_min [°C]: {}\n" + "temperature_max [°C]: {}\n" + "memory_temperature_min [°C]: {}\n" + "memory_temperature_max [°C]: {}\n" + "hotspot_temperature_min [°C]: {}\n" + "hotspot_temperature_max [°C]: {}\n" + "hbm_0_temperature_min [°C]: {}\n" + "hbm_0_temperature_max [°C]: {}\n" + "hbm_1_temperature_min [°C]: {}\n" + "hbm_1_temperature_max [°C]: {}\n" + "hbm_2_temperature_min [°C]: {}\n" + "hbm_2_temperature_max [°C]: {}\n" + "hbm_3_temperature_min [°C]: {}\n" + "hbm_3_temperature_max [°C]: {}\n" + "fan_speed_percentage [%]: [{}]\n" + "temperature [°C]: [{}]\n" + "memory_temperature [°C]: [{}]\n" + "hotspot_temperature [°C]: [{}]\n" + "hbm_0_temperature [°C]: [{}]\n" + "hbm_1_temperature [°C]: [{}]\n" + "hbm_2_temperature [°C]: [{}]\n" + "hbm_3_temperature [°C]: [{}]", detail::value_or_default(samples.get_num_fans()), - detail::value_or_default(samples.get_max_fan_speed()), - detail::value_or_default(samples.get_temperature_edge_min()), - detail::value_or_default(samples.get_temperature_edge_max()), - detail::value_or_default(samples.get_temperature_hotspot_min()), - detail::value_or_default(samples.get_temperature_hotspot_max()), - detail::value_or_default(samples.get_temperature_memory_min()), - detail::value_or_default(samples.get_temperature_memory_max()), - detail::value_or_default(samples.get_temperature_hbm_0_min()), - detail::value_or_default(samples.get_temperature_hbm_0_max()), - detail::value_or_default(samples.get_temperature_hbm_1_min()), - detail::value_or_default(samples.get_temperature_hbm_1_max()), - detail::value_or_default(samples.get_temperature_hbm_2_min()), - detail::value_or_default(samples.get_temperature_hbm_2_max()), - detail::value_or_default(samples.get_temperature_hbm_3_min()), - detail::value_or_default(samples.get_temperature_hbm_3_max()), - fmt::join(detail::value_or_default(samples.get_fan_speed()), ", "), - fmt::join(detail::value_or_default(samples.get_temperature_edge()), ", "), - fmt::join(detail::value_or_default(samples.get_temperature_hotspot()), ", "), - fmt::join(detail::value_or_default(samples.get_temperature_memory()), ", "), - fmt::join(detail::value_or_default(samples.get_temperature_hbm_0()), ", "), - fmt::join(detail::value_or_default(samples.get_temperature_hbm_1()), ", "), - fmt::join(detail::value_or_default(samples.get_temperature_hbm_2()), ", "), - fmt::join(detail::value_or_default(samples.get_temperature_hbm_3()), ", ")); + detail::value_or_default(samples.get_fan_speed_max()), + detail::value_or_default(samples.get_temperature_min()), + detail::value_or_default(samples.get_temperature_max()), + detail::value_or_default(samples.get_memory_temperature_min()), + detail::value_or_default(samples.get_memory_temperature_max()), + detail::value_or_default(samples.get_hotspot_temperature_min()), + detail::value_or_default(samples.get_hotspot_temperature_max()), + detail::value_or_default(samples.get_hbm_0_temperature_min()), + detail::value_or_default(samples.get_hbm_0_temperature_max()), + detail::value_or_default(samples.get_hbm_1_temperature_min()), + detail::value_or_default(samples.get_hbm_1_temperature_max()), + detail::value_or_default(samples.get_hbm_2_temperature_min()), + detail::value_or_default(samples.get_hbm_2_temperature_max()), + detail::value_or_default(samples.get_hbm_3_temperature_min()), + detail::value_or_default(samples.get_hbm_3_temperature_max()), + fmt::join(detail::value_or_default(samples.get_fan_speed_percentage()), ", "), + fmt::join(detail::value_or_default(samples.get_temperature()), ", "), + fmt::join(detail::value_or_default(samples.get_memory_temperature()), ", "), + fmt::join(detail::value_or_default(samples.get_hotspot_temperature()), ", "), + fmt::join(detail::value_or_default(samples.get_hbm_0_temperature()), ", "), + fmt::join(detail::value_or_default(samples.get_hbm_1_temperature()), ", "), + fmt::join(detail::value_or_default(samples.get_hbm_2_temperature()), ", "), + fmt::join(detail::value_or_default(samples.get_hbm_3_temperature()), ", ")); } } // namespace hws diff --git a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp index 17e7049..ebb65d3 100644 --- a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp +++ b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp @@ -378,33 +378,33 @@ void gpu_nvidia_hardware_sampler::sampling_loop() { } if (temperature_samples_.num_fans_.has_value() && temperature_samples_.num_fans_.value() > 0) { - decltype(temperature_samples_.min_fan_speed_)::value_type min_fan_speed{}; - decltype(temperature_samples_.max_fan_speed_)::value_type max_fan_speed{}; + decltype(temperature_samples_.fan_speed_min_)::value_type min_fan_speed{}; + decltype(temperature_samples_.fan_speed_max_)::value_type max_fan_speed{}; if (nvmlDeviceGetMinMaxFanSpeed(device, &min_fan_speed, &max_fan_speed) == NVML_SUCCESS) { - temperature_samples_.min_fan_speed_ = min_fan_speed; - temperature_samples_.max_fan_speed_ = max_fan_speed; + temperature_samples_.fan_speed_min_ = min_fan_speed; + temperature_samples_.fan_speed_max_ = max_fan_speed; } } - decltype(temperature_samples_.temperature_threshold_gpu_max_)::value_type temperature_threshold_gpu_max{}; - if (nvmlDeviceGetTemperatureThreshold(device, NVML_TEMPERATURE_THRESHOLD_GPU_MAX, &temperature_threshold_gpu_max) == NVML_SUCCESS) { - temperature_samples_.temperature_threshold_gpu_max_ = temperature_threshold_gpu_max; + unsigned int temperature_max{}; + if (nvmlDeviceGetTemperatureThreshold(device, NVML_TEMPERATURE_THRESHOLD_GPU_MAX, &temperature_max) == NVML_SUCCESS) { + temperature_samples_.temperature_max_ = static_cast(temperature_max); } - decltype(temperature_samples_.temperature_threshold_mem_max_)::value_type temperature_threshold_mem_max{}; - if (nvmlDeviceGetTemperatureThreshold(device, NVML_TEMPERATURE_THRESHOLD_MEM_MAX, &temperature_threshold_mem_max) == NVML_SUCCESS) { - temperature_samples_.temperature_threshold_mem_max_ = temperature_threshold_mem_max; + unsigned int memory_temperature_max{}; + if (nvmlDeviceGetTemperatureThreshold(device, NVML_TEMPERATURE_THRESHOLD_MEM_MAX, &memory_temperature_max) == NVML_SUCCESS) { + temperature_samples_.memory_temperature_max_ = static_cast(memory_temperature_max); } // queried samples -> retrieved every iteration if available - decltype(temperature_samples_.fan_speed_)::value_type::value_type fan_speed{}; - if (nvmlDeviceGetFanSpeed(device, &fan_speed) == NVML_SUCCESS) { - temperature_samples_.fan_speed_ = decltype(temperature_samples_.fan_speed_)::value_type{ fan_speed }; + unsigned int fan_speed_percentage{}; + if (nvmlDeviceGetFanSpeed(device, &fan_speed_percentage) == NVML_SUCCESS) { + temperature_samples_.fan_speed_percentage_ = decltype(temperature_samples_.fan_speed_percentage_)::value_type{ static_cast(fan_speed_percentage) }; } - decltype(temperature_samples_.temperature_gpu_)::value_type::value_type temperature_gpu{}; - if (nvmlDeviceGetTemperature(device, NVML_TEMPERATURE_GPU, &temperature_gpu) == NVML_SUCCESS) { - temperature_samples_.temperature_gpu_ = decltype(temperature_samples_.temperature_gpu_)::value_type{ temperature_gpu }; + unsigned int temperature{}; + if (nvmlDeviceGetTemperature(device, NVML_TEMPERATURE_GPU, &temperature) == NVML_SUCCESS) { + temperature_samples_.temperature_ = decltype(temperature_samples_.temperature_)::value_type{ static_cast(temperature) }; } } @@ -513,16 +513,16 @@ void gpu_nvidia_hardware_sampler::sampling_loop() { // retrieve temperature related information { - if (temperature_samples_.fan_speed_.has_value()) { - decltype(temperature_samples_.fan_speed_)::value_type::value_type value{}; + if (temperature_samples_.fan_speed_percentage_.has_value()) { + unsigned int value{}; HWS_NVML_ERROR_CHECK(nvmlDeviceGetFanSpeed(device, &value)); - temperature_samples_.fan_speed_->push_back(value); + temperature_samples_.fan_speed_percentage_->push_back(static_cast(value)); } - if (temperature_samples_.temperature_gpu_.has_value()) { - decltype(temperature_samples_.temperature_gpu_)::value_type::value_type value{}; + if (temperature_samples_.temperature_.has_value()) { + unsigned int value{}; HWS_NVML_ERROR_CHECK(nvmlDeviceGetTemperature(device, NVML_TEMPERATURE_GPU, &value)); - temperature_samples_.temperature_gpu_->push_back(value); + temperature_samples_.temperature_->push_back(static_cast(value)); } } } diff --git a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp index 71fb7a6..9258f24 100644 --- a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp +++ b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp @@ -458,47 +458,47 @@ std::string nvml_temperature_samples::generate_yaml_string() const { this->num_fans_.value()); } // min fan speed - if (this->min_fan_speed_.has_value()) { - str += fmt::format(" min_fan_speed:\n" + if (this->fan_speed_min_.has_value()) { + str += fmt::format(" fan_speed_min:\n" " unit: \"percentage\"\n" " values: {}\n", - this->min_fan_speed_.value()); + this->fan_speed_min_.value()); } // max fan speed - if (this->max_fan_speed_.has_value()) { - str += fmt::format(" max_fan_speed:\n" + if (this->fan_speed_max_.has_value()) { + str += fmt::format(" fan_speed_max:\n" " unit: \"percentage\"\n" " values: {}\n", - this->max_fan_speed_.value()); + this->fan_speed_max_.value()); } // temperature threshold GPU max - if (this->temperature_threshold_gpu_max_.has_value()) { - str += fmt::format(" temperature_gpu_max:\n" + if (this->temperature_max_.has_value()) { + str += fmt::format(" temperature_max:\n" " unit: \"°C\"\n" " values: {}\n", - this->temperature_threshold_gpu_max_.value()); + this->temperature_max_.value()); } // temperature threshold memory max - if (this->temperature_threshold_mem_max_.has_value()) { - str += fmt::format(" temperature_mem_max:\n" + if (this->memory_temperature_max_.has_value()) { + str += fmt::format(" memory_temperature_max:\n" " unit: \"°C\"\n" " values: {}\n", - this->temperature_threshold_mem_max_.value()); + this->memory_temperature_max_.value()); } // fan speed - if (this->fan_speed_.has_value()) { - str += fmt::format(" fan_speed:\n" + if (this->fan_speed_percentage_.has_value()) { + str += fmt::format(" fan_speed_percentage:\n" " unit: \"percentage\"\n" " values: [{}]\n", - fmt::join(this->fan_speed_.value(), ", ")); + fmt::join(this->fan_speed_percentage_.value(), ", ")); } // temperature GPU - if (this->temperature_gpu_.has_value()) { - str += fmt::format(" temperature_gpu:\n" + if (this->temperature_.has_value()) { + str += fmt::format(" temperature:\n" " unit: \"°C\"\n" " values: [{}]\n", - fmt::join(this->temperature_gpu_.value(), ", ")); + fmt::join(this->temperature_.value(), ", ")); } // remove last newline @@ -511,17 +511,17 @@ std::ostream &operator<<(std::ostream &out, const nvml_temperature_samples &samp return out << fmt::format("num_fans [int]: {}\n" "min_fan_speed [%]: {}\n" "max_fan_speed [%]: {}\n" - "temperature_threshold_gpu_max [°C]: {}\n" - "temperature_threshold_mem_max [°C]: {}\n" - "fan_speed [%]: [{}]\n" - "temperature_gpu [°C]: [{}]", + "temperature__max [°C]: {}\n" + "memory_temperature_max [°C]: {}\n" + "fan_speed_percentage [%]: [{}]\n" + "temperature [°C]: [{}]", detail::value_or_default(samples.get_num_fans()), - detail::value_or_default(samples.get_min_fan_speed()), - detail::value_or_default(samples.get_max_fan_speed()), - detail::value_or_default(samples.get_temperature_threshold_gpu_max()), - detail::value_or_default(samples.get_temperature_threshold_mem_max()), - fmt::join(detail::value_or_default(samples.get_fan_speed()), ", "), - fmt::join(detail::value_or_default(samples.get_temperature_gpu()), ", ")); + detail::value_or_default(samples.get_fan_speed_min()), + detail::value_or_default(samples.get_fan_speed_max()), + detail::value_or_default(samples.get_temperature_max()), + detail::value_or_default(samples.get_memory_temperature_max()), + fmt::join(detail::value_or_default(samples.get_fan_speed_percentage()), ", "), + fmt::join(detail::value_or_default(samples.get_temperature()), ", ")); } } // namespace hws From 357bf249a5399fce84ff854eabfc6e01213acdfe Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Mon, 16 Sep 2024 14:32:03 +0200 Subject: [PATCH 20/69] Unify memory related samples. --- README.md | 47 ++++++----- include/hardware_sampling/cpu/cpu_samples.hpp | 8 +- .../gpu_amd/rocm_smi_samples.hpp | 19 +++-- .../gpu_nvidia/nvml_samples.hpp | 11 +-- src/hardware_sampling/cpu/cpu_samples.cpp | 66 +++++++-------- .../cpu/hardware_sampler.cpp | 8 +- .../gpu_amd/hardware_sampler.cpp | 25 ++++-- .../gpu_amd/rocm_smi_samples.cpp | 72 +++++++++------- .../gpu_nvidia/hardware_sampler.cpp | 29 ++++--- .../gpu_nvidia/nvml_samples.cpp | 83 ++++++++++--------- 10 files changed, 206 insertions(+), 162 deletions(-) diff --git a/README.md b/README.md index 36b116d..a0c61bb 100644 --- a/README.md +++ b/README.md @@ -144,34 +144,35 @@ export PYTHONPATH=${CMAKE_INSTALL_PREFIX}/lib:${PYTHONPATH} | sample | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs | |:----------------------------|:----:|:-----------:|:--------:|:----------:| -| cache_size_L1d | str | | | | -| cache_size_L1i | str | | | | -| cache_size_L2 | str | | | | -| cache_size_L3 | str | | | | +| cache_size_L1d | str | - | - | - | +| cache_size_L1i | str | - | - | - | +| cache_size_L2 | str | - | - | - | +| cache_size_L3 | str | - | - | - | | memory_total | B | B | B | | -| swap_memory_total | B | | | | -| memory_free | B | B | B | | +| visible_memory_total | - | - | B | - | +| swap_memory_total | B | - | - | - | +| memory_total_{} | - | | | B | +| allocatable_memory_total_{} | - | | | B | +| num_pcie_lanes_min | - | - | int | | +| num_pcie_lanes_max | - | int | int | | +| pcie_link_generation_max | - | int | - | int | +| pcie_link_speed_max | - | MBPS | - | BPS | +| pcie_link_transfer_rate_min | - | - | MT/s | | +| pcie_link_transfer_rate_max | - | - | MT/s | | +| memory_bus_width | - | Bit | - | | | memory_used | B | B | B | | -| swap_memory_free | B | | | | -| swap_memory_used | B | | | | -| visible_memory_total | | | B | | -| min_num_pcie_lanes | | | int | | -| max_num_pcie_lanes | | | int | | -| pcie_bandwidth | | MBPS | T/s | MBPS | -| num_pcie_lanes | | | int | | -| memory_total_{} | | | | B | -| allocatable_memory_total_{} | | | | B | -| pcie_max_bandwidth | | MBPS | | BPS | -| max_pcie_link_width | | | | int | -| max_pcie_link_generation | | int | | int | +| memory_free | B | B | B | | +| swap_memory_used | B | - | - | - | +| swap_memory_free | B | - | - | - | +| num_pcie_lanes | - | int | int | | +| pcie_link_generation | - | int | - | int | +| pcie_link_speed | - | MBPS | - | MBPS | +| pcie_link_transfer_rate | - | - | T/s | - | +| memory_used_{} | | | | B | +| memory_free_{} | | | | B | | memory_bus_width_{} | | | | Bit | | memory_num_channels_{} | | | | int | | memory_location_{} | | | | str | -| memory_free_{} | | | | B | -| memory_used_{} | | | | B | -| pcie_link_width | | int | | int | -| pcie_link_generation | | int | | int | -| memory_bus_width | | Bit | | | ### temperature-related samples diff --git a/include/hardware_sampling/cpu/cpu_samples.hpp b/include/hardware_sampling/cpu/cpu_samples.hpp index 8a90c30..98a88f2 100644 --- a/include/hardware_sampling/cpu/cpu_samples.hpp +++ b/include/hardware_sampling/cpu/cpu_samples.hpp @@ -166,10 +166,10 @@ class cpu_memory_samples { */ [[nodiscard]] std::string generate_yaml_string() const; - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, l1d_cache) // the size of the L1 data cache - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, l1i_cache) // the size of the L1 instruction cache - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, l2_cache) // the size of the L2 cache - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, l3_cache) // the size of the L2 cache + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, cache_size_L1d) // the size of the L1 data cache + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, cache_size_L1i) // the size of the L1 instruction cache + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, cache_size_L2) // the size of the L2 cache + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, cache_size_L3) // the size of the L2 cache HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned long long, memory_total) // the total available memory in Byte HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned long long, swap_memory_total) // the total available swap memory in Byte diff --git a/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp b/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp index 05deb6b..03de73a 100644 --- a/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp +++ b/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp @@ -163,14 +163,17 @@ class rocm_smi_memory_samples { */ [[nodiscard]] std::string generate_yaml_string() const; - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, memory_total) // the total available memory in Byte - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, visible_memory_total) // the total visible available memory in Byte, may be smaller than the total memory - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint32_t, min_num_pcie_lanes) // the minimum number of used PCIe lanes - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint32_t, max_num_pcie_lanes) // the maximum number of used PCIe lanes - - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint64_t, memory_used) // the currently used memory in Byte - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint64_t, pcie_transfer_rate) // the current PCIe transfer rate in T/s - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint32_t, num_pcie_lanes) // the number of currently used PCIe lanes + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, memory_total) // the total available memory in Byte + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, visible_memory_total) // the total visible available memory in Byte, may be smaller than the total memory + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint32_t, num_pcie_lanes_min) // the minimum number of used PCIe lanes + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint32_t, num_pcie_lanes_max) // the maximum number of used PCIe lanes + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, pcie_link_transfer_rate_min) // the minimum PCIe link transfer rate in MT/s + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, pcie_link_transfer_rate_max) // the maximum PCIe link transfer rate in MT/s + + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint64_t, memory_used) // the currently used memory in Byte + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint64_t, memory_free) // the currently free memory in Byte + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint32_t, num_pcie_lanes) // the number of currently used PCIe lanes + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint64_t, pcie_link_transfer_rate) // the current PCIe transfer rate in MT/s }; /** diff --git a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp index b1af6bc..b85c7a8 100644 --- a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp +++ b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp @@ -169,15 +169,16 @@ class nvml_memory_samples { [[nodiscard]] std::string generate_yaml_string() const; HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned long, memory_total) // the total available memory in Byte - HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, pcie_link_max_speed) // the maximum PCIe link speed in MBPS + HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, num_pcie_lanes_max) // the maximum number of PCIe lanes + HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, pcie_link_generation_max) // the maximum PCIe link generation (e.g., PCIe 4.0, PCIe 5.0, etc) + HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, pcie_link_speed_max) // the maximum PCIe link speed in MBPS HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, memory_bus_width) // the memory bus with in Bit - HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, max_pcie_link_generation) // the current PCIe link generation (e.g., PCIe 4.0, PCIe 5.0, etc) - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned long long, memory_free) // the currently free memory in Byte HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned long long, memory_used) // the currently used memory in Byte - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, pcie_link_speed) // the current PCIe link speed in MBPS - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, pcie_link_width) // the current PCIe link width (e.g., x16, x8, x4, etc) + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned long long, memory_free) // the currently free memory in Byte + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, num_pcie_lanes) // the current PCIe link width (e.g., x16, x8, x4, etc) HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, pcie_link_generation) // the current PCIe link generation (may change during runtime to save energy) + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, pcie_link_speed) // the current PCIe link speed in MBPS }; /** diff --git a/src/hardware_sampling/cpu/cpu_samples.cpp b/src/hardware_sampling/cpu/cpu_samples.cpp index 80ed9b4..7b5054c 100644 --- a/src/hardware_sampling/cpu/cpu_samples.cpp +++ b/src/hardware_sampling/cpu/cpu_samples.cpp @@ -374,32 +374,32 @@ std::string cpu_memory_samples::generate_yaml_string() const { std::string str{ "memory:\n" }; // the size of the L1 data cache - if (this->l1d_cache_.has_value()) { + if (this->cache_size_L1d_.has_value()) { str += fmt::format(" cache_size_L1d:\n" " unit: \"string\"\n" " values: \"{}\"\n", - this->l1d_cache_.value()); + this->cache_size_L1d_.value()); } // the size of the L1 instruction cache - if (this->l1i_cache_.has_value()) { + if (this->cache_size_L1i_.has_value()) { str += fmt::format(" cache_size_L1i:\n" " unit: \"string\"\n" " values: \"{}\"\n", - this->l1i_cache_.value()); + this->cache_size_L1i_.value()); } // the size of the L2 cache - if (this->l2_cache_.has_value()) { + if (this->cache_size_L2_.has_value()) { str += fmt::format(" cache_size_L2:\n" " unit: \"string\"\n" " values: \"{}\"\n", - this->l2_cache_.value()); + this->cache_size_L2_.value()); } // the size of the L3 cache - if (this->l3_cache_.has_value()) { + if (this->cache_size_L3_.has_value()) { str += fmt::format(" cache_size_L3:\n" " unit: \"string\"\n" " values: \"{}\"\n", - this->l3_cache_.value()); + this->cache_size_L3_.value()); } // the total size of available memory @@ -417,13 +417,6 @@ std::string cpu_memory_samples::generate_yaml_string() const { this->swap_memory_total_.value()); } - // the available free memory - if (this->memory_free_.has_value()) { - str += fmt::format(" memory_free:\n" - " unit: \"B\"\n" - " values: [{}]\n", - fmt::join(this->memory_free_.value(), ", ")); - } // the used memory if (this->memory_used_.has_value()) { str += fmt::format(" memory_used:\n" @@ -431,12 +424,12 @@ std::string cpu_memory_samples::generate_yaml_string() const { " values: [{}]\n", fmt::join(this->memory_used_.value(), ", ")); } - // the available swap memory - if (this->swap_memory_free_.has_value()) { - str += fmt::format(" swap_memory_free:\n" + // the available free memory + if (this->memory_free_.has_value()) { + str += fmt::format(" memory_free:\n" " unit: \"B\"\n" " values: [{}]\n", - fmt::join(this->swap_memory_free_.value(), ", ")); + fmt::join(this->memory_free_.value(), ", ")); } // the swap memory if (this->swap_memory_used_.has_value()) { @@ -445,6 +438,13 @@ std::string cpu_memory_samples::generate_yaml_string() const { " values: [{}]\n", fmt::join(this->swap_memory_used_.value(), ", ")); } + // the available swap memory + if (this->swap_memory_free_.has_value()) { + str += fmt::format(" swap_memory_free:\n" + " unit: \"B\"\n" + " values: [{}]\n", + fmt::join(this->swap_memory_free_.value(), ", ")); + } // remove last newline str.pop_back(); @@ -453,26 +453,26 @@ std::string cpu_memory_samples::generate_yaml_string() const { } std::ostream &operator<<(std::ostream &out, const cpu_memory_samples &samples) { - return out << fmt::format("l1d_cache [string]: {}\n" - "l1i_cache [string]: {}\n" - "l2_cache [string]: {}\n" - "l3_cache [string]: {}\n" + return out << fmt::format("cache_size_L1d [string]: {}\n" + "cache_size_L1i [string]: {}\n" + "cache_size_L2 [string]: {}\n" + "cache_size_L3 [string]: {}\n" "memory_total [B]: {}\n" "swap_memory_total [B]: {}\n" - "memory_free [B]: [{}]\n" "memory_used [B]: [{}]\n" - "swap_memory_free [B]: [{}]\n" - "swap_memory_used [B]: [{}]", - detail::value_or_default(samples.get_l1d_cache()), - detail::value_or_default(samples.get_l1i_cache()), - detail::value_or_default(samples.get_l2_cache()), - detail::value_or_default(samples.get_l3_cache()), + "memory_free [B]: [{}]\n" + "swap_memory_used [B]: [{}]\n" + "swap_memory_free [B]: [{}]", + detail::value_or_default(samples.get_cache_size_L1d()), + detail::value_or_default(samples.get_cache_size_L1i()), + detail::value_or_default(samples.get_cache_size_L2()), + detail::value_or_default(samples.get_cache_size_L3()), detail::value_or_default(samples.get_memory_total()), detail::value_or_default(samples.get_swap_memory_total()), - fmt::join(detail::value_or_default(samples.get_memory_free()), ", "), fmt::join(detail::value_or_default(samples.get_memory_used()), ", "), - fmt::join(detail::value_or_default(samples.get_swap_memory_free()), ", "), - fmt::join(detail::value_or_default(samples.get_swap_memory_used()), ", ")); + fmt::join(detail::value_or_default(samples.get_memory_free()), ", "), + fmt::join(detail::value_or_default(samples.get_swap_memory_used()), ", "), + fmt::join(detail::value_or_default(samples.get_swap_memory_free()), ", ")); } //*************************************************************************************************************************************// diff --git a/src/hardware_sampling/cpu/hardware_sampler.cpp b/src/hardware_sampling/cpu/hardware_sampler.cpp index cf2eeda..6ad38ff 100644 --- a/src/hardware_sampling/cpu/hardware_sampler.cpp +++ b/src/hardware_sampling/cpu/hardware_sampler.cpp @@ -98,13 +98,13 @@ void cpu_hardware_sampler::sampling_loop() { } else if (detail::starts_with(line, "CPU min MHz")) { clock_samples_.clock_frequency_min_ = detail::convert_to(value); } else if (detail::starts_with(line, "L1d cache")) { - memory_samples_.l1d_cache_ = detail::convert_to(value); + memory_samples_.cache_size_L1d_ = detail::convert_to(value); } else if (detail::starts_with(line, "L1i cache")) { - memory_samples_.l1i_cache_ = detail::convert_to(value); + memory_samples_.cache_size_L1i_ = detail::convert_to(value); } else if (detail::starts_with(line, "L2 cache")) { - memory_samples_.l2_cache_ = detail::convert_to(value); + memory_samples_.cache_size_L2_ = detail::convert_to(value); } else if (detail::starts_with(line, "L3 cache")) { - memory_samples_.l3_cache_ = detail::convert_to(value); + memory_samples_.cache_size_L3_ = detail::convert_to(value); } } diff --git a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp index 76cde4b..f8a8253 100644 --- a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp +++ b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp @@ -298,17 +298,20 @@ void gpu_amd_hardware_sampler::sampling_loop() { rsmi_pcie_bandwidth_t bandwidth_info{}; if (rsmi_dev_pci_bandwidth_get(device_id_, &bandwidth_info) == RSMI_STATUS_SUCCESS) { - memory_samples_.min_num_pcie_lanes_ = bandwidth_info.lanes[0]; - memory_samples_.max_num_pcie_lanes_ = bandwidth_info.lanes[bandwidth_info.transfer_rate.num_supported - 1]; + memory_samples_.num_pcie_lanes_min_ = bandwidth_info.lanes[0]; + memory_samples_.num_pcie_lanes_max_ = bandwidth_info.lanes[bandwidth_info.transfer_rate.num_supported - 1]; + memory_samples_.pcie_link_transfer_rate_min_ = bandwidth_info.transfer_rate.frequency[0] / 1000000; + memory_samples_.pcie_link_transfer_rate_max_ = bandwidth_info.transfer_rate.frequency[bandwidth_info.transfer_rate.num_supported - 1] / 1000000; + // queried samples -> retrieved every iteration if available - memory_samples_.pcie_transfer_rate_ = decltype(memory_samples_.pcie_transfer_rate_)::value_type{}; + memory_samples_.pcie_link_transfer_rate_ = decltype(memory_samples_.pcie_link_transfer_rate_)::value_type{}; memory_samples_.num_pcie_lanes_ = decltype(memory_samples_.num_pcie_lanes_)::value_type{}; if (bandwidth_info.transfer_rate.current < RSMI_MAX_NUM_FREQUENCIES) { - memory_samples_.pcie_transfer_rate_->push_back(bandwidth_info.transfer_rate.frequency[bandwidth_info.transfer_rate.current]); + memory_samples_.pcie_link_transfer_rate_->push_back(bandwidth_info.transfer_rate.frequency[bandwidth_info.transfer_rate.current] / 1000000); memory_samples_.num_pcie_lanes_->push_back(bandwidth_info.lanes[bandwidth_info.transfer_rate.current]); } else { // the current index is (somehow) wrong - memory_samples_.pcie_transfer_rate_->push_back(0); + memory_samples_.pcie_link_transfer_rate_->push_back(0); memory_samples_.num_pcie_lanes_->push_back(0); } } @@ -317,6 +320,9 @@ void gpu_amd_hardware_sampler::sampling_loop() { decltype(memory_samples_.memory_used_)::value_type::value_type memory_used{}; if (rsmi_dev_memory_usage_get(device_id_, RSMI_MEM_TYPE_VRAM, &memory_used) == RSMI_STATUS_SUCCESS) { memory_samples_.memory_used_ = decltype(memory_samples_.memory_used_)::value_type{ memory_used }; + if (memory_samples_.memory_total_.has_value()) { + memory_samples_.memory_free_ = decltype(memory_samples_.memory_used_)::value_type{ memory_samples_.memory_total_.value() - memory_samples_.memory_used_->front() }; + } } } @@ -582,17 +588,20 @@ void gpu_amd_hardware_sampler::sampling_loop() { decltype(memory_samples_.memory_used_)::value_type::value_type value{}; HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_memory_usage_get(device_id_, RSMI_MEM_TYPE_VRAM, &value)); memory_samples_.memory_used_->push_back(value); + if (memory_samples_.memory_free_.has_value()) { + memory_samples_.memory_free_->push_back(memory_samples_.memory_total_.value() - value); + } } - if (memory_samples_.pcie_transfer_rate_.has_value() && memory_samples_.num_pcie_lanes_.has_value()) { + if (memory_samples_.pcie_link_transfer_rate_.has_value() && memory_samples_.num_pcie_lanes_.has_value()) { rsmi_pcie_bandwidth_t bandwidth_info{}; HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_pci_bandwidth_get(device_id_, &bandwidth_info)); if (bandwidth_info.transfer_rate.current < RSMI_MAX_NUM_FREQUENCIES) { - memory_samples_.pcie_transfer_rate_->push_back(bandwidth_info.transfer_rate.frequency[bandwidth_info.transfer_rate.current]); + memory_samples_.pcie_link_transfer_rate_->push_back(bandwidth_info.transfer_rate.frequency[bandwidth_info.transfer_rate.current] / 1000000); memory_samples_.num_pcie_lanes_->push_back(bandwidth_info.lanes[bandwidth_info.transfer_rate.current]); } else { // the current index is (somehow) wrong - memory_samples_.pcie_transfer_rate_->push_back(0); + memory_samples_.pcie_link_transfer_rate_->push_back(0); memory_samples_.num_pcie_lanes_->push_back(0); } } diff --git a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp b/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp index e8dcffa..0bb7eeb 100644 --- a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp +++ b/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp @@ -337,18 +337,32 @@ std::string rocm_smi_memory_samples::generate_yaml_string() const { this->visible_memory_total_.value()); } // min number of PCIe lanes - if (this->min_num_pcie_lanes_.has_value()) { - str += fmt::format(" min_num_pcie_lanes:\n" + if (this->num_pcie_lanes_min_.has_value()) { + str += fmt::format(" num_pcie_lanes_min:\n" " unit: \"int\"\n" " values: {}\n", - this->min_num_pcie_lanes_.value()); + this->num_pcie_lanes_min_.value()); } // max number of PCIe lanes - if (this->max_num_pcie_lanes_.has_value()) { - str += fmt::format(" max_num_pcie_lanes:\n" + if (this->num_pcie_lanes_max_.has_value()) { + str += fmt::format(" num_pcie_lanes_max:\n" " unit: \"int\"\n" " values: {}\n", - this->max_num_pcie_lanes_.value()); + this->num_pcie_lanes_max_.value()); + } + // the minimum PCIe link transfer rate + if (this->pcie_link_transfer_rate_min_.has_value()) { + str += fmt::format(" pcie_link_transfer_rate_min:\n" + " unit: \"MT/s\"\n" + " values: {}\n", + this->pcie_link_transfer_rate_min_.value()); + } + // the maximum PCIe link transfer rate + if (this->pcie_link_transfer_rate_max_.has_value()) { + str += fmt::format(" pcie_link_transfer_rate_max:\n" + " unit: \"MT/s\"\n" + " values: {}\n", + this->pcie_link_transfer_rate_max_.value()); } // used memory @@ -359,31 +373,27 @@ std::string rocm_smi_memory_samples::generate_yaml_string() const { fmt::join(this->memory_used_.value(), ", ")); } // free memory - if (this->memory_used_.has_value() && this->memory_total_.has_value()) { - decltype(rocm_smi_memory_samples::memory_used_)::value_type memory_free(this->memory_used_->size(), this->memory_total_.value()); - for (std::size_t i = 0; i < memory_free.size(); ++i) { - memory_free[i] -= this->memory_used_.value()[i]; - } + if (this->memory_free_.has_value()) { str += fmt::format(" memory_free:\n" " unit: \"B\"\n" " values: [{}]\n", - fmt::join(memory_free, ", ")); + fmt::join(this->memory_free_.value(), ", ")); } - // PCIe bandwidth - if (this->pcie_transfer_rate_.has_value()) { - str += fmt::format(" pcie_bandwidth:\n" - " unit: \"T/s\"\n" - " values: [{}]\n", - fmt::join(this->pcie_transfer_rate_.value(), ", ")); - } // number of PCIe lanes if (this->num_pcie_lanes_.has_value()) { - str += fmt::format(" pcie_num_lanes:\n" + str += fmt::format(" num_pcie_lanes:\n" " unit: \"int\"\n" " values: [{}]\n", fmt::join(this->num_pcie_lanes_.value(), ", ")); } + // PCIe transfer rate + if (this->pcie_link_transfer_rate_.has_value()) { + str += fmt::format(" pcie_link_transfer_rate:\n" + " unit: \"MT/s\"\n" + " values: [{}]\n", + fmt::join(this->pcie_link_transfer_rate_.value(), ", ")); + } // remove last newline str.pop_back(); @@ -394,18 +404,24 @@ std::string rocm_smi_memory_samples::generate_yaml_string() const { std::ostream &operator<<(std::ostream &out, const rocm_smi_memory_samples &samples) { return out << fmt::format("memory_total [B]: {}\n" "visible_memory_total [B]: {}\n" - "min_num_pcie_lanes [int]: {}\n" - "max_num_pcie_lanes [int]: {}\n" + "num_pcie_lanes_min [int]: {}\n" + "num_pcie_lanes_max [int]: {}\n" + "pcie_link_transfer_rate_min [MBPS]: {}\n" + "pcie_link_transfer_rate_max [MBPS]: {}\n" "memory_used [B]: [{}]\n" - "pcie_transfer_rate [T/s]: [{}]\n" - "num_pcie_lanes [int]: [{}]", + "memory_free [B]: [{}]\n" + "num_pcie_lanes [int]: [{}]\n" + "pcie_link_transfer_rate [MBPS]: [{}]", detail::value_or_default(samples.get_memory_total()), detail::value_or_default(samples.get_visible_memory_total()), - detail::value_or_default(samples.get_min_num_pcie_lanes()), - detail::value_or_default(samples.get_max_num_pcie_lanes()), + detail::value_or_default(samples.get_num_pcie_lanes_min()), + detail::value_or_default(samples.get_num_pcie_lanes_max()), + detail::value_or_default(samples.get_pcie_link_transfer_rate_min()), + detail::value_or_default(samples.get_pcie_link_transfer_rate_max()), fmt::join(detail::value_or_default(samples.get_memory_used()), ", "), - fmt::join(detail::value_or_default(samples.get_pcie_transfer_rate()), ", "), - fmt::join(detail::value_or_default(samples.get_num_pcie_lanes()), ", ")); + fmt::join(detail::value_or_default(samples.get_memory_free()), ", "), + fmt::join(detail::value_or_default(samples.get_num_pcie_lanes()), ", "), + fmt::join(detail::value_or_default(samples.get_pcie_link_transfer_rate()), ", ")); } //*************************************************************************************************************************************// diff --git a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp index ebb65d3..7af2a2a 100644 --- a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp +++ b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp @@ -347,20 +347,25 @@ void gpu_nvidia_hardware_sampler::sampling_loop() { memory_samples_.memory_bus_width_ = memory_bus_width; } - decltype(memory_samples_.max_pcie_link_generation_)::value_type max_pcie_link_generation{}; - if (nvmlDeviceGetMaxPcieLinkGeneration(device, &max_pcie_link_generation) == NVML_SUCCESS) { - memory_samples_.max_pcie_link_generation_ = max_pcie_link_generation; + decltype(memory_samples_.num_pcie_lanes_max_)::value_type num_pcie_lanes_max{}; + if (nvmlDeviceGetMaxPcieLinkWidth(device, &num_pcie_lanes_max) == NVML_SUCCESS) { + memory_samples_.num_pcie_lanes_max_ = num_pcie_lanes_max; } - decltype(memory_samples_.pcie_link_max_speed_)::value_type pcie_link_max_speed{}; - if (nvmlDeviceGetPcieLinkMaxSpeed(device, &pcie_link_max_speed) == NVML_SUCCESS) { - memory_samples_.pcie_link_max_speed_ = pcie_link_max_speed; + decltype(memory_samples_.pcie_link_generation_max_)::value_type pcie_link_generation_max{}; + if (nvmlDeviceGetMaxPcieLinkGeneration(device, &pcie_link_generation_max) == NVML_SUCCESS) { + memory_samples_.pcie_link_generation_max_ = pcie_link_generation_max; + } + + decltype(memory_samples_.pcie_link_speed_max_)::value_type pcie_link_speed_max{}; + if (nvmlDeviceGetPcieLinkMaxSpeed(device, &pcie_link_speed_max) == NVML_SUCCESS) { + memory_samples_.pcie_link_speed_max_ = pcie_link_speed_max; } // queried samples -> retrieved every iteration if available - decltype(memory_samples_.pcie_link_width_)::value_type::value_type pcie_link_width{}; - if (nvmlDeviceGetCurrPcieLinkWidth(device, &pcie_link_width) == NVML_SUCCESS) { - memory_samples_.pcie_link_width_ = decltype(memory_samples_.pcie_link_width_)::value_type{ pcie_link_width }; + decltype(memory_samples_.num_pcie_lanes_)::value_type::value_type num_pcie_lanes{}; + if (nvmlDeviceGetCurrPcieLinkWidth(device, &num_pcie_lanes) == NVML_SUCCESS) { + memory_samples_.num_pcie_lanes_ = decltype(memory_samples_.num_pcie_lanes_)::value_type{ num_pcie_lanes }; } decltype(memory_samples_.pcie_link_generation_)::value_type::value_type pcie_link_generation{}; @@ -498,10 +503,10 @@ void gpu_nvidia_hardware_sampler::sampling_loop() { memory_samples_.memory_used_->push_back(memory_info.used); } - if (memory_samples_.pcie_link_width_.has_value()) { - decltype(memory_samples_.pcie_link_width_)::value_type::value_type value{}; + if (memory_samples_.num_pcie_lanes_.has_value()) { + decltype(memory_samples_.num_pcie_lanes_)::value_type::value_type value{}; HWS_NVML_ERROR_CHECK(nvmlDeviceGetCurrPcieLinkWidth(device, &value)); - memory_samples_.pcie_link_width_->push_back(value); + memory_samples_.num_pcie_lanes_->push_back(value); } if (memory_samples_.pcie_link_generation_.has_value()) { diff --git a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp index 9258f24..0ee319a 100644 --- a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp +++ b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp @@ -359,11 +359,25 @@ std::string nvml_memory_samples::generate_yaml_string() const { this->memory_total_.value()); } // maximum PCIe link speed - if (this->pcie_link_max_speed_.has_value()) { - str += fmt::format(" pcie_max_bandwidth:\n" + if (this->pcie_link_speed_max_.has_value()) { + str += fmt::format(" pcie_link_speed_max:\n" " unit: \"MBPS\"\n" " values: {}\n", - this->pcie_link_max_speed_.value()); + this->pcie_link_speed_max_.value()); + } + // maximum PCIe link generation + if (this->pcie_link_generation_max_.has_value()) { + str += fmt::format(" pcie_link_generation_max:\n" + " unit: \"int\"\n" + " values: {}\n", + this->pcie_link_generation_max_.value()); + } + // maximum number of available PCIe lanes + if (this->num_pcie_lanes_max_.has_value()) { + str += fmt::format(" num_pcie_lanes_max:\n" + " unit: \"int\"\n" + " values: {}\n", + this->num_pcie_lanes_max_.value()); } // memory bus width if (this->memory_bus_width_.has_value()) { @@ -372,21 +386,7 @@ std::string nvml_memory_samples::generate_yaml_string() const { " values: {}\n", this->memory_bus_width_.value()); } - // maximum PCIe link generation - if (this->max_pcie_link_generation_.has_value()) { - str += fmt::format(" max_pcie_link_generation:\n" - " unit: \"int\"\n" - " values: {}\n", - this->max_pcie_link_generation_.value()); - } - // free memory size - if (this->memory_free_.has_value()) { - str += fmt::format(" memory_free:\n" - " unit: \"B\"\n" - " values: [{}]\n", - fmt::join(this->memory_free_.value(), ", ")); - } // used memory size if (this->memory_used_.has_value()) { str += fmt::format(" memory_used:\n" @@ -394,19 +394,19 @@ std::string nvml_memory_samples::generate_yaml_string() const { " values: [{}]\n", fmt::join(this->memory_used_.value(), ", ")); } - // PCIe link speed - if (this->pcie_link_speed_.has_value()) { - str += fmt::format(" pcie_bandwidth:\n" - " unit: \"MBPS\"\n" + // free memory size + if (this->memory_free_.has_value()) { + str += fmt::format(" memory_free:\n" + " unit: \"B\"\n" " values: [{}]\n", - fmt::join(this->pcie_link_speed_.value(), ", ")); + fmt::join(this->memory_free_.value(), ", ")); } // PCIe link width - if (this->pcie_link_width_.has_value()) { - str += fmt::format(" pcie_link_width:\n" + if (this->num_pcie_lanes_.has_value()) { + str += fmt::format(" num_pcie_lanes:\n" " unit: \"int\"\n" " values: [{}]\n", - fmt::join(this->pcie_link_width_.value(), ", ")); + fmt::join(this->num_pcie_lanes_.value(), ", ")); } // PCIe link generation if (this->pcie_link_generation_.has_value()) { @@ -415,6 +415,13 @@ std::string nvml_memory_samples::generate_yaml_string() const { " values: [{}]\n", fmt::join(this->pcie_link_generation_.value(), ", ")); } + // PCIe link speed + if (this->pcie_link_speed_.has_value()) { + str += fmt::format(" pcie_link_speed:\n" + " unit: \"MBPS\"\n" + " values: [{}]\n", + fmt::join(this->pcie_link_speed_.value(), ", ")); + } // remove last newline str.pop_back(); @@ -424,23 +431,25 @@ std::string nvml_memory_samples::generate_yaml_string() const { std::ostream &operator<<(std::ostream &out, const nvml_memory_samples &samples) { return out << fmt::format("memory_total [B]: {}\n" - "pcie_link_max_speed [MBPS]: {}\n" + "pcie_link_speed_max [MBPS]: {}\n" + "pcie_link_generation_max [int]: {}\n" + "num_pcie_lanes_max [int]: {}\n" "memory_bus_width [Bit]: {}\n" - "max_pcie_link_generation [int]: {}\n" - "memory_free [B]: [{}]\n" "memory_used [B]: [{}]\n" - "pcie_link_speed [MBPS]: [{}]\n" - "pcie_link_width [int]: [{}]\n" - "pcie_link_generation [int]: [{}]", + "memory_free [B]: [{}]\n" + "num_pcie_lanes [int]: [{}]\n" + "pcie_link_generation [int]: [{}]\n" + "pcie_link_speed [MBPS]: [{}]", detail::value_or_default(samples.get_memory_total()), - detail::value_or_default(samples.get_pcie_link_max_speed()), + detail::value_or_default(samples.get_pcie_link_speed_max()), + detail::value_or_default(samples.get_pcie_link_generation_max()), + detail::value_or_default(samples.get_num_pcie_lanes_max()), detail::value_or_default(samples.get_memory_bus_width()), - detail::value_or_default(samples.get_max_pcie_link_generation()), - fmt::join(detail::value_or_default(samples.get_memory_free()), ", "), fmt::join(detail::value_or_default(samples.get_memory_used()), ", "), - fmt::join(detail::value_or_default(samples.get_pcie_link_speed()), ", "), - fmt::join(detail::value_or_default(samples.get_pcie_link_width()), ", "), - fmt::join(detail::value_or_default(samples.get_pcie_link_generation()), ", ")); + fmt::join(detail::value_or_default(samples.get_memory_free()), ", "), + fmt::join(detail::value_or_default(samples.get_num_pcie_lanes()), ", "), + fmt::join(detail::value_or_default(samples.get_pcie_link_generation()), ", "), + fmt::join(detail::value_or_default(samples.get_pcie_link_speed()), ", ")); } //*************************************************************************************************************************************// From e1a808c83b5eef70044d9ebd56b96773b18496e1 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Mon, 16 Sep 2024 14:55:33 +0200 Subject: [PATCH 21/69] Unify general samples. --- README.md | 51 +++++++++---------- .../gpu_amd/rocm_smi_samples.hpp | 4 +- include/hardware_sampling/gpu_amd/utility.hpp | 32 ++++++++++++ .../gpu_nvidia/nvml_samples.hpp | 2 +- .../hardware_sampling/gpu_nvidia/utility.hpp | 2 + .../gpu_amd/hardware_sampler.cpp | 4 +- .../gpu_amd/rocm_smi_samples.cpp | 4 +- .../gpu_nvidia/nvml_samples.cpp | 4 +- 8 files changed, 68 insertions(+), 35 deletions(-) diff --git a/README.md b/README.md index a0c61bb..f74d6b9 100644 --- a/README.md +++ b/README.md @@ -66,32 +66,31 @@ export PYTHONPATH=${CMAKE_INSTALL_PREFIX}/lib:${PYTHONPATH} ### General samples -| sample | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs | -|:--------------------|:-----:|:-----------:|:---------:|:------------:| -| name | str | str | str | str | -| vendor_id | str | str (fix) | str | str (PCIe ID | -| architecture | str | str | str | ? | -| byte_order | str | str (fix) | str (fix) | str (fix) | -| compute_utilization | % | % | % | ? | -| memory_utilization | - | % | % | ? | -| performance_level | | int | int | | -| num_cores | int | int | - | | -| num_compute_units | - | int | int | ? | TODO -| num_threads | int | - | - | - | -| threads_per_core | int | - | - | - | -| cores_per_socket | int | - | - | - | -| num_sockets | int | - | - | - | -| numa_nodes | int | | | | -| flags | str | | | | -| ipc | float | - | - | - | -| irq | int | - | - | - | -| smi | int | - | - | - | -| poll | int | - | - | - | -| poll_percent | % | - | - | - | -| persistence_mode | | bool | | | -| standby_mode | | | | str | -| num_threads_per_eu | | | | int | -| eu_simd_width | | | | int | +| sample | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs | +|:--------------------|:-----:|:-----------:|:---------:|:-------------:| +| architecture | str | str | str | ? | +| byte_order | str | str (fix) | str (fix) | str (fix) | +| num_cores | int | int | - | | +| num_threads | int | - | - | - | +| threads_per_core | int | - | - | - | +| cores_per_socket | int | - | - | - | +| num_sockets | int | - | - | - | +| numa_nodes | int | - | - | | +| vendor_id | str | str (fix) | str | str (PCIe ID) | +| name | str | str | str | str | +| flags | str | - | - | | +| persistence_mode | - | bool | - | | +| compute_utilization | % | % | % | ? | +| memory_utilization | - | % | % | ? | +| ipc | float | - | - | - | +| irq | int | - | - | - | +| smi | int | - | - | - | +| poll | int | - | - | - | +| poll_percent | % | - | - | - | +| performance_level | - | int | int | | +| standby_mode | | | | str | +| num_threads_per_eu | | | | int | +| eu_simd_width | | | | int | ### clock-related samples diff --git a/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp b/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp index 03de73a..af5228b 100644 --- a/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp +++ b/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp @@ -43,14 +43,14 @@ class rocm_smi_general_samples { */ [[nodiscard]] std::string generate_yaml_string() const; + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, architecture) // the architecture name of the device HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, byte_order) // the byte order (e.g., little/big endian) HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, vendor_id) // the vendor ID HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, name) // the name of the device - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, architecture) // the architecture name of the device HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint32_t, compute_utilization) // the GPU compute utilization in percent HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint32_t, memory_utilization) // the GPU memory utilization in percent - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(int, performance_level) // the performance level: one of rsmi_dev_perf_level_t + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::string, performance_level) // the performance level: one of rsmi_dev_perf_level_t }; /** diff --git a/include/hardware_sampling/gpu_amd/utility.hpp b/include/hardware_sampling/gpu_amd/utility.hpp index 59e19b5..f17db78 100644 --- a/include/hardware_sampling/gpu_amd/utility.hpp +++ b/include/hardware_sampling/gpu_amd/utility.hpp @@ -52,6 +52,38 @@ namespace hws { #define HWS_HIP_ERROR_CHECK(hip_func) hip_func; #endif +// TODO: move to cpp file + +/** + * @brief Convert the performance level value (rsmi_dev_perf_level_t) to a string. + * @param[in] clocks_event_reasons the bitmask to convert to a string + * @return all event throttle reasons + */ +[[nodiscard]] inline std::string performance_level_to_string(const rsmi_dev_perf_level_t perf_level) { + switch (perf_level) { + case RSMI_DEV_PERF_LEVEL_AUTO: + return "\"auto\""; + case RSMI_DEV_PERF_LEVEL_LOW: + return "\"low\""; + case RSMI_DEV_PERF_LEVEL_HIGH: + return "\"high\""; + case RSMI_DEV_PERF_LEVEL_MANUAL: + return "\"manual\""; + case RSMI_DEV_PERF_LEVEL_STABLE_STD: + return "\"stable_std\""; + case RSMI_DEV_PERF_LEVEL_STABLE_PEAK: + return "\"stable_peak\""; + case RSMI_DEV_PERF_LEVEL_STABLE_MIN_MCLK: + return "\"stable_min_mclk\""; + case RSMI_DEV_PERF_LEVEL_STABLE_MIN_SCLK: + return "\"stable_min_sclk\""; + case RSMI_DEV_PERF_LEVEL_DETERMINISM: + return "\"determinism\""; + case RSMI_DEV_PERF_LEVEL_UNKNOWN: + return "\"unknown\""; + } +} + } // namespace hws #endif // HARDWARE_SAMPLING_GPU_AMD_UTILITY_HPP_ diff --git a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp index b85c7a8..ed6504b 100644 --- a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp +++ b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp @@ -45,10 +45,10 @@ class nvml_general_samples { HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, architecture) // the architecture name of the device HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, byte_order) // the byte order (e.g., little/big endian) + HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, num_cores) // the number of CUDA cores HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, vendor_id) // the vendor ID HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, name) // the name of the device HWS_SAMPLE_STRUCT_FIXED_MEMBER(bool, persistence_mode) // the persistence mode: if true, the driver is always loaded reducing the latency for the first API call - HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, num_cores) // the number of CUDA cores HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, compute_utilization) // the GPU compute utilization in percent HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, memory_utilization) // the GPU memory utilization in percent diff --git a/include/hardware_sampling/gpu_nvidia/utility.hpp b/include/hardware_sampling/gpu_nvidia/utility.hpp index 272126b..7b1266d 100644 --- a/include/hardware_sampling/gpu_nvidia/utility.hpp +++ b/include/hardware_sampling/gpu_nvidia/utility.hpp @@ -39,6 +39,8 @@ namespace hws::detail { #define HWS_NVML_ERROR_CHECK(nvml_func) nvml_func; #endif +// TODO: move to cpp file + /** * @brief Convert the clock throttle reason event bitmask to a string representation. If the provided bitmask represents multiple reasons, they are split using "|". * @param[in] clocks_event_reasons the bitmask to convert to a string diff --git a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp index f8a8253..2d344ef 100644 --- a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp +++ b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp @@ -110,7 +110,7 @@ void gpu_amd_hardware_sampler::sampling_loop() { // queried samples -> retrieved every iteration if available rsmi_dev_perf_level_t pstate{}; if (rsmi_dev_perf_level_get(device_id_, &pstate) == RSMI_STATUS_SUCCESS) { - general_samples_.performance_level_ = decltype(general_samples_.performance_level_)::value_type{ static_cast(pstate) }; + general_samples_.performance_level_ = decltype(general_samples_.performance_level_)::value_type{ performance_level_to_string(pstate) }; } decltype(general_samples_.compute_utilization_)::value_type::value_type utilization_gpu{}; @@ -468,7 +468,7 @@ void gpu_amd_hardware_sampler::sampling_loop() { if (general_samples_.performance_level_.has_value()) { rsmi_dev_perf_level_t pstate{}; HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_perf_level_get(device_id_, &pstate)); - general_samples_.performance_level_->push_back(static_cast(pstate)); + general_samples_.performance_level_->push_back(performance_level_to_string(pstate)); } if (general_samples_.compute_utilization_.has_value()) { diff --git a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp b/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp index 0bb7eeb..6f0c3fc 100644 --- a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp +++ b/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp @@ -71,7 +71,7 @@ std::string rocm_smi_general_samples::generate_yaml_string() const { // performance state if (this->performance_level_.has_value()) { str += fmt::format(" performance_state:\n" - " unit: \"int - see rsmi_dev_perf_level_t\"\n" + " unit: \"string\"\n" " values: [{}]\n", fmt::join(this->performance_level_.value(), ", ")); } @@ -89,7 +89,7 @@ std::ostream &operator<<(std::ostream &out, const rocm_smi_general_samples &samp "name [string]: {}\n" "compute_utilization [%]: [{}]\n" "memory_utilization [%]: [{}]\n" - "performance_level [int]: [{}]", + "performance_level [string]: [{}]", detail::value_or_default(samples.get_architecture()), detail::value_or_default(samples.get_byte_order()), detail::value_or_default(samples.get_vendor_id()), diff --git a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp index 0ee319a..b3254ad 100644 --- a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp +++ b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp @@ -99,19 +99,19 @@ std::string nvml_general_samples::generate_yaml_string() const { std::ostream &operator<<(std::ostream &out, const nvml_general_samples &samples) { return out << fmt::format("architecture [string]: {}\n" "byte_order [string]: {}\n" + "num_cores [int]: {}\n" "vendor_id [string]: {}\n" "name [string]: {}\n" "persistence_mode [bool]: {}\n" - "num_cores [int]: {}\n" "compute_utilization [%]: [{}]\n" "memory_utilization [%]: [{}]\n" "performance_level [int]: [{}]", detail::value_or_default(samples.get_architecture()), detail::value_or_default(samples.get_byte_order()), + detail::value_or_default(samples.get_num_cores()), detail::value_or_default(samples.get_vendor_id()), detail::value_or_default(samples.get_name()), detail::value_or_default(samples.get_persistence_mode()), - detail::value_or_default(samples.get_num_cores()), fmt::join(detail::value_or_default(samples.get_compute_utilization()), ", "), fmt::join(detail::value_or_default(samples.get_memory_utilization()), ", "), fmt::join(detail::value_or_default(samples.get_performance_level()), ", ")); From ece190c1d1a503895e777139e085b3a9fcb63e23 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Mon, 16 Sep 2024 14:58:53 +0200 Subject: [PATCH 22/69] Prefix YAML entry to make its meaning clearer. --- src/hardware_sampling/gpu_nvidia/nvml_samples.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp index b3254ad..43e9dd5 100644 --- a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp +++ b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp @@ -172,7 +172,7 @@ std::string nvml_clock_samples::generate_yaml_string() const { " unit: \"MHz\"\n" " values:\n"); for (const auto &[key, value] : this->available_clock_frequencies_.value()) { - str += fmt::format(" {}: [{}]\n", key, fmt::join(value, ", ")); + str += fmt::format(" memory_clock_frequency_{}: [{}]\n", key, fmt::join(value, ", ")); } } // the available memory clock frequencies From c8357eee24a7859d1b6c8e826d4dea635bcc0bd3 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Mon, 16 Sep 2024 15:29:16 +0200 Subject: [PATCH 23/69] Consistent quoting of string-like values in the YAML file (and only in the YAML file). --- include/hardware_sampling/gpu_amd/utility.hpp | 20 +++++++++---------- .../hardware_sampling/gpu_nvidia/utility.hpp | 2 +- include/hardware_sampling/utility.hpp | 20 +++++++++++++++++++ src/hardware_sampling/cpu/cpu_samples.cpp | 4 ++-- .../gpu_amd/rocm_smi_samples.cpp | 8 ++++---- .../gpu_nvidia/nvml_samples.cpp | 4 ++-- src/hardware_sampling/hardware_sampler.cpp | 10 ++++++---- 7 files changed, 45 insertions(+), 23 deletions(-) diff --git a/include/hardware_sampling/gpu_amd/utility.hpp b/include/hardware_sampling/gpu_amd/utility.hpp index f17db78..00d98bc 100644 --- a/include/hardware_sampling/gpu_amd/utility.hpp +++ b/include/hardware_sampling/gpu_amd/utility.hpp @@ -62,25 +62,25 @@ namespace hws { [[nodiscard]] inline std::string performance_level_to_string(const rsmi_dev_perf_level_t perf_level) { switch (perf_level) { case RSMI_DEV_PERF_LEVEL_AUTO: - return "\"auto\""; + return "auto"; case RSMI_DEV_PERF_LEVEL_LOW: - return "\"low\""; + return "low"; case RSMI_DEV_PERF_LEVEL_HIGH: - return "\"high\""; + return "high"; case RSMI_DEV_PERF_LEVEL_MANUAL: - return "\"manual\""; + return "manual"; case RSMI_DEV_PERF_LEVEL_STABLE_STD: - return "\"stable_std\""; + return "stable_std"; case RSMI_DEV_PERF_LEVEL_STABLE_PEAK: - return "\"stable_peak\""; + return "stable_peak"; case RSMI_DEV_PERF_LEVEL_STABLE_MIN_MCLK: - return "\"stable_min_mclk\""; + return "stable_min_mclk"; case RSMI_DEV_PERF_LEVEL_STABLE_MIN_SCLK: - return "\"stable_min_sclk\""; + return "stable_min_sclk"; case RSMI_DEV_PERF_LEVEL_DETERMINISM: - return "\"determinism\""; + return "determinism"; case RSMI_DEV_PERF_LEVEL_UNKNOWN: - return "\"unknown\""; + return "unknown"; } } diff --git a/include/hardware_sampling/gpu_nvidia/utility.hpp b/include/hardware_sampling/gpu_nvidia/utility.hpp index 7b1266d..05560b1 100644 --- a/include/hardware_sampling/gpu_nvidia/utility.hpp +++ b/include/hardware_sampling/gpu_nvidia/utility.hpp @@ -78,7 +78,7 @@ namespace hws::detail { if ((clocks_event_reasons & nvmlClocksThrottleReasonHwThermalSlowdown) != 0ull) { reasons.emplace_back("HwThermalSlowdown"); } - return fmt::format("\"{}\"", fmt::join(reasons, "|")); + return fmt::format("{}", fmt::join(reasons, "|")); } } diff --git a/include/hardware_sampling/utility.hpp b/include/hardware_sampling/utility.hpp index 91836c4..2eb7451 100644 --- a/include/hardware_sampling/utility.hpp +++ b/include/hardware_sampling/utility.hpp @@ -252,6 +252,26 @@ template return ""; } +/** + * @brief Quote all @p values and return a vector of strings. + * @details Example: calling this function with `{ 1, 2, 3, 4 }` would return a vector of strings containing `{ "1", "2", "3", "4" }`. + * @tparam T the type of the values to quote + * @param[in] values the values to quote + * @return the quoted values (`[[nodiscard]]`) + */ +template +[[nodiscard]] inline std::vector quote(const std::vector &values) { + std::vector quoted{}; + quoted.reserve(values.size()); + + // quote all values + for (const T &val : values) { + quoted.push_back(fmt::format("\"{}\"", val)); + } + + return quoted; +} + } // namespace hws::detail #endif // HARDWARE_SAMPLING_UTILITY_HPP_ diff --git a/src/hardware_sampling/cpu/cpu_samples.cpp b/src/hardware_sampling/cpu/cpu_samples.cpp index 7b5054c..0242e9a 100644 --- a/src/hardware_sampling/cpu/cpu_samples.cpp +++ b/src/hardware_sampling/cpu/cpu_samples.cpp @@ -7,7 +7,7 @@ #include "hardware_sampling/cpu/cpu_samples.hpp" -#include "hardware_sampling/utility.hpp" // hws::detail::value_or_default +#include "hardware_sampling/utility.hpp" // hws::detail::{value_or_default, quote} #include "fmt/format.h" // fmt::format #include "fmt/ranges.h" // fmt::join @@ -104,7 +104,7 @@ std::string cpu_general_samples::generate_yaml_string() const { str += fmt::format(" flags:\n" " unit: \"string\"\n" " values: [{}]\n", - fmt::join(this->flags_.value(), ", ")); + fmt::join(detail::quote(this->flags_.value()), ", ")); } // the percent the CPU was busy diff --git a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp b/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp index 6f0c3fc..ba06efe 100644 --- a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp +++ b/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp @@ -7,7 +7,7 @@ #include "hardware_sampling/gpu_amd/rocm_smi_samples.hpp" -#include "hardware_sampling/utility.hpp" // hws::detail::{value_or_default,} +#include "hardware_sampling/utility.hpp" // hws::detail::{value_or_default, quote} #include "fmt/format.h" // fmt::format #include "fmt/ranges.h" // fmt::join @@ -73,7 +73,7 @@ std::string rocm_smi_general_samples::generate_yaml_string() const { str += fmt::format(" performance_state:\n" " unit: \"string\"\n" " values: [{}]\n", - fmt::join(this->performance_level_.value(), ", ")); + fmt::join(detail::quote(this->performance_level_.value()), ", ")); } // remove last newline @@ -267,7 +267,7 @@ std::string rocm_smi_power_samples::generate_yaml_string() const { str += fmt::format(" available_power_profiles:\n" " unit: \"string\"\n" " values: [{}]\n", - fmt::join(this->available_power_profiles_.value(), ", ")); + fmt::join(detail::quote(this->available_power_profiles_.value()), ", ")); } // current power usage @@ -289,7 +289,7 @@ std::string rocm_smi_power_samples::generate_yaml_string() const { str += fmt::format(" power_profile:\n" " unit: \"string\"\n" " values: [{}]\n", - fmt::join(this->power_profile_.value(), ", ")); + fmt::join(detail::quote(this->power_profile_.value()), ", ")); } // remove last newline diff --git a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp index 43e9dd5..94bf97b 100644 --- a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp +++ b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp @@ -7,7 +7,7 @@ #include "hardware_sampling/gpu_nvidia/nvml_samples.hpp" -#include "hardware_sampling/utility.hpp" // hws::detail::{value_or_default, map_entry_to_string} +#include "hardware_sampling/utility.hpp" // hws::detail::{value_or_default, map_entry_to_string, quote} #include "fmt/format.h" // fmt::format #include "fmt/ranges.h" // fmt::join @@ -209,7 +209,7 @@ std::string nvml_clock_samples::generate_yaml_string() const { str += fmt::format(" throttle_reason:\n" " unit: \"string\"\n" " values: [{}]\n", - fmt::join(this->throttle_reason_.value(), ", ")); + fmt::join(detail::quote(this->throttle_reason_.value()), ", ")); } // clock is auto-boosted if (this->auto_boosted_clock_.has_value()) { diff --git a/src/hardware_sampling/hardware_sampler.cpp b/src/hardware_sampling/hardware_sampler.cpp index 1ac8f81..3511f7f 100644 --- a/src/hardware_sampling/hardware_sampler.cpp +++ b/src/hardware_sampling/hardware_sampler.cpp @@ -129,7 +129,7 @@ void hardware_sampler::dump_yaml(const char *filename) { file << "---\n\n"; // set the device identification - file << fmt::format("device_identification: {}\n\n", this->device_identification()); + file << fmt::format("device_identification: \"{}\"\n\n", this->device_identification()); // output the start date time of this hardware sampling file << fmt::format("start_time: \"{:%Y-%m-%d %X}\"\n\n", start_date_time_); @@ -139,7 +139,7 @@ void hardware_sampler::dump_yaml(const char *filename) { std::vector event_names{}; for (const auto &[time_point, name] : events_) { event_time_points.push_back(time_point); - event_names.push_back(name); + event_names.push_back(fmt::format("\"{}\"", name)); } file << fmt::format("events:\n" " time_points:\n" @@ -150,12 +150,14 @@ void hardware_sampler::dump_yaml(const char *filename) { fmt::join(event_names, ", ")); // output the sampling information - file << fmt::format("sampling_interval: {}\n" + file << fmt::format("sampling_interval:\n" + " unit: \"ms\"\n" + " values: {}\n" "time_points:\n" " unit: \"s\"\n" " values: [{}]\n" "{}\n\n", - this->sampling_interval(), + this->sampling_interval().count(), fmt::join(detail::durations_from_reference_time(this->sampling_time_points(), this->get_event(0).time_point), ", "), this->generate_yaml_string()); } From eaa4e4e8ca4e016fc874b719babb5575bba27148 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Mon, 16 Sep 2024 15:39:39 +0200 Subject: [PATCH 24/69] Move implementation to cpp file. --- CMakeLists.txt | 6 +- include/hardware_sampling/gpu_amd/utility.hpp | 34 ++---------- .../hardware_sampling/gpu_nvidia/utility.hpp | 40 +------------- src/hardware_sampling/gpu_amd/utility.cpp | 41 ++++++++++++++ src/hardware_sampling/gpu_nvidia/utility.cpp | 55 +++++++++++++++++++ 5 files changed, 106 insertions(+), 70 deletions(-) create mode 100644 src/hardware_sampling/gpu_amd/utility.cpp create mode 100644 src/hardware_sampling/gpu_nvidia/utility.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 48ed48e..74cc828 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -202,7 +202,8 @@ if (CUDAToolkit_FOUND) target_sources(${HWS_LIBRARY_NAME} PRIVATE $) # add compile definition @@ -228,7 +229,8 @@ if (rocm_smi_FOUND) target_sources(${HWS_LIBRARY_NAME} PRIVATE $) # add compile definition diff --git a/include/hardware_sampling/gpu_amd/utility.hpp b/include/hardware_sampling/gpu_amd/utility.hpp index 00d98bc..b0786f9 100644 --- a/include/hardware_sampling/gpu_amd/utility.hpp +++ b/include/hardware_sampling/gpu_amd/utility.hpp @@ -16,8 +16,9 @@ #include "rocm_smi/rocm_smi.h" // ROCm SMI runtime functions #include // std::runtime_error +#include // std::string -namespace hws { +namespace hws::detail { /** * @def HWS_ROCM_SMI_ERROR_CHECK @@ -52,37 +53,12 @@ namespace hws { #define HWS_HIP_ERROR_CHECK(hip_func) hip_func; #endif -// TODO: move to cpp file - /** - * @brief Convert the performance level value (rsmi_dev_perf_level_t) to a string. + * @brief Convert the performance level value (`rsmi_dev_perf_level_t`) to a string. * @param[in] clocks_event_reasons the bitmask to convert to a string - * @return all event throttle reasons + * @return all event throttle reasons (`[[nodiscard]]`) */ -[[nodiscard]] inline std::string performance_level_to_string(const rsmi_dev_perf_level_t perf_level) { - switch (perf_level) { - case RSMI_DEV_PERF_LEVEL_AUTO: - return "auto"; - case RSMI_DEV_PERF_LEVEL_LOW: - return "low"; - case RSMI_DEV_PERF_LEVEL_HIGH: - return "high"; - case RSMI_DEV_PERF_LEVEL_MANUAL: - return "manual"; - case RSMI_DEV_PERF_LEVEL_STABLE_STD: - return "stable_std"; - case RSMI_DEV_PERF_LEVEL_STABLE_PEAK: - return "stable_peak"; - case RSMI_DEV_PERF_LEVEL_STABLE_MIN_MCLK: - return "stable_min_mclk"; - case RSMI_DEV_PERF_LEVEL_STABLE_MIN_SCLK: - return "stable_min_sclk"; - case RSMI_DEV_PERF_LEVEL_DETERMINISM: - return "determinism"; - case RSMI_DEV_PERF_LEVEL_UNKNOWN: - return "unknown"; - } -} +[[nodiscard]] std::string performance_level_to_string(rsmi_dev_perf_level_t perf_level); } // namespace hws diff --git a/include/hardware_sampling/gpu_nvidia/utility.hpp b/include/hardware_sampling/gpu_nvidia/utility.hpp index 05560b1..b347f0a 100644 --- a/include/hardware_sampling/gpu_nvidia/utility.hpp +++ b/include/hardware_sampling/gpu_nvidia/utility.hpp @@ -13,12 +13,10 @@ #pragma once #include "fmt/format.h" // fmt::format -#include "fmt/ranges.h" // fmt::join #include "nvml.h" // NVML runtime functions #include // std::runtime_error #include // std::string -#include // std::vector namespace hws::detail { @@ -39,48 +37,12 @@ namespace hws::detail { #define HWS_NVML_ERROR_CHECK(nvml_func) nvml_func; #endif -// TODO: move to cpp file - /** * @brief Convert the clock throttle reason event bitmask to a string representation. If the provided bitmask represents multiple reasons, they are split using "|". * @param[in] clocks_event_reasons the bitmask to convert to a string * @return all event throttle reasons */ -[[nodiscard]] inline std::string throttle_event_reason_to_string(const unsigned long long clocks_event_reasons) { - if (clocks_event_reasons == 0ull) { - return "None"; - } else { - std::vector reasons{}; - if ((clocks_event_reasons & nvmlClocksEventReasonApplicationsClocksSetting) != 0ull) { - reasons.emplace_back("ApplicationsClocksSetting"); - } - if ((clocks_event_reasons & nvmlClocksEventReasonDisplayClockSetting) != 0ull) { - reasons.emplace_back("DisplayClockSetting"); - } - if ((clocks_event_reasons & nvmlClocksEventReasonGpuIdle) != 0ull) { - reasons.emplace_back("GpuIdle"); - } - if ((clocks_event_reasons & nvmlClocksEventReasonSwPowerCap) != 0ull) { - reasons.emplace_back("SwPowerCap"); - } - if ((clocks_event_reasons & nvmlClocksEventReasonSwThermalSlowdown) != 0ull) { - reasons.emplace_back("SwThermalSlowdown"); - } - if ((clocks_event_reasons & nvmlClocksEventReasonSyncBoost) != 0ull) { - reasons.emplace_back("SyncBoost"); - } - if ((clocks_event_reasons & nvmlClocksThrottleReasonHwPowerBrakeSlowdown) != 0ull) { - reasons.emplace_back("HwPowerBrakeSlowdown"); - } - if ((clocks_event_reasons & nvmlClocksThrottleReasonHwSlowdown) != 0ull) { - reasons.emplace_back("HwSlowdown"); - } - if ((clocks_event_reasons & nvmlClocksThrottleReasonHwThermalSlowdown) != 0ull) { - reasons.emplace_back("HwThermalSlowdown"); - } - return fmt::format("{}", fmt::join(reasons, "|")); - } -} +[[nodiscard]] std::string throttle_event_reason_to_string(unsigned long long clocks_event_reasons); } // namespace hws::detail diff --git a/src/hardware_sampling/gpu_amd/utility.cpp b/src/hardware_sampling/gpu_amd/utility.cpp new file mode 100644 index 0000000..3164c18 --- /dev/null +++ b/src/hardware_sampling/gpu_amd/utility.cpp @@ -0,0 +1,41 @@ +/** + * @author Marcel Breyer + * @copyright 2024-today All Rights Reserved + * @license This file is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + */ + +#include "hardware_sampling/gpu_amd/utility.hpp" + +#include "rocm_smi/rocm_smi.h" // ROCm SMI runtime functions + +#include // std::string + +namespace hws { + +std::string performance_level_to_string(const rsmi_dev_perf_level_t perf_level) { + switch (perf_level) { + case RSMI_DEV_PERF_LEVEL_AUTO: + return "auto"; + case RSMI_DEV_PERF_LEVEL_LOW: + return "low"; + case RSMI_DEV_PERF_LEVEL_HIGH: + return "high"; + case RSMI_DEV_PERF_LEVEL_MANUAL: + return "manual"; + case RSMI_DEV_PERF_LEVEL_STABLE_STD: + return "stable_std"; + case RSMI_DEV_PERF_LEVEL_STABLE_PEAK: + return "stable_peak"; + case RSMI_DEV_PERF_LEVEL_STABLE_MIN_MCLK: + return "stable_min_mclk"; + case RSMI_DEV_PERF_LEVEL_STABLE_MIN_SCLK: + return "stable_min_sclk"; + case RSMI_DEV_PERF_LEVEL_DETERMINISM: + return "determinism"; + case RSMI_DEV_PERF_LEVEL_UNKNOWN: + return "unknown"; + } +} + +} // namespace hws::detail diff --git a/src/hardware_sampling/gpu_nvidia/utility.cpp b/src/hardware_sampling/gpu_nvidia/utility.cpp new file mode 100644 index 0000000..70883e6 --- /dev/null +++ b/src/hardware_sampling/gpu_nvidia/utility.cpp @@ -0,0 +1,55 @@ +/** + * @author Marcel Breyer + * @copyright 2024-today All Rights Reserved + * @license This file is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + */ + +#include "hardware_sampling/gpu_nvidia/utility.hpp" + +#include "fmt/format.h" // fmt::format +#include "fmt/ranges.h" // fmt::join +#include "nvml.h" // NVML runtime functions + +#include // std::string +#include // std::vector + +namespace hws::detail { + +std::string throttle_event_reason_to_string(const unsigned long long clocks_event_reasons) { + if (clocks_event_reasons == 0ull) { + return "None"; + } else { + std::vector reasons{}; + if ((clocks_event_reasons & nvmlClocksEventReasonApplicationsClocksSetting) != 0ull) { + reasons.emplace_back("ApplicationsClocksSetting"); + } + if ((clocks_event_reasons & nvmlClocksEventReasonDisplayClockSetting) != 0ull) { + reasons.emplace_back("DisplayClockSetting"); + } + if ((clocks_event_reasons & nvmlClocksEventReasonGpuIdle) != 0ull) { + reasons.emplace_back("GpuIdle"); + } + if ((clocks_event_reasons & nvmlClocksEventReasonSwPowerCap) != 0ull) { + reasons.emplace_back("SwPowerCap"); + } + if ((clocks_event_reasons & nvmlClocksEventReasonSwThermalSlowdown) != 0ull) { + reasons.emplace_back("SwThermalSlowdown"); + } + if ((clocks_event_reasons & nvmlClocksEventReasonSyncBoost) != 0ull) { + reasons.emplace_back("SyncBoost"); + } + if ((clocks_event_reasons & nvmlClocksThrottleReasonHwPowerBrakeSlowdown) != 0ull) { + reasons.emplace_back("HwPowerBrakeSlowdown"); + } + if ((clocks_event_reasons & nvmlClocksThrottleReasonHwSlowdown) != 0ull) { + reasons.emplace_back("HwSlowdown"); + } + if ((clocks_event_reasons & nvmlClocksThrottleReasonHwThermalSlowdown) != 0ull) { + reasons.emplace_back("HwThermalSlowdown"); + } + return fmt::format("{}", fmt::join(reasons, "|")); + } +} + +} // namespace hws::detail From 527c63530be926ef96aec1b55a6f4d0fe464b581 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Mon, 16 Sep 2024 16:03:32 +0200 Subject: [PATCH 25/69] Clean-up utility header. --- include/hardware_sampling/utility.hpp | 156 +++++++++++++------------- src/hardware_sampling/utility.cpp | 4 + 2 files changed, 85 insertions(+), 75 deletions(-) diff --git a/include/hardware_sampling/utility.hpp b/include/hardware_sampling/utility.hpp index 2eb7451..ff7d3f6 100644 --- a/include/hardware_sampling/utility.hpp +++ b/include/hardware_sampling/utility.hpp @@ -16,17 +16,15 @@ #include "fmt/ranges.h" // fmt::join #include // std::from_chars -#include // std::chrono::{milliseconds, duration_cast} +#include // std::chrono::duration #include // std::trunc #include // std::size_t -#include // std::back_inserter, std::next, std::prev #include // std::optional -#include // std::basic_stringstream #include // std::runtime_error #include // std::string, std::stof, std::stod, std::stold -#include // std::string_view, std::basic_string_view +#include // std::string_view #include // std::errc -#include // std::is_same_v, std::remove_cv_t, std::remove_reference_t +#include // std::is_same_v, std::is_floating_point_v, std::remove_cv_t, std::remove_reference_t, std::true_type, std::false_type #include // std::vector namespace hws::detail { @@ -56,69 +54,33 @@ namespace hws::detail { private: \ std::optional> sample_name##_{}; -// TODO: clean-up - -/** - * @brief Checks whether the string @p sv starts with the substring @p start - * @param[in] sv the full string - * @param[in] start the substring - * @return `true` if @p sv starts with @p start, otherwise `false` - */ -[[nodiscard]] inline bool starts_with(const std::string_view sv, const std::string_view start) { - return sv.substr(0, start.size()) == start; -} +/*****************************************************************************************************/ +/** type_traits **/ +/*****************************************************************************************************/ template using remove_cvref_t = std::remove_cv_t>; -/** - * @brief Convert all time points to their duration in seconds (using double) truncated to three decimal places passed since the @p reference time point. - * @tparam TimePoint the type if the time points - * @param[in] time_points the time points - * @param[in] reference the reference time point - * @return the duration passed in seconds since the @p reference time point (`[[nodiscard]]`) - */ -template -[[nodiscard]] inline std::vector durations_from_reference_time(const std::vector &time_points, const TimePoint &reference) { - std::vector durations(time_points.size()); - - for (std::size_t i = 0; i < durations.size(); ++i) { - durations[i] = std::trunc(std::chrono::duration(time_points[i] - reference).count() * 1000.0) / 1000.0; - } +template +struct is_vector : std::false_type { }; - return durations; -} +template +struct is_vector> : std::true_type { }; -/** - * @brief Convert all time points to their duration since the epoch start. - * @tparam TimePoint the type of the time points - * @param[in] time_points the time points - * @return the duration passed since the respective @p TimePoint epoch start (`[[nodiscard]]`) - */ -template -[[nodiscard]] inline std::vector time_points_to_epoch(const std::vector &time_points) { - std::vector times(time_points.size()); +template +constexpr bool is_vector_v = is_vector::value; - for (std::size_t i = 0; i < times.size(); ++i) { - times[i] = time_points[i].time_since_epoch(); - } - return times; -} +/*****************************************************************************************************/ +/** string manipulation **/ +/*****************************************************************************************************/ /** - * @brief Return the value encapsulated by the std::optional @p opt if it contains a value, otherwise a default constructed @p T is returned. - * @tparam T the type of the value stored in the std::optional - * @param[in] opt the std::optional to check - * @return the value of the std::optional or a default constructed @p T (`[[nodiscard]]`) + * @brief Checks whether the string @p sv starts with the substring @p start + * @param[in] sv the full string + * @param[in] start the substring + * @return `true` if @p sv starts with @p start, otherwise `false` */ -template -[[nodiscard]] inline T value_or_default(const std::optional &opt) { - if (opt.has_value()) { - return opt.value(); - } else { - return T{}; - } -} +[[nodiscard]] bool starts_with(std::string_view sv, std::string_view start) noexcept; /** * @brief Trim the @p str, i.e., remove all leading and trailing whitespace characters. @@ -134,6 +96,14 @@ template */ [[nodiscard]] std::string to_lower_case(std::string_view str); +/** + * @brief Split the @p str at the delimiters @p delim. + * @param[in] str the string to split + * @param[in] delim the used delimiter + * @return a vector containing all split tokens (`[[nodiscard]]`) + */ +[[nodiscard]] std::vector split(std::string_view str, char delim = ' '); + /** * @brief Convert the @p str to a value of type @p T. * @tparam T the type to convert the string to @@ -212,23 +182,6 @@ template return split_str; } -/** - * @brief Split the @p str at the delimiters @p delim. - * @param[in] str the string to split - * @param[in] delim the used delimiter - * @return a vector containing all split tokens (`[[nodiscard]]`) - */ -[[nodiscard]] std::vector split(std::string_view str, char delim = ' '); - -template -struct is_vector : std::false_type { }; - -template -struct is_vector> : std::true_type { }; - -template -constexpr bool is_vector_v = is_vector::value; - /** * @brief Convert all entries in the map to a single dict-like string. * @details The resulting string is of form "{KEY, VALUE}" or "{KEY, [VALUES]}". @@ -272,6 +225,59 @@ template return quoted; } +/*****************************************************************************************************/ +/** other free functions **/ +/*****************************************************************************************************/ + +/** + * @brief Convert all time points to their duration in seconds (using double) truncated to three decimal places passed since the @p reference time point. + * @tparam TimePoint the type if the time points + * @param[in] time_points the time points + * @param[in] reference the reference time point + * @return the duration passed in seconds since the @p reference time point (`[[nodiscard]]`) + */ +template +[[nodiscard]] inline std::vector durations_from_reference_time(const std::vector &time_points, const TimePoint &reference) { + std::vector durations(time_points.size()); + + for (std::size_t i = 0; i < durations.size(); ++i) { + durations[i] = std::trunc(std::chrono::duration(time_points[i] - reference).count() * 1000.0) / 1000.0; + } + + return durations; +} + +/** + * @brief Convert all time points to their duration since the epoch start. + * @tparam TimePoint the type of the time points + * @param[in] time_points the time points + * @return the duration passed since the respective @p TimePoint epoch start (`[[nodiscard]]`) + */ +template +[[nodiscard]] inline std::vector time_points_to_epoch(const std::vector &time_points) { + std::vector times(time_points.size()); + + for (std::size_t i = 0; i < times.size(); ++i) { + times[i] = time_points[i].time_since_epoch(); + } + return times; +} + +/** + * @brief Return the value encapsulated by the std::optional @p opt if it contains a value, otherwise a default constructed @p T is returned. + * @tparam T the type of the value stored in the std::optional + * @param[in] opt the std::optional to check + * @return the value of the std::optional or a default constructed @p T (`[[nodiscard]]`) + */ +template +[[nodiscard]] inline T value_or_default(const std::optional &opt) { + if (opt.has_value()) { + return opt.value(); + } else { + return T{}; + } +} + } // namespace hws::detail #endif // HARDWARE_SAMPLING_UTILITY_HPP_ diff --git a/src/hardware_sampling/utility.cpp b/src/hardware_sampling/utility.cpp index 58e604c..9e2dbc2 100644 --- a/src/hardware_sampling/utility.cpp +++ b/src/hardware_sampling/utility.cpp @@ -15,6 +15,10 @@ namespace hws::detail { +bool starts_with(const std::string_view sv, const std::string_view start) noexcept { + return sv.substr(0, start.size()) == start; +} + std::string_view trim(std::string_view str) noexcept { // trim right { From 131de00e7dc67e6c49dfa153cda08e1973b9340a Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Mon, 16 Sep 2024 16:49:31 +0200 Subject: [PATCH 26/69] Update Python bindings. --- README.md | 96 ++++++++++---------- bindings/cpu_hardware_sampler.cpp | 43 ++++----- bindings/event.cpp | 5 +- bindings/gpu_amd_hardware_sampler.cpp | 106 ++++++++++++----------- bindings/gpu_intel_hardware_sampler.cpp | 14 +-- bindings/gpu_nvidia_hardware_sampler.cpp | 78 +++++++++-------- bindings/hardware_sampler.cpp | 17 ++-- bindings/main.cpp | 28 +++--- 8 files changed, 204 insertions(+), 183 deletions(-) diff --git a/README.md b/README.md index f74d6b9..bb02f24 100644 --- a/README.md +++ b/README.md @@ -122,22 +122,22 @@ export PYTHONPATH=${CMAKE_INSTALL_PREFIX}/lib:${PYTHONPATH} ### power-related samples -| sample | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs | -|:----------------------------|:---------------------------------:|:-----------:|:-----------:|:----------:| -| power_management_limit | - | W | W | | -| power_enforced_limit | - | W | W | | -| power_measurement_type | str (fix) | str | str | | -| power_management_mode | - | bool | - | | -| available_power_profiles | - | list of int | list of str | | -| power_usage | W | W | W | | -| core_watt | W | - | - | - | -| dram_watt | W | - | - | - | -| package_rapl_throttling | % | - | - | - | -| dram_rapl_throttling | % | - | - | - | -| power_total_energy_consumed | J
(calculated via power_usage) | J | J | J | -| power_profile | - | int | str | | -| energy_threshold_enabled | | | | bool | -| energy_threshold | | | | J | +| sample | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs | +|:-------------------------------|:---------------------------------:|:-----------:|:-----------:|:----------:| +| power_management_limit | - | W | W | | +| power_enforced_limit | - | W | W | | +| power_measurement_type | str (fix) | str | str | | +| power_management_mode | - | bool | - | | +| available_power_profiles | - | list of int | list of str | | +| power_usage | W | W | W | | +| core_watt | W | - | - | - | +| dram_watt | W | - | - | - | +| package_rapl_throttling | % | - | - | - | +| dram_rapl_throttling | % | - | - | - | +| power_total_energy_consumption | J
(calculated via power_usage) | J | J | J | +| power_profile | - | int | str | | +| energy_threshold_enabled | | | | bool | +| energy_threshold | | | | J | ### memory-related samples @@ -175,38 +175,38 @@ export PYTHONPATH=${CMAKE_INSTALL_PREFIX}/lib:${PYTHONPATH} ### temperature-related samples -| sample | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs | -|:-------------------------|:----:|:-----------:|:--------:|:----------:| -| num_fans | - | int | int | | -| fan_speed_min | - | % | - | | -| fan_speed_max | - | % | RPM | | -| temperature_min | - | - | °C | | -| temperature_max | - | °C | °C | | -| memory_temperature_min | - | - | °C | | -| memory_temperature_max | - | °C | °C | | -| hotspot_temperature_min | - | - | °C | | -| hotspot_temperature_max | - | - | °C | | -| hbm_0_temperature_min | - | - | °C | | -| hbm_0_temperature_max | - | - | °C | | -| hbm_1_temperature_min | - | - | °C | | -| hbm_1_temperature_max | - | - | °C | | -| hbm_2_temperature_min | - | - | °C | | -| hbm_2_temperature_max | - | - | °C | | -| hbm_3_temperature_min | - | - | °C | | -| hbm_3_temperature_max | - | - | °C | | -| fan_speed_percentage | - | % | % | | -| temperature | °C | °C | °C | | -| memory_temperature | - | - | °C | | -| hotspot_temperature | - | - | °C | | -| hbm_0_temperature | - | - | °C | | -| hbm_1_temperature | - | - | °C | | -| hbm_2_temperature | - | - | °C | | -| hbm_3_temperature | - | - | °C | | -| temperature_{}_max | | | | | -| temperature_psu | | | | | -| temperature_{} | | | | | -| core_temperature | °C | - | - | - | -| core_throttle_percentage | % | - | - | - | +| sample | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs | +|:------------------------|:----:|:-----------:|:--------:|:----------:| +| num_fans | - | int | int | | +| fan_speed_min | - | % | - | | +| fan_speed_max | - | % | RPM | | +| temperature_min | - | - | °C | | +| temperature_max | - | °C | °C | | +| memory_temperature_min | - | - | °C | | +| memory_temperature_max | - | °C | °C | | +| hotspot_temperature_min | - | - | °C | | +| hotspot_temperature_max | - | - | °C | | +| hbm_0_temperature_min | - | - | °C | | +| hbm_0_temperature_max | - | - | °C | | +| hbm_1_temperature_min | - | - | °C | | +| hbm_1_temperature_max | - | - | °C | | +| hbm_2_temperature_min | - | - | °C | | +| hbm_2_temperature_max | - | - | °C | | +| hbm_3_temperature_min | - | - | °C | | +| hbm_3_temperature_max | - | - | °C | | +| fan_speed_percentage | - | % | % | | +| temperature | °C | °C | °C | | +| memory_temperature | - | - | °C | | +| hotspot_temperature | - | - | °C | | +| hbm_0_temperature | - | - | °C | | +| hbm_1_temperature | - | - | °C | | +| hbm_2_temperature | - | - | °C | | +| hbm_3_temperature | - | - | °C | | +| temperature_{}_max | | | | | +| temperature_psu | | | | | +| temperature_{} | | | | | +| core_temperature | °C | - | - | - | +| core_throttle_percent | % | - | - | - | ### gfx-related (iGPU) samples diff --git a/bindings/cpu_hardware_sampler.cpp b/bindings/cpu_hardware_sampler.cpp index 75f0a01..9ed204d 100644 --- a/bindings/cpu_hardware_sampler.cpp +++ b/bindings/cpu_hardware_sampler.cpp @@ -9,12 +9,12 @@ #include "hardware_sampling/cpu/hardware_sampler.hpp" // hws::cpu_hardware_sampler #include "hardware_sampling/hardware_sampler.hpp" // hws::hardware_sampler +#include "fmt/format.h" // fmt::format #include "pybind11/chrono.h" // automatic bindings for std::chrono::milliseconds #include "pybind11/pybind11.h" // py::module_ #include "pybind11/stl.h" // bind STL types #include // std::chrono::milliseconds -#include // std::format namespace py = pybind11; @@ -23,6 +23,7 @@ void init_cpu_hardware_sampler(py::module_ &m) { py::class_(m, "CpuGeneralSamples") .def("get_architecture", &hws::cpu_general_samples::get_architecture, "the CPU architecture (e.g., x86_64)") .def("get_byte_order", &hws::cpu_general_samples::get_byte_order, "the byte order (e.g., little/big endian)") + .def("get_num_cores", &hws::cpu_general_samples::get_num_cores, "the total number of cores of the CPU(s)") .def("get_num_threads", &hws::cpu_general_samples::get_num_threads, "the number of threads of the CPU(s) including potential hyper-threads") .def("get_threads_per_core", &hws::cpu_general_samples::get_threads_per_core, "the number of hyper-threads per core") .def("get_cores_per_socket", &hws::cpu_general_samples::get_cores_per_socket, "the number of physical cores per socket") @@ -31,45 +32,47 @@ void init_cpu_hardware_sampler(py::module_ &m) { .def("get_vendor_id", &hws::cpu_general_samples::get_vendor_id, "the vendor ID (e.g. GenuineIntel)") .def("get_name", &hws::cpu_general_samples::get_name, "the name of the CPU") .def("get_flags", &hws::cpu_general_samples::get_flags, "potential CPU flags (e.g., sse4_1, avx, avx, etc)") - .def("get_busy_percent", &hws::cpu_general_samples::get_busy_percent, "the percent the CPU was busy doing work") + .def("get_compute_utilization", &hws::cpu_general_samples::get_compute_utilization, "the percent the CPU was busy doing work") .def("get_ipc", &hws::cpu_general_samples::get_ipc, "the instructions-per-cycle count") .def("get_irq", &hws::cpu_general_samples::get_irq, "the number of interrupts") .def("get_smi", &hws::cpu_general_samples::get_smi, "the number of system management interrupts") .def("get_poll", &hws::cpu_general_samples::get_poll, "the number of times the CPU was in the polling state") .def("get_poll_percent", &hws::cpu_general_samples::get_poll_percent, "the percent of the CPU was in the polling state") .def("__repr__", [](const hws::cpu_general_samples &self) { - return std::format("", self); + return fmt::format("", self); }); // bind the clock samples py::class_(m, "CpuClockSamples") - .def("get_frequency_boost", &hws::cpu_clock_samples::get_frequency_boost, "true if frequency boosting is enabled") - .def("get_min_frequency", &hws::cpu_clock_samples::get_min_frequency, "the minimum possible CPU frequency in MHz") - .def("get_max_frequency", &hws::cpu_clock_samples::get_max_frequency, "the maximum possible CPU frequency in MHz") - .def("get_average_frequency", &hws::cpu_clock_samples::get_average_frequency, "the average CPU frequency in MHz including idle cores") + .def("get_auto_boosted_clock_enabled", &hws::cpu_clock_samples::get_auto_boosted_clock_enabled, "true if frequency boosting is enabled") + .def("get_clock_frequency_min", &hws::cpu_clock_samples::get_clock_frequency_min, "the minimum possible CPU frequency in MHz") + .def("get_clock_frequency_max", &hws::cpu_clock_samples::get_clock_frequency_max, "the maximum possible CPU frequency in MHz") + .def("get_clock_frequency", &hws::cpu_clock_samples::get_clock_frequency, "the average CPU frequency in MHz including idle cores") .def("get_average_non_idle_frequency", &hws::cpu_clock_samples::get_average_non_idle_frequency, "the average CPU frequency in MHz excluding idle cores") .def("get_time_stamp_counter", &hws::cpu_clock_samples::get_time_stamp_counter, "the time stamp counter") .def("__repr__", [](const hws::cpu_clock_samples &self) { - return std::format("", self); + return fmt::format("", self); }); // bind the power samples py::class_(m, "CpuPowerSamples") - .def("get_package_watt", &hws::cpu_power_samples::get_package_watt, "the currently consumed power of the package of the CPU in W") + .def("get_power_measurement_type", &hws::cpu_power_samples::get_power_measurement_type, "the type of the power readings: always \"instant/current\"") + .def("get_power_usage", &hws::cpu_power_samples::get_power_usage, "the currently consumed power of the package of the CPU in W") + .def("get_power_total_energy_consumed", &hws::cpu_power_samples::get_power_total_energy_consumption, "the total power consumption in J") .def("get_core_watt", &hws::cpu_power_samples::get_core_watt, "the currently consumed power of the core part of the CPU in W") .def("get_ram_watt", &hws::cpu_power_samples::get_ram_watt, "the currently consumed power of the RAM part of the CPU in W") .def("get_package_rapl_throttle_percent", &hws::cpu_power_samples::get_package_rapl_throttle_percent, "the percent of time the package throttled due to RAPL limiters") .def("get_dram_rapl_throttle_percent", &hws::cpu_power_samples::get_dram_rapl_throttle_percent, "the percent of time the DRAM throttled due to RAPL limiters") .def("__repr__", [](const hws::cpu_power_samples &self) { - return std::format("", self); + return fmt::format("", self); }); // bind the memory samples py::class_(m, "CpuMemorySamples") - .def("get_l1d_cache", &hws::cpu_memory_samples::get_l1d_cache, "the size of the L1 data cache") - .def("get_l1i_cache", &hws::cpu_memory_samples::get_l1i_cache, "the size of the L1 instruction cache") - .def("get_l2_cache", &hws::cpu_memory_samples::get_l2_cache, "the size of the L2 cache") - .def("get_l3_cache", &hws::cpu_memory_samples::get_l3_cache, "the size of the L2 cache") + .def("get_cache_size_L1d", &hws::cpu_memory_samples::get_cache_size_L1d, "the size of the L1 data cache") + .def("get_cache_size_L1i", &hws::cpu_memory_samples::get_cache_size_L1i, "the size of the L1 instruction cache") + .def("get_cache_size_L2", &hws::cpu_memory_samples::get_cache_size_L2, "the size of the L2 cache") + .def("get_cache_size_L3", &hws::cpu_memory_samples::get_cache_size_L3, "the size of the L2 cache") .def("get_memory_total", &hws::cpu_memory_samples::get_memory_total, "the total available memory in Byte") .def("get_swap_memory_total", &hws::cpu_memory_samples::get_swap_memory_total, "the total available swap memory in Byte") .def("get_memory_used", &hws::cpu_memory_samples::get_memory_used, "the currently used memory in Byte") @@ -77,16 +80,16 @@ void init_cpu_hardware_sampler(py::module_ &m) { .def("get_swap_memory_used", &hws::cpu_memory_samples::get_swap_memory_used, "the currently used swap memory in Byte") .def("get_swap_memory_free", &hws::cpu_memory_samples::get_swap_memory_free, "the currently free swap memory in Byte") .def("__repr__", [](const hws::cpu_memory_samples &self) { - return std::format("", self); + return fmt::format("", self); }); // bind the temperature samples py::class_(m, "CpuTemperatureSamples") + .def("get_temperature", &hws::cpu_temperature_samples::get_temperature, "the current temperature of the whole package in °C") .def("get_core_temperature", &hws::cpu_temperature_samples::get_core_temperature, "the current temperature of the core part of the CPU in °C") .def("get_core_throttle_percent", &hws::cpu_temperature_samples::get_core_throttle_percent, "the percent of time the CPU has throttled") - .def("get_package_temperature", &hws::cpu_temperature_samples::get_package_temperature, "the current temperature of the whole package in °C") .def("__repr__", [](const hws::cpu_temperature_samples &self) { - return std::format("", self); + return fmt::format("", self); }); // bind the gfx samples @@ -98,7 +101,7 @@ void init_cpu_hardware_sampler(py::module_ &m) { .def("get_cpu_works_for_gpu_percent", &hws::cpu_gfx_samples::get_cpu_works_for_gpu_percent, "the percent of time the CPU was doing work for the iGPU") .def("get_gfx_watt", &hws::cpu_gfx_samples::get_gfx_watt, "the currently consumed power of the iGPU of the CPU in W") .def("__repr__", [](const hws::cpu_gfx_samples &self) { - return std::format("", self); + return fmt::format("", self); }); // bind the idle state samples @@ -110,7 +113,7 @@ void init_cpu_hardware_sampler(py::module_ &m) { .def("get_system_low_power_idle_state_percent", &hws::cpu_idle_states_samples::get_system_low_power_idle_state_percent, "the percent of time the CPU was in the system low power idle state") .def("get_package_low_power_idle_state_percent", &hws::cpu_idle_states_samples::get_package_low_power_idle_state_percent, "the percent of time the CPU was in the package low power idle state") .def("__repr__", [](const hws::cpu_gfx_samples &self) { - return std::format("", self); + return fmt::format("", self); }); // bind the CPU hardware sampler class @@ -125,6 +128,6 @@ void init_cpu_hardware_sampler(py::module_ &m) { .def("gfx_samples", &hws::cpu_hardware_sampler::gfx_samples, "get all gfx (iGPU) related samples") .def("idle_state_samples", &hws::cpu_hardware_sampler::idle_state_samples, "get all idle state related samples") .def("__repr__", [](const hws::cpu_hardware_sampler &self) { - return std::format("", self); + return fmt::format("", self); }); } diff --git a/bindings/event.cpp b/bindings/event.cpp index ba463ad..8a9696a 100644 --- a/bindings/event.cpp +++ b/bindings/event.cpp @@ -7,12 +7,11 @@ #include "hardware_sampling/event.hpp" // hws::event +#include "fmt/format.h" // fmt::format #include "pybind11/chrono.h" // bind std::chrono types #include "pybind11/pybind11.h" // py::module_ #include "pybind11/stl.h" // bind STL types -#include // std::format - namespace py = pybind11; void init_event(py::module_ &m) { @@ -22,6 +21,6 @@ void init_event(py::module_ &m) { .def_readonly("time_point", &hws::event::time_point, "read the time point associated to this event") .def_readonly("name", &hws::event::name, "read the name associated to this event") .def("__repr__", [](const hws::event &self) { - return std::format("", self.time_point.time_since_epoch(), self.name); + return fmt::format("", self.time_point.time_since_epoch(), self.name); }); } diff --git a/bindings/gpu_amd_hardware_sampler.cpp b/bindings/gpu_amd_hardware_sampler.cpp index f43c741..9ffd042 100644 --- a/bindings/gpu_amd_hardware_sampler.cpp +++ b/bindings/gpu_amd_hardware_sampler.cpp @@ -9,98 +9,106 @@ #include "hardware_sampling/gpu_amd/rocm_smi_samples.hpp" // hws::{rocm_smi_general_samples, rocm_smi_clock_samples, rocm_smi_power_samples, rocm_smi_memory_samples, rocm_smi_temperature_samples} #include "hardware_sampling/hardware_sampler.hpp" // hws::hardware_sampler +#include "fmt/format.h" // fmt::format #include "pybind11/chrono.h" // automatic bindings for std::chrono::milliseconds #include "pybind11/pybind11.h" // py::module_ #include "pybind11/stl.h" // bind STL types #include // std::chrono::milliseconds #include // std::size_t -#include // std::format namespace py = pybind11; void init_gpu_amd_hardware_sampler(py::module_ &m) { // bind the general samples py::class_(m, "RocmSmiGeneralSamples") + .def("get_architecture", &hws::rocm_smi_general_samples::get_name, "the architecture name of the device") + .def("get_byte_order", &hws::rocm_smi_general_samples::get_byte_order, "the byte order (e.g., little/big endian)") + .def("get_vendor_id", &hws::rocm_smi_general_samples::get_vendor_id, "the vendor ID") .def("get_name", &hws::rocm_smi_general_samples::get_name, "the name of the device") + .def("get_compute_utilization", &hws::rocm_smi_general_samples::get_compute_utilization, "the GPU compute utilization in percent") + .def("get_memory_utilization", &hws::rocm_smi_general_samples::get_memory_utilization, "the GPU memory utilization in percent") .def("get_performance_level", &hws::rocm_smi_general_samples::get_performance_level, "the performance level: one of rsmi_dev_perf_level_t") - .def("get_utilization_gpu", &hws::rocm_smi_general_samples::get_utilization_gpu, "the GPU compute utilization in percent") - .def("get_utilization_mem", &hws::rocm_smi_general_samples::get_utilization_mem, "the GPU memory utilization in percent") .def("__repr__", [](const hws::rocm_smi_general_samples &self) { - return std::format("", self); + return fmt::format("", self); }); // bind the clock samples py::class_(m, "RocmSmiClockSamples") - .def("get_clock_system_min", &hws::rocm_smi_clock_samples::get_clock_system_min, "the minimum possible system clock frequency in Hz") - .def("get_clock_system_max", &hws::rocm_smi_clock_samples::get_clock_system_max, "the maximum possible system clock frequency in Hz") - .def("get_clock_socket_min", &hws::rocm_smi_clock_samples::get_clock_socket_min, "the minimum possible socket clock frequency in Hz") - .def("get_clock_socket_max", &hws::rocm_smi_clock_samples::get_clock_socket_max, "the maximum possible socket clock frequency in Hz") - .def("get_clock_memory_min", &hws::rocm_smi_clock_samples::get_clock_memory_min, "the minimum possible memory clock frequency in Hz") - .def("get_clock_memory_max", &hws::rocm_smi_clock_samples::get_clock_memory_max, "the maximum possible memory clock frequency in Hz") - .def("get_clock_system", &hws::rocm_smi_clock_samples::get_clock_system, "the current system clock frequency in Hz") - .def("get_clock_socket", &hws::rocm_smi_clock_samples::get_clock_socket, "the current socket clock frequency in Hz") - .def("get_clock_memory", &hws::rocm_smi_clock_samples::get_clock_memory, "the current memory clock frequency in Hz") + .def("get_clock_frequency_min", &hws::rocm_smi_clock_samples::get_clock_frequency_min, "the minimum possible system clock frequency in MHz") + .def("get_clock_frequency_max", &hws::rocm_smi_clock_samples::get_clock_frequency_max, "the maximum possible system clock frequency in MHz") + .def("get_memory_clock_frequency_min", &hws::rocm_smi_clock_samples::get_memory_clock_frequency_min, "the minimum possible memory clock frequency in MHz") + .def("get_memory_clock_frequency_max", &hws::rocm_smi_clock_samples::get_memory_clock_frequency_max, "the maximum possible memory clock frequency in MHz") + .def("get_socket_clock_frequency_min", &hws::rocm_smi_clock_samples::get_socket_clock_frequency_min, "the minimum possible socket clock frequency in MHz") + .def("get_socket_clock_frequency_max", &hws::rocm_smi_clock_samples::get_socket_clock_frequency_max, "the maximum possible socket clock frequency in MHz") + .def("get_available_clock_frequencies", &hws::rocm_smi_clock_samples::get_available_clock_frequencies, "the available clock frequencies in MHz (slowest to fastest)") + .def("get_available_memory_clock_frequencies", &hws::rocm_smi_clock_samples::get_available_memory_clock_frequencies, "the available memory clock frequencies in MHz (slowest to fastest)") + .def("get_clock_frequency", &hws::rocm_smi_clock_samples::get_clock_frequency, "the current system clock frequency in MHz") + .def("get_memory_clock_frequency", &hws::rocm_smi_clock_samples::get_memory_clock_frequency, "the current memory clock frequency in MHz") + .def("get_socket_clock_frequency", &hws::rocm_smi_clock_samples::get_socket_clock_frequency, "the current socket clock frequency in MHz") .def("get_overdrive_level", &hws::rocm_smi_clock_samples::get_overdrive_level, "the GPU overdrive percentage") .def("get_memory_overdrive_level", &hws::rocm_smi_clock_samples::get_memory_overdrive_level, "the GPU's memory overdrive percentage") .def("__repr__", [](const hws::rocm_smi_clock_samples &self) { - return std::format("", self); + return fmt::format("", self); }); // bind the power samples py::class_(m, "RocmSmiPowerSamples") - .def("get_power_default_cap", &hws::rocm_smi_power_samples::get_power_default_cap, "the default power cap, may be different from power cap") - .def("get_power_cap", &hws::rocm_smi_power_samples::get_power_cap, "if the GPU draws more power (μW) than the power cap, the GPU may throttle") - .def("get_power_type", &hws::rocm_smi_power_samples::get_power_type, "the type of the power management: either current power draw or average power draw") + .def("get_power_management_limit", &hws::rocm_smi_power_samples::get_power_management_limit, "the default power cap (W), may be different from power cap") + .def("get_power_enforced_limit", &hws::rocm_smi_power_samples::get_power_enforced_limit, "if the GPU draws more power (W) than the power cap, the GPU may throttle") + .def("get_power_measurement_type", &hws::rocm_smi_power_samples::get_power_measurement_type, "the type of the power readings: either current power draw or average power draw") .def("get_available_power_profiles", &hws::rocm_smi_power_samples::get_available_power_profiles, "a list of the available power profiles") - .def("get_power_usage", &hws::rocm_smi_power_samples::get_power_usage, "the current GPU socket power draw in μW") - .def("get_power_total_energy_consumption", &hws::rocm_smi_power_samples::get_power_total_energy_consumption, "the total power consumption since the last driver reload in μJ") + .def("get_power_usage", &hws::rocm_smi_power_samples::get_power_usage, "the current GPU socket power draw in W") + .def("get_power_total_energy_consumption", &hws::rocm_smi_power_samples::get_power_total_energy_consumption, "the total power consumption since the last driver reload in J") .def("get_power_profile", &hws::rocm_smi_power_samples::get_power_profile, "the current active power profile; one of 'available_power_profiles'") .def("__repr__", [](const hws::rocm_smi_power_samples &self) { - return std::format("", self); + return fmt::format("", self); }); // bind the memory samples py::class_(m, "RocmSmiMemorySamples") .def("get_memory_total", &hws::rocm_smi_memory_samples::get_memory_total, "the total available memory in Byte") .def("get_visible_memory_total", &hws::rocm_smi_memory_samples::get_visible_memory_total, "the total visible available memory in Byte, may be smaller than the total memory") - .def("get_min_num_pcie_lanes", &hws::rocm_smi_memory_samples::get_min_num_pcie_lanes, "the minimum number of used PCIe lanes") - .def("get_max_num_pcie_lanes", &hws::rocm_smi_memory_samples::get_max_num_pcie_lanes, "the maximum number of used PCIe lanes") + .def("get_num_pcie_lanes_min", &hws::rocm_smi_memory_samples::get_num_pcie_lanes_min, "the minimum number of used PCIe lanes") + .def("get_num_pcie_lanes_max", &hws::rocm_smi_memory_samples::get_num_pcie_lanes_max, "the maximum number of used PCIe lanes") + .def("get_pcie_link_transfer_rate_min", &hws::rocm_smi_memory_samples::get_pcie_link_transfer_rate_min, "the minimum PCIe link transfer rate in MT/s") + .def("get_pcie_link_transfer_rate_max", &hws::rocm_smi_memory_samples::get_pcie_link_transfer_rate_max, "the maximum PCIe link transfer rate in MT/s") .def("get_memory_used", &hws::rocm_smi_memory_samples::get_memory_used, "the currently used memory in Byte") - .def("get_pcie_transfer_rate", &hws::rocm_smi_memory_samples::get_pcie_transfer_rate, "the current PCIe transfer rate in T/s") + .def("get_memory_free", &hws::rocm_smi_memory_samples::get_memory_free, "the currently free memory in Byte") .def("get_num_pcie_lanes", &hws::rocm_smi_memory_samples::get_num_pcie_lanes, "the number of currently used PCIe lanes") + .def("get_pcie_link_transfer_rate", &hws::rocm_smi_memory_samples::get_pcie_link_transfer_rate, "the current PCIe transfer rate in T/s") .def("__repr__", [](const hws::rocm_smi_memory_samples &self) { - return std::format("", self); + return fmt::format("", self); }); // bind the temperature samples py::class_(m, "RocmSmiTemperatureSamples") .def("get_num_fans", &hws::rocm_smi_temperature_samples::get_num_fans, "the number of fans (if any)") - .def("get_max_fan_speed", &hws::rocm_smi_temperature_samples::get_max_fan_speed, "the maximum fan speed") - .def("get_temperature_edge_min", &hws::rocm_smi_temperature_samples::get_temperature_edge_min, "the minimum temperature on the GPU's edge temperature sensor in m°C") - .def("get_temperature_edge_max", &hws::rocm_smi_temperature_samples::get_temperature_edge_max, "the maximum temperature on the GPU's edge temperature sensor in m°C") - .def("get_temperature_hotspot_min", &hws::rocm_smi_temperature_samples::get_temperature_hotspot_min, "the minimum temperature on the GPU's hotspot temperature sensor in m°C") - .def("get_temperature_hotspot_max", &hws::rocm_smi_temperature_samples::get_temperature_hotspot_max, "the maximum temperature on the GPU's hotspot temperature sensor in m°C") - .def("get_temperature_memory_min", &hws::rocm_smi_temperature_samples::get_temperature_memory_min, "the minimum temperature on the GPU's memory temperature sensor in m°C") - .def("get_temperature_memory_max", &hws::rocm_smi_temperature_samples::get_temperature_memory_max, "the maximum temperature on the GPU's memory temperature sensor in m°C") - .def("get_temperature_hbm_0_min", &hws::rocm_smi_temperature_samples::get_temperature_hbm_0_min, "the minimum temperature on the GPU's HBM0 temperature sensor in m°C") - .def("get_temperature_hbm_0_max", &hws::rocm_smi_temperature_samples::get_temperature_hbm_0_max, "the maximum temperature on the GPU's HBM0 temperature sensor in m°C") - .def("get_temperature_hbm_1_min", &hws::rocm_smi_temperature_samples::get_temperature_hbm_1_min, "the minimum temperature on the GPU's HBM1 temperature sensor in m°C") - .def("get_temperature_hbm_1_max", &hws::rocm_smi_temperature_samples::get_temperature_hbm_1_max, "the maximum temperature on the GPU's HBM1 temperature sensor in m°C") - .def("get_temperature_hbm_2_min", &hws::rocm_smi_temperature_samples::get_temperature_hbm_2_min, "the minimum temperature on the GPU's HBM2 temperature sensor in m°C") - .def("get_temperature_hbm_2_max", &hws::rocm_smi_temperature_samples::get_temperature_hbm_2_max, "the maximum temperature on the GPU's HBM2 temperature sensor in m°C") - .def("get_temperature_hbm_3_min", &hws::rocm_smi_temperature_samples::get_temperature_hbm_3_min, "the minimum temperature on the GPU's HBM3 temperature sensor in m°C") - .def("get_temperature_hbm_3_max", &hws::rocm_smi_temperature_samples::get_temperature_hbm_3_max, "the maximum temperature on the GPU's HBM3 temperature sensor in m°C") - .def("get_fan_speed", &hws::rocm_smi_temperature_samples::get_fan_speed, "the current fan speed in %") - .def("get_temperature_edge", &hws::rocm_smi_temperature_samples::get_temperature_edge, "the current temperature on the GPU's edge temperature sensor in m°C") - .def("get_temperature_hotspot", &hws::rocm_smi_temperature_samples::get_temperature_hotspot, "the current temperature on the GPU's hotspot temperature sensor in m°C") - .def("get_temperature_memory", &hws::rocm_smi_temperature_samples::get_temperature_memory, "the current temperature on the GPU's memory temperature sensor in m°C") - .def("get_temperature_hbm_0", &hws::rocm_smi_temperature_samples::get_temperature_hbm_0, "the current temperature on the GPU's HBM0 temperature sensor in m°C") - .def("get_temperature_hbm_1", &hws::rocm_smi_temperature_samples::get_temperature_hbm_1, "the current temperature on the GPU's HBM1 temperature sensor in m°C") - .def("get_temperature_hbm_2", &hws::rocm_smi_temperature_samples::get_temperature_hbm_2, "the current temperature on the GPU's HBM2 temperature sensor in m°C") - .def("get_temperature_hbm_3", &hws::rocm_smi_temperature_samples::get_temperature_hbm_3, "the current temperature on the GPU's HBM3 temperature sensor in m°C") + .def("get_fan_speed_max", &hws::rocm_smi_temperature_samples::get_fan_speed_max, "the maximum fan speed in RPM") + .def("get_temperature_min", &hws::rocm_smi_temperature_samples::get_temperature_min, "the minimum temperature on the GPU's edge temperature sensor in °C") + .def("get_temperature_max", &hws::rocm_smi_temperature_samples::get_temperature_max, "the maximum temperature on the GPU's edge temperature sensor in °C") + .def("get_memory_temperature_min", &hws::rocm_smi_temperature_samples::get_memory_temperature_min, "the minimum temperature on the GPU's memory temperature sensor in °C") + .def("get_memory_temperature_max", &hws::rocm_smi_temperature_samples::get_memory_temperature_max, "the maximum temperature on the GPU's memory temperature sensor in °C") + .def("get_hotspot_temperature_min", &hws::rocm_smi_temperature_samples::get_hotspot_temperature_min, "the minimum temperature on the GPU's hotspot temperature sensor in °C") + .def("get_hotspot_temperature_max", &hws::rocm_smi_temperature_samples::get_hotspot_temperature_max, "the maximum temperature on the GPU's hotspot temperature sensor in °C") + .def("get_hbm_0_temperature_min", &hws::rocm_smi_temperature_samples::get_hbm_0_temperature_min, "the minimum temperature on the GPU's HBM0 temperature sensor in °C") + .def("get_hbm_0_temperature_max", &hws::rocm_smi_temperature_samples::get_hbm_0_temperature_max, "the maximum temperature on the GPU's HBM0 temperature sensor in °C") + .def("get_hbm_1_temperature_min", &hws::rocm_smi_temperature_samples::get_hbm_1_temperature_min, "the minimum temperature on the GPU's HBM1 temperature sensor in °C") + .def("get_hbm_1_temperature_max", &hws::rocm_smi_temperature_samples::get_hbm_1_temperature_max, "the maximum temperature on the GPU's HBM1 temperature sensor in °C") + .def("get_hbm_2_temperature_min", &hws::rocm_smi_temperature_samples::get_hbm_2_temperature_min, "the minimum temperature on the GPU's HBM2 temperature sensor in °C") + .def("get_hbm_2_temperature_max", &hws::rocm_smi_temperature_samples::get_hbm_2_temperature_max, "the maximum temperature on the GPU's HBM2 temperature sensor in °C") + .def("get_hbm_3_temperature_min", &hws::rocm_smi_temperature_samples::get_hbm_3_temperature_min, "the minimum temperature on the GPU's HBM3 temperature sensor in °C") + .def("get_hbm_3_temperature_max", &hws::rocm_smi_temperature_samples::get_hbm_3_temperature_max, "the maximum temperature on the GPU's HBM3 temperature sensor in °C") + .def("get_fan_speed_percentage", &hws::rocm_smi_temperature_samples::get_fan_speed_percentage, "the current fan speed in %") + .def("get_temperature", &hws::rocm_smi_temperature_samples::get_temperature, "the current temperature on the GPU's edge temperature sensor in °C") + .def("get_hotspot_temperature", &hws::rocm_smi_temperature_samples::get_hotspot_temperature, "the current temperature on the GPU's hotspot temperature sensor in °C") + .def("get_memory_temperature", &hws::rocm_smi_temperature_samples::get_memory_temperature, "the current temperature on the GPU's memory temperature sensor in °C") + .def("get_hbm_0_temperature", &hws::rocm_smi_temperature_samples::get_hbm_0_temperature, "the current temperature on the GPU's HBM0 temperature sensor in °C") + .def("get_hbm_1_temperature", &hws::rocm_smi_temperature_samples::get_hbm_1_temperature, "the current temperature on the GPU's HBM1 temperature sensor in °C") + .def("get_hbm_2_temperature", &hws::rocm_smi_temperature_samples::get_hbm_2_temperature, "the current temperature on the GPU's HBM2 temperature sensor in °C") + .def("get_hbm_3_temperature", &hws::rocm_smi_temperature_samples::get_hbm_3_temperature, "the current temperature on the GPU's HBM3 temperature sensor in °C") .def("__repr__", [](const hws::rocm_smi_temperature_samples &self) { - return std::format("", self); + return fmt::format("", self); }); // bind the GPU AMD hardware sampler class @@ -115,6 +123,6 @@ void init_gpu_amd_hardware_sampler(py::module_ &m) { .def("memory_samples", &hws::gpu_amd_hardware_sampler::memory_samples, "get all memory related samples") .def("temperature_samples", &hws::gpu_amd_hardware_sampler::temperature_samples, "get all temperature related samples") .def("__repr__", [](const hws::gpu_amd_hardware_sampler &self) { - return std::format("", self); + return fmt::format("", self); }); } diff --git a/bindings/gpu_intel_hardware_sampler.cpp b/bindings/gpu_intel_hardware_sampler.cpp index 39b346a..0b05a55 100644 --- a/bindings/gpu_intel_hardware_sampler.cpp +++ b/bindings/gpu_intel_hardware_sampler.cpp @@ -9,13 +9,13 @@ #include "hardware_sampling/gpu_intel/level_zero_samples.hpp" // hws::{level_zero_general_samples, level_zero_clock_samples, level_zero_power_samples, level_zero_memory_samples, level_zero_temperature_samples} #include "hardware_sampling/hardware_sampler.hpp" // hws::hardware_sampler +#include "fmt/format.h" // fmt::format #include "pybind11/chrono.h" // automatic bindings for std::chrono::milliseconds #include "pybind11/pybind11.h" // py::module_ #include "pybind11/stl.h" // bind STL types #include // std::chrono::milliseconds #include // std::size_t -#include // std::format namespace py = pybind11; @@ -27,7 +27,7 @@ void init_gpu_intel_hardware_sampler(py::module_ &m) { .def("get_num_threads_per_eu", &hws::level_zero_general_samples::get_num_threads_per_eu, "the number of threads per EU unit") .def("get_eu_simd_width", &hws::level_zero_general_samples::get_eu_simd_width, "the physical EU unit SIMD width") .def("__repr__", [](const hws::level_zero_general_samples &self) { - return std::format("", self); + return fmt::format("", self); }); // bind the clock samples @@ -45,7 +45,7 @@ void init_gpu_intel_hardware_sampler(py::module_ &m) { .def("get_clock_mem", &hws::level_zero_clock_samples::get_clock_mem, "the current memory frequency in MHz") .def("get_throttle_reason_mem", &hws::level_zero_clock_samples::get_throttle_reason_mem, "the current memory frequency throttle reason") .def("__repr__", [](const hws::level_zero_clock_samples &self) { - return std::format("", self); + return fmt::format("", self); }); // bind the power samples @@ -54,7 +54,7 @@ void init_gpu_intel_hardware_sampler(py::module_ &m) { .def("get_energy_threshold", &hws::level_zero_power_samples::get_energy_threshold, "the energy threshold in J") .def("get_power_total_energy_consumption", &hws::level_zero_power_samples::get_power_total_energy_consumption, "the total power consumption since the last driver reload in mJ") .def("__repr__", [](const hws::level_zero_power_samples &self) { - return std::format("", self); + return fmt::format("", self); }); // bind the memory samples @@ -72,7 +72,7 @@ void init_gpu_intel_hardware_sampler(py::module_ &m) { .def("get_pcie_link_width", &hws::level_zero_memory_samples::get_pcie_link_width, "the current PCIe lane width") .def("get_pcie_link_generation", &hws::level_zero_memory_samples::get_pcie_link_generation, "the current PCIe generation") .def("__repr__", [](const hws::level_zero_memory_samples &self) { - return std::format("", self); + return fmt::format("", self); }); // bind the temperature samples @@ -81,7 +81,7 @@ void init_gpu_intel_hardware_sampler(py::module_ &m) { .def("get_temperature_psu", &hws::level_zero_temperature_samples::get_temperature_psu, "the temperature of the PSU in °C") .def("get_temperature", &hws::level_zero_temperature_samples::get_temperature, "the current temperature for the sensor in °C") .def("__repr__", [](const hws::level_zero_temperature_samples &self) { - return std::format("", self); + return fmt::format("", self); }); // bind the GPU Intel hardware sampler class @@ -96,6 +96,6 @@ void init_gpu_intel_hardware_sampler(py::module_ &m) { .def("memory_samples", &hws::gpu_intel_hardware_sampler::memory_samples, "get all memory related samples") .def("temperature_samples", &hws::gpu_intel_hardware_sampler::temperature_samples, "get all temperature related samples") .def("__repr__", [](const hws::gpu_intel_hardware_sampler &self) { - return std::format("", self); + return fmt::format("", self); }); } diff --git a/bindings/gpu_nvidia_hardware_sampler.cpp b/bindings/gpu_nvidia_hardware_sampler.cpp index 21130ae..1550c07 100644 --- a/bindings/gpu_nvidia_hardware_sampler.cpp +++ b/bindings/gpu_nvidia_hardware_sampler.cpp @@ -9,84 +9,92 @@ #include "hardware_sampling/gpu_nvidia/nvml_samples.hpp" // hws::{nvml_general_samples, nvml_clock_samples, nvml_power_samples, nvml_memory_samples, nvml_temperature_samples} #include "hardware_sampling/hardware_sampler.hpp" // hws::hardware_sampler +#include "fmt/format.h" // fmt::format #include "pybind11/chrono.h" // automatic bindings for std::chrono::milliseconds #include "pybind11/pybind11.h" // py::module_ #include "pybind11/stl.h" // bind STL types #include // std::chrono::milliseconds #include // std::size_t -#include // std::format namespace py = pybind11; void init_gpu_nvidia_hardware_sampler(py::module_ &m) { // bind the general samples py::class_(m, "NvmlGeneralSamples") + .def("get_architecture", &hws::nvml_general_samples::get_architecture, "the architecture name of the device") + .def("get_byte_order", &hws::nvml_general_samples::get_byte_order, "the byte order (e.g., little/big endian)") + .def("get_num_cores", &hws::nvml_general_samples::get_num_cores, "the number of CUDA cores") + .def("get_vendor_id", &hws::nvml_general_samples::get_vendor_id, "the vendor ID") .def("get_name", &hws::nvml_general_samples::get_name, "the name of the device") .def("get_persistence_mode", &hws::nvml_general_samples::get_persistence_mode, "the persistence mode: if true, the driver is always loaded reducing the latency for the first API call") - .def("get_num_cores", &hws::nvml_general_samples::get_num_cores, "the number of CUDA cores") - .def("get_performance_state", &hws::nvml_general_samples::get_performance_state, "the performance state: 0 - 15 where 0 is the maximum performance and 15 the minimum performance") - .def("get_utilization_gpu", &hws::nvml_general_samples::get_utilization_gpu, "the GPU compute utilization in percent") - .def("get_utilization_mem", &hws::nvml_general_samples::get_utilization_mem, "the GPU memory utilization in percent") + .def("get_compute_utilization", &hws::nvml_general_samples::get_compute_utilization, "the GPU compute utilization in percent") + .def("get_memory_utilization", &hws::nvml_general_samples::get_memory_utilization, "the GPU memory utilization in percent") + .def("get_performance_level", &hws::nvml_general_samples::get_performance_level, "the performance state: 0 - 15 where 0 is the maximum performance and 15 the minimum performance") .def("__repr__", [](const hws::nvml_general_samples &self) { - return std::format("", self); + return fmt::format("", self); }); // bind the clock samples py::class_(m, "NvmlClockSamples") - .def("get_adaptive_clock_status", &hws::nvml_clock_samples::get_adaptive_clock_status, "true if clock boosting is currently enabled") - .def("get_clock_graph_min", &hws::nvml_clock_samples::get_clock_graph_min, "the minimum possible graphics clock frequency in MHz") - .def("get_clock_graph_max", &hws::nvml_clock_samples::get_clock_graph_max, "the maximum possible graphics clock frequency in MHz") - .def("get_clock_sm_max", &hws::nvml_clock_samples::get_clock_sm_max, "the maximum possible SM clock frequency in MHz") - .def("get_clock_mem_min", &hws::nvml_clock_samples::get_clock_mem_min, "the minimum possible memory clock frequency in MHz") - .def("get_clock_mem_max", &hws::nvml_clock_samples::get_clock_mem_max, "the maximum possible memory clock frequency in MHz") - .def("get_clock_graph", &hws::nvml_clock_samples::get_clock_graph, "the current graphics clock frequency in MHz") - .def("get_clock_sm", &hws::nvml_clock_samples::get_clock_sm, "the current SM clock frequency in Mhz") - .def("get_clock_mem", &hws::nvml_clock_samples::get_clock_mem, "the current memory clock frequency in MHz") - .def("get_clock_throttle_reason", &hws::nvml_clock_samples::get_clock_throttle_reason, "the reason the GPU clock throttled (bitmask)") - .def("get_auto_boosted_clocks", &hws::nvml_clock_samples::get_auto_boosted_clocks, "true if the clocks are currently auto boosted") + .def("get_auto_boosted_clock_enabled", &hws::nvml_clock_samples::get_auto_boosted_clock_enabled, "true if clock boosting is currently enabled") + .def("get_clock_frequency_min", &hws::nvml_clock_samples::get_clock_frequency_min, "the minimum possible graphics clock frequency in MHz") + .def("get_clock_frequency_max", &hws::nvml_clock_samples::get_clock_frequency_max, "the maximum possible graphics clock frequency in MHz") + .def("get_memory_clock_frequency_min", &hws::nvml_clock_samples::get_memory_clock_frequency_min, "the minimum possible memory clock frequency in MHz") + .def("get_memory_clock_frequency_max", &hws::nvml_clock_samples::get_memory_clock_frequency_max, "the maximum possible memory clock frequency in MHz") + .def("get_sm_clock_frequency_max", &hws::nvml_clock_samples::get_sm_clock_frequency_max, "the maximum possible SM clock frequency in MHz") + .def("get_clock_frequency", &hws::nvml_clock_samples::get_clock_frequency, "the current graphics clock frequency in MHz") + .def("get_available_clock_frequencies", &hws::nvml_clock_samples::get_available_clock_frequencies, "the available clock frequencies in MHz, based on a memory clock frequency (slowest to fastest)") + .def("get_available_memory_clock_frequencies", &hws::nvml_clock_samples::get_available_memory_clock_frequencies, "the available memory clock frequencies in MHz (slowest to fastest)") + .def("get_memory_clock_frequency", &hws::nvml_clock_samples::get_memory_clock_frequency, "the current memory clock frequency in MHz") + .def("get_sm_clock_frequency", &hws::nvml_clock_samples::get_sm_clock_frequency, "the current SM clock frequency in Mhz") + .def("get_throttle_reason", &hws::nvml_clock_samples::get_throttle_reason, "the reason the GPU clock throttled") + .def("get_auto_boosted_clock", &hws::nvml_clock_samples::get_auto_boosted_clock, "true if the clocks are currently auto boosted") .def("__repr__", [](const hws::nvml_clock_samples &self) { - return std::format("", self); + return fmt::format("", self); }); // bind the power samples py::class_(m, "NvmlPowerSamples") - .def("get_power_management_mode", &hws::nvml_power_samples::get_power_management_mode, "true if power management algorithms are supported and active") .def("get_power_management_limit", &hws::nvml_power_samples::get_power_management_limit, "if the GPU draws more power (mW) than the power management limit, the GPU may throttle") .def("get_power_enforced_limit", &hws::nvml_power_samples::get_power_enforced_limit, "the actually enforced power limit, may be different from power management limit if external limiters are set") - .def("get_power_state", &hws::nvml_power_samples::get_power_state, "the current GPU power state: 0 - 15 where 0 is the maximum power and 15 the minimum power") + .def("get_power_measurement_type", &hws::nvml_power_samples::get_power_measurement_type, "the type of the power readings: either current power draw or average power draw") + .def("get_power_management_mode", &hws::nvml_power_samples::get_power_management_mode, "true if power management algorithms are supported and active") + .def("get_available_power_profiles", &hws::nvml_power_samples::get_available_power_profiles, "a list of the available power profiles") .def("get_power_usage", &hws::nvml_power_samples::get_power_usage, "the current power draw of the GPU and its related circuity (e.g., memory) in mW") .def("get_power_total_energy_consumption", &hws::nvml_power_samples::get_power_total_energy_consumption, "the total power consumption since the last driver reload in mJ") + .def("get_power_profile", &hws::nvml_power_samples::get_power_profile, "the current GPU power state: 0 - 15 where 0 is the maximum power and 15 the minimum power") .def("__repr__", [](const hws::nvml_power_samples &self) { - return std::format("", self); + return fmt::format("", self); }); // bind the memory samples py::class_(m, "NvmlMemorySamples") .def("get_memory_total", &hws::nvml_memory_samples::get_memory_total, "the total available memory in Byte") - .def("get_pcie_link_max_speed", &hws::nvml_memory_samples::get_pcie_link_max_speed, "the maximum PCIe link speed in MBPS") + .def("get_num_pcie_lanes_max", &hws::nvml_memory_samples::get_num_pcie_lanes_max, "the maximum number of PCIe lanes") + .def("get_pcie_link_generation_max", &hws::nvml_memory_samples::get_pcie_link_generation_max, "the current PCIe link generation (e.g., PCIe 4.0, PCIe 5.0, etc)") + .def("get_pcie_link_speed_max", &hws::nvml_memory_samples::get_pcie_link_speed_max, "the maximum PCIe link speed in MBPS") .def("get_memory_bus_width", &hws::nvml_memory_samples::get_memory_bus_width, "the memory bus with in Bit") - .def("get_max_pcie_link_generation", &hws::nvml_memory_samples::get_max_pcie_link_generation, "the current PCIe link generation (e.g., PCIe 4.0, PCIe 5.0, etc)") - .def("get_memory_free", &hws::nvml_memory_samples::get_memory_free, "the currently free memory in Byte") .def("get_memory_used", &hws::nvml_memory_samples::get_memory_used, "the currently used memory in Byte") - .def("get_pcie_link_speed", &hws::nvml_memory_samples::get_pcie_link_speed, "the current PCIe link speed in MBPS") - .def("get_pcie_link_width", &hws::nvml_memory_samples::get_pcie_link_width, "the current PCIe link width (e.g., x16, x8, x4, etc)") + .def("get_memory_free", &hws::nvml_memory_samples::get_memory_free, "the currently free memory in Byte") + .def("get_num_pcie_lanes", &hws::nvml_memory_samples::get_num_pcie_lanes, "the current PCIe link width (e.g., x16, x8, x4, etc)") .def("get_pcie_link_generation", &hws::nvml_memory_samples::get_pcie_link_generation, "the current PCIe link generation (may change during runtime to save energy)") + .def("get_pcie_link_speed", &hws::nvml_memory_samples::get_pcie_link_speed, "the current PCIe link speed in MBPS") .def("__repr__", [](const hws::nvml_memory_samples &self) { - return std::format("", self); + return fmt::format("", self); }); // bind the temperature samples py::class_(m, "NvmlTemperatureSamples") .def("get_num_fans", &hws::nvml_temperature_samples::get_num_fans, "the number of fans (if any)") - .def("get_min_fan_speed", &hws::nvml_temperature_samples::get_min_fan_speed, "the minimum fan speed the user can set in %") - .def("get_max_fan_speed", &hws::nvml_temperature_samples::get_max_fan_speed, "the maximum fan speed the user can set in %") - .def("get_temperature_threshold_gpu_max", &hws::nvml_temperature_samples::get_temperature_threshold_gpu_max, "the maximum graphics temperature threshold in °C") - .def("get_temperature_threshold_mem_max", &hws::nvml_temperature_samples::get_temperature_threshold_mem_max, "the maximum memory temperature threshold in °C") - .def("get_fan_speed", &hws::nvml_temperature_samples::get_fan_speed, "the current intended fan speed in %") - .def("get_temperature_gpu", &hws::nvml_temperature_samples::get_temperature_gpu, "the current GPU temperature in °C") + .def("get_fan_speed_min", &hws::nvml_temperature_samples::get_fan_speed_min, "the minimum fan speed the user can set in %") + .def("get_fan_speed_max", &hws::nvml_temperature_samples::get_fan_speed_max, "the maximum fan speed the user can set in %") + .def("get_temperature_max", &hws::nvml_temperature_samples::get_temperature_max, "the maximum graphics temperature threshold in °C") + .def("get_memory_temperature_max", &hws::nvml_temperature_samples::get_memory_temperature_max, "the maximum memory temperature threshold in °C") + .def("get_fan_speed_percentage", &hws::nvml_temperature_samples::get_fan_speed_percentage, "the current intended fan speed in %") + .def("get_temperature", &hws::nvml_temperature_samples::get_temperature, "the current GPU temperature in °C") .def("__repr__", [](const hws::nvml_temperature_samples &self) { - return std::format("", self); + return fmt::format("", self); }); // bind the GPU NVIDIA hardware sampler class @@ -101,6 +109,6 @@ void init_gpu_nvidia_hardware_sampler(py::module_ &m) { .def("memory_samples", &hws::gpu_nvidia_hardware_sampler::memory_samples, "get all memory related samples") .def("temperature_samples", &hws::gpu_nvidia_hardware_sampler::temperature_samples, "get all temperature related samples") .def("__repr__", [](const hws::gpu_nvidia_hardware_sampler &self) { - return std::format("", self); + return fmt::format("", self); }); } diff --git a/bindings/hardware_sampler.cpp b/bindings/hardware_sampler.cpp index 5d45f74..e46dbc8 100644 --- a/bindings/hardware_sampler.cpp +++ b/bindings/hardware_sampler.cpp @@ -12,21 +12,22 @@ #if defined(HWS_FOR_CPUS_ENABLED) #include "hardware_sampling/cpu/hardware_sampler.hpp" // hws::cpu_hardware_sampler #endif +#if defined(HWS_FOR_NVIDIA_GPUS_ENABLED) + #include "hardware_sampling/gpu_nvidia/hardware_sampler.hpp" // hws::gpu_nvidia_hardware_sampler +#endif #if defined(HWS_FOR_AMD_GPUS_ENABLED) #include "hardware_sampling/gpu_amd/hardware_sampler.hpp" // hws::gpu_amd_hardware_sampler #endif #if defined(HWS_FOR_INTEL_GPUS_ENABLED) #include "hardware_sampling/gpu_intel/hardware_sampler.hpp" // hws::gpu_intel_hardware_sampler #endif -#if defined(HWS_FOR_NVIDIA_GPUS_ENABLED) - #include "hardware_sampling/gpu_nvidia/hardware_sampler.hpp" // hws::gpu_nvidia_hardware_sampler -#endif +#include "fmt/format.h" // fmt::format #include "pybind11/chrono.h" // bind std::chrono types #include "pybind11/pybind11.h" // py::module_, py::class_ #include "pybind11/stl.h" // bind STL types -#include // std::format +#include // std::string namespace py = pybind11; @@ -54,22 +55,22 @@ void init_hardware_sampler(py::module_ &m) { .def("__repr__", [](const hws::hardware_sampler &self) { #if defined(HWS_FOR_CPUS_ENABLED) if (dynamic_cast(&self)) { - return std::format("", dynamic_cast(self)); + return fmt::format("", dynamic_cast(self)); } #endif #if defined(HWS_FOR_NVIDIA_GPUS_ENABLED) if (dynamic_cast(&self)) { - return std::format("", dynamic_cast(self)); + return fmt::format("", dynamic_cast(self)); } #endif #if defined(HWS_FOR_AMD_GPUS_ENABLED) if (dynamic_cast(&self)) { - return std::format("", dynamic_cast(self)); + return fmt::format("", dynamic_cast(self)); } #endif #if defined(HWS_FOR_INTEL_GPUS_ENABLED) if (dynamic_cast(&self)) { - return std::format("", dynamic_cast(self)); + return fmt::format("", dynamic_cast(self)); } #endif return std::string{ "unknown" }; diff --git a/bindings/main.cpp b/bindings/main.cpp index 5a4c01c..2b6f507 100644 --- a/bindings/main.cpp +++ b/bindings/main.cpp @@ -7,6 +7,11 @@ #include "pybind11/pybind11.h" // PYBIND11_MODULE, py::module_ +#include // std::string_view + +#define HWS_IS_DEFINED_HELPER(x) #x +#define HWS_IS_DEFINED(x) (std::string_view{ #x } != std::string_view{ HWS_IS_DEFINED_HELPER(x) }) + namespace py = pybind11; // forward declare binding functions @@ -23,30 +28,27 @@ PYBIND11_MODULE(HardwareSampling, m) { init_event(m); init_hardware_sampler(m); + // CPU sampling #if defined(HWS_FOR_CPUS_ENABLED) init_cpu_hardware_sampler(m); - m.def("has_cpu_hardware_sampler", []{return true;} ); -#else - m.def("has_cpu_hardware_sampler", []{return false;} ); #endif + m.def("has_cpu_hardware_sampler", []() { return HWS_IS_DEFINED(HWS_FOR_CPUS_ENABLED); }); + + // NVIDIA GPU sampling #if defined(HWS_FOR_NVIDIA_GPUS_ENABLED) init_gpu_nvidia_hardware_sampler(m); - m.def("has_gpu_nvidia_hardware_sampler", []{return true;} ); -#else - m.def("has_gpu_nvidia_hardware_sampler", []{return false;} ); #endif + m.def("has_gpu_nvidia_hardware_sampler", []() { return HWS_IS_DEFINED(HWS_FOR_NVIDIA_GPUS_ENABLED); }); + + // AMD GPU sampling #if defined(HWS_FOR_AMD_GPUS_ENABLED) init_gpu_amd_hardware_sampler(m); - m.def("has_gpu_amd_hardware_sampler", []{return true;} ); -#else - m.def("has_gpu_amd_hardware_sampler", []{return false;} ); #endif + m.def("has_gpu_amd_hardware_sampler", []() { return HWS_IS_DEFINED(HWS_FOR_AMD_GPUS_ENABLED); }); + // Intel GPU sampling #if defined(HWS_FOR_INTEL_GPUS_ENABLED) init_gpu_intel_hardware_sampler(m); - m.def("has_gpu_intel_hardware_sampler", []{return true;} ); -#else - m.def("has_gpu_intel_hardware_sampler", []{return false;} ); #endif - + m.def("has_gpu_intel_hardware_sampler", []() { return HWS_IS_DEFINED(HWS_FOR_INTEL_GPUS_ENABLED); }); } From 6f4079577e71056a777286fb6b8d1a63a1f3de3d Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Mon, 16 Sep 2024 17:06:20 +0200 Subject: [PATCH 27/69] Update README tables. --- README.md | 297 +++++++++++++++++++++++++++--------------------------- 1 file changed, 148 insertions(+), 149 deletions(-) diff --git a/README.md b/README.md index bb02f24..c805dbb 100644 --- a/README.md +++ b/README.md @@ -64,175 +64,174 @@ export PYTHONPATH=${CMAKE_INSTALL_PREFIX}/lib:${PYTHONPATH} ## Available samples +The sampling type `fixed` denotes samples that are gathered once per hardware samples like maximum clock frequencies or temperatures or the total available memory. +The sampling type `sampled` denotes samples that are gathered during the whole hardware sampling process like the current clock frequencies, temperatures, or memory consumption. + ### General samples -| sample | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs | -|:--------------------|:-----:|:-----------:|:---------:|:-------------:| -| architecture | str | str | str | ? | -| byte_order | str | str (fix) | str (fix) | str (fix) | -| num_cores | int | int | - | | -| num_threads | int | - | - | - | -| threads_per_core | int | - | - | - | -| cores_per_socket | int | - | - | - | -| num_sockets | int | - | - | - | -| numa_nodes | int | - | - | | -| vendor_id | str | str (fix) | str | str (PCIe ID) | -| name | str | str | str | str | -| flags | str | - | - | | -| persistence_mode | - | bool | - | | -| compute_utilization | % | % | % | ? | -| memory_utilization | - | % | % | ? | -| ipc | float | - | - | - | -| irq | int | - | - | - | -| smi | int | - | - | - | -| poll | int | - | - | - | -| poll_percent | % | - | - | - | -| performance_level | - | int | int | | -| standby_mode | | | | str | -| num_threads_per_eu | | | | int | -| eu_simd_width | | | | int | +| sample | sample type | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs | +|:--------------------|:-----------:|:-----------:|:-----------:|:---------:|:-------------:| +| architecture | fixed | str | str | str | ? | +| byte_order | fixed | str | str (fix) | str (fix) | str (fix) | +| num_cores | fixed | int | int | - | | +| num_threads | fixed | int | - | - | - | +| threads_per_core | fixed | int | - | - | - | +| cores_per_socket | fixed | int | - | - | - | +| num_sockets | fixed | int | - | - | - | +| numa_nodes | fixed | int | - | - | | +| vendor_id | fixed | str | str (fix) | str | str (PCIe ID) | +| name | fixed | str | str | str | str | +| flags | fixed | list of str | - | - | | +| persistence_mode | fixed | - | bool | - | | +| compute_utilization | sampled | % | % | % | ? | +| memory_utilization | sampled | - | % | % | ? | +| ipc | sampled | float | - | - | - | +| irq | sampled | int | - | - | - | +| smi | sampled | int | - | - | - | +| poll | sampled | int | - | - | - | +| poll_percent | sampled | % | - | - | - | +| performance_level | sampled | - | int | str | | +| standby_mode | | | | | str | +| num_threads_per_eu | | | | | int | +| eu_simd_width | | | | | int | ### clock-related samples -| sample | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs | -|:-----------------------------------|:----:|:----------------:|:--------:|:----------:| -| auto_boosted_clock_enabled | bool | bool | - | | -| clock_frequency_min | MHz | MHz | MHz | | -| clock_frequency_max | MHz | MHz | MHz | | -| memory_clock_frequency_min | - | MHz | MHz | | -| memory_clock_frequency_max | - | MHz | MHz | | -| socket_clock_frequency_min | - | - | MHz | - | -| socket_clock_frequency_min | - | - | MHz | - | -| sm_clock_frequency_max | - | MHz | - | - | -| available_clock_frequencies | - | MHz | MHz | | -| available_memory_clock_frequencies | - | MHz | MHz | | -| clock_frequency | MHz | MHz | MHz | | -| average_non_idle_frequency | MHz | - | - | - | -| time_stamp_counter | MHz | - | - | - | -| memory_clock_frequency | - | MHz | MHz | | -| socket_clock_frequency | - | - | MHz | - | -| sm_clock_frequency | - | MHz | - | - | -| overdrive_level | - | - | % | - | -| memory_overdrive_level | - | - | % | - | -| throttle_reason | - | string (bitmask) | - | | -| memory_throttle_reason | - | - | - | | -| auto_boosted_clock | - | bool | - | - | -| tdp_frequency_limit | - | - | - | | -| memory_tdp_frequency_limit | - | - | - | | +| sample | sample type | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs | +|:-----------------------------------|:-----------:|:----:|:----------------:|:-----------:|:----------:| +| auto_boosted_clock_enabled | fixed | bool | bool | - | | +| clock_frequency_min | fixed | MHz | MHz | MHz | | +| clock_frequency_max | fixed | MHz | MHz | MHz | | +| memory_clock_frequency_min | fixed | - | MHz | MHz | | +| memory_clock_frequency_max | fixed | - | MHz | MHz | | +| socket_clock_frequency_min | fixed | - | - | MHz | - | +| socket_clock_frequency_min | fixed | - | - | MHz | - | +| sm_clock_frequency_max | fixed | - | MHz | - | - | +| available_clock_frequencies | fixed | - | map of MHz | list of MHz | | +| available_memory_clock_frequencies | fixed | - | list of MHz | list of MHz | | +| clock_frequency | sampled | MHz | MHz | MHz | | +| average_non_idle_frequency | sampled | MHz | - | - | - | +| time_stamp_counter | sampled | MHz | - | - | - | +| memory_clock_frequency | sampled | - | MHz | MHz | | +| socket_clock_frequency | sampled | - | - | MHz | - | +| sm_clock_frequency | sampled | - | MHz | - | - | +| overdrive_level | sampled | - | - | % | - | +| memory_overdrive_level | sampled | - | - | % | - | +| throttle_reason | sampled | - | string (bitmask) | - | | +| memory_throttle_reason | | - | - | - | | +| auto_boosted_clock | sampled | - | bool | - | - | +| tdp_frequency_limit | | - | - | - | | +| memory_tdp_frequency_limit | | - | - | - | | ### power-related samples -| sample | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs | -|:-------------------------------|:---------------------------------:|:-----------:|:-----------:|:----------:| -| power_management_limit | - | W | W | | -| power_enforced_limit | - | W | W | | -| power_measurement_type | str (fix) | str | str | | -| power_management_mode | - | bool | - | | -| available_power_profiles | - | list of int | list of str | | -| power_usage | W | W | W | | -| core_watt | W | - | - | - | -| dram_watt | W | - | - | - | -| package_rapl_throttling | % | - | - | - | -| dram_rapl_throttling | % | - | - | - | -| power_total_energy_consumption | J
(calculated via power_usage) | J | J | J | -| power_profile | - | int | str | | -| energy_threshold_enabled | | | | bool | -| energy_threshold | | | | J | +| sample | sample type | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs | +|:-------------------------------|:-----------:|:---------------------------------:|:-----------:|:-----------:|:----------:| +| power_management_limit | fixed | - | W | W | | +| power_enforced_limit | fixed | - | W | W | | +| power_measurement_type | fixed | str (fix) | str | str | | +| power_management_mode | fixed | - | bool | - | | +| available_power_profiles | fixed | - | list of int | list of str | | +| power_usage | sampled | W | W | W | | +| core_watt | sampled | W | - | - | - | +| dram_watt | sampled | W | - | - | - | +| package_rapl_throttling | sampled | % | - | - | - | +| dram_rapl_throttling | sampled | % | - | - | - | +| power_total_energy_consumption | sampled | J
(calculated via power_usage) | J | J | J | +| power_profile | sampled | - | int | str | | +| energy_threshold_enabled | | | | | bool | +| energy_threshold | | | | | J | ### memory-related samples -| sample | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs | -|:----------------------------|:----:|:-----------:|:--------:|:----------:| -| cache_size_L1d | str | - | - | - | -| cache_size_L1i | str | - | - | - | -| cache_size_L2 | str | - | - | - | -| cache_size_L3 | str | - | - | - | -| memory_total | B | B | B | | -| visible_memory_total | - | - | B | - | -| swap_memory_total | B | - | - | - | -| memory_total_{} | - | | | B | -| allocatable_memory_total_{} | - | | | B | -| num_pcie_lanes_min | - | - | int | | -| num_pcie_lanes_max | - | int | int | | -| pcie_link_generation_max | - | int | - | int | -| pcie_link_speed_max | - | MBPS | - | BPS | -| pcie_link_transfer_rate_min | - | - | MT/s | | -| pcie_link_transfer_rate_max | - | - | MT/s | | -| memory_bus_width | - | Bit | - | | -| memory_used | B | B | B | | -| memory_free | B | B | B | | -| swap_memory_used | B | - | - | - | -| swap_memory_free | B | - | - | - | -| num_pcie_lanes | - | int | int | | -| pcie_link_generation | - | int | - | int | -| pcie_link_speed | - | MBPS | - | MBPS | -| pcie_link_transfer_rate | - | - | T/s | - | -| memory_used_{} | | | | B | -| memory_free_{} | | | | B | -| memory_bus_width_{} | | | | Bit | -| memory_num_channels_{} | | | | int | -| memory_location_{} | | | | str | +| sample | sample type | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs | +|:----------------------------|:-----------:|:----:|:-----------:|:--------:|:----------:| +| cache_size_L1d | fixed | str | - | - | - | +| cache_size_L1i | fixed | str | - | - | - | +| cache_size_L2 | fixed | str | - | - | - | +| cache_size_L3 | fixed | str | - | - | - | +| memory_total | fixed | B | B | B | | +| visible_memory_total | fixed | - | - | B | - | +| swap_memory_total | fixed | B | - | - | - | +| memory_total_{} | | - | | | B | +| allocatable_memory_total_{} | | - | | | B | +| num_pcie_lanes_min | fixed | - | - | int | | +| num_pcie_lanes_max | fixed | - | int | int | | +| pcie_link_generation_max | fixed | - | int | - | int | +| pcie_link_speed_max | fixed | - | MBPS | - | BPS | +| pcie_link_transfer_rate_min | fixed | - | - | MT/s | | +| pcie_link_transfer_rate_max | fixed | - | - | MT/s | | +| memory_bus_width | fixed | - | Bit | - | | +| memory_used | sampled | B | B | B | | +| memory_free | sampled | B | B | B | | +| swap_memory_used | sampled | B | - | - | - | +| swap_memory_free | sampled | B | - | - | - | +| num_pcie_lanes | sampled | - | int | int | | +| pcie_link_generation | sampled | - | int | - | int | +| pcie_link_speed | sampled | - | MBPS | - | MBPS | +| pcie_link_transfer_rate | sampled | - | - | T/s | - | +| memory_used_{} | | | | | B | +| memory_free_{} | | | | | B | +| memory_bus_width_{} | | | | | Bit | +| memory_num_channels_{} | | | | | int | +| memory_location_{} | | | | | str | ### temperature-related samples -| sample | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs | -|:------------------------|:----:|:-----------:|:--------:|:----------:| -| num_fans | - | int | int | | -| fan_speed_min | - | % | - | | -| fan_speed_max | - | % | RPM | | -| temperature_min | - | - | °C | | -| temperature_max | - | °C | °C | | -| memory_temperature_min | - | - | °C | | -| memory_temperature_max | - | °C | °C | | -| hotspot_temperature_min | - | - | °C | | -| hotspot_temperature_max | - | - | °C | | -| hbm_0_temperature_min | - | - | °C | | -| hbm_0_temperature_max | - | - | °C | | -| hbm_1_temperature_min | - | - | °C | | -| hbm_1_temperature_max | - | - | °C | | -| hbm_2_temperature_min | - | - | °C | | -| hbm_2_temperature_max | - | - | °C | | -| hbm_3_temperature_min | - | - | °C | | -| hbm_3_temperature_max | - | - | °C | | -| fan_speed_percentage | - | % | % | | -| temperature | °C | °C | °C | | -| memory_temperature | - | - | °C | | -| hotspot_temperature | - | - | °C | | -| hbm_0_temperature | - | - | °C | | -| hbm_1_temperature | - | - | °C | | -| hbm_2_temperature | - | - | °C | | -| hbm_3_temperature | - | - | °C | | -| temperature_{}_max | | | | | -| temperature_psu | | | | | -| temperature_{} | | | | | -| core_temperature | °C | - | - | - | -| core_throttle_percent | % | - | - | - | +| sample | sample type | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs | +|:------------------------|:-----------:|:----:|:-----------:|:--------:|:----------:| +| num_fans | fixed | - | int | int | | +| fan_speed_min | fixed | - | % | - | | +| fan_speed_max | fixed | - | % | RPM | | +| temperature_min | fixed | - | - | °C | | +| temperature_max | fixed | - | °C | °C | | +| memory_temperature_min | fixed | - | - | °C | | +| memory_temperature_max | fixed | - | °C | °C | | +| hotspot_temperature_min | fixed | - | - | °C | | +| hotspot_temperature_max | fixed | - | - | °C | | +| hbm_0_temperature_min | fixed | - | - | °C | | +| hbm_0_temperature_max | fixed | - | - | °C | | +| hbm_1_temperature_min | fixed | - | - | °C | | +| hbm_1_temperature_max | fixed | - | - | °C | | +| hbm_2_temperature_min | fixed | - | - | °C | | +| hbm_2_temperature_max | fixed | - | - | °C | | +| hbm_3_temperature_min | fixed | - | - | °C | | +| hbm_3_temperature_max | fixed | - | - | °C | | +| fan_speed_percentage | sampled | - | % | % | | +| temperature | sampled | °C | °C | °C | | +| memory_temperature | sampled | - | - | °C | | +| hotspot_temperature | sampled | - | - | °C | | +| hbm_0_temperature | sampled | - | - | °C | | +| hbm_1_temperature | sampled | - | - | °C | | +| hbm_2_temperature | sampled | - | - | °C | | +| hbm_3_temperature | sampled | - | - | °C | | +| temperature_{}_max | | | | | | +| temperature_psu | | | | | | +| temperature_{} | | | | | | +| core_temperature | sampled | °C | - | - | - | +| core_throttle_percent | sampled | % | - | - | - | ### gfx-related (iGPU) samples -| sample | CPUs | -|:---------------------------|:----:| -| graphics_render_state | % | -| graphics_frequency | MHz | -| average_graphics_frequency | MHz | -| gpu_state_c0 | % | -| cpu_works_for_gpu | % | -| graphics_power | W | +| sample | sample type | CPUs | +|:--------------------------|:-----------:|:----:| +| gfx_render_state_percent | sampled | % | +| gfx_frequency | sampled | MHz | +| average_gfx_frequency | sampled | MHz | +| gfx_state_c0_percent | sampled | % | +| cpu_works_for_gpu_percent | sampled | % | +| gfx_watt | sampled | W | ### "idle states"-related samples -| sample | CPUs | -|:---------------------------------|:----:| -| all_cpus_state_c0 | % | -| any_cpu_state_c0 | % | -| lower_power_idle_state | % | -| system_lower_power_idle_state | % | -| package_lower_power_idle_state | % | -| cpu_idle_state_{}_percentage | % | -| package_idle_state_{}_percentage | % | -| package_idle_state_{}_percentage | % | -| idle_state_{}_percentage | % | -| idle_state_{} | int | +| sample | sample type | CPUs | +|:-------------------------------------|:-----------:|:-------------:| +| idle_states | fixed | map of values | +| all_cpus_state_c0_percent | sampled | % | +| any_cpu_state_c0_percent | sampled | % | +| low_power_idle_state_percent | sampled | % | +| system_low_power_idle_state_percent | sampled | % | +| package_low_power_idle_state_percent | sampled | % | From e1a0da58580851963b7e6b77df74e43082da7bac Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Mon, 16 Sep 2024 17:35:34 +0200 Subject: [PATCH 28/69] Update sample name. --- README.md | 2 +- bindings/cpu_hardware_sampler.cpp | 2 +- include/hardware_sampling/cpu/cpu_samples.hpp | 6 +++--- src/hardware_sampling/cpu/cpu_samples.cpp | 10 +++++----- src/hardware_sampling/cpu/hardware_sampler.cpp | 4 ++-- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index c805dbb..91dba28 100644 --- a/README.md +++ b/README.md @@ -110,7 +110,7 @@ The sampling type `sampled` denotes samples that are gathered during the whole h | available_clock_frequencies | fixed | - | map of MHz | list of MHz | | | available_memory_clock_frequencies | fixed | - | list of MHz | list of MHz | | | clock_frequency | sampled | MHz | MHz | MHz | | -| average_non_idle_frequency | sampled | MHz | - | - | - | +| average_non_idle_clock_frequency | sampled | MHz | - | - | - | | time_stamp_counter | sampled | MHz | - | - | - | | memory_clock_frequency | sampled | - | MHz | MHz | | | socket_clock_frequency | sampled | - | - | MHz | - | diff --git a/bindings/cpu_hardware_sampler.cpp b/bindings/cpu_hardware_sampler.cpp index 9ed204d..354c585 100644 --- a/bindings/cpu_hardware_sampler.cpp +++ b/bindings/cpu_hardware_sampler.cpp @@ -48,7 +48,7 @@ void init_cpu_hardware_sampler(py::module_ &m) { .def("get_clock_frequency_min", &hws::cpu_clock_samples::get_clock_frequency_min, "the minimum possible CPU frequency in MHz") .def("get_clock_frequency_max", &hws::cpu_clock_samples::get_clock_frequency_max, "the maximum possible CPU frequency in MHz") .def("get_clock_frequency", &hws::cpu_clock_samples::get_clock_frequency, "the average CPU frequency in MHz including idle cores") - .def("get_average_non_idle_frequency", &hws::cpu_clock_samples::get_average_non_idle_frequency, "the average CPU frequency in MHz excluding idle cores") + .def("get_average_non_idle_clock_frequency", &hws::cpu_clock_samples::get_average_non_idle_clock_frequency, "the average CPU frequency in MHz excluding idle cores") .def("get_time_stamp_counter", &hws::cpu_clock_samples::get_time_stamp_counter, "the time stamp counter") .def("__repr__", [](const hws::cpu_clock_samples &self) { return fmt::format("", self); diff --git a/include/hardware_sampling/cpu/cpu_samples.hpp b/include/hardware_sampling/cpu/cpu_samples.hpp index 98a88f2..f92ba0d 100644 --- a/include/hardware_sampling/cpu/cpu_samples.hpp +++ b/include/hardware_sampling/cpu/cpu_samples.hpp @@ -95,9 +95,9 @@ class cpu_clock_samples { HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, clock_frequency_min) // the minimum possible CPU frequency in MHz HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, clock_frequency_max) // the maximum possible CPU frequency in MHz - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, clock_frequency) // the average CPU frequency in MHz including idle cores - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, average_non_idle_frequency) // the average CPU frequency in MHz excluding idle cores - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, time_stamp_counter) // the time stamp counter + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, clock_frequency) // the average CPU frequency in MHz including idle cores + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, average_non_idle_clock_frequency) // the average CPU frequency in MHz excluding idle cores + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, time_stamp_counter) // the time stamp counter }; /** diff --git a/src/hardware_sampling/cpu/cpu_samples.cpp b/src/hardware_sampling/cpu/cpu_samples.cpp index 0242e9a..cc99d76 100644 --- a/src/hardware_sampling/cpu/cpu_samples.cpp +++ b/src/hardware_sampling/cpu/cpu_samples.cpp @@ -242,12 +242,12 @@ std::string cpu_clock_samples::generate_yaml_string() const { fmt::join(this->clock_frequency_.value(), ", ")); } // the average CPU frequency excluding idle time - if (this->average_non_idle_frequency_.has_value()) { - str += fmt::format(" average_non_idle_frequency:\n" + if (this->average_non_idle_clock_frequency_.has_value()) { + str += fmt::format(" average_non_idle_clock_frequency:\n" " turbostat_name: \"Bzy_MHz\"\n" " unit: \"MHz\"\n" " values: [{}]\n", - fmt::join(this->average_non_idle_frequency_.value(), ", ")); + fmt::join(this->average_non_idle_clock_frequency_.value(), ", ")); } // the time stamp counter if (this->time_stamp_counter_.has_value()) { @@ -269,13 +269,13 @@ std::ostream &operator<<(std::ostream &out, const cpu_clock_samples &samples) { "clock_frequency_min [MHz]: {}\n" "clock_frequency_max [MHz]: {}\n" "clock_frequency [MHz]: [{}]\n" - "average_non_idle_frequency [MHz]: [{}]\n" + "average_non_idle_clock_frequency [MHz]: [{}]\n" "time_stamp_counter [MHz]: [{}]", detail::value_or_default(samples.get_auto_boosted_clock_enabled()), detail::value_or_default(samples.get_clock_frequency_min()), detail::value_or_default(samples.get_clock_frequency_max()), fmt::join(detail::value_or_default(samples.get_clock_frequency()), ", "), - fmt::join(detail::value_or_default(samples.get_average_non_idle_frequency()), ", "), + fmt::join(detail::value_or_default(samples.get_average_non_idle_clock_frequency()), ", "), fmt::join(detail::value_or_default(samples.get_time_stamp_counter()), ", ")); } diff --git a/src/hardware_sampling/cpu/hardware_sampler.cpp b/src/hardware_sampling/cpu/hardware_sampler.cpp index 6ad38ff..492c17a 100644 --- a/src/hardware_sampling/cpu/hardware_sampler.cpp +++ b/src/hardware_sampling/cpu/hardware_sampler.cpp @@ -171,8 +171,8 @@ void cpu_hardware_sampler::sampling_loop() { using vector_type = decltype(general_samples_.compute_utilization_)::value_type; general_samples_.compute_utilization_ = vector_type{ detail::convert_to(values[i]) }; } else if (header[i] == "Bzy_MHz") { - using vector_type = decltype(clock_samples_.average_non_idle_frequency_)::value_type; - clock_samples_.average_non_idle_frequency_ = vector_type{ detail::convert_to(values[i]) }; + using vector_type = decltype(clock_samples_.average_non_idle_clock_frequency_)::value_type; + clock_samples_.average_non_idle_clock_frequency_ = vector_type{ detail::convert_to(values[i]) }; } else if (header[i] == "TSC_MHz") { using vector_type = decltype(clock_samples_.time_stamp_counter_)::value_type; clock_samples_.time_stamp_counter_ = vector_type{ detail::convert_to(values[i]) }; From d719e8f0b4fb5d929ddef786f6cd549f6c835966 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Mon, 16 Sep 2024 17:36:07 +0200 Subject: [PATCH 29/69] Add new function returning relative time points (relative to the first event) as "normal" number. --- README.md | 10 +++++----- bindings/hardware_sampler.cpp | 7 ++++--- src/hardware_sampling/cpu/hardware_sampler.cpp | 4 ++-- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 91dba28..25e2f73 100644 --- a/README.md +++ b/README.md @@ -241,6 +241,7 @@ The sampling type `sampled` denotes samples that are gathered during the whole h import HardwareSampling import numpy as np import matplotlib.pyplot as plt +import matplotlib.dates as mdates import datetime sampler = HardwareSampling.CpuHardwareSampler() @@ -259,16 +260,15 @@ sampler.stop() sampler.dump_yaml("track.yaml") # plot the results -time_points = sampler.time_points() -relative_time_points = [(t - time_points[0]) / datetime.timedelta(milliseconds=1) for t in time_points] +time_points = sampler.relative_time_points() -plt.plot(relative_time_points, sampler.clock_samples().get_average_frequency(), label="average") -plt.plot(relative_time_points, sampler.clock_samples().get_average_non_idle_frequency(), label="average non-idle") +plt.plot(time_points, sampler.clock_samples().get_clock_frequency(), label="average") +plt.plot(time_points, sampler.clock_samples().get_average_non_idle_clock_frequency(), label="average non-idle") axes = plt.gcf().axes[0] x_bounds = axes.get_xlim() for event in sampler.get_events()[1:-1]: - tp = (event.time_point - time_points[0]) / datetime.timedelta(milliseconds=1) + tp = (event.time_point - sampler.time_points()[0]) / datetime.timedelta(milliseconds=1000) axes.axvline(x=tp, color='r') axes.annotate(text=event.name, xy=(((tp - x_bounds[0]) / (x_bounds[1] - x_bounds[0])), 1.025), xycoords='axes fraction', rotation=270) diff --git a/bindings/hardware_sampler.cpp b/bindings/hardware_sampler.cpp index e46dbc8..12c0c01 100644 --- a/bindings/hardware_sampler.cpp +++ b/bindings/hardware_sampler.cpp @@ -7,7 +7,8 @@ #include "hardware_sampling/hardware_sampler.hpp" // hws::hardware_sampler -#include "hardware_sampling/event.hpp" // hws::event +#include "hardware_sampling/event.hpp" // hws::event +#include "hardware_sampling/utility.hpp" // hws::detail::durations_from_reference_time #if defined(HWS_FOR_CPUS_ENABLED) #include "hardware_sampling/cpu/hardware_sampler.hpp" // hws::cpu_hardware_sampler @@ -50,6 +51,7 @@ void init_hardware_sampler(py::module_ &m) { .def("get_events", &hws::hardware_sampler::get_events, "get all events") .def("get_event", &hws::hardware_sampler::get_event, "get a specific event") .def("time_points", &hws::hardware_sampler::sampling_time_points, "get the time points of the respective hardware samples") + .def("relative_time_points", [](const hws::hardware_sampler &self) { return hws::detail::durations_from_reference_time(self.sampling_time_points(), self.get_event(0).time_point); }, "get the relative durations of the respective hardware samples in seconds (as \"normal\" number)") .def("sampling_interval", &hws::hardware_sampler::sampling_interval, "get the sampling interval of this hardware sampler (in ms)") .def("dump_yaml", py::overload_cast(&hws::hardware_sampler::dump_yaml), "dump all hardware samples to the given YAML file") .def("__repr__", [](const hws::hardware_sampler &self) { @@ -73,6 +75,5 @@ void init_hardware_sampler(py::module_ &m) { return fmt::format("", dynamic_cast(self)); } #endif - return std::string{ "unknown" }; - }); + return std::string{ "unknown" }; }); } diff --git a/src/hardware_sampling/cpu/hardware_sampler.cpp b/src/hardware_sampling/cpu/hardware_sampler.cpp index 492c17a..7e89eca 100644 --- a/src/hardware_sampling/cpu/hardware_sampler.cpp +++ b/src/hardware_sampling/cpu/hardware_sampler.cpp @@ -318,8 +318,8 @@ void cpu_hardware_sampler::sampling_loop() { using vector_type = decltype(general_samples_.compute_utilization_)::value_type; general_samples_.compute_utilization_->push_back(detail::convert_to(values[i])); } else if (header[i] == "Bzy_MHz") { - using vector_type = decltype(clock_samples_.average_non_idle_frequency_)::value_type; - clock_samples_.average_non_idle_frequency_->push_back(detail::convert_to(values[i])); + using vector_type = decltype(clock_samples_.average_non_idle_clock_frequency_)::value_type; + clock_samples_.average_non_idle_clock_frequency_->push_back(detail::convert_to(values[i])); } else if (header[i] == "TSC_MHz") { using vector_type = decltype(clock_samples_.time_stamp_counter_)::value_type; clock_samples_.time_stamp_counter_->push_back(detail::convert_to(values[i])); From 4f411f390529400753d0e1747e81d8175f867255 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Mon, 16 Sep 2024 17:56:47 +0200 Subject: [PATCH 30/69] Add new python only functions that return a relative event, i.e., events where the "relative_time_point" member is the time duration since the first event occured (and not an absolute time). --- README.md | 8 +++--- bindings/hardware_sampler.cpp | 39 +++++++++++++++++++++++++++ include/hardware_sampling/utility.hpp | 14 +++++++++- 3 files changed, 55 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 25e2f73..d7acad6 100644 --- a/README.md +++ b/README.md @@ -267,11 +267,9 @@ plt.plot(time_points, sampler.clock_samples().get_average_non_idle_clock_frequen axes = plt.gcf().axes[0] x_bounds = axes.get_xlim() -for event in sampler.get_events()[1:-1]: - tp = (event.time_point - sampler.time_points()[0]) / datetime.timedelta(milliseconds=1000) - - axes.axvline(x=tp, color='r') - axes.annotate(text=event.name, xy=(((tp - x_bounds[0]) / (x_bounds[1] - x_bounds[0])), 1.025), xycoords='axes fraction', rotation=270) +for event in sampler.get_relative_events()[1:-1]: + axes.axvline(x=event.relative_time_point, color='r') + axes.annotate(text=event.name, xy=(((event.relative_time_point - x_bounds[0]) / (x_bounds[1] - x_bounds[0])), 1.025), xycoords='axes fraction', rotation=270) plt.xlabel("runtime [ms]") plt.ylabel("clock frequency [MHz]") diff --git a/bindings/hardware_sampler.cpp b/bindings/hardware_sampler.cpp index 12c0c01..6619854 100644 --- a/bindings/hardware_sampler.cpp +++ b/bindings/hardware_sampler.cpp @@ -30,11 +30,43 @@ #include // std::string +namespace hws { + +/** + * @brief A struct encapsulating a single event with a relative time point. + */ +struct relative_event { + /** + * @brief Construct a new event given a time point and name. + * @param[in] time_point_p the time when the event occurred relative to the first event + * @param[in] name_p the name of the event + */ + relative_event(const double relative_time_point_p, std::string name_p) : + relative_time_point{ relative_time_point_p }, + name{ std::move(name_p) } { } + + /// The relative time point this event occurred at. + double relative_time_point; + /// The name of this event. + std::string name; +}; + +} // namespace hws + namespace py = pybind11; void init_hardware_sampler(py::module_ &m) { const py::module_ pure_virtual_module = m.def_submodule("__pure_virtual"); + // a special python only struct encapsulating a relative event, i.e., an event where its "relative_time_point" member is the time passed since the first event + py::class_(m, "RelativeEvent") + .def(py::init(), "construct a new event using a time point and a name") + .def_readonly("relative_time_point", &hws::relative_event::relative_time_point, "read the relative time point associated to this event") + .def_readonly("name", &hws::relative_event::name, "read the name associated to this event") + .def("__repr__", [](const hws::relative_event &self) { + return fmt::format("", self.relative_time_point, self.name); + }); + // bind the pure virtual hardware sampler base class py::class_ pyhardware_sampler(pure_virtual_module, "__pure_virtual_base_HardwareSampler"); pyhardware_sampler.def("start", &hws::hardware_sampler::start_sampling, "start the current hardware sampling") @@ -49,7 +81,14 @@ void init_hardware_sampler(py::module_ &m) { .def("add_event", py::overload_cast(&hws::hardware_sampler::add_event), "add a new event using a name, the current time is used as time point") .def("num_events", &hws::hardware_sampler::num_events, "get the number of events") .def("get_events", &hws::hardware_sampler::get_events, "get all events") + .def("get_relative_events", [](const hws::hardware_sampler &self) { + std::vector relative_events{}; + for (const hws::event &e : self.get_events()) { + relative_events.emplace_back(hws::detail::duration_from_reference_time(e.time_point, self.get_event(0).time_point), e.name); + } + return relative_events; }, "get all relative events") .def("get_event", &hws::hardware_sampler::get_event, "get a specific event") + .def("get_relative_event", [](const hws::hardware_sampler &self, const std::size_t idx) { return hws::relative_event{ hws::detail::duration_from_reference_time(self.get_event(idx).time_point, self.get_event(0).time_point), self.get_event(idx).name }; }, "get a specific relative event") .def("time_points", &hws::hardware_sampler::sampling_time_points, "get the time points of the respective hardware samples") .def("relative_time_points", [](const hws::hardware_sampler &self) { return hws::detail::durations_from_reference_time(self.sampling_time_points(), self.get_event(0).time_point); }, "get the relative durations of the respective hardware samples in seconds (as \"normal\" number)") .def("sampling_interval", &hws::hardware_sampler::sampling_interval, "get the sampling interval of this hardware sampler (in ms)") diff --git a/include/hardware_sampling/utility.hpp b/include/hardware_sampling/utility.hpp index ff7d3f6..c70b4c2 100644 --- a/include/hardware_sampling/utility.hpp +++ b/include/hardware_sampling/utility.hpp @@ -229,6 +229,18 @@ template /** other free functions **/ /*****************************************************************************************************/ +/** + * @brief Convert the time point to its duration in seconds (using double) truncated to three decimal places passed since the @p reference time point. + * @tparam TimePoint the type if the time point + * @param[in] time_point the time point + * @param[in] reference the reference time point + * @return the duration passed in seconds since the @p reference time point (`[[nodiscard]]`) + */ +template +[[nodiscard]] inline double duration_from_reference_time(const TimePoint &time_point, const TimePoint &reference) { + return std::trunc(std::chrono::duration(time_point - reference).count() * 1000.0) / 1000.0; +} + /** * @brief Convert all time points to their duration in seconds (using double) truncated to three decimal places passed since the @p reference time point. * @tparam TimePoint the type if the time points @@ -241,7 +253,7 @@ template std::vector durations(time_points.size()); for (std::size_t i = 0; i < durations.size(); ++i) { - durations[i] = std::trunc(std::chrono::duration(time_points[i] - reference).count() * 1000.0) / 1000.0; + durations[i] = duration_from_reference_time(time_points[i], reference); } return durations; From d6c69e9564aabf6d49e143850f5ce3c814651461 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Tue, 17 Sep 2024 10:59:25 +0200 Subject: [PATCH 31/69] Mark dump_yaml member functions as const. --- bindings/hardware_sampler.cpp | 2 +- include/hardware_sampling/hardware_sampler.hpp | 6 +++--- src/hardware_sampling/hardware_sampler.cpp | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/bindings/hardware_sampler.cpp b/bindings/hardware_sampler.cpp index 6619854..6b3d465 100644 --- a/bindings/hardware_sampler.cpp +++ b/bindings/hardware_sampler.cpp @@ -92,7 +92,7 @@ void init_hardware_sampler(py::module_ &m) { .def("time_points", &hws::hardware_sampler::sampling_time_points, "get the time points of the respective hardware samples") .def("relative_time_points", [](const hws::hardware_sampler &self) { return hws::detail::durations_from_reference_time(self.sampling_time_points(), self.get_event(0).time_point); }, "get the relative durations of the respective hardware samples in seconds (as \"normal\" number)") .def("sampling_interval", &hws::hardware_sampler::sampling_interval, "get the sampling interval of this hardware sampler (in ms)") - .def("dump_yaml", py::overload_cast(&hws::hardware_sampler::dump_yaml), "dump all hardware samples to the given YAML file") + .def("dump_yaml", py::overload_cast(&hws::hardware_sampler::dump_yaml, py::const_), "dump all hardware samples to the given YAML file") .def("__repr__", [](const hws::hardware_sampler &self) { #if defined(HWS_FOR_CPUS_ENABLED) if (dynamic_cast(&self)) { diff --git a/include/hardware_sampling/hardware_sampler.hpp b/include/hardware_sampling/hardware_sampler.hpp index ce7c6fb..9da2905 100644 --- a/include/hardware_sampling/hardware_sampler.hpp +++ b/include/hardware_sampling/hardware_sampler.hpp @@ -148,15 +148,15 @@ class hardware_sampler { * @brief Dump the hardware samples to the YAML file with @p filename. * @param[in] filename the YAML file to append the hardware samples to */ - void dump_yaml(const char *filename); + void dump_yaml(const char *filename) const; /** * @copydoc hws::hardware_sampler::dump_yaml(const char *) */ - void dump_yaml(const std::string &filename); + void dump_yaml(const std::string &filename) const; /** * @copydoc hws::hardware_sampler::dump_yaml(const char *) */ - void dump_yaml(const std::filesystem::path &filename); + void dump_yaml(const std::filesystem::path &filename) const; protected: /** diff --git a/src/hardware_sampling/hardware_sampler.cpp b/src/hardware_sampling/hardware_sampler.cpp index 3511f7f..b68cbd1 100644 --- a/src/hardware_sampling/hardware_sampler.cpp +++ b/src/hardware_sampling/hardware_sampler.cpp @@ -118,7 +118,7 @@ event hardware_sampler::get_event(const std::size_t idx) const { return events_[idx]; } -void hardware_sampler::dump_yaml(const char *filename) { +void hardware_sampler::dump_yaml(const char *filename) const { if (!this->has_sampling_stopped()) { throw std::runtime_error{ "Can dump samples to the YAML file only after the sampling has been stopped!" }; } @@ -162,11 +162,11 @@ void hardware_sampler::dump_yaml(const char *filename) { this->generate_yaml_string()); } -void hardware_sampler::dump_yaml(const std::string &filename) { +void hardware_sampler::dump_yaml(const std::string &filename) const { this->dump_yaml(filename.c_str()); } -void hardware_sampler::dump_yaml(const std::filesystem::path &filename) { +void hardware_sampler::dump_yaml(const std::filesystem::path &filename) const { this->dump_yaml(filename.string().c_str()); } From cbbac19e67cc9cf128e0186d25c03675ea29b811 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Tue, 17 Sep 2024 11:00:07 +0200 Subject: [PATCH 32/69] Implement HWS_CUDA_ERROR_CHECK macro for also checking cuda error codes (previously only NVML functions could be checked). --- include/hardware_sampling/gpu_nvidia/utility.hpp | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/include/hardware_sampling/gpu_nvidia/utility.hpp b/include/hardware_sampling/gpu_nvidia/utility.hpp index b347f0a..0352915 100644 --- a/include/hardware_sampling/gpu_nvidia/utility.hpp +++ b/include/hardware_sampling/gpu_nvidia/utility.hpp @@ -12,8 +12,9 @@ #define HARDWARE_SAMPLING_GPU_NVIDIA_UTILITY_HPP_ #pragma once -#include "fmt/format.h" // fmt::format -#include "nvml.h" // NVML runtime functions +#include "cuda_runtime_api.h" // CUDA runtime functions +#include "fmt/format.h" // fmt::format +#include "nvml.h" // NVML runtime functions #include // std::runtime_error #include // std::string @@ -33,14 +34,23 @@ namespace hws::detail { throw std::runtime_error{ fmt::format("Error in NVML function call \"{}\": {} ({})", #nvml_func, nvmlErrorString(errc), static_cast(errc)) }; \ } \ } + + #define HWS_CUDA_ERROR_CHECK(cuda_func) \ + { \ + const cudaError_t errc = cuda_func; \ + if (errc != cudaSuccess) { \ + throw std::runtime_error{ fmt::format("Error in CUDA function call \"{}\": {} ({})", #cuda_func, cudaGetErrorName(errc), cudaGetErrorString(errc)) }; \ + } \ + } #else #define HWS_NVML_ERROR_CHECK(nvml_func) nvml_func; + #define HWS_CUDA_ERROR_CHECK(cuda_func) cuda_func; #endif /** * @brief Convert the clock throttle reason event bitmask to a string representation. If the provided bitmask represents multiple reasons, they are split using "|". * @param[in] clocks_event_reasons the bitmask to convert to a string - * @return all event throttle reasons + * @return all event throttle reasons (`[[nodiscard]]`) */ [[nodiscard]] std::string throttle_event_reason_to_string(unsigned long long clocks_event_reasons); From 5fc03c2cfbd10ae6316578bb729608093dade14e Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Tue, 17 Sep 2024 11:29:02 +0200 Subject: [PATCH 33/69] Fix errors in documentation. --- include/hardware_sampling/hardware_sampler.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/hardware_sampling/hardware_sampler.hpp b/include/hardware_sampling/hardware_sampler.hpp index 9da2905..64eb833 100644 --- a/include/hardware_sampling/hardware_sampler.hpp +++ b/include/hardware_sampling/hardware_sampler.hpp @@ -86,7 +86,7 @@ class hardware_sampler { */ [[nodiscard]] bool has_sampling_started() const noexcept; /** - * @brief Check whether this hardware sampler has currently sampling. + * @brief Check whether this hardware sampler is currently sampling. * @return `true` if the hardware sampler is currently sampling, `false` otherwise (`[[nodiscard]]`) */ [[nodiscard]] bool is_sampling() const noexcept; @@ -121,7 +121,7 @@ class hardware_sampler { /** * @brief Return the number of recorded events. - * @return the number of events (`[[nodiscard]]`) + * @return the events (`[[nodiscard]]`) */ [[nodiscard]] const std::vector &get_events() const noexcept { return events_; } From acb982668cf84b0d082931734cfa87c2a167f9b8 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Tue, 17 Sep 2024 11:51:15 +0200 Subject: [PATCH 34/69] Fix errors in documentation wrongly using PLSSVM. --- bindings/CMakeLists.txt | 2 +- bindings/hardware_sampler.cpp | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/bindings/CMakeLists.txt b/bindings/CMakeLists.txt index 95f6a2b..f6f8c5e 100644 --- a/bindings/CMakeLists.txt +++ b/bindings/CMakeLists.txt @@ -4,7 +4,7 @@ ## See the LICENSE.md file in the project root for full license information. ######################################################################################################################## -message(STATUS "Building Python language bindings for PLSSVM.") +message(STATUS "Building Python language bindings.") find_package(Python COMPONENTS Interpreter Development) diff --git a/bindings/hardware_sampler.cpp b/bindings/hardware_sampler.cpp index 6b3d465..76dcbcc 100644 --- a/bindings/hardware_sampler.cpp +++ b/bindings/hardware_sampler.cpp @@ -96,22 +96,22 @@ void init_hardware_sampler(py::module_ &m) { .def("__repr__", [](const hws::hardware_sampler &self) { #if defined(HWS_FOR_CPUS_ENABLED) if (dynamic_cast(&self)) { - return fmt::format("", dynamic_cast(self)); + return fmt::format("", dynamic_cast(self)); } #endif #if defined(HWS_FOR_NVIDIA_GPUS_ENABLED) if (dynamic_cast(&self)) { - return fmt::format("", dynamic_cast(self)); + return fmt::format("", dynamic_cast(self)); } #endif #if defined(HWS_FOR_AMD_GPUS_ENABLED) if (dynamic_cast(&self)) { - return fmt::format("", dynamic_cast(self)); + return fmt::format("", dynamic_cast(self)); } #endif #if defined(HWS_FOR_INTEL_GPUS_ENABLED) if (dynamic_cast(&self)) { - return fmt::format("", dynamic_cast(self)); + return fmt::format("", dynamic_cast(self)); } #endif return std::string{ "unknown" }; }); From bd4d98790070a9681e6fcfa7adb508f0a3a1232d Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Tue, 17 Sep 2024 12:44:08 +0200 Subject: [PATCH 35/69] Add a new system_hardware_sampler that automatically samples all available hardware (e.g., CPU and GPUs). --- CMakeLists.txt | 21 ++- bindings/CMakeLists.txt | 2 + bindings/hardware_sampler.cpp | 39 +--- bindings/main.cpp | 4 + bindings/relative_event.cpp | 26 +++ bindings/relative_event.hpp | 40 ++++ bindings/system_hardware_sampler.cpp | 67 +++++++ include/hardware_sampling/core.hpp | 1 + .../system_hardware_sampler.hpp | 177 ++++++++++++++++++ .../system_hardware_sampler.cpp | 177 ++++++++++++++++++ 10 files changed, 518 insertions(+), 36 deletions(-) create mode 100644 bindings/relative_event.cpp create mode 100644 bindings/relative_event.hpp create mode 100644 bindings/system_hardware_sampler.cpp create mode 100644 include/hardware_sampling/system_hardware_sampler.hpp create mode 100644 src/hardware_sampling/system_hardware_sampler.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 74cc828..1080a26 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,6 +15,7 @@ project("HWS - Hardware Sampling for GPUs and CPUs" set(HWS_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/event.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/hardware_sampler.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/system_hardware_sampler.cpp ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/utility.cpp ) @@ -87,6 +88,24 @@ else () endif () target_link_libraries(${HWS_LIBRARY_NAME} PUBLIC fmt::fmt) +#set(HWS_ryml_VERSION v0.7.2) +#find_package(ryml QUIET) +#if (fmt_FOUND) +# message(STATUS "Found package ryml (rapidyaml).") +#else () +# message(STATUS "Couldn't find package ryml (rapidyaml). Building version ${HWS_ryml_VERSION} from source.") +# # fetch yaml library ryml +# FetchContent_Declare(ryml +# GIT_REPOSITORY https://github.com/biojppm/rapidyaml +# GIT_TAG ${HWS_ryml_VERSION} +# GIT_SHALLOW FALSE +# QUIET +# ) +# FetchContent_MakeAvailable(ryml) +# add_dependencies(${HWS_LIBRARY_NAME} ryml) +#endif () +#target_link_libraries(${HWS_LIBRARY_NAME} PUBLIC ryml::ryml) + #################################################################################################################### ## CPU measurements ## #################################################################################################################### @@ -194,7 +213,7 @@ endif () # find libraries necessary for NVML and link against them find_package(CUDAToolkit QUIET) if (CUDAToolkit_FOUND) - target_link_libraries(${HWS_LIBRARY_NAME} PRIVATE CUDA::nvml) + target_link_libraries(${HWS_LIBRARY_NAME} PRIVATE CUDA::nvml CUDA::cudart) message(STATUS "Enable sampling of NVIDIA GPU information using NVML.") diff --git a/bindings/CMakeLists.txt b/bindings/CMakeLists.txt index f6f8c5e..fb00d29 100644 --- a/bindings/CMakeLists.txt +++ b/bindings/CMakeLists.txt @@ -32,7 +32,9 @@ endif () # set source files that are always used set(HWS_PYTHON_BINDINGS_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/event.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/relative_event.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hardware_sampler.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/system_hardware_sampler.cpp ${CMAKE_CURRENT_SOURCE_DIR}/main.cpp ) diff --git a/bindings/hardware_sampler.cpp b/bindings/hardware_sampler.cpp index 76dcbcc..5f86f96 100644 --- a/bindings/hardware_sampler.cpp +++ b/bindings/hardware_sampler.cpp @@ -28,45 +28,14 @@ #include "pybind11/pybind11.h" // py::module_, py::class_ #include "pybind11/stl.h" // bind STL types -#include // std::string - -namespace hws { - -/** - * @brief A struct encapsulating a single event with a relative time point. - */ -struct relative_event { - /** - * @brief Construct a new event given a time point and name. - * @param[in] time_point_p the time when the event occurred relative to the first event - * @param[in] name_p the name of the event - */ - relative_event(const double relative_time_point_p, std::string name_p) : - relative_time_point{ relative_time_point_p }, - name{ std::move(name_p) } { } - - /// The relative time point this event occurred at. - double relative_time_point; - /// The name of this event. - std::string name; -}; - -} // namespace hws +#include "relative_event.hpp" // hws::detail::relative_event +#include // std::string namespace py = pybind11; void init_hardware_sampler(py::module_ &m) { const py::module_ pure_virtual_module = m.def_submodule("__pure_virtual"); - // a special python only struct encapsulating a relative event, i.e., an event where its "relative_time_point" member is the time passed since the first event - py::class_(m, "RelativeEvent") - .def(py::init(), "construct a new event using a time point and a name") - .def_readonly("relative_time_point", &hws::relative_event::relative_time_point, "read the relative time point associated to this event") - .def_readonly("name", &hws::relative_event::name, "read the name associated to this event") - .def("__repr__", [](const hws::relative_event &self) { - return fmt::format("", self.relative_time_point, self.name); - }); - // bind the pure virtual hardware sampler base class py::class_ pyhardware_sampler(pure_virtual_module, "__pure_virtual_base_HardwareSampler"); pyhardware_sampler.def("start", &hws::hardware_sampler::start_sampling, "start the current hardware sampling") @@ -82,13 +51,13 @@ void init_hardware_sampler(py::module_ &m) { .def("num_events", &hws::hardware_sampler::num_events, "get the number of events") .def("get_events", &hws::hardware_sampler::get_events, "get all events") .def("get_relative_events", [](const hws::hardware_sampler &self) { - std::vector relative_events{}; + std::vector relative_events{}; for (const hws::event &e : self.get_events()) { relative_events.emplace_back(hws::detail::duration_from_reference_time(e.time_point, self.get_event(0).time_point), e.name); } return relative_events; }, "get all relative events") .def("get_event", &hws::hardware_sampler::get_event, "get a specific event") - .def("get_relative_event", [](const hws::hardware_sampler &self, const std::size_t idx) { return hws::relative_event{ hws::detail::duration_from_reference_time(self.get_event(idx).time_point, self.get_event(0).time_point), self.get_event(idx).name }; }, "get a specific relative event") + .def("get_relative_event", [](const hws::hardware_sampler &self, const std::size_t idx) { return hws::detail::relative_event{ hws::detail::duration_from_reference_time(self.get_event(idx).time_point, self.get_event(0).time_point), self.get_event(idx).name }; }, "get a specific relative event") .def("time_points", &hws::hardware_sampler::sampling_time_points, "get the time points of the respective hardware samples") .def("relative_time_points", [](const hws::hardware_sampler &self) { return hws::detail::durations_from_reference_time(self.sampling_time_points(), self.get_event(0).time_point); }, "get the relative durations of the respective hardware samples in seconds (as \"normal\" number)") .def("sampling_interval", &hws::hardware_sampler::sampling_interval, "get the sampling interval of this hardware sampler (in ms)") diff --git a/bindings/main.cpp b/bindings/main.cpp index 2b6f507..11dbf33 100644 --- a/bindings/main.cpp +++ b/bindings/main.cpp @@ -16,7 +16,9 @@ namespace py = pybind11; // forward declare binding functions void init_event(py::module_ &); +void init_relative_event(py::module_ &); void init_hardware_sampler(py::module_ &); +void init_system_hardware_sampler(py::module_ &); void init_cpu_hardware_sampler(py::module_ &); void init_gpu_nvidia_hardware_sampler(py::module_ &); void init_gpu_amd_hardware_sampler(py::module_ &); @@ -26,7 +28,9 @@ PYBIND11_MODULE(HardwareSampling, m) { m.doc() = "Hardware Sampling for CPUs and GPUs"; init_event(m); + init_relative_event(m); init_hardware_sampler(m); + init_system_hardware_sampler(m); // CPU sampling #if defined(HWS_FOR_CPUS_ENABLED) diff --git a/bindings/relative_event.cpp b/bindings/relative_event.cpp new file mode 100644 index 0000000..c0cb611 --- /dev/null +++ b/bindings/relative_event.cpp @@ -0,0 +1,26 @@ +/** + * @author Marcel Breyer + * @copyright 2024-today All Rights Reserved + * @license This file is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + */ + +#include "relative_event.hpp" // hws::detail::relative_event + +#include "fmt/format.h" // fmt::format +#include "pybind11/chrono.h" // bind std::chrono types +#include "pybind11/pybind11.h" // py::module_ +#include "pybind11/stl.h" // bind STL types + +namespace py = pybind11; + +void init_relative_event(py::module_ &m) { + // a special python only struct encapsulating a relative event, i.e., an event where its "relative_time_point" member is the time passed since the first event + py::class_(m, "RelativeEvent") + .def(py::init(), "construct a new event using a time point and a name") + .def_readonly("relative_time_point", &hws::detail::relative_event::relative_time_point, "read the relative time point associated to this event") + .def_readonly("name", &hws::detail::relative_event::name, "read the name associated to this event") + .def("__repr__", [](const hws::detail::relative_event &self) { + return fmt::format("", self.relative_time_point, self.name); + }); +} diff --git a/bindings/relative_event.hpp b/bindings/relative_event.hpp new file mode 100644 index 0000000..2033f12 --- /dev/null +++ b/bindings/relative_event.hpp @@ -0,0 +1,40 @@ +/** + * @file + * @author Marcel Breyer + * @copyright 2024-today All Rights Reserved + * @license This file is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + * + * @brief Defines a struct encapsulating a single event with a relative time point. + */ + +#ifndef HARDWARE_SAMPLING_BINDINGS_RELATIVE_EVENT_HPP_ +#define HARDWARE_SAMPLING_BINDINGS_RELATIVE_EVENT_HPP_ + +#include // std::string +#include // std::move + +namespace hws::detail { + +/** + * @brief A struct encapsulating a single event with a relative time point. + */ +struct relative_event { + /** + * @brief Construct a new event given a time point and name. + * @param[in] time_point_p the time when the event occurred relative to the first event + * @param[in] name_p the name of the event + */ + relative_event(const double relative_time_point_p, std::string name_p) : + relative_time_point{ relative_time_point_p }, + name{ std::move(name_p) } { } + + /// The relative time point this event occurred at. + double relative_time_point; + /// The name of this event. + std::string name; +}; + +} // namespace hws::detail + +#endif // HARDWARE_SAMPLING_BINDINGS_RELATIVE_EVENT_HPP_ diff --git a/bindings/system_hardware_sampler.cpp b/bindings/system_hardware_sampler.cpp new file mode 100644 index 0000000..3c24ad3 --- /dev/null +++ b/bindings/system_hardware_sampler.cpp @@ -0,0 +1,67 @@ +/** + * @author Marcel Breyer + * @copyright 2024-today All Rights Reserved + * @license This file is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + */ + +#include "hardware_sampling/system_hardware_sampler.hpp" // hws::system_hardware_sampler + +#include "hardware_sampling/event.hpp" // hws::event +#include "hardware_sampling/utility.hpp" // hws::detail::durations_from_reference_time + +#include "fmt/format.h" // fmt::format +#include "pybind11/chrono.h" // bind std::chrono types +#include "pybind11/pybind11.h" // py::module_, py::class_ +#include "pybind11/stl.h" // bind STL types + +#include "relative_event.hpp" // hws::detail::relative_event +#include // std::string + +namespace py = pybind11; + +void init_system_hardware_sampler(py::module_ &m) { + // bind the pure virtual hardware sampler base class + py::class_(m, "SystemHardwareSampler") + .def(py::init<>(), "construct a new system hardware sampler with the default sampling interval") + .def(py::init(), "construct a new system hardware sampler for with the specified sampling interval") + .def("start", &hws::system_hardware_sampler::start_sampling, "start hardware sampling for all available hardware samplers") + .def("stop", &hws::system_hardware_sampler::stop_sampling, "stop hardware sampling for all available hardware samplers") + .def("pause", &hws::system_hardware_sampler::pause_sampling, "pause hardware sampling for all available hardware samplers") + .def("resume", &hws::system_hardware_sampler::resume_sampling, "resume hardware sampling for all available hardware samplers") + .def("has_started", &hws::system_hardware_sampler::has_sampling_started, "check whether hardware sampling has already been started for all hardware samplers") + .def("is_sampling", &hws::system_hardware_sampler::is_sampling, "check whether the hardware sampling is currently active for all hardware samplers") + .def("has_stopped", &hws::system_hardware_sampler::has_sampling_stopped, "check whether hardware sampling has already been stopped for all hardware samplers") + .def("add_event", py::overload_cast(&hws::system_hardware_sampler::add_event), "add a new event to all hardware samplers") + .def("add_event", py::overload_cast(&hws::system_hardware_sampler::add_event), "add a new event using a time point and a name to all hardware samplers") + .def("add_event", py::overload_cast(&hws::system_hardware_sampler::add_event), "add a new event using a name, the current time is used as time point to all hardware samplers") + .def("num_events", &hws::system_hardware_sampler::num_events, "get the number of events separately for each hardware sampler") + .def("get_events", &hws::system_hardware_sampler::get_events, "get all events separately for each hardware sampler") + .def("get_relative_events", [](const hws::system_hardware_sampler &self) { + std::vector> relative_events{}; + for (const std::vector &events : self.get_events()) { + relative_events.emplace_back(); + for (const hws::event &e : events) { + relative_events.back().emplace_back(hws::detail::duration_from_reference_time(e.time_point, events[0].time_point), e.name); + } + } + return relative_events; }, "get all relative events separately for each hardware sampler") + .def("time_points", &hws::system_hardware_sampler::sampling_time_points, "get the time points of the respective hardware samples separately for each hardware sampler") + .def("relative_time_points", [](const hws::system_hardware_sampler &self) { + std::vector> relative_time_points{}; + for (std::size_t s = 0; s < self.num_samplers(); ++s) { + relative_time_points.emplace_back(hws::detail::durations_from_reference_time(self.sampling_time_points()[s], self.get_events()[s][0].time_point)); + } + return relative_time_points; }, "get the relative durations of the respective hardware samples in seconds (as \"normal\" number)") + .def("sampling_interval", &hws::system_hardware_sampler::sampling_interval, "get the sampling interval separately for each hardware sampler (in ms)") + .def("num_samplers", &hws::system_hardware_sampler::num_samplers, "get the number of hardware samplers available for the whole system") + .def("samplers", [](hws::system_hardware_sampler &self) { + std::vector out{}; + for (auto &ptr : self.samplers()) { + out.push_back(ptr.get()); + } + return out; }, "get the hardware samplers available for the whole system") + .def("sampler", [](hws::system_hardware_sampler &self, const std::size_t idx) { return self.sampler(idx).get(); }, "get the i-th hardware sampler available for the whole system") + .def("dump_yaml", py::overload_cast(&hws::system_hardware_sampler::dump_yaml, py::const_), "dump all hardware samples for all hardware samplers to the given YAML file") + .def("__repr__", [](const hws::system_hardware_sampler &self) { return fmt::format("", self.num_samplers()); }); +} diff --git a/include/hardware_sampling/core.hpp b/include/hardware_sampling/core.hpp index 3c986a5..15d65df 100644 --- a/include/hardware_sampling/core.hpp +++ b/include/hardware_sampling/core.hpp @@ -14,6 +14,7 @@ #include "hardware_sampling/event.hpp" #include "hardware_sampling/hardware_sampler.hpp" +#include "hardware_sampling/system_hardware_sampler.hpp" #if defined(HWS_FOR_CPUS_ENABLED) #include "hardware_sampling/cpu/cpu_samples.hpp" diff --git a/include/hardware_sampling/system_hardware_sampler.hpp b/include/hardware_sampling/system_hardware_sampler.hpp new file mode 100644 index 0000000..c585a3f --- /dev/null +++ b/include/hardware_sampling/system_hardware_sampler.hpp @@ -0,0 +1,177 @@ +/** + * @file + * @author Marcel Breyer + * @copyright 2024-today All Rights Reserved + * @license This file is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + * + * @brief Defines a hardware sampler for the whole system, i.e., automatically creates CPU and GPU hardware samples if the respective sampler and hardware are available. + */ + +#ifndef HARDWARE_SAMPLING_SYSTEM_HARDWARE_SAMPLER_HPP_ +#define HARDWARE_SAMPLING_SYSTEM_HARDWARE_SAMPLER_HPP_ + +#include "hardware_sampling/event.hpp" // hws::event +#include "hardware_sampling/hardware_sampler.hpp" // hws::hardware_sampler + +#include // std::chrono::{milliseconds, steady_clock::time_point} +#include // std::size_t +#include // std::filesystem::path +#include // std::unique_ptr +#include // std::string +#include // std::vector + +namespace hws { + +/** + * @brief A hardware sampler for the whole system. + * @details Enables hardware samplers for which hardware is available and the CMake configuration found the respective dependencies. + */ +class system_hardware_sampler { + public: + /** + * @brief Construct hardware samplers with the default sampling interval. + */ + system_hardware_sampler(); + /** + * @brief Construct hardware samplers with the provided @p sampling_interval. + * @param[in] sampling_interval the used sampling interval + */ + explicit system_hardware_sampler(std::chrono::milliseconds sampling_interval); + + /** + * @brief Delete the copy-constructor. + */ + system_hardware_sampler(const system_hardware_sampler &) = delete; + /** + * @brief Delete the move-constructor. + */ + system_hardware_sampler(system_hardware_sampler &&) noexcept = delete; + /** + * @brief Delete the copy-assignment operator. + */ + system_hardware_sampler &operator=(const system_hardware_sampler &) = delete; + /** + * @brief Delete the move-assignment operator. + */ + system_hardware_sampler &operator=(system_hardware_sampler &&) noexcept = delete; + + /** + * @brief Start hardware sampling for all wrapped hardware samplers. + */ + void start_sampling(); + /** + * @brief Stop hardware sampling for all wrapped hardware samplers. + */ + void stop_sampling(); + /** + * @brief Pause hardware sampling for all wrapped hardware samplers. + */ + void pause_sampling(); + /** + * @brief Resume hardware sampling for all wrapped hardware samplers. + */ + void resume_sampling(); + + /** + * @brief Check whether the hardware samplers have already started sampling. + * @return `true` if **all** hardware samplers have already started sampling, `false` otherwise (`[[nodiscard]]`) + */ + [[nodiscard]] bool has_sampling_started() const noexcept; + /** + * @brief Check whether the hardware samplers are currently sampling. + * @return `true` if **all** hardware samplers are currently sampling, `false` otherwise (`[[nodiscard]]`) + */ + [[nodiscard]] bool is_sampling() const noexcept; + /** + * @brief Check whether the hardware samplers have already stopped sampling. + * @return `true` if **all** hardware samplers have already stopped sampling, `false` otherwise (`[[nodiscard]]`) + */ + [[nodiscard]] bool has_sampling_stopped() const noexcept; + + /** + * @brief Add a new event to all hardware samplers. + * @param e the event + */ + void add_event(event e); + /** + * @brief Add a new event to all hardware samplers. + * @param[in] time_point the time point when the event occurred + * @param[in] name the name of the event + */ + void add_event(decltype(event::time_point) time_point, decltype(event::name) name); + /** + * @brief Add a new event to all hardware samplers. The time_point will be the current time. + * @param[in] name the name of the event + */ + void add_event(decltype(event::name) name); + + /** + * @brief Return the number of recorded events separately for each hardware sampler. + * @return the number of events per hardware sampler (`[[nodiscard]]`) + */ + [[nodiscard]] std::vector num_events() const; + /** + * @brief Return the number of recorded events separately for each hardware sampler. + * @return the events per hardware sampler (`[[nodiscard]]`) + */ + [[nodiscard]] std::vector> get_events() const; + /** + * @brief Return the time points the samples separately for each hardware sampler. + * @return the time points per hardware sampler (`[[nodiscard]]`) + */ + [[nodiscard]] std::vector> sampling_time_points() const; + /** + * @brief Return the sampling interval separately for each hardware sampler. + * @return the samping interval in milliseconds per hardware sampler (`[[nodiscard]]`) + */ + [[nodiscard]] std::vector sampling_interval() const; + + /** + * @brief The number of hardware samplers available for the whole system. + * @return the number of hardware samplers (`[[nodiscard]]`) + */ + [[nodiscard]] std::size_t num_samplers() const noexcept; + /** + * @brief The hardware samplers available for the whole system. + * @return all available hardware samplers (`[[nodiscard]]`) + */ + [[nodiscard]] const std::vector> &samplers() const noexcept; + /** + * @copydoc hws::system_hardware_sampler::samplers() const + */ + [[nodiscard]] std::vector> &samplers() noexcept; + /** + * @brief Return the hardware sampler at index @p idx. + * @param[in] idx the index of the hardware sampler + * @throws std::out_of_range if @p idx is out-of-range + * @return the hardware sampler at index @p idx (`[[nodiscard]]`) + */ + [[nodiscard]] const std::unique_ptr &sampler(std::size_t idx) const; + /** + * @copydoc hws::system_hardware_sampler::samplers(std::size_t idx) const + */ + [[nodiscard]] std::unique_ptr &sampler(std::size_t idx); + + /** + * @brief Dump the hardware samples of all hardware samplers to the YAML file with @p filename. + * @param[in] filename the YAML file to append the hardware samples to + */ + void dump_yaml(const char *filename) const; + /** + * @copydoc hws::system_hardware_sampler::dump_yaml(const char *) + */ + void dump_yaml(const std::string &filename) const; + /** + * @copydoc hws::system_hardware_sampler::dump_yaml(const char *) + */ + void dump_yaml(const std::filesystem::path &filename) const; + + private: + /// The different hardware sampler for the current system. + std::vector> samplers_; +}; + +} // namespace hws + +#endif // HARDWARE_SAMPLING_SYSTEM_HARDWARE_SAMPLER_HPP_ diff --git a/src/hardware_sampling/system_hardware_sampler.cpp b/src/hardware_sampling/system_hardware_sampler.cpp new file mode 100644 index 0000000..d2d63a9 --- /dev/null +++ b/src/hardware_sampling/system_hardware_sampler.cpp @@ -0,0 +1,177 @@ +/** + * @author Marcel Breyer + * @copyright 2024-today All Rights Reserved + * @license This file is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + */ + +#include "hardware_sampling/system_hardware_sampler.hpp" + +#include "hardware_sampling/event.hpp" // hws::event + +#if defined(HWS_FOR_CPUS_ENABLED) + #include "hardware_sampling/cpu/hardware_sampler.hpp" // hws::cpu_hardware_sampler +#endif +#if defined(HWS_FOR_NVIDIA_GPUS_ENABLED) + #include "hardware_sampling/gpu_nvidia/hardware_sampler.hpp" // hws::gpu_nvidia_hardware_sampler + #include "hardware_sampling/gpu_nvidia/utility.hpp" // HWS_CUDA_ERROR_CHECK, hws::detail:: +#endif +#if defined(HWS_FOR_AMD_GPUS_ENABLED) + #include "hardware_sampling/gpu_amd/hardware_sampler.hpp" // hws::gpu_amd_hardware_sampler + #include "hardware_sampling/gpu_amd/utility.hpp" // HWS_HIP_ERROR_CHECK + + #include "hip/hip_runtime.h" // hipGetDeviceCount +#endif +#if defined(HWS_FOR_INTEL_GPUS_ENABLED) + #include "hardware_sampling/gpu_intel/hardware_sampler.hpp" // hws::gpu_intel_hardware_sampler + #include "hardware_sampling/gpu_intel/utility.hpp" // HWS_LEVEL_ZERO_ERROR_CHECK +#endif + +#include "fmt/format.h" // fmt::format + +#include // std::for_each, std::all_of +#include // std::chrono::milliseconds +#include // std::unique_ptr, std::make_unique +#include // std::out_of_range +#include // std::vector + +namespace hws { + +system_hardware_sampler::system_hardware_sampler() : + system_hardware_sampler{ HWS_SAMPLING_INTERVAL } { } + +system_hardware_sampler::system_hardware_sampler(const std::chrono::milliseconds sampling_interval) { + // create the hardware samplers based on the available hardware +#if defined(HWS_FOR_CPUS_ENABLED) + { + samplers_.push_back(std::make_unique(sampling_interval)); + } +#endif +#if defined(HWS_FOR_NVIDIA_GPUS_ENABLED) + { + int device_count{}; + HWS_CUDA_ERROR_CHECK(cudaGetDeviceCount(&device_count)); + for (int device = 0; device < device_count; ++device) { + samplers_.push_back(std::make_unique(static_cast(device), sampling_interval)); + } + } +#endif +#if defined(HWS_FOR_AMD_GPUS_ENABLED) + { + int device_count{}; + HWS_HIP_ERROR_CHECK(hipGetDeviceCount(&device_count)); + for (int device = 0; device < device_count; ++device) { + samplers_.push_back(std::make_unique(static_cast(device), sampling_interval)); + } + } +#endif +#if defined(HWS_FOR_INTEL_GPUS_ENABLED) + { + // TODO: implement + } +#endif +} + +void system_hardware_sampler::start_sampling() { + std::for_each(samplers_.begin(), samplers_.end(), [](auto &ptr) { ptr->start_sampling(); }); +} + +void system_hardware_sampler::stop_sampling() { + std::for_each(samplers_.begin(), samplers_.end(), [](auto &ptr) { ptr->stop_sampling(); }); +} + +void system_hardware_sampler::pause_sampling() { + std::for_each(samplers_.begin(), samplers_.end(), [](auto &ptr) { ptr->pause_sampling(); }); +} + +void system_hardware_sampler::resume_sampling() { + std::for_each(samplers_.begin(), samplers_.end(), [](auto &ptr) { ptr->resume_sampling(); }); +} + +bool system_hardware_sampler::has_sampling_started() const noexcept { + return std::all_of(samplers_.cbegin(), samplers_.cend(), [](const auto &ptr) { return ptr->has_sampling_started(); }); +} + +bool system_hardware_sampler::is_sampling() const noexcept { + return std::all_of(samplers_.cbegin(), samplers_.cend(), [](const auto &ptr) { return ptr->is_sampling(); }); +} + +bool system_hardware_sampler::has_sampling_stopped() const noexcept { + return std::all_of(samplers_.cbegin(), samplers_.cend(), [](const auto &ptr) { return ptr->has_sampling_stopped(); }); +} + +void system_hardware_sampler::add_event(event e) { + std::for_each(samplers_.begin(), samplers_.end(), [&e](auto &ptr) { ptr->add_event(e); }); +} + +void system_hardware_sampler::add_event(decltype(event::time_point) time_point, decltype(event::name) name) { + std::for_each(samplers_.begin(), samplers_.end(), [&time_point, &name](auto &ptr) { ptr->add_event(time_point, name); }); +} + +void system_hardware_sampler::add_event(decltype(event::name) name) { + std::for_each(samplers_.begin(), samplers_.end(), [&name](auto &ptr) { ptr->add_event(name); }); +} + +std::vector system_hardware_sampler::num_events() const { + std::vector num_events_per_sampler(this->num_samplers()); + std::transform(samplers_.cbegin(), samplers_.cend(), num_events_per_sampler.begin(), [](const auto &ptr) { return ptr->num_events(); }); + return num_events_per_sampler; +} + +std::vector> system_hardware_sampler::get_events() const { + std::vector> events_per_sampler(this->num_samplers()); + std::transform(samplers_.cbegin(), samplers_.cend(), events_per_sampler.begin(), [](const auto &ptr) { return ptr->get_events(); }); + return events_per_sampler; +} + +std::vector> system_hardware_sampler::sampling_time_points() const { + std::vector> sampling_time_points_per_sampler(this->num_samplers()); + std::transform(samplers_.cbegin(), samplers_.cend(), sampling_time_points_per_sampler.begin(), [](const auto &ptr) { return ptr->sampling_time_points(); }); + return sampling_time_points_per_sampler; +} + +std::vector system_hardware_sampler::sampling_interval() const { + std::vector sampling_interval_per_sampler(this->num_samplers()); + std::transform(samplers_.cbegin(), samplers_.cend(), sampling_interval_per_sampler.begin(), [](const auto &ptr) { return ptr->sampling_interval(); }); + return sampling_interval_per_sampler; +} + +std::size_t system_hardware_sampler::num_samplers() const noexcept { + return samplers_.size(); +} + +std::vector> &system_hardware_sampler::samplers() noexcept { + return samplers_; +} + +const std::vector> &system_hardware_sampler::samplers() const noexcept { + return samplers_; +} + +std::unique_ptr &system_hardware_sampler::sampler(const std::size_t idx) { + if (idx >= samplers_.size()) { + throw std::out_of_range{ fmt::format("Index {} is out-of-range for size {}!", idx, samplers_.size()) }; + } + return samplers_[idx]; +} + +const std::unique_ptr &system_hardware_sampler::sampler(const std::size_t idx) const { + if (idx >= samplers_.size()) { + throw std::out_of_range{ fmt::format("Index {} is out-of-range for size {}!", idx, samplers_.size()) }; + } + return samplers_[idx]; +} + +void system_hardware_sampler::dump_yaml(const char *filename) const { + std::for_each(samplers_.cbegin(), samplers_.cend(), [&filename](const auto &ptr) { ptr->dump_yaml(filename); }); +} + +void system_hardware_sampler::dump_yaml(const std::string &filename) const { + std::for_each(samplers_.cbegin(), samplers_.cend(), [&filename](const auto &ptr) { ptr->dump_yaml(filename); }); +} + +void system_hardware_sampler::dump_yaml(const std::filesystem::path &filename) const { + std::for_each(samplers_.cbegin(), samplers_.cend(), [&filename](const auto &ptr) { ptr->dump_yaml(filename); }); +} + +} // namespace hws From 7c96f02681cfad7229b815b2f687d65f2d7598eb Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Tue, 17 Sep 2024 13:04:53 +0200 Subject: [PATCH 36/69] Fix clang-tidy warnings. --- .../cpu/hardware_sampler.hpp | 4 +- .../gpu_amd/hardware_sampler.hpp | 4 +- .../gpu_nvidia/hardware_sampler.hpp | 4 +- .../gpu_nvidia/nvml_device_handle_impl.hpp | 2 +- .../gpu_nvidia/nvml_samples.hpp | 2 +- .../system_hardware_sampler.hpp | 5 ++ src/hardware_sampling/cpu/utility.cpp | 6 +-- .../gpu_amd/hardware_sampler.cpp | 50 +++++++++---------- .../gpu_amd/rocm_smi_samples.cpp | 5 +- .../gpu_nvidia/hardware_sampler.cpp | 38 +++++++------- src/hardware_sampling/hardware_sampler.cpp | 2 +- 11 files changed, 63 insertions(+), 59 deletions(-) diff --git a/include/hardware_sampling/cpu/hardware_sampler.hpp b/include/hardware_sampling/cpu/hardware_sampler.hpp index 18b489f..4e65338 100644 --- a/include/hardware_sampling/cpu/hardware_sampler.hpp +++ b/include/hardware_sampling/cpu/hardware_sampler.hpp @@ -113,12 +113,12 @@ class cpu_hardware_sampler : public hardware_sampler { /** * @copydoc hws::hardware_sampler::device_identification */ - std::string device_identification() const final; + [[nodiscard]] std::string device_identification() const final; /** * @copydoc hws::hardware_sampler::generate_yaml_string */ - std::string generate_yaml_string() const final; + [[nodiscard]] std::string generate_yaml_string() const final; /// The general CPU samples. cpu_general_samples general_samples_{}; diff --git a/include/hardware_sampling/gpu_amd/hardware_sampler.hpp b/include/hardware_sampling/gpu_amd/hardware_sampler.hpp index 80a7dbe..65e6ca3 100644 --- a/include/hardware_sampling/gpu_amd/hardware_sampler.hpp +++ b/include/hardware_sampling/gpu_amd/hardware_sampler.hpp @@ -120,12 +120,12 @@ class gpu_amd_hardware_sampler : public hardware_sampler { /** * @copydoc hws::hardware_sampler::device_identification */ - std::string device_identification() const final; + [[nodiscard]] std::string device_identification() const final; /** * @copydoc hws::hardware_sampler::generate_yaml_string */ - std::string generate_yaml_string() const final; + [[nodiscard]] std::string generate_yaml_string() const final; /// The ID of the device to sample. std::uint32_t device_id_{}; diff --git a/include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp b/include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp index 60ed693..562348a 100644 --- a/include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp +++ b/include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp @@ -121,12 +121,12 @@ class gpu_nvidia_hardware_sampler : public hardware_sampler { /** * @copydoc hws::hardware_sampler::device_identification */ - std::string device_identification() const final; + [[nodiscard]] std::string device_identification() const final; /** * @copydoc hws::hardware_sampler::generate_yaml_string */ - std::string generate_yaml_string() const final; + [[nodiscard]] std::string generate_yaml_string() const final; /// The device handle for the device to sample. detail::nvml_device_handle device_{}; diff --git a/include/hardware_sampling/gpu_nvidia/nvml_device_handle_impl.hpp b/include/hardware_sampling/gpu_nvidia/nvml_device_handle_impl.hpp index 9247f29..df6147c 100644 --- a/include/hardware_sampling/gpu_nvidia/nvml_device_handle_impl.hpp +++ b/include/hardware_sampling/gpu_nvidia/nvml_device_handle_impl.hpp @@ -32,7 +32,7 @@ struct nvml_device_handle::nvml_device_handle_impl { * @param[in] device_id the device to get the handle for */ explicit nvml_device_handle_impl(const std::size_t device_id) { - HWS_NVML_ERROR_CHECK(nvmlDeviceGetHandleByIndex(static_cast(device_id), &device)); + HWS_NVML_ERROR_CHECK(nvmlDeviceGetHandleByIndex(static_cast(device_id), &device)) } /// The wrapped NVML device handle. diff --git a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp index ed6504b..c6e7ad9 100644 --- a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp +++ b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp @@ -170,7 +170,7 @@ class nvml_memory_samples { HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned long, memory_total) // the total available memory in Byte HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, num_pcie_lanes_max) // the maximum number of PCIe lanes - HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, pcie_link_generation_max) // the maximum PCIe link generation (e.g., PCIe 4.0, PCIe 5.0, etc) + HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, pcie_link_generation_max) // the maximum PCIe link generation (e.g., PCIe 4.0, PCIe 5.0, etc.) HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, pcie_link_speed_max) // the maximum PCIe link speed in MBPS HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, memory_bus_width) // the memory bus with in Bit diff --git a/include/hardware_sampling/system_hardware_sampler.hpp b/include/hardware_sampling/system_hardware_sampler.hpp index c585a3f..394a0c6 100644 --- a/include/hardware_sampling/system_hardware_sampler.hpp +++ b/include/hardware_sampling/system_hardware_sampler.hpp @@ -56,6 +56,11 @@ class system_hardware_sampler { */ system_hardware_sampler &operator=(system_hardware_sampler &&) noexcept = delete; + /** + * @brief Explicitly use the default destructor. + */ + ~system_hardware_sampler() = default; + /** * @brief Start hardware sampling for all wrapped hardware samplers. */ diff --git a/src/hardware_sampling/cpu/utility.cpp b/src/hardware_sampling/cpu/utility.cpp index 2b0080f..3a17995 100644 --- a/src/hardware_sampling/cpu/utility.cpp +++ b/src/hardware_sampling/cpu/utility.cpp @@ -36,10 +36,10 @@ std::string run_subprocess(const std::string_view cmd_line) { // create subprocess subprocess_s proc{}; - HWS_SUBPROCESS_ERROR_CHECK(subprocess_create(cmd_ptr_split.data(), options, &proc)); + HWS_SUBPROCESS_ERROR_CHECK(subprocess_create(cmd_ptr_split.data(), options, &proc)) // wait until process has finished int return_code{}; - HWS_SUBPROCESS_ERROR_CHECK(subprocess_join(&proc, &return_code)); + HWS_SUBPROCESS_ERROR_CHECK(subprocess_join(&proc, &return_code)) if (return_code != 0) { throw std::runtime_error{ fmt::format("Error: \"{}\" returned with {}!", cmd_line, return_code) }; } @@ -50,7 +50,7 @@ std::string run_subprocess(const std::string_view cmd_line) { const std::size_t bytes_read = std::fread(buffer.data(), sizeof(typename decltype(buffer)::value_type), buffer.size(), out_handle); // destroy subprocess - HWS_SUBPROCESS_ERROR_CHECK(subprocess_destroy(&proc)); + HWS_SUBPROCESS_ERROR_CHECK(subprocess_destroy(&proc)) // create output return buffer.substr(0, bytes_read); diff --git a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp index 2d344ef..96380b3 100644 --- a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp +++ b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp @@ -47,7 +47,7 @@ gpu_amd_hardware_sampler::gpu_amd_hardware_sampler(const std::size_t device_id, device_id_{ static_cast(device_id) } { // make sure that rsmi_init is only called once for all instances if (instances_++ == 0) { - HWS_ROCM_SMI_ERROR_CHECK(rsmi_init(std::uint64_t{ 0 })); + HWS_ROCM_SMI_ERROR_CHECK(rsmi_init(std::uint64_t{ 0 })) // notify that initialization has been finished init_finished_ = true; } else { @@ -66,7 +66,7 @@ gpu_amd_hardware_sampler::~gpu_amd_hardware_sampler() { // the last instance must shut down the ROCm SMI runtime // make sure that rsmi_shut_down is only called once if (--instances_ == 0) { - HWS_ROCM_SMI_ERROR_CHECK(rsmi_shut_down()); + HWS_ROCM_SMI_ERROR_CHECK(rsmi_shut_down()) // reset init_finished flag init_finished_ = false; } @@ -92,8 +92,8 @@ void gpu_amd_hardware_sampler::sampling_loop() { general_samples_.byte_order_ = "Little Endian"; hipDeviceProp_t prop{}; - if (hipGetDeviceProperties(&prop, device_id_) == hipSuccess) { - std::string architecture{ prop.gcnArchName }; + if (hipGetDeviceProperties(&prop, static_cast(device_id_)) == hipSuccess) { + const std::string architecture{ prop.gcnArchName }; general_samples_.architecture_ = architecture.substr(0, architecture.find_first_of('\0')); } @@ -467,19 +467,19 @@ void gpu_amd_hardware_sampler::sampling_loop() { { if (general_samples_.performance_level_.has_value()) { rsmi_dev_perf_level_t pstate{}; - HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_perf_level_get(device_id_, &pstate)); + HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_perf_level_get(device_id_, &pstate)) general_samples_.performance_level_->push_back(performance_level_to_string(pstate)); } if (general_samples_.compute_utilization_.has_value()) { decltype(general_samples_.compute_utilization_)::value_type::value_type value{}; - HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_busy_percent_get(device_id_, &value)); + HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_busy_percent_get(device_id_, &value)) general_samples_.compute_utilization_->push_back(value); } if (general_samples_.memory_utilization_.has_value()) { decltype(general_samples_.memory_utilization_)::value_type::value_type value{}; - HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_memory_busy_percent_get(device_id_, &value)); + HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_memory_busy_percent_get(device_id_, &value)) general_samples_.memory_utilization_->push_back(value); } } @@ -488,7 +488,7 @@ void gpu_amd_hardware_sampler::sampling_loop() { { if (clock_samples_.clock_frequency_.has_value()) { rsmi_frequencies_t frequency_info{}; - HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_SYS, &frequency_info)); + HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_SYS, &frequency_info)) if (frequency_info.current < RSMI_MAX_NUM_FREQUENCIES) { clock_samples_.clock_frequency_->push_back(static_cast(frequency_info.frequency[frequency_info.current]) / 1000.0 / 1000.0); } else { @@ -499,7 +499,7 @@ void gpu_amd_hardware_sampler::sampling_loop() { if (clock_samples_.socket_clock_frequency_.has_value()) { rsmi_frequencies_t frequency_info{}; - HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_SOC, &frequency_info)); + HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_SOC, &frequency_info)) if (frequency_info.current < RSMI_MAX_NUM_FREQUENCIES) { clock_samples_.socket_clock_frequency_->push_back(static_cast(frequency_info.frequency[frequency_info.current]) / 1000.0 / 1000.0); } else { @@ -510,7 +510,7 @@ void gpu_amd_hardware_sampler::sampling_loop() { if (clock_samples_.memory_clock_frequency_.has_value()) { rsmi_frequencies_t frequency_info{}; - HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_MEM, &frequency_info)); + HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_MEM, &frequency_info)) if (frequency_info.current < RSMI_MAX_NUM_FREQUENCIES) { clock_samples_.memory_clock_frequency_->push_back(static_cast(frequency_info.frequency[frequency_info.current]) / 1000.0 / 1000.0); } else { @@ -521,13 +521,13 @@ void gpu_amd_hardware_sampler::sampling_loop() { if (clock_samples_.overdrive_level_.has_value()) { decltype(clock_samples_.overdrive_level_)::value_type::value_type value{}; - HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_overdrive_level_get(device_id_, &value)); + HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_overdrive_level_get(device_id_, &value)) clock_samples_.overdrive_level_->push_back(value); } if (clock_samples_.memory_overdrive_level_.has_value()) { decltype(clock_samples_.memory_overdrive_level_)::value_type::value_type value{}; - HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_mem_overdrive_level_get(device_id_, &value)); + HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_mem_overdrive_level_get(device_id_, &value)) clock_samples_.memory_overdrive_level_->push_back(value); } } @@ -537,7 +537,7 @@ void gpu_amd_hardware_sampler::sampling_loop() { if (power_samples_.power_usage_.has_value()) { [[maybe_unused]] RSMI_POWER_TYPE power_type{}; std::uint64_t value{}; - HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_power_get(device_id_, &value, &power_type)); + HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_power_get(device_id_, &value, &power_type)) power_samples_.power_usage_->push_back(static_cast(value - initial_power_usage) / 1000.0 / 1000.0); } @@ -545,14 +545,14 @@ void gpu_amd_hardware_sampler::sampling_loop() { [[maybe_unused]] std::uint64_t timestamp{}; float resolution{}; std::uint64_t value{}; - HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_energy_count_get(device_id_, &value, &resolution, ×tamp)); + HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_energy_count_get(device_id_, &value, &resolution, ×tamp)) const auto scaled_value = static_cast(value) * static_cast(resolution); power_samples_.power_total_energy_consumption_->push_back(scaled_value / 1000.0); } if (power_samples_.power_profile_.has_value()) { rsmi_power_profile_status_t power_profile{}; - HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_power_profile_presets_get(device_id_, std::uint32_t{ 0 }, &power_profile)); + HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_power_profile_presets_get(device_id_, std::uint32_t{ 0 }, &power_profile)) switch (power_profile.current) { case RSMI_PWR_PROF_PRST_CUSTOM_MASK: power_samples_.power_profile_->emplace_back("CUSTOM"); @@ -586,7 +586,7 @@ void gpu_amd_hardware_sampler::sampling_loop() { { if (memory_samples_.memory_used_.has_value()) { decltype(memory_samples_.memory_used_)::value_type::value_type value{}; - HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_memory_usage_get(device_id_, RSMI_MEM_TYPE_VRAM, &value)); + HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_memory_usage_get(device_id_, RSMI_MEM_TYPE_VRAM, &value)) memory_samples_.memory_used_->push_back(value); if (memory_samples_.memory_free_.has_value()) { memory_samples_.memory_free_->push_back(memory_samples_.memory_total_.value() - value); @@ -595,7 +595,7 @@ void gpu_amd_hardware_sampler::sampling_loop() { if (memory_samples_.pcie_link_transfer_rate_.has_value() && memory_samples_.num_pcie_lanes_.has_value()) { rsmi_pcie_bandwidth_t bandwidth_info{}; - HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_pci_bandwidth_get(device_id_, &bandwidth_info)); + HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_pci_bandwidth_get(device_id_, &bandwidth_info)) if (bandwidth_info.transfer_rate.current < RSMI_MAX_NUM_FREQUENCIES) { memory_samples_.pcie_link_transfer_rate_->push_back(bandwidth_info.transfer_rate.frequency[bandwidth_info.transfer_rate.current] / 1000000); memory_samples_.num_pcie_lanes_->push_back(bandwidth_info.lanes[bandwidth_info.transfer_rate.current]); @@ -611,50 +611,50 @@ void gpu_amd_hardware_sampler::sampling_loop() { { if (temperature_samples_.fan_speed_percentage_.has_value()) { std::int64_t value{}; - HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_fan_speed_get(device_id_, std::uint32_t{ 0 }, &value)); + HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_fan_speed_get(device_id_, std::uint32_t{ 0 }, &value)) temperature_samples_.fan_speed_percentage_->push_back(static_cast(value) / static_cast(RSMI_MAX_FAN_SPEED)); } if (temperature_samples_.temperature_.has_value()) { std::int64_t value{}; - HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_EDGE, RSMI_TEMP_CURRENT, &value)); + HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_EDGE, RSMI_TEMP_CURRENT, &value)) temperature_samples_.temperature_->push_back(static_cast(value) / 1000.0); } if (temperature_samples_.memory_temperature_.has_value()) { std::int64_t value{}; - HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_MEMORY, RSMI_TEMP_CURRENT, &value)); + HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_MEMORY, RSMI_TEMP_CURRENT, &value)) temperature_samples_.memory_temperature_->push_back(static_cast(value) / 1000.0); } if (temperature_samples_.hotspot_temperature_.has_value()) { std::int64_t value{}; - HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_JUNCTION, RSMI_TEMP_CURRENT, &value)); + HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_JUNCTION, RSMI_TEMP_CURRENT, &value)) temperature_samples_.hotspot_temperature_->push_back(static_cast(value) / 1000.0); } if (temperature_samples_.hbm_0_temperature_.has_value()) { std::int64_t value{}; - HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_0, RSMI_TEMP_CURRENT, &value)); + HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_0, RSMI_TEMP_CURRENT, &value)) temperature_samples_.hbm_0_temperature_->push_back(static_cast(value) / 1000.0); } if (temperature_samples_.hbm_1_temperature_.has_value()) { std::int64_t value{}; - HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_1, RSMI_TEMP_CURRENT, &value)); + HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_1, RSMI_TEMP_CURRENT, &value)) temperature_samples_.hbm_1_temperature_->push_back(static_cast(value) / 1000.0); } if (temperature_samples_.hbm_2_temperature_.has_value()) { std::int64_t value{}; - HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_2, RSMI_TEMP_CURRENT, &value)); + HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_2, RSMI_TEMP_CURRENT, &value)) temperature_samples_.hbm_2_temperature_->push_back(static_cast(value) / 1000.0); } if (temperature_samples_.hbm_3_temperature_.has_value()) { std::int64_t value{}; - HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_3, RSMI_TEMP_CURRENT, &value)); + HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_3, RSMI_TEMP_CURRENT, &value)) temperature_samples_.hbm_3_temperature_->push_back(static_cast(value) / 1000.0); } } diff --git a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp b/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp index ba06efe..641ca29 100644 --- a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp +++ b/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp @@ -9,9 +9,8 @@ #include "hardware_sampling/utility.hpp" // hws::detail::{value_or_default, quote} -#include "fmt/format.h" // fmt::format -#include "fmt/ranges.h" // fmt::join -#include "rocm_smi/rocm_smi.h" // RSMI_MAX_FAN_SPEED +#include "fmt/format.h" // fmt::format +#include "fmt/ranges.h" // fmt::join #include // std::ostream #include // std::string diff --git a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp index 7af2a2a..20c9918 100644 --- a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp +++ b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp @@ -46,7 +46,7 @@ gpu_nvidia_hardware_sampler::gpu_nvidia_hardware_sampler(const std::size_t devic hardware_sampler{ sampling_interval } { // make sure that nvmlInit is only called once for all instances if (instances_++ == 0) { - HWS_NVML_ERROR_CHECK(nvmlInit()); + HWS_NVML_ERROR_CHECK(nvmlInit()) // notify that initialization has been finished init_finished_ = true; } else { @@ -68,7 +68,7 @@ gpu_nvidia_hardware_sampler::~gpu_nvidia_hardware_sampler() { // the last instance must shut down the NVML runtime // make sure that nvmlShutdown is only called once if (--instances_ == 0) { - HWS_NVML_ERROR_CHECK(nvmlShutdown()); + HWS_NVML_ERROR_CHECK(nvmlShutdown()) // reset init_finished flag init_finished_ = false; } @@ -227,7 +227,7 @@ void gpu_nvidia_hardware_sampler::sampling_loop() { { unsigned int clock_count{ 128 }; std::vector supported_clocks(clock_count); - if (clock_samples_.memory_clock_frequency_min_.has_value() && nvmlDeviceGetSupportedGraphicsClocks(device, clock_samples_.memory_clock_frequency_min_.value(), &clock_count, supported_clocks.data()) == NVML_SUCCESS) { + if (clock_samples_.memory_clock_frequency_min_.has_value() && nvmlDeviceGetSupportedGraphicsClocks(device, static_cast(clock_samples_.memory_clock_frequency_min_.value()), &clock_count, supported_clocks.data()) == NVML_SUCCESS) { clock_samples_.clock_frequency_min_ = static_cast(*std::min_element(supported_clocks.cbegin(), supported_clocks.cbegin() + clock_count)); } @@ -427,13 +427,13 @@ void gpu_nvidia_hardware_sampler::sampling_loop() { { if (general_samples_.performance_level_.has_value()) { nvmlPstates_t pstate{}; - HWS_NVML_ERROR_CHECK(nvmlDeviceGetPerformanceState(device, &pstate)); + HWS_NVML_ERROR_CHECK(nvmlDeviceGetPerformanceState(device, &pstate)) general_samples_.performance_level_->push_back(static_cast(pstate)); } if (general_samples_.compute_utilization_.has_value() && general_samples_.memory_utilization_.has_value()) { nvmlUtilization_t util{}; - HWS_NVML_ERROR_CHECK(nvmlDeviceGetUtilizationRates(device, &util)); + HWS_NVML_ERROR_CHECK(nvmlDeviceGetUtilizationRates(device, &util)) general_samples_.compute_utilization_->push_back(util.gpu); general_samples_.memory_utilization_->push_back(util.memory); } @@ -443,32 +443,32 @@ void gpu_nvidia_hardware_sampler::sampling_loop() { { if (clock_samples_.clock_frequency_.has_value()) { unsigned int value{}; - HWS_NVML_ERROR_CHECK(nvmlDeviceGetClockInfo(device, NVML_CLOCK_GRAPHICS, &value)); + HWS_NVML_ERROR_CHECK(nvmlDeviceGetClockInfo(device, NVML_CLOCK_GRAPHICS, &value)) clock_samples_.clock_frequency_->push_back(static_cast(value)); } if (clock_samples_.sm_clock_frequency_.has_value()) { unsigned int value{}; - HWS_NVML_ERROR_CHECK(nvmlDeviceGetClockInfo(device, NVML_CLOCK_SM, &value)); + HWS_NVML_ERROR_CHECK(nvmlDeviceGetClockInfo(device, NVML_CLOCK_SM, &value)) clock_samples_.sm_clock_frequency_->push_back(static_cast(value)); } if (clock_samples_.memory_clock_frequency_.has_value()) { unsigned int value{}; - HWS_NVML_ERROR_CHECK(nvmlDeviceGetClockInfo(device, NVML_CLOCK_MEM, &value)); + HWS_NVML_ERROR_CHECK(nvmlDeviceGetClockInfo(device, NVML_CLOCK_MEM, &value)) clock_samples_.memory_clock_frequency_->push_back(static_cast(value)); } if (clock_samples_.throttle_reason_.has_value()) { unsigned long long value{}; - HWS_NVML_ERROR_CHECK(nvmlDeviceGetCurrentClocksEventReasons(device, &value)); + HWS_NVML_ERROR_CHECK(nvmlDeviceGetCurrentClocksEventReasons(device, &value)) clock_samples_.throttle_reason_->push_back(detail::throttle_event_reason_to_string(value)); } if (clock_samples_.auto_boosted_clock_.has_value()) { nvmlEnableState_t mode{}; nvmlEnableState_t default_mode{}; - HWS_NVML_ERROR_CHECK(nvmlDeviceGetAutoBoostedClocksEnabled(device, &mode, &default_mode)); + HWS_NVML_ERROR_CHECK(nvmlDeviceGetAutoBoostedClocksEnabled(device, &mode, &default_mode)) clock_samples_.auto_boosted_clock_->push_back(mode == NVML_FEATURE_ENABLED); } } @@ -477,19 +477,19 @@ void gpu_nvidia_hardware_sampler::sampling_loop() { { if (power_samples_.power_profile_.has_value()) { nvmlPstates_t pstate{}; - HWS_NVML_ERROR_CHECK(nvmlDeviceGetPowerState(device, &pstate)); + HWS_NVML_ERROR_CHECK(nvmlDeviceGetPowerState(device, &pstate)) power_samples_.power_profile_->push_back(static_cast(pstate)); } if (power_samples_.power_usage_.has_value()) { unsigned int value{}; - HWS_NVML_ERROR_CHECK(nvmlDeviceGetPowerUsage(device, &value)); + HWS_NVML_ERROR_CHECK(nvmlDeviceGetPowerUsage(device, &value)) power_samples_.power_usage_->push_back(static_cast(value - initial_power_usage) / 1000.0); } if (power_samples_.power_total_energy_consumption_.has_value()) { unsigned long long value{}; - HWS_NVML_ERROR_CHECK(nvmlDeviceGetTotalEnergyConsumption(device, &value)); + HWS_NVML_ERROR_CHECK(nvmlDeviceGetTotalEnergyConsumption(device, &value)) power_samples_.power_total_energy_consumption_->push_back(static_cast(value) / 1000.0); } } @@ -498,20 +498,20 @@ void gpu_nvidia_hardware_sampler::sampling_loop() { { if (memory_samples_.memory_free_.has_value() && memory_samples_.memory_used_.has_value()) { nvmlMemory_t memory_info{}; - HWS_NVML_ERROR_CHECK(nvmlDeviceGetMemoryInfo(device, &memory_info)); + HWS_NVML_ERROR_CHECK(nvmlDeviceGetMemoryInfo(device, &memory_info)) memory_samples_.memory_free_->push_back(memory_info.free); memory_samples_.memory_used_->push_back(memory_info.used); } if (memory_samples_.num_pcie_lanes_.has_value()) { decltype(memory_samples_.num_pcie_lanes_)::value_type::value_type value{}; - HWS_NVML_ERROR_CHECK(nvmlDeviceGetCurrPcieLinkWidth(device, &value)); + HWS_NVML_ERROR_CHECK(nvmlDeviceGetCurrPcieLinkWidth(device, &value)) memory_samples_.num_pcie_lanes_->push_back(value); } if (memory_samples_.pcie_link_generation_.has_value()) { decltype(memory_samples_.pcie_link_generation_)::value_type::value_type value{}; - HWS_NVML_ERROR_CHECK(nvmlDeviceGetCurrPcieLinkGeneration(device, &value)); + HWS_NVML_ERROR_CHECK(nvmlDeviceGetCurrPcieLinkGeneration(device, &value)) memory_samples_.pcie_link_generation_->push_back(value); } } @@ -520,13 +520,13 @@ void gpu_nvidia_hardware_sampler::sampling_loop() { { if (temperature_samples_.fan_speed_percentage_.has_value()) { unsigned int value{}; - HWS_NVML_ERROR_CHECK(nvmlDeviceGetFanSpeed(device, &value)); + HWS_NVML_ERROR_CHECK(nvmlDeviceGetFanSpeed(device, &value)) temperature_samples_.fan_speed_percentage_->push_back(static_cast(value)); } if (temperature_samples_.temperature_.has_value()) { unsigned int value{}; - HWS_NVML_ERROR_CHECK(nvmlDeviceGetTemperature(device, NVML_TEMPERATURE_GPU, &value)); + HWS_NVML_ERROR_CHECK(nvmlDeviceGetTemperature(device, NVML_TEMPERATURE_GPU, &value)) temperature_samples_.temperature_->push_back(static_cast(value)); } } @@ -539,7 +539,7 @@ void gpu_nvidia_hardware_sampler::sampling_loop() { std::string gpu_nvidia_hardware_sampler::device_identification() const { nvmlPciInfo_st pcie_info{}; - HWS_NVML_ERROR_CHECK(nvmlDeviceGetPciInfo_v3(device_.get_impl().device, &pcie_info)); + HWS_NVML_ERROR_CHECK(nvmlDeviceGetPciInfo_v3(device_.get_impl().device, &pcie_info)) return fmt::format("gpu_nvidia_device_{}_{}", pcie_info.bus, pcie_info.device); } diff --git a/src/hardware_sampling/hardware_sampler.cpp b/src/hardware_sampling/hardware_sampler.cpp index b68cbd1..5d27972 100644 --- a/src/hardware_sampling/hardware_sampler.cpp +++ b/src/hardware_sampling/hardware_sampler.cpp @@ -12,7 +12,7 @@ #include "fmt/format.h" // fmt::format #include "fmt/ranges.h" // fmt::join -#include "fmt/chrono.h" // fmt::localtime, direct formatting of std::chrono types +#include "fmt/chrono.h" // direct formatting of std::chrono types #include // std::chrono::{system_clock, steady_clock, duration_cast, milliseconds} #include // std::size_t From 9782e96425ecf7bf9f71ba5d650584012fd17d96 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Mon, 23 Sep 2024 12:07:43 +0200 Subject: [PATCH 37/69] Remove unused fetch content. --- CMakeLists.txt | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1080a26..1fbf7a7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -88,24 +88,6 @@ else () endif () target_link_libraries(${HWS_LIBRARY_NAME} PUBLIC fmt::fmt) -#set(HWS_ryml_VERSION v0.7.2) -#find_package(ryml QUIET) -#if (fmt_FOUND) -# message(STATUS "Found package ryml (rapidyaml).") -#else () -# message(STATUS "Couldn't find package ryml (rapidyaml). Building version ${HWS_ryml_VERSION} from source.") -# # fetch yaml library ryml -# FetchContent_Declare(ryml -# GIT_REPOSITORY https://github.com/biojppm/rapidyaml -# GIT_TAG ${HWS_ryml_VERSION} -# GIT_SHALLOW FALSE -# QUIET -# ) -# FetchContent_MakeAvailable(ryml) -# add_dependencies(${HWS_LIBRARY_NAME} ryml) -#endif () -#target_link_libraries(${HWS_LIBRARY_NAME} PUBLIC ryml::ryml) - #################################################################################################################### ## CPU measurements ## #################################################################################################################### From 1dd98f2d7a858a85b169a6e9071641ff11a27dc2 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Tue, 24 Sep 2024 10:52:11 +0200 Subject: [PATCH 38/69] Add missing detail namespace qualifier. --- src/hardware_sampling/gpu_amd/hardware_sampler.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp index 96380b3..e535124 100644 --- a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp +++ b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp @@ -110,7 +110,7 @@ void gpu_amd_hardware_sampler::sampling_loop() { // queried samples -> retrieved every iteration if available rsmi_dev_perf_level_t pstate{}; if (rsmi_dev_perf_level_get(device_id_, &pstate) == RSMI_STATUS_SUCCESS) { - general_samples_.performance_level_ = decltype(general_samples_.performance_level_)::value_type{ performance_level_to_string(pstate) }; + general_samples_.performance_level_ = decltype(general_samples_.performance_level_)::value_type{ detail::performance_level_to_string(pstate) }; } decltype(general_samples_.compute_utilization_)::value_type::value_type utilization_gpu{}; @@ -468,7 +468,7 @@ void gpu_amd_hardware_sampler::sampling_loop() { if (general_samples_.performance_level_.has_value()) { rsmi_dev_perf_level_t pstate{}; HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_perf_level_get(device_id_, &pstate)) - general_samples_.performance_level_->push_back(performance_level_to_string(pstate)); + general_samples_.performance_level_->push_back(detail::performance_level_to_string(pstate)); } if (general_samples_.compute_utilization_.has_value()) { From 28b52f4a6b5607b219793d24bdae71fbd28d9346 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Tue, 24 Sep 2024 11:12:21 +0200 Subject: [PATCH 39/69] Fix an error where the power usage was calculated from a reference point instead of the total power consumption. --- src/hardware_sampling/gpu_amd/hardware_sampler.cpp | 14 ++++++++------ .../gpu_nvidia/hardware_sampler.cpp | 14 ++++++++------ 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp index e535124..9182347 100644 --- a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp +++ b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp @@ -83,7 +83,7 @@ void gpu_amd_hardware_sampler::sampling_loop() { this->add_time_point(std::chrono::steady_clock::now()); - std::uint64_t initial_power_usage{}; + double initial_total_power_consumption{}; // initial total power consumption in J // retrieve initial general information { @@ -201,7 +201,8 @@ void gpu_amd_hardware_sampler::sampling_loop() { { RSMI_POWER_TYPE power_type{}; - if (rsmi_dev_power_get(device_id_, &initial_power_usage, &power_type) == RSMI_STATUS_SUCCESS) { + std::uint64_t power_usage{}; + if (rsmi_dev_power_get(device_id_, &power_usage, &power_type) == RSMI_STATUS_SUCCESS) { switch (power_type) { case RSMI_POWER_TYPE::RSMI_AVERAGE_POWER: power_samples_.power_measurement_type_ = "average"; @@ -214,7 +215,7 @@ void gpu_amd_hardware_sampler::sampling_loop() { break; } // report power usage since the first sample - power_samples_.power_usage_ = decltype(power_samples_.power_usage_)::value_type{ static_cast(0) }; + power_samples_.power_usage_ = decltype(power_samples_.power_usage_)::value_type{ static_cast(power_usage) / 1000.0 / 1000.0 }; } } @@ -280,7 +281,8 @@ void gpu_amd_hardware_sampler::sampling_loop() { std::uint64_t power_total_energy_consumption{}; if (rsmi_dev_energy_count_get(device_id_, &power_total_energy_consumption, &resolution, ×tamp) == RSMI_STATUS_SUCCESS) { const auto scaled_value = static_cast(power_total_energy_consumption) * static_cast(resolution); - power_samples_.power_total_energy_consumption_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ scaled_value / 1000.0 / 1000.0 }; + initial_total_power_consumption = scaled_value / 1000.0 / 1000.0; + power_samples_.power_total_energy_consumption_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ 0.0 }; } } @@ -538,7 +540,7 @@ void gpu_amd_hardware_sampler::sampling_loop() { [[maybe_unused]] RSMI_POWER_TYPE power_type{}; std::uint64_t value{}; HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_power_get(device_id_, &value, &power_type)) - power_samples_.power_usage_->push_back(static_cast(value - initial_power_usage) / 1000.0 / 1000.0); + power_samples_.power_usage_->push_back(static_cast(value) / 1000.0 / 1000.0); } if (power_samples_.power_total_energy_consumption_.has_value()) { @@ -547,7 +549,7 @@ void gpu_amd_hardware_sampler::sampling_loop() { std::uint64_t value{}; HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_energy_count_get(device_id_, &value, &resolution, ×tamp)) const auto scaled_value = static_cast(value) * static_cast(resolution); - power_samples_.power_total_energy_consumption_->push_back(scaled_value / 1000.0); + power_samples_.power_total_energy_consumption_->push_back((scaled_value / 1000.0 / 1000.0) - initial_total_power_consumption); } if (power_samples_.power_profile_.has_value()) { diff --git a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp index 20c9918..d5a2e71 100644 --- a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp +++ b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp @@ -88,7 +88,7 @@ void gpu_nvidia_hardware_sampler::sampling_loop() { this->add_time_point(std::chrono::steady_clock::now()); - unsigned int initial_power_usage{}; + double initial_total_power_consumption{}; // initial total power consumption in J // retrieve initial general information { @@ -316,13 +316,15 @@ void gpu_nvidia_hardware_sampler::sampling_loop() { power_samples_.available_power_profiles_ = power_states; // queried samples -> retrieved every iteration if available - if (nvmlDeviceGetPowerUsage(device, &initial_power_usage) == NVML_SUCCESS) { - power_samples_.power_usage_ = decltype(power_samples_.power_usage_)::value_type{ static_cast(0) }; + unsigned int power_usage{}; + if (nvmlDeviceGetPowerUsage(device, &power_usage) == NVML_SUCCESS) { + power_samples_.power_usage_ = decltype(power_samples_.power_usage_)::value_type{ static_cast(power_usage) / 1000.0 }; } unsigned long long power_total_energy_consumption{}; if (nvmlDeviceGetTotalEnergyConsumption(device, &power_total_energy_consumption) == NVML_SUCCESS) { - power_samples_.power_total_energy_consumption_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ static_cast(power_total_energy_consumption) / 1000.0 }; + initial_total_power_consumption = static_cast(power_total_energy_consumption) / 1000.0; + power_samples_.power_total_energy_consumption_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ 0.0 }; } nvmlPstates_t pstate{}; @@ -484,13 +486,13 @@ void gpu_nvidia_hardware_sampler::sampling_loop() { if (power_samples_.power_usage_.has_value()) { unsigned int value{}; HWS_NVML_ERROR_CHECK(nvmlDeviceGetPowerUsage(device, &value)) - power_samples_.power_usage_->push_back(static_cast(value - initial_power_usage) / 1000.0); + power_samples_.power_usage_->push_back(static_cast(value) / 1000.0); } if (power_samples_.power_total_energy_consumption_.has_value()) { unsigned long long value{}; HWS_NVML_ERROR_CHECK(nvmlDeviceGetTotalEnergyConsumption(device, &value)) - power_samples_.power_total_energy_consumption_->push_back(static_cast(value) / 1000.0); + power_samples_.power_total_energy_consumption_->push_back((static_cast(value) / 1000.0) - initial_total_power_consumption); } } From 03e572e0927f007ff1caaecf46e060afc0003683 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Tue, 24 Sep 2024 11:13:25 +0200 Subject: [PATCH 40/69] Change order of device ID and bus ID. --- src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp index d5a2e71..1536237 100644 --- a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp +++ b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp @@ -542,7 +542,7 @@ void gpu_nvidia_hardware_sampler::sampling_loop() { std::string gpu_nvidia_hardware_sampler::device_identification() const { nvmlPciInfo_st pcie_info{}; HWS_NVML_ERROR_CHECK(nvmlDeviceGetPciInfo_v3(device_.get_impl().device, &pcie_info)) - return fmt::format("gpu_nvidia_device_{}_{}", pcie_info.bus, pcie_info.device); + return fmt::format("gpu_nvidia_device_{}_{}", pcie_info.device, pcie_info.bus); } std::string gpu_nvidia_hardware_sampler::generate_yaml_string() const { From 6ae5c21621a6ca6dcda8f88e0f279c0f2cf7310b Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Tue, 24 Sep 2024 11:17:01 +0200 Subject: [PATCH 41/69] Add newlines between the different categories to make the YAML output more clear. --- src/hardware_sampling/cpu/hardware_sampler.cpp | 12 ++++++------ src/hardware_sampling/gpu_amd/hardware_sampler.cpp | 8 ++++---- .../gpu_nvidia/hardware_sampler.cpp | 8 ++++---- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/hardware_sampling/cpu/hardware_sampler.cpp b/src/hardware_sampling/cpu/hardware_sampler.cpp index 7e89eca..d4a6754 100644 --- a/src/hardware_sampling/cpu/hardware_sampler.cpp +++ b/src/hardware_sampling/cpu/hardware_sampler.cpp @@ -428,12 +428,12 @@ std::string cpu_hardware_sampler::generate_yaml_string() const { throw std::runtime_error{ "Can't create the final YAML entry if the hardware sampler is still running!" }; } - return fmt::format("{}\n" - "{}\n" - "{}\n" - "{}\n" - "{}\n" - "{}\n" + return fmt::format("{}\n\n" + "{}\n\n" + "{}\n\n" + "{}\n\n" + "{}\n\n" + "{}\n\n" "{}", general_samples_.generate_yaml_string(), clock_samples_.generate_yaml_string(), diff --git a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp index 9182347..84480bb 100644 --- a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp +++ b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp @@ -677,10 +677,10 @@ std::string gpu_amd_hardware_sampler::generate_yaml_string() const { throw std::runtime_error{ "Can't create the final YAML entry if the hardware sampler is still running!" }; } - return fmt::format("{}\n" - "{}\n" - "{}\n" - "{}\n" + return fmt::format("{}\n\n" + "{}\n\n" + "{}\n\n" + "{}\n\n" "{}", general_samples_.generate_yaml_string(), clock_samples_.generate_yaml_string(), diff --git a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp index 1536237..769f0a6 100644 --- a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp +++ b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp @@ -551,10 +551,10 @@ std::string gpu_nvidia_hardware_sampler::generate_yaml_string() const { throw std::runtime_error{ "Can't create the final YAML entry if the hardware sampler is still running!" }; } - return fmt::format("{}\n" - "{}\n" - "{}\n" - "{}\n" + return fmt::format("{}\n\n" + "{}\n\n" + "{}\n\n" + "{}\n\n" "{}", general_samples_.generate_yaml_string(), clock_samples_.generate_yaml_string(), From d32f6bf14f14179f915649de17b7ab0e4b1dca5d Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Tue, 24 Sep 2024 11:27:11 +0200 Subject: [PATCH 42/69] Fix some compilation warnings and linker errors. --- include/hardware_sampling/gpu_amd/utility.hpp | 7 +++++-- src/hardware_sampling/gpu_amd/hardware_sampler.cpp | 2 +- src/hardware_sampling/gpu_amd/utility.cpp | 3 ++- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/include/hardware_sampling/gpu_amd/utility.hpp b/include/hardware_sampling/gpu_amd/utility.hpp index b0786f9..a277e06 100644 --- a/include/hardware_sampling/gpu_amd/utility.hpp +++ b/include/hardware_sampling/gpu_amd/utility.hpp @@ -50,7 +50,10 @@ namespace hws::detail { #else #define HWS_ROCM_SMI_ERROR_CHECK(rocm_smi_func) rocm_smi_func; - #define HWS_HIP_ERROR_CHECK(hip_func) hip_func; + #define HWS_HIP_ERROR_CHECK(hip_func) \ + { \ + [[maybe_unused]] hipError_t errc = hip_func; \ + } #endif /** @@ -60,6 +63,6 @@ namespace hws::detail { */ [[nodiscard]] std::string performance_level_to_string(rsmi_dev_perf_level_t perf_level); -} // namespace hws +} // namespace hws::detail #endif // HARDWARE_SAMPLING_GPU_AMD_UTILITY_HPP_ diff --git a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp index 84480bb..dae7fec 100644 --- a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp +++ b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp @@ -8,7 +8,7 @@ #include "hardware_sampling/gpu_amd/hardware_sampler.hpp" #include "hardware_sampling/gpu_amd/rocm_smi_samples.hpp" // hws::{rocm_smi_general_samples, rocm_smi_clock_samples, rocm_smi_power_samples, rocm_smi_memory_samples, rocm_smi_temperature_samples} -#include "hardware_sampling/gpu_amd/utility.hpp" // HWS_ROCM_SMI_ERROR_CHECK +#include "hardware_sampling/gpu_amd/utility.hpp" // hws::detail::performance_level_to_string, HWS_ROCM_SMI_ERROR_CHECK #include "hardware_sampling/hardware_sampler.hpp" // hws::hardware_sampler #include "hardware_sampling/utility.hpp" // hws::detail::time_points_to_epoch diff --git a/src/hardware_sampling/gpu_amd/utility.cpp b/src/hardware_sampling/gpu_amd/utility.cpp index 3164c18..35d375c 100644 --- a/src/hardware_sampling/gpu_amd/utility.cpp +++ b/src/hardware_sampling/gpu_amd/utility.cpp @@ -11,7 +11,7 @@ #include // std::string -namespace hws { +namespace hws::detail { std::string performance_level_to_string(const rsmi_dev_perf_level_t perf_level) { switch (perf_level) { @@ -34,6 +34,7 @@ std::string performance_level_to_string(const rsmi_dev_perf_level_t perf_level) case RSMI_DEV_PERF_LEVEL_DETERMINISM: return "determinism"; case RSMI_DEV_PERF_LEVEL_UNKNOWN: + default: return "unknown"; } } From 89129b2effb9b1835a60be4d7f5b24389893bfdd Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Tue, 24 Sep 2024 11:29:32 +0200 Subject: [PATCH 43/69] Update README file. --- README.md | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index d7acad6..32cd1bc 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,7 @@ # hws - Hardware Sampling for CPUs and GPUs -The Hardware Sampling (hws) library can be used to track hardware performance like clock frequency, memory usage, temperatures, or power draw. +The Hardware Sampling (hws) library can be used to track hardware performance like clock frequency, memory usage, +temperatures, or power draw. It currently supports CPUs as well as GPUs from NVIDIA, AMD, and Intel. ## Getting Started @@ -10,15 +11,23 @@ It currently supports CPUs as well as GPUs from NVIDIA, AMD, and Intel. General dependencies: - a C++17 capable compiler -- [{fmt} > 11.0.2](https://github.com/fmtlib/fmt) for string formatting (automatically build during the CMake configuration if it couldn't be found using the respective `find_package` call) -- [Pybind11 > v2.13.1](https://github.com/pybind/pybind11) if Python bindings are enabled (automatically build during the CMake configuration if it couldn't be found using the respective `find_package` call) +- [{fmt} > 11.0.2](https://github.com/fmtlib/fmt) for string formatting (automatically build during the CMake + configuration if it couldn't be found using the respective `find_package` call) +- [Pybind11 > v2.13.1](https://github.com/pybind/pybind11) if Python bindings are enabled (automatically build during + the CMake configuration if it couldn't be found using the respective `find_package` call) Dependencies based on the hardware to sample: -- if a CPU should be targeted: at least one of [`turbostat`](https://www.linux.org/docs/man8/turbostat.html) (may require root privileges), [`lscpu`](https://man7.org/linux/man-pages/man1/lscpu.1.html), or [`free`](https://man7.org/linux/man-pages/man1/free.1.html) and the [`subprocess.h`](https://github.com/sheredom/subprocess.h) library (automatically build during the CMake configuration if it couldn't be found using the respective `find_package` call) +- if a CPU should be targeted: at least one of [`turbostat`](https://www.linux.org/docs/man8/turbostat.html) (may + require root privileges), [`lscpu`](https://man7.org/linux/man-pages/man1/lscpu.1.html), or [ + `free`](https://man7.org/linux/man-pages/man1/free.1.html) and the [ + `subprocess.h`](https://github.com/sheredom/subprocess.h) library (automatically build during the CMake configuration + if it couldn't be found using the respective `find_package` call) - if an NVIDIA GPU should be targeted: NVIDIA's Management Library [`NVML`](https://docs.nvidia.com/deploy/nvml-api/) -- if an AMD GPU should be targeted: AMD's ROCm SMI library [`rocm_smi_lib`](https://rocm.docs.amd.com/projects/rocm_smi_lib/en/latest/doxygen/html/modules.html) -- if an Intel GPU should be targeted: Intel's [`Level Zero library`](https://spec.oneapi.io/level-zero/latest/core/INTRO.html) +- if an AMD GPU should be targeted: AMD's ROCm SMI library [ + `rocm_smi_lib`](https://rocm.docs.amd.com/projects/rocm_smi_lib/en/latest/doxygen/html/modules.html) +- if an Intel GPU should be targeted: Intel's [ + `Level Zero library`](https://spec.oneapi.io/level-zero/latest/core/INTRO.html) ### Building hws @@ -41,7 +50,8 @@ cmake --build . -j The `[optional_options]` can be one or multiple of: -- `HWS_ENABLE_ERROR_CHECKS=ON|OFF` (default: `OFF`): enable sanity checks during hardware sampling, may be problematic with smaller sample intervals +- `HWS_ENABLE_ERROR_CHECKS=ON|OFF` (default: `OFF`): enable sanity checks during hardware sampling, may be problematic + with smaller sample intervals - `HWS_SAMPLING_INTERVAL=100ms` (default: `100ms`): set the sampling interval in milliseconds - `HWS_ENABLE_PYTHON_BINDINGS=ON|OFF` (default: `ON`): enable Python bindings @@ -233,8 +243,6 @@ The sampling type `sampled` denotes samples that are gathered during the whole h | system_low_power_idle_state_percent | sampled | % | | package_low_power_idle_state_percent | sampled | % | - - ## Example Python usage ```python @@ -269,7 +277,9 @@ axes = plt.gcf().axes[0] x_bounds = axes.get_xlim() for event in sampler.get_relative_events()[1:-1]: axes.axvline(x=event.relative_time_point, color='r') - axes.annotate(text=event.name, xy=(((event.relative_time_point - x_bounds[0]) / (x_bounds[1] - x_bounds[0])), 1.025), xycoords='axes fraction', rotation=270) + axes.annotate(text=event.name, + xy=(((event.relative_time_point - x_bounds[0]) / (x_bounds[1] - x_bounds[0])), 1.025), + xycoords='axes fraction', rotation=270) plt.xlabel("runtime [ms]") plt.ylabel("clock frequency [MHz]") @@ -283,4 +293,5 @@ plt.show() ## License -The hws library is distributed under the [MIT license](https://github.com/SC-SGS/hardware_sampling/blob/main/LICENSE.md). \ No newline at end of file +The hws library is distributed under +the [MIT license](https://github.com/SC-SGS/hardware_sampling/blob/main/LICENSE.md). \ No newline at end of file From d26130afeeb099460d7b1e58bacf5de416f969c1 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Tue, 24 Sep 2024 11:54:06 +0200 Subject: [PATCH 44/69] Interpolate total power consumption from the current power usage on AMD GPUs if the rsmi_dev_energy_count_get doesn't work (may happen on some older GPUs). --- .../gpu_amd/hardware_sampler.cpp | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp index dae7fec..dbd2971 100644 --- a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp +++ b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp @@ -283,6 +283,9 @@ void gpu_amd_hardware_sampler::sampling_loop() { const auto scaled_value = static_cast(power_total_energy_consumption) * static_cast(resolution); initial_total_power_consumption = scaled_value / 1000.0 / 1000.0; power_samples_.power_total_energy_consumption_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ 0.0 }; + } else if (power_samples_.power_usage_.has_value()) { + // if the total energy consumption cannot be retrieved, but the current power draw, approximate it + power_samples_.power_total_energy_consumption_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ 0.0 }; } } @@ -547,9 +550,16 @@ void gpu_amd_hardware_sampler::sampling_loop() { [[maybe_unused]] std::uint64_t timestamp{}; float resolution{}; std::uint64_t value{}; - HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_energy_count_get(device_id_, &value, &resolution, ×tamp)) - const auto scaled_value = static_cast(value) * static_cast(resolution); - power_samples_.power_total_energy_consumption_->push_back((scaled_value / 1000.0 / 1000.0) - initial_total_power_consumption); + if (rsmi_dev_energy_count_get(device_id_, &value, &resolution, ×tamp) == RSMI_STATUS_SUCCESS) { + const auto scaled_value = static_cast(value) * static_cast(resolution); + power_samples_.power_total_energy_consumption_->push_back((scaled_value / 1000.0 / 1000.0) - initial_total_power_consumption); + } else if (power_samples_.power_usage_.has_value()) { + // if the total energy consumption cannot be retrieved, but the current power draw, approximate it + const std::size_t num_time_points = this->sampling_time_points().size(); + const auto time_difference = std::chrono::duration(this->sampling_time_points()[num_time_points - 1] - this->sampling_time_points()[num_time_points - 2]).count(); + const auto current = power_samples_.power_usage_->back() * time_difference; + power_samples_.power_total_energy_consumption_->push_back(power_samples_.power_total_energy_consumption_->back() + current); + } } if (power_samples_.power_profile_.has_value()) { From d115e3117daaa82f9d923f89d41d53c2763a2b7a Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Tue, 24 Sep 2024 12:09:24 +0200 Subject: [PATCH 45/69] Update Intel GPU Level Zero implementation (not tested yet since currently no access to Intel GPUs). --- CMakeLists.txt | 38 +- README.md | 220 ++++---- bindings/gpu_intel_hardware_sampler.cpp | 67 ++- .../gpu_intel/hardware_sampler.hpp | 8 +- .../level_zero_device_handle_impl.hpp | 14 +- .../gpu_intel/level_zero_samples.hpp | 93 ++-- .../hardware_sampling/gpu_intel/utility.hpp | 26 +- .../gpu_intel/hardware_sampler.cpp | 313 ++++++++---- .../gpu_intel/level_zero_samples.cpp | 481 ++++++++++-------- src/hardware_sampling/gpu_intel/utility.cpp | 65 ++- 10 files changed, 777 insertions(+), 548 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1fbf7a7..f3ba9df 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -245,25 +245,25 @@ endif () ## Intel GPU sampling via Level Zero ## #################################################################################################################### # try finding Level Zero -#find_package(level_zero QUIET) -#if (level_zero_FOUND) -# target_link_libraries(${HWS_LIBRARY_NAME} PRIVATE level_zero) -# -# message(STATUS "Enable sampling of Intel GPU information using Level Zero.") -# -# # add source file to source file list -# target_sources(${HWS_LIBRARY_NAME} PRIVATE -# $) -# -# # add compile definition -# target_compile_definitions(${HWS_LIBRARY_NAME} PUBLIC HWS_FOR_INTEL_GPUS_ENABLED) -#else () -# message(STATUS "Hardware sampling for Intel GPUs disabled!") -#endif () +find_package(level_zero QUIET) +if (level_zero_FOUND) + target_link_libraries(${HWS_LIBRARY_NAME} PRIVATE level_zero) + + message(STATUS "Enable sampling of Intel GPU information using Level Zero.") + + # add source file to source file list + target_sources(${HWS_LIBRARY_NAME} PRIVATE + $) + + # add compile definition + target_compile_definitions(${HWS_LIBRARY_NAME} PUBLIC HWS_FOR_INTEL_GPUS_ENABLED) +else () + message(STATUS "Hardware sampling for Intel GPUs disabled!") +endif () #################################################################################################################### diff --git a/README.md b/README.md index 32cd1bc..3207e22 100644 --- a/README.md +++ b/README.md @@ -74,150 +74,144 @@ export PYTHONPATH=${CMAKE_INSTALL_PREFIX}/lib:${PYTHONPATH} ## Available samples -The sampling type `fixed` denotes samples that are gathered once per hardware samples like maximum clock frequencies or temperatures or the total available memory. -The sampling type `sampled` denotes samples that are gathered during the whole hardware sampling process like the current clock frequencies, temperatures, or memory consumption. +The sampling type `fixed` denotes samples that are gathered once per hardware samples like maximum clock frequencies or +temperatures or the total available memory. +The sampling type `sampled` denotes samples that are gathered during the whole hardware sampling process like the +current clock frequencies, temperatures, or memory consumption. ### General samples | sample | sample type | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs | |:--------------------|:-----------:|:-----------:|:-----------:|:---------:|:-------------:| -| architecture | fixed | str | str | str | ? | +| architecture | fixed | str | str | str | - | | byte_order | fixed | str | str (fix) | str (fix) | str (fix) | -| num_cores | fixed | int | int | - | | +| num_cores | fixed | int | int | - | - | | num_threads | fixed | int | - | - | - | | threads_per_core | fixed | int | - | - | - | | cores_per_socket | fixed | int | - | - | - | | num_sockets | fixed | int | - | - | - | -| numa_nodes | fixed | int | - | - | | +| numa_nodes | fixed | int | - | - | - | | vendor_id | fixed | str | str (fix) | str | str (PCIe ID) | | name | fixed | str | str | str | str | -| flags | fixed | list of str | - | - | | -| persistence_mode | fixed | - | bool | - | | -| compute_utilization | sampled | % | % | % | ? | -| memory_utilization | sampled | - | % | % | ? | +| flags | fixed | list of str | - | - | list of str | +| persistence_mode | fixed | - | bool | - | - | +| standby_mode | fixed | - | - | - | str | +| num_threads_per_eu | fixed | - | - | - | int | +| eu_simd_width | fixed | - | - | - | int | +| compute_utilization | sampled | % | % | % | - | +| memory_utilization | sampled | - | % | % | - | | ipc | sampled | float | - | - | - | | irq | sampled | int | - | - | - | | smi | sampled | int | - | - | - | | poll | sampled | int | - | - | - | | poll_percent | sampled | % | - | - | - | -| performance_level | sampled | - | int | str | | -| standby_mode | | | | | str | -| num_threads_per_eu | | | | | int | -| eu_simd_width | | | | | int | +| performance_level | sampled | - | int | str | - | ### clock-related samples -| sample | sample type | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs | -|:-----------------------------------|:-----------:|:----:|:----------------:|:-----------:|:----------:| -| auto_boosted_clock_enabled | fixed | bool | bool | - | | -| clock_frequency_min | fixed | MHz | MHz | MHz | | -| clock_frequency_max | fixed | MHz | MHz | MHz | | -| memory_clock_frequency_min | fixed | - | MHz | MHz | | -| memory_clock_frequency_max | fixed | - | MHz | MHz | | -| socket_clock_frequency_min | fixed | - | - | MHz | - | -| socket_clock_frequency_min | fixed | - | - | MHz | - | -| sm_clock_frequency_max | fixed | - | MHz | - | - | -| available_clock_frequencies | fixed | - | map of MHz | list of MHz | | -| available_memory_clock_frequencies | fixed | - | list of MHz | list of MHz | | -| clock_frequency | sampled | MHz | MHz | MHz | | -| average_non_idle_clock_frequency | sampled | MHz | - | - | - | -| time_stamp_counter | sampled | MHz | - | - | - | -| memory_clock_frequency | sampled | - | MHz | MHz | | -| socket_clock_frequency | sampled | - | - | MHz | - | -| sm_clock_frequency | sampled | - | MHz | - | - | -| overdrive_level | sampled | - | - | % | - | -| memory_overdrive_level | sampled | - | - | % | - | -| throttle_reason | sampled | - | string (bitmask) | - | | -| memory_throttle_reason | | - | - | - | | -| auto_boosted_clock | sampled | - | bool | - | - | -| tdp_frequency_limit | | - | - | - | | -| memory_tdp_frequency_limit | | - | - | - | | +| sample | sample type | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs | +|:-----------------------------------|:-----------:|:----:|:----------------:|:-----------:|:----------------:| +| auto_boosted_clock_enabled | fixed | bool | bool | - | - | +| clock_frequency_min | fixed | MHz | MHz | MHz | MHz | +| clock_frequency_max | fixed | MHz | MHz | MHz | MHz | +| memory_clock_frequency_min | fixed | - | MHz | MHz | MHz | +| memory_clock_frequency_max | fixed | - | MHz | MHz | MHz | +| socket_clock_frequency_min | fixed | - | - | MHz | - | +| socket_clock_frequency_min | fixed | - | - | MHz | - | +| sm_clock_frequency_max | fixed | - | MHz | - | - | +| available_clock_frequencies | fixed | - | map of MHz | list of MHz | list of MHz | +| available_memory_clock_frequencies | fixed | - | list of MHz | list of MHz | list of MHz | +| clock_frequency | sampled | MHz | MHz | MHz | MHz | +| average_non_idle_clock_frequency | sampled | MHz | - | - | - | +| time_stamp_counter | sampled | MHz | - | - | - | +| memory_clock_frequency | sampled | - | MHz | MHz | MHz | +| socket_clock_frequency | sampled | - | - | MHz | - | +| sm_clock_frequency | sampled | - | MHz | - | - | +| overdrive_level | sampled | - | - | % | - | +| memory_overdrive_level | sampled | - | - | % | - | +| throttle_reason | sampled | - | string (bitmask) | - | string (bitmask) | +| memory_throttle_reason | sampled | - | - | - | string (bitmask) | +| auto_boosted_clock | sampled | - | bool | - | - | +| frequency_limit_tdp | sampled | - | - | - | MHz | +| memory_frequency_limit_tdp | sampled | - | - | - | MHz | ### power-related samples -| sample | sample type | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs | -|:-------------------------------|:-----------:|:---------------------------------:|:-----------:|:-----------:|:----------:| -| power_management_limit | fixed | - | W | W | | -| power_enforced_limit | fixed | - | W | W | | -| power_measurement_type | fixed | str (fix) | str | str | | -| power_management_mode | fixed | - | bool | - | | -| available_power_profiles | fixed | - | list of int | list of str | | -| power_usage | sampled | W | W | W | | -| core_watt | sampled | W | - | - | - | -| dram_watt | sampled | W | - | - | - | -| package_rapl_throttling | sampled | % | - | - | - | -| dram_rapl_throttling | sampled | % | - | - | - | -| power_total_energy_consumption | sampled | J
(calculated via power_usage) | J | J | J | -| power_profile | sampled | - | int | str | | -| energy_threshold_enabled | | | | | bool | -| energy_threshold | | | | | J | +| sample | sample type | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs | +|:-------------------------------|:-----------:|:---------------------------------:|:-----------:|:--------------------------------------------------------------------------------------:|:----------------------------------------------------:| +| power_management_limit | fixed | - | W | W | - | +| power_enforced_limit | fixed | - | W | W | W | +| power_measurement_type | fixed | str (fix) | str | str | str | +| power_management_mode | fixed | - | bool | - | bool | +| available_power_profiles | fixed | - | list of int | list of str | - | +| power_usage | sampled | W | W | W | W
(calculated via power_total_energy_consumption) | +| core_watt | sampled | W | - | - | - | +| dram_watt | sampled | W | - | - | - | +| package_rapl_throttling | sampled | % | - | - | - | +| dram_rapl_throttling | sampled | % | - | - | - | +| power_total_energy_consumption | sampled | J
(calculated via power_usage) | J | J
(calculated via power_usage if
power_total_energy_consumption isn't available) | J | +| power_profile | sampled | - | int | str | - | ### memory-related samples -| sample | sample type | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs | -|:----------------------------|:-----------:|:----:|:-----------:|:--------:|:----------:| -| cache_size_L1d | fixed | str | - | - | - | -| cache_size_L1i | fixed | str | - | - | - | -| cache_size_L2 | fixed | str | - | - | - | -| cache_size_L3 | fixed | str | - | - | - | -| memory_total | fixed | B | B | B | | -| visible_memory_total | fixed | - | - | B | - | -| swap_memory_total | fixed | B | - | - | - | -| memory_total_{} | | - | | | B | -| allocatable_memory_total_{} | | - | | | B | -| num_pcie_lanes_min | fixed | - | - | int | | -| num_pcie_lanes_max | fixed | - | int | int | | -| pcie_link_generation_max | fixed | - | int | - | int | -| pcie_link_speed_max | fixed | - | MBPS | - | BPS | -| pcie_link_transfer_rate_min | fixed | - | - | MT/s | | -| pcie_link_transfer_rate_max | fixed | - | - | MT/s | | -| memory_bus_width | fixed | - | Bit | - | | -| memory_used | sampled | B | B | B | | -| memory_free | sampled | B | B | B | | -| swap_memory_used | sampled | B | - | - | - | -| swap_memory_free | sampled | B | - | - | - | -| num_pcie_lanes | sampled | - | int | int | | -| pcie_link_generation | sampled | - | int | - | int | -| pcie_link_speed | sampled | - | MBPS | - | MBPS | -| pcie_link_transfer_rate | sampled | - | - | T/s | - | -| memory_used_{} | | | | | B | -| memory_free_{} | | | | | B | -| memory_bus_width_{} | | | | | Bit | -| memory_num_channels_{} | | | | | int | -| memory_location_{} | | | | | str | +| sample | sample type | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs | +|:----------------------------|:-----------:|:----:|:-----------:|:--------:|:------------------------------:| +| cache_size_L1d | fixed | str | - | - | - | +| cache_size_L1i | fixed | str | - | - | - | +| cache_size_L2 | fixed | str | - | - | - | +| cache_size_L3 | fixed | str | - | - | - | +| memory_total | fixed | B | B | B | B
(map of memory modules) | +| visible_memory_total | fixed | - | - | B | B
(map of memory modules) | +| swap_memory_total | fixed | B | - | - | - | +| num_pcie_lanes_min | fixed | - | - | int | - | +| num_pcie_lanes_max | fixed | - | int | int | int | +| pcie_link_generation_max | fixed | - | int | - | int | +| pcie_link_speed_max | fixed | - | MBPS | - | MBPS | +| pcie_link_transfer_rate_min | fixed | - | - | MT/s | - | +| pcie_link_transfer_rate_max | fixed | - | - | MT/s | - | +| memory_bus_width | fixed | - | Bit | - | Bit
(map of memory modules) | +| memory_num_channels | fixed | - | - | - | int
(map of memory modules) | +| memory_used | sampled | B | B | B | B
(map of memory modules) | +| memory_free | sampled | B | B | B | B
(map of memory modules) | +| swap_memory_used | sampled | B | - | - | - | +| swap_memory_free | sampled | B | - | - | - | +| num_pcie_lanes | sampled | - | int | int | int | +| pcie_link_generation | sampled | - | int | - | int | +| pcie_link_speed | sampled | - | MBPS | - | MBPS | +| pcie_link_transfer_rate | sampled | - | - | T/s | - | ### temperature-related samples | sample | sample type | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs | |:------------------------|:-----------:|:----:|:-----------:|:--------:|:----------:| -| num_fans | fixed | - | int | int | | -| fan_speed_min | fixed | - | % | - | | -| fan_speed_max | fixed | - | % | RPM | | -| temperature_min | fixed | - | - | °C | | -| temperature_max | fixed | - | °C | °C | | -| memory_temperature_min | fixed | - | - | °C | | -| memory_temperature_max | fixed | - | °C | °C | | -| hotspot_temperature_min | fixed | - | - | °C | | -| hotspot_temperature_max | fixed | - | - | °C | | -| hbm_0_temperature_min | fixed | - | - | °C | | -| hbm_0_temperature_max | fixed | - | - | °C | | -| hbm_1_temperature_min | fixed | - | - | °C | | -| hbm_1_temperature_max | fixed | - | - | °C | | -| hbm_2_temperature_min | fixed | - | - | °C | | -| hbm_2_temperature_max | fixed | - | - | °C | | -| hbm_3_temperature_min | fixed | - | - | °C | | -| hbm_3_temperature_max | fixed | - | - | °C | | -| fan_speed_percentage | sampled | - | % | % | | -| temperature | sampled | °C | °C | °C | | -| memory_temperature | sampled | - | - | °C | | -| hotspot_temperature | sampled | - | - | °C | | -| hbm_0_temperature | sampled | - | - | °C | | -| hbm_1_temperature | sampled | - | - | °C | | -| hbm_2_temperature | sampled | - | - | °C | | -| hbm_3_temperature | sampled | - | - | °C | | -| temperature_{}_max | | | | | | -| temperature_psu | | | | | | -| temperature_{} | | | | | | +| num_fans | fixed | - | int | int | int | +| fan_speed_min | fixed | - | % | - | - | +| fan_speed_max | fixed | - | % | RPM | RPM | +| temperature_min | fixed | - | - | °C | - | +| temperature_max | fixed | - | °C | °C | °C | +| memory_temperature_min | fixed | - | - | °C | - | +| memory_temperature_max | fixed | - | °C | °C | °C | +| hotspot_temperature_min | fixed | - | - | °C | - | +| hotspot_temperature_max | fixed | - | - | °C | - | +| hbm_0_temperature_min | fixed | - | - | °C | - | +| hbm_0_temperature_max | fixed | - | - | °C | - | +| hbm_1_temperature_min | fixed | - | - | °C | - | +| hbm_1_temperature_max | fixed | - | - | °C | - | +| hbm_2_temperature_min | fixed | - | - | °C | - | +| hbm_2_temperature_max | fixed | - | - | °C | - | +| hbm_3_temperature_min | fixed | - | - | °C | - | +| hbm_3_temperature_max | fixed | - | - | °C | - | +| global_temperature_max | fixed | - | - | °C | °C | +| fan_speed_percentage | sampled | - | % | % | % | +| temperature | sampled | °C | °C | °C | °C | +| memory_temperature | sampled | - | - | °C | °C | +| hotspot_temperature | sampled | - | - | °C | - | +| hbm_0_temperature | sampled | - | - | °C | - | +| hbm_1_temperature | sampled | - | - | °C | - | +| hbm_2_temperature | sampled | - | - | °C | - | +| hbm_3_temperature | sampled | - | - | °C | - | +| global_temperature | sampled | - | - | - | °C | +| psu_temperature | sampled | - | - | - | °C | | core_temperature | sampled | °C | - | - | - | | core_throttle_percent | sampled | % | - | - | - | diff --git a/bindings/gpu_intel_hardware_sampler.cpp b/bindings/gpu_intel_hardware_sampler.cpp index 0b05a55..8cf6f83 100644 --- a/bindings/gpu_intel_hardware_sampler.cpp +++ b/bindings/gpu_intel_hardware_sampler.cpp @@ -22,7 +22,10 @@ namespace py = pybind11; void init_gpu_intel_hardware_sampler(py::module_ &m) { // bind the general samples py::class_(m, "LevelZeroGeneralSamples") + .def("get_byte_order", &hws::level_zero_general_samples::get_byte_order, "the byte order (e.g., little/big endian)") + .def("get_vendor_id", &hws::level_zero_general_samples::get_vendor_id, "the vendor ID") .def("get_name", &hws::level_zero_general_samples::get_name, "the model name of the device") + .def("get_flags", &hws::level_zero_general_samples::get_flags, "potential GPU flags (e.g. integrated device)") .def("get_standby_mode", &hws::level_zero_general_samples::get_standby_mode, "the enabled standby mode (power saving or never)") .def("get_num_threads_per_eu", &hws::level_zero_general_samples::get_num_threads_per_eu, "the number of threads per EU unit") .def("get_eu_simd_width", &hws::level_zero_general_samples::get_eu_simd_width, "the physical EU unit SIMD width") @@ -32,27 +35,29 @@ void init_gpu_intel_hardware_sampler(py::module_ &m) { // bind the clock samples py::class_(m, "LevelZeroClockSamples") - .def("get_clock_gpu_min", &hws::level_zero_clock_samples::get_clock_gpu_min, "the minimum possible GPU clock frequency in MHz") - .def("get_clock_gpu_max", &hws::level_zero_clock_samples::get_clock_gpu_max, "the maximum possible GPU clock frequency in MHz") - .def("get_available_clocks_gpu", &hws::level_zero_clock_samples::get_available_clocks_gpu, "the available GPU clock frequencies in MHz (slowest to fastest)") - .def("get_clock_mem_min", &hws::level_zero_clock_samples::get_clock_mem_min, "the minimum possible memory clock frequency in MHz") - .def("get_clock_mem_max", &hws::level_zero_clock_samples::get_clock_mem_max, "the maximum possible memory clock frequency in MHz") - .def("get_available_clocks_mem", &hws::level_zero_clock_samples::get_available_clocks_mem, "the available memory clock frequencies in MHz (slowest to fastest)") - .def("get_tdp_frequency_limit_gpu", &hws::level_zero_clock_samples::get_tdp_frequency_limit_gpu, "the current maximum allowed GPU frequency based on the TDP limit in MHz") - .def("get_clock_gpu", &hws::level_zero_clock_samples::get_clock_gpu, "the current GPU frequency in MHz") - .def("get_throttle_reason_gpu", &hws::level_zero_clock_samples::get_throttle_reason_gpu, "the current GPU frequency throttle reason") - .def("get_tdp_frequency_limit_mem", &hws::level_zero_clock_samples::get_tdp_frequency_limit_mem, "the current maximum allowed memory frequency based on the TDP limit in MHz") - .def("get_clock_mem", &hws::level_zero_clock_samples::get_clock_mem, "the current memory frequency in MHz") - .def("get_throttle_reason_mem", &hws::level_zero_clock_samples::get_throttle_reason_mem, "the current memory frequency throttle reason") + .def("get_clock_frequency_min", &hws::level_zero_clock_samples::get_clock_frequency_min, "the minimum possible GPU clock frequency in MHz") + .def("get_clock_frequency_max", &hws::level_zero_clock_samples::get_clock_frequency_max, "the maximum possible GPU clock frequency in MHz") + .def("get_memory_clock_frequency_min", &hws::level_zero_clock_samples::get_memory_clock_frequency_min, "the minimum possible memory clock frequency in MHz") + .def("get_memory_clock_frequency_max", &hws::level_zero_clock_samples::get_memory_clock_frequency_max, "the maximum possible memory clock frequency in MHz") + .def("get_available_clock_frequencies", &hws::level_zero_clock_samples::get_available_clock_frequencies, "the available GPU clock frequencies in MHz (slowest to fastest)") + .def("get_available_memory_clock_frequencies", &hws::level_zero_clock_samples::get_available_memory_clock_frequencies, "the available memory clock frequencies in MHz (slowest to fastest)") + .def("get_clock_frequency", &hws::level_zero_clock_samples::get_clock_frequency, "the current GPU frequency in MHz") + .def("get_memory_clock_frequency", &hws::level_zero_clock_samples::get_memory_clock_frequency, "the current memory frequency in MHz") + .def("get_throttle_reason", &hws::level_zero_clock_samples::get_throttle_reason, "the current GPU frequency throttle reason") + .def("get_memory_throttle_reason", &hws::level_zero_clock_samples::get_memory_throttle_reason, "the current memory frequency throttle reason") + .def("get_frequency_limit_tdp", &hws::level_zero_clock_samples::get_frequency_limit_tdp, "the current maximum allowed GPU frequency based on the TDP limit in MHz") + .def("get_memory_frequency_limit_tdp", &hws::level_zero_clock_samples::get_memory_frequency_limit_tdp, "the current maximum allowed memory frequency based on the TDP limit in MHz") .def("__repr__", [](const hws::level_zero_clock_samples &self) { return fmt::format("", self); }); // bind the power samples py::class_(m, "LevelZeroPowerSamples") - .def("get_energy_threshold_enabled", &hws::level_zero_power_samples::get_energy_threshold_enabled, "true if the energy threshold is enabled") - .def("get_energy_threshold", &hws::level_zero_power_samples::get_energy_threshold, "the energy threshold in J") - .def("get_power_total_energy_consumption", &hws::level_zero_power_samples::get_power_total_energy_consumption, "the total power consumption since the last driver reload in mJ") + .def("get_power_enforced_limit", &hws::level_zero_power_samples::get_power_enforced_limit, "the actually enforced power limit (W), may be different from power management limit if external limiters are set") + .def("get_power_measurement_type", &hws::level_zero_power_samples::get_power_measurement_type, "the type of the power readings") + .def("get_power_management_mode", &hws::level_zero_power_samples::get_power_management_mode, "true if power management limits are enabled") + .def("get_power_usage", &hws::level_zero_power_samples::get_power_usage, "the current power draw of the GPU in W (calculated from power_total_energy_consumption)") + .def("get_power_total_energy_consumption", &hws::level_zero_power_samples::get_power_total_energy_consumption, "the total power consumption since the last driver reload in J") .def("__repr__", [](const hws::level_zero_power_samples &self) { return fmt::format("", self); }); @@ -60,26 +65,34 @@ void init_gpu_intel_hardware_sampler(py::module_ &m) { // bind the memory samples py::class_(m, "LevelZeroMemorySamples") .def("get_memory_total", &hws::level_zero_memory_samples::get_memory_total, "the total memory size of the different memory modules in Bytes") - .def("get_allocatable_memory_total", &hws::level_zero_memory_samples::get_allocatable_memory_total, "the total allocatable memory size of the different memory modules in Bytes") - .def("get_pcie_link_max_speed", &hws::level_zero_memory_samples::get_pcie_link_max_speed, "the maximum PCIe bandwidth in bytes/sec") - .def("get_pcie_max_width", &hws::level_zero_memory_samples::get_pcie_max_width, "the PCIe lane width") - .def("get_max_pcie_link_generation", &hws::level_zero_memory_samples::get_max_pcie_link_generation, "the PCIe generation") - .def("get_bus_width", &hws::level_zero_memory_samples::get_bus_width, "the bus width of the different memory modules") - .def("get_num_channels", &hws::level_zero_memory_samples::get_num_channels, "the number of memory channels of the different memory modules") - .def("get_location", &hws::level_zero_memory_samples::get_location, "the location of the different memory modules (system or device)") + .def("get_visible_memory_total", &hws::level_zero_memory_samples::get_visible_memory_total, "the total allocatable memory size of the different memory modules in Bytes") + .def("get_memory_location", &hws::level_zero_memory_samples::get_memory_location, "the location of the different memory modules (system or device)") + .def("get_num_pcie_lanes_max", &hws::level_zero_memory_samples::get_num_pcie_lanes_max, "the PCIe lane width") + .def("get_pcie_link_generation_max", &hws::level_zero_memory_samples::get_pcie_link_generation_max, "the PCIe generation") + .def("get_pcie_link_speed_max", &hws::level_zero_memory_samples::get_pcie_link_speed_max, "the maximum PCIe bandwidth in bytes/sec") + .def("get_memory_bus_width", &hws::level_zero_memory_samples::get_memory_bus_width, "the bus width of the different memory modules") + .def("get_memory_num_channels", &hws::level_zero_memory_samples::get_memory_num_channels, "the number of memory channels of the different memory modules") .def("get_memory_free", &hws::level_zero_memory_samples::get_memory_free, "the currently free memory of the different memory modules in Bytes") - .def("get_pcie_link_speed", &hws::level_zero_memory_samples::get_pcie_link_speed, "the current PCIe bandwidth in bytes/sec") - .def("get_pcie_link_width", &hws::level_zero_memory_samples::get_pcie_link_width, "the current PCIe lane width") + .def("get_memory_used", &hws::level_zero_memory_samples::get_memory_used, "the currently used memory of the different memory modules in Bytes") + .def("get_num_pcie_lanes", &hws::level_zero_memory_samples::get_num_pcie_lanes, "the current PCIe lane width") .def("get_pcie_link_generation", &hws::level_zero_memory_samples::get_pcie_link_generation, "the current PCIe generation") + .def("get_pcie_link_speed", &hws::level_zero_memory_samples::get_pcie_link_speed, "the current PCIe bandwidth in bytes/sec") .def("__repr__", [](const hws::level_zero_memory_samples &self) { return fmt::format("", self); }); // bind the temperature samples py::class_(m, "LevelZeroTemperatureSamples") - .def("get_temperature_max", &hws::level_zero_temperature_samples::get_temperature_max, "the maximum temperature for the sensor in °C") - .def("get_temperature_psu", &hws::level_zero_temperature_samples::get_temperature_psu, "the temperature of the PSU in °C") - .def("get_temperature", &hws::level_zero_temperature_samples::get_temperature, "the current temperature for the sensor in °C") + .def("get_num_fans", &hws::level_zero_temperature_samples::get_num_fans, "the number of fans") + .def("get_fan_speed_max", &hws::level_zero_temperature_samples::get_fan_speed_max, "the maximum fan speed the user can set in RPM") + .def("get_temperature_max", &hws::level_zero_temperature_samples::get_temperature_max, "the maximum GPU temperature in °C") + .def("get_memory_temperature_max", &hws::level_zero_temperature_samples::get_memory_temperature_max, "the maximum memory temperature in °C") + .def("get_global_temperature_max", &hws::level_zero_temperature_samples::get_global_temperature_max, "the maximum global temperature in °C") + .def("get_fan_speed_percentage", &hws::level_zero_temperature_samples::get_fan_speed_percentage, "the current intended fan speed in %") + .def("get_temperature", &hws::level_zero_temperature_samples::get_temperature, "the current GPU temperature in °C") + .def("get_memory_temperature", &hws::level_zero_temperature_samples::get_memory_temperature, "the current memory temperature in °C") + .def("get_global_temperature", &hws::level_zero_temperature_samples::get_global_temperature, "the current global temperature in °C") + .def("get_psu_temperature", &hws::level_zero_temperature_samples::get_psu_temperature, "the current PSU temperature in °C") .def("__repr__", [](const hws::level_zero_temperature_samples &self) { return fmt::format("", self); }); diff --git a/include/hardware_sampling/gpu_intel/hardware_sampler.hpp b/include/hardware_sampling/gpu_intel/hardware_sampler.hpp index 442be04..d47bd32 100644 --- a/include/hardware_sampling/gpu_intel/hardware_sampler.hpp +++ b/include/hardware_sampling/gpu_intel/hardware_sampler.hpp @@ -16,10 +16,12 @@ #include "hardware_sampling/gpu_intel/level_zero_samples.hpp" // hws::{level_zero_general_samples, level_zero_clock_samples, level_zero_power_samples, level_zero_memory_samples, level_zero_temperature_samples} #include "hardware_sampling/hardware_sampler.hpp" // hws::hardware_sampler +#include "fmt/format.h" // fmt::formatter, fmt::ostream_formatter + #include // std::atomic -#include // std::chrono::{steady_clock, milliseconds}, std::chrono_literals namespace +#include // std::chrono::milliseconds, std::chrono_literals namespace #include // std::size_t -#include // std::formatter +#include // std::ostream forward declaration #include // std::string namespace hws { @@ -157,6 +159,6 @@ std::ostream &operator<<(std::ostream &out, const gpu_intel_hardware_sampler &sa } // namespace hws template <> -struct std::formatter : hws::detail::ostream_formatter { }; +struct fmt::formatter : fmt::ostream_formatter { }; #endif // HARDWARE_SAMPLING_GPU_INTEL_HARDWARE_SAMPLER_HPP_ diff --git a/include/hardware_sampling/gpu_intel/level_zero_device_handle_impl.hpp b/include/hardware_sampling/gpu_intel/level_zero_device_handle_impl.hpp index 640cdcc..a0f2ccd 100644 --- a/include/hardware_sampling/gpu_intel/level_zero_device_handle_impl.hpp +++ b/include/hardware_sampling/gpu_intel/level_zero_device_handle_impl.hpp @@ -15,11 +15,11 @@ #include "hardware_sampling/gpu_intel/level_zero_device_handle.hpp" // hws::detail::level_zero_device_handle #include "hardware_sampling/gpu_intel/utility.hpp" // HWS_LEVEL_ZERO_ERROR_CHECK +#include "fmt/format.h" // fmt::format #include "level_zero/ze_api.h" // Level Zero runtime functions #include // std::size_t #include // std::uint32_t -#include // std::format #include // std::make_shared #include // std::runtime_error #include // std::vector @@ -38,28 +38,28 @@ struct level_zero_device_handle::level_zero_device_handle_impl { explicit level_zero_device_handle_impl(const std::size_t device_id) { // discover the number of drivers std::uint32_t driver_count{ 0 }; - HWS_LEVEL_ZERO_ERROR_CHECK(zeDriverGet(&driver_count, nullptr)); + HWS_LEVEL_ZERO_ERROR_CHECK(zeDriverGet(&driver_count, nullptr)) // check if only the single GPU driver has been found if (driver_count > 1) { - throw std::runtime_error{ std::format("Found too many GPU drivers ({})!", driver_count) }; + throw std::runtime_error{ fmt::format("Found too many GPU drivers ({})!", driver_count) }; } // get the GPU driver - HWS_LEVEL_ZERO_ERROR_CHECK(zeDriverGet(&driver_count, &driver)); + HWS_LEVEL_ZERO_ERROR_CHECK(zeDriverGet(&driver_count, &driver)) // get all GPUs for the current driver std::uint32_t device_count{ 0 }; - HWS_LEVEL_ZERO_ERROR_CHECK(zeDeviceGet(driver, &device_count, nullptr)); + HWS_LEVEL_ZERO_ERROR_CHECK(zeDeviceGet(driver, &device_count, nullptr)) // check if enough GPUs have been found if (driver_count <= device_id) { - throw std::runtime_error{ std::format("Found only {} GPUs, but GPU with the ID was requested!", device_count, device_id) }; + throw std::runtime_error{ fmt::format("Found only {} GPUs, but GPU with the ID was requested!", device_count, device_id) }; } // get the GPUs std::vector all_devices(device_count); - HWS_LEVEL_ZERO_ERROR_CHECK(zeDeviceGet(driver, &device_count, all_devices.data())); + HWS_LEVEL_ZERO_ERROR_CHECK(zeDeviceGet(driver, &device_count, all_devices.data())) // save the requested device device = all_devices[device_id]; diff --git a/include/hardware_sampling/gpu_intel/level_zero_samples.hpp b/include/hardware_sampling/gpu_intel/level_zero_samples.hpp index dc8b411..9900f95 100644 --- a/include/hardware_sampling/gpu_intel/level_zero_samples.hpp +++ b/include/hardware_sampling/gpu_intel/level_zero_samples.hpp @@ -12,10 +12,11 @@ #define HARDWARE_SAMPLING_GPU_INTEL_LEVEL_ZERO_SAMPLES_HPP_ #pragma once -#include "hardware_sampling/utility.hpp" // HWS_SAMPLE_STRUCT_FIXED_MEMBER, HWS_SAMPLE_STRUCT_SAMPLING_MEMBER, hws::detail::ostream_formatter +#include "hardware_sampling/utility.hpp" // HWS_SAMPLE_STRUCT_FIXED_MEMBER, HWS_SAMPLE_STRUCT_SAMPLING_MEMBER + +#include "fmt/ostream.h" // fmt::formatter, fmt::ostream_formatter #include // std::uint64_t, std::int32_t -#include // std::format #include // std::ostream forward declaration #include // std::optional #include // std::string @@ -43,9 +44,11 @@ class level_zero_general_samples { */ [[nodiscard]] std::string generate_yaml_string() const; - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, byte_order) // the byte order (e.g., little/big endian) - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, vendor_id) // the vendor ID - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, name) // the model name of the device + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, byte_order) // the byte order (e.g., little/big endian) + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, vendor_id) // the vendor ID + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, name) // the model name of the device + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector, flags) // potential GPU flags (e.g. integrated device) + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, standby_mode) // the enabled standby mode (power saving or never) HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint32_t, num_threads_per_eu) // the number of threads per EU unit HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint32_t, eu_simd_width) // the physical EU unit SIMD width @@ -79,19 +82,19 @@ class level_zero_clock_samples { */ [[nodiscard]] std::string generate_yaml_string() const; - HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, clock_gpu_min) // the minimum possible GPU clock frequency in MHz - HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, clock_gpu_max) // the maximum possible GPU clock frequency in MHz - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector, available_clocks_gpu) // the available GPU clock frequencies in MHz (slowest to fastest) - HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, clock_mem_min) // the minimum possible memory clock frequency in MHz - HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, clock_mem_max) // the maximum possible memory clock frequency in MHz - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector, available_clocks_mem) // the available memory clock frequencies in MHz (slowest to fastest) - - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, tdp_frequency_limit_gpu) // the current maximum allowed GPU frequency based on the TDP limit in MHz - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, clock_gpu) // the current GPU frequency in MHz - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(int, throttle_reason_gpu) // the current GPU frequency throttle reason - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, tdp_frequency_limit_mem) // the current maximum allowed memory frequency based on the TDP limit in MHz - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, clock_mem) // the current memory frequency in MHz - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(int, throttle_reason_mem) // the current memory frequency throttle reason + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, clock_frequency_min) // the minimum possible GPU clock frequency in MHz + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, clock_frequency_max) // the maximum possible GPU clock frequency in MHz + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, memory_clock_frequency_min) // the minimum possible memory clock frequency in MHz + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, memory_clock_frequency_max) // the maximum possible memory clock frequency in MHz + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector, available_clock_frequencies) // the available GPU clock frequencies in MHz (slowest to fastest) + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector, available_memory_clock_frequencies) // the available memory clock frequencies in MHz (slowest to fastest) + + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, clock_frequency) // the current GPU frequency in MHz + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, memory_clock_frequency) // the current memory frequency in MHz + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::string, throttle_reason) // the current GPU frequency throttle reason + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::string, memory_throttle_reason) // the current memory frequency throttle reason + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, frequency_limit_tdp) // the current maximum allowed GPU frequency based on the TDP limit in MHz + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, memory_frequency_limit_tdp) // the current maximum allowed memory frequency based on the TDP limit in MHz }; /** @@ -126,6 +129,7 @@ class level_zero_power_samples { HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, power_measurement_type) // the type of the power readings HWS_SAMPLE_STRUCT_FIXED_MEMBER(bool, power_management_mode) // true if power management limits are enabled + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, power_usage) // the current power draw of the GPU in W (calculated from power_total_energy_consumption) HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, power_total_energy_consumption) // the total power consumption since the last driver reload in J }; @@ -164,19 +168,20 @@ class level_zero_memory_samples { */ [[nodiscard]] std::string generate_yaml_string() const; - HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type, memory_total) // the total memory size of the different memory modules in Bytes - HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type, allocatable_memory_total) // the total allocatable memory size of the different memory modules in Bytes - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, pcie_link_max_speed) // the maximum PCIe bandwidth in bytes/sec - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int32_t, pcie_max_width) // the PCIe lane width - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int32_t, max_pcie_link_generation) // the PCIe generation - HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type, bus_width) // the bus width of the different memory modules - HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type, num_channels) // the number of memory channels of the different memory modules - HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type, location) // the location of the different memory modules (system or device) + HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type, memory_total) // the total memory size of the different memory modules in Bytes + HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type, visible_memory_total) // the total allocatable memory size of the different memory modules in Bytes + HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type, memory_location) // the location of the different memory modules (system or device) + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int32_t, num_pcie_lanes_max) // the maximum PCIe lane width + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int32_t, pcie_link_generation_max) // the maximum PCIe generation + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, pcie_link_speed_max) // the maximum PCIe bandwidth in MBPS + HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type, memory_bus_width) // the bus width of the different memory modules + HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type, memory_num_channels) // the number of memory channels of the different memory modules HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type>, memory_free) // the currently free memory of the different memory modules in Bytes - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int64_t, pcie_link_speed) // the current PCIe bandwidth in bytes/sec - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int32_t, pcie_link_width) // the current PCIe lane width + HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type>, memory_used) // the currently used memory of the different memory modules in Bytes + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int32_t, num_pcie_lanes) // the current PCIe lane width HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int32_t, pcie_link_generation) // the current PCIe generation + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int64_t, pcie_link_speed) // the current PCIe bandwidth in bytes/sec }; /** @@ -199,13 +204,6 @@ class level_zero_temperature_samples { // befriend hardware sampler class friend class gpu_intel_hardware_sampler; - /** - * @brief The map type used if the number of potential Level Zero domains is unknown at compile time. - * @tparam T the mapped type - */ - template - using map_type = std::unordered_map; - public: /** * @brief Assemble the YAML string containing all available general hardware samples. @@ -214,10 +212,17 @@ class level_zero_temperature_samples { */ [[nodiscard]] std::string generate_yaml_string() const; - HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type, temperature_max) // the maximum temperature for the sensor in °C - - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int32_t, temperature_psu) // the temperature of the PSU in °C - HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type>, temperature) // the current temperature for the sensor in °C + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint32_t, num_fans) // the number of fans + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int32_t, fan_speed_max) // the maximum fan speed the user can set in RPM + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, temperature_max) // the maximum GPU temperature in °C + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, memory_temperature_max) // the maximum memory temperature in °C + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, global_temperature_max) // the maximum global temperature in °C + + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, fan_speed_percentage) // the current intended fan speed in % + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, temperature) // the temperature of the GPU in °C + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, memory_temperature) // the temperature of the memory in °C + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, global_temperature) // the global temperature in °C + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, psu_temperature) // the temperature of the PSU in °C }; /** @@ -232,18 +237,18 @@ std::ostream &operator<<(std::ostream &out, const level_zero_temperature_samples } // namespace hws template <> -struct std::formatter : hws::detail::ostream_formatter { }; +struct fmt::formatter : fmt::ostream_formatter { }; template <> -struct std::formatter : hws::detail::ostream_formatter { }; +struct fmt::formatter : fmt::ostream_formatter { }; template <> -struct std::formatter : hws::detail::ostream_formatter { }; +struct fmt::formatter : fmt::ostream_formatter { }; template <> -struct std::formatter : hws::detail::ostream_formatter { }; +struct fmt::formatter : fmt::ostream_formatter { }; template <> -struct std::formatter : hws::detail::ostream_formatter { }; +struct fmt::formatter : fmt::ostream_formatter { }; #endif // HARDWARE_SAMPLING_GPU_INTEL_LEVEL_ZERO_SAMPLES_HPP_ diff --git a/include/hardware_sampling/gpu_intel/utility.hpp b/include/hardware_sampling/gpu_intel/utility.hpp index 810901f..03f9f8d 100644 --- a/include/hardware_sampling/gpu_intel/utility.hpp +++ b/include/hardware_sampling/gpu_intel/utility.hpp @@ -12,13 +12,14 @@ #define HARDWARE_SAMPLING_GPU_INTEL_UTILITY_HPP_ #pragma once +#include "fmt/format.h" // fmt::format #include "level_zero/ze_api.h" // Level Zero runtime functions #include "level_zero/zes_api.h" // Level Zero runtime functions -#include // std::format #include // std::runtime_error #include // std::string #include // std::string_view +#include // std::vector namespace hws::detail { @@ -39,13 +40,27 @@ namespace hws::detail { { \ const ze_result_t errc = level_zero_func; \ if (errc != ZE_RESULT_SUCCESS) { \ - throw std::runtime_error{ std::format("Error in Level Zero function call \"{}\": {}", #level_zero_func, to_result_string(errc)) }; \ + throw std::runtime_error{ fmt::format("Error in Level Zero function call \"{}\": {}", #level_zero_func, to_result_string(errc)) }; \ } \ } #else #define HWS_LEVEL_ZERO_ERROR_CHECK(level_zero_func) level_zero_func; #endif +/** + * @brief Convert the @p flags to a vector of strings. + * @param[in] flags the flags to convert to strings + * @return a vector containing all flags as strings (`[[nodiscard]]`) + */ +[[nodiscard]] std::vector property_flags_to_vector(ze_device_property_flags_t flags); + +/** + * @brief Convert the throttle reason bitmask to a string representation. If the provided bitmask represents multiple reasons, they are split using "|". + * @param[in] reasons the bitmask to convert to a string + * @return all throttle reasons (`[[nodiscard]]`) + */ +[[nodiscard]] std::string throttle_reason_to_string(zes_freq_throttle_reason_flags_t reasons); + /** * @brief Convert a Level Zero memory type to a string representation. * @param[in] mem_type the Level Zero memory type @@ -60,13 +75,6 @@ namespace hws::detail { */ [[nodiscard]] std::string memory_location_to_name(zes_mem_loc_t mem_loc); -/** - * @brief Convert a Level Zero temperature sensor type to a string representation. - * @param[in] sensor_type the Level Zero temperature sensor type - * @return the string representation (`[[nodiscard]]`) - */ -[[nodiscard]] std::string temperature_sensor_type_to_name(zes_temp_sensors_t sensor_type); - } // namespace hws::detail #endif // HARDWARE_SAMPLING_GPU_INTEL_UTILITY_HPP_ diff --git a/src/hardware_sampling/gpu_intel/hardware_sampler.cpp b/src/hardware_sampling/gpu_intel/hardware_sampler.cpp index 2027135..774ab6c 100644 --- a/src/hardware_sampling/gpu_intel/hardware_sampler.cpp +++ b/src/hardware_sampling/gpu_intel/hardware_sampler.cpp @@ -13,6 +13,7 @@ #include "hardware_sampling/hardware_sampler.hpp" // hws::hardware_sampler #include "hardware_sampling/utility.hpp" // hws::{durations_from_reference_time, join} +#include "fmt/format.h" // fmt::format #include "level_zero/ze_api.h" // Level Zero runtime functions #include "level_zero/zes_api.h" // Level Zero runtime functions @@ -20,7 +21,6 @@ #include // std::size_t #include // std::int32_t #include // std::exception, std::terminate -#include // std::format #include // std::ios_base #include // std::cerr, std::endl #include // std::runtime_error @@ -44,7 +44,7 @@ gpu_intel_hardware_sampler::gpu_intel_hardware_sampler(const std::size_t device_ hardware_sampler{ sampling_interval } { // make sure that zeInit is only called once for all instances if (instances_++ == 0) { - HWS_LEVEL_ZERO_ERROR_CHECK(zeInit(ZE_INIT_FLAG_GPU_ONLY)); + HWS_LEVEL_ZERO_ERROR_CHECK(zeInit(ZE_INIT_FLAG_GPU_ONLY)) // notify that initialization has been finished init_finished_ = true; } else { @@ -77,6 +77,7 @@ void gpu_intel_hardware_sampler::sampling_loop() { std::vector frequency_handles{}; std::vector power_handles{}; std::vector memory_handles{}; + std::vector fan_handles{}; std::vector psu_handles{}; std::vector temperature_handles{}; @@ -86,6 +87,8 @@ void gpu_intel_hardware_sampler::sampling_loop() { this->add_time_point(std::chrono::steady_clock::now()); + double initial_total_power_consumption{}; // initial total power consumption in J + // retrieve initial general information { // the byte order is given by Intel directly @@ -93,9 +96,12 @@ void gpu_intel_hardware_sampler::sampling_loop() { ze_device_properties_t ze_device_prop{}; if (zeDeviceGetProperties(device, &ze_device_prop) == ZE_RESULT_SUCCESS) { - general_samples_.vendor_id_ = std::format("{:x}", ze_device_prop.vendorId); // TODO: PCI configuration ID to name? + general_samples_.vendor_id_ = fmt::format("{:x}", ze_device_prop.vendorId); general_samples_.num_threads_per_eu_ = ze_device_prop.numThreadsPerEU; general_samples_.eu_simd_width_ = ze_device_prop.physicalEUSimdWidth; + + // assemble list of GPU flags + general_samples_.flags_ = detail::property_flags_to_vector(ze_device_prop.flags); } zes_device_properties_t zes_device_prop{}; @@ -143,12 +149,12 @@ void gpu_intel_hardware_sampler::sampling_loop() { // determine the frequency domain (e.g. GPU, memory, etc) switch (prop.type) { case ZES_FREQ_DOMAIN_GPU: - clock_samples_.clock_gpu_min_ = prop.min; - clock_samples_.clock_gpu_max_ = prop.max; + clock_samples_.clock_frequency_min_ = prop.min; + clock_samples_.clock_frequency_max_ = prop.max; break; case ZES_FREQ_DOMAIN_MEMORY: - clock_samples_.clock_mem_min_ = prop.min; - clock_samples_.clock_mem_max_ = prop.max; + clock_samples_.memory_clock_frequency_min_ = prop.min; + clock_samples_.memory_clock_frequency_max_ = prop.max; break; default: // do nothing @@ -163,10 +169,10 @@ void gpu_intel_hardware_sampler::sampling_loop() { // determine the frequency domain (e.g. GPU, memory, etc) switch (prop.type) { case ZES_FREQ_DOMAIN_GPU: - clock_samples_.available_clocks_gpu_ = available_clocks; + clock_samples_.available_clock_frequencies_ = available_clocks; break; case ZES_FREQ_DOMAIN_MEMORY: - clock_samples_.available_clocks_mem_ = available_clocks; + clock_samples_.available_memory_clock_frequencies_ = available_clocks; break; default: // do nothing @@ -183,28 +189,28 @@ void gpu_intel_hardware_sampler::sampling_loop() { case ZES_FREQ_DOMAIN_GPU: { if (frequency_state.tdp >= 0.0) { - clock_samples_.tdp_frequency_limit_gpu_ = decltype(clock_samples_.tdp_frequency_limit_gpu_)::value_type{ frequency_state.tdp }; + clock_samples_.frequency_limit_tdp_ = decltype(clock_samples_.frequency_limit_tdp_)::value_type{ frequency_state.tdp }; } if (frequency_state.actual >= 0.0) { - clock_samples_.clock_gpu_ = decltype(clock_samples_.clock_gpu_)::value_type{ frequency_state.actual }; + clock_samples_.clock_frequency_ = decltype(clock_samples_.clock_frequency_)::value_type{ frequency_state.actual }; } if (frequency_state.throttleReasons >= 0.0) { - using vector_type = decltype(clock_samples_.throttle_reason_gpu_)::value_type; - clock_samples_.throttle_reason_gpu_ = vector_type{ static_cast(frequency_state.throttleReasons) }; + using vector_type = decltype(clock_samples_.throttle_reason_)::value_type; + clock_samples_.throttle_reason_ = vector_type{ static_cast(detail::throttle_reason_to_string(frequency_state.throttleReasons)) }; } } break; case ZES_FREQ_DOMAIN_MEMORY: { if (frequency_state.tdp >= 0.0) { - clock_samples_.tdp_frequency_limit_mem_ = decltype(clock_samples_.tdp_frequency_limit_mem_)::value_type{ frequency_state.tdp }; + clock_samples_.memory_frequency_limit_tdp_ = decltype(clock_samples_.memory_frequency_limit_tdp_)::value_type{ frequency_state.tdp }; } if (frequency_state.actual >= 0.0) { - clock_samples_.clock_mem_ = decltype(clock_samples_.clock_mem_)::value_type{ frequency_state.actual }; + clock_samples_.memory_clock_frequency_ = decltype(clock_samples_.memory_clock_frequency_)::value_type{ frequency_state.actual }; } if (frequency_state.throttleReasons >= 0.0) { - using vector_type = decltype(clock_samples_.throttle_reason_mem_)::value_type; - clock_samples_.throttle_reason_mem_ = vector_type{ static_cast(frequency_state.throttleReasons) }; + using vector_type = decltype(clock_samples_.memory_throttle_reason_)::value_type; + clock_samples_.memory_throttle_reason_ = vector_type{ static_cast(detail::throttle_reason_to_string(frequency_state.throttleReasons)) }; } } break; @@ -259,7 +265,9 @@ void gpu_intel_hardware_sampler::sampling_loop() { // get total power consumption zes_power_energy_counter_t energy_counter{}; if (zesPowerGetEnergyCounter(power_handles.front(), &energy_counter) == ZE_RESULT_SUCCESS) { - power_samples_.power_total_energy_consumption_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ static_cast(energy_counter.energy) / 1000.0 / 1000.0 }; + initial_total_power_consumption = static_cast(energy_counter.energy) / 1000.0 / 1000.0; + power_samples_.power_total_energy_consumption_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ 0.0 }; + power_samples_.power_usage_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ 0.0 }; } // get energy thresholds @@ -294,40 +302,46 @@ void gpu_intel_hardware_sampler::sampling_loop() { } if (prop.busWidth != -1) { // first value to add -> initialize map - if (!memory_samples_.bus_width_.has_value()) { - memory_samples_.bus_width_ = decltype(memory_samples_.bus_width_)::value_type{}; + if (!memory_samples_.memory_bus_width_.has_value()) { + memory_samples_.memory_bus_width_ = decltype(memory_samples_.memory_bus_width_)::value_type{}; } // add new memory bus width - memory_samples_.bus_width_.value()[memory_module_name] = prop.busWidth; + memory_samples_.memory_bus_width_.value()[memory_module_name] = prop.busWidth; } if (prop.numChannels != -1) { // first value to add -> initialize map - if (!memory_samples_.num_channels_.has_value()) { - memory_samples_.num_channels_ = decltype(memory_samples_.num_channels_)::value_type{}; + if (!memory_samples_.memory_num_channels_.has_value()) { + memory_samples_.memory_num_channels_ = decltype(memory_samples_.memory_num_channels_)::value_type{}; } // add new number of memory channels - memory_samples_.num_channels_.value()[memory_module_name] = prop.numChannels; + memory_samples_.memory_num_channels_.value()[memory_module_name] = prop.numChannels; } // first value to add -> initialize map - if (!memory_samples_.location_.has_value()) { - memory_samples_.location_ = decltype(memory_samples_.location_)::value_type{}; + if (!memory_samples_.memory_location_.has_value()) { + memory_samples_.memory_location_ = decltype(memory_samples_.memory_location_)::value_type{}; } - memory_samples_.location_.value()[memory_module_name] = detail::memory_location_to_name(prop.location); + memory_samples_.memory_location_.value()[memory_module_name] = detail::memory_location_to_name(prop.location); // get current memory information zes_mem_state_t mem_state{}; if (zesMemoryGetState(handle, &mem_state) == ZE_RESULT_SUCCESS) { // first value to add -> initialize map - if (!memory_samples_.allocatable_memory_total_.has_value()) { - memory_samples_.allocatable_memory_total_ = decltype(memory_samples_.allocatable_memory_total_)::value_type{}; + if (!memory_samples_.visible_memory_total_.has_value()) { + memory_samples_.visible_memory_total_ = decltype(memory_samples_.visible_memory_total_)::value_type{}; } - memory_samples_.allocatable_memory_total_.value()[memory_module_name] = mem_state.size; + memory_samples_.visible_memory_total_.value()[memory_module_name] = mem_state.size; // first value to add -> initialize map if (!memory_samples_.memory_free_.has_value()) { memory_samples_.memory_free_ = decltype(memory_samples_.memory_free_)::value_type{}; } memory_samples_.memory_free_.value()[memory_module_name].push_back(mem_state.free); + + // first value to add -> initialize map + if (!memory_samples_.memory_used_.has_value()) { + memory_samples_.memory_used_ = decltype(memory_samples_.memory_used_)::value_type{}; + } + memory_samples_.memory_used_.value()[memory_module_name].push_back(mem_state.size - mem_state.free); } } } @@ -336,13 +350,13 @@ void gpu_intel_hardware_sampler::sampling_loop() { zes_pci_properties_t pci_prop{}; if (zesDevicePciGetProperties(device, &pci_prop) == ZE_RESULT_SUCCESS) { if (pci_prop.maxSpeed.gen != -1) { - memory_samples_.max_pcie_link_generation_ = pci_prop.maxSpeed.gen; + memory_samples_.pcie_link_generation_max_ = pci_prop.maxSpeed.gen; } if (pci_prop.maxSpeed.width != -1) { - memory_samples_.pcie_max_width_ = pci_prop.maxSpeed.width; + memory_samples_.num_pcie_lanes_max_ = pci_prop.maxSpeed.width; } if (pci_prop.maxSpeed.maxBandwidth != -1) { - memory_samples_.pcie_link_max_speed_ = pci_prop.maxSpeed.maxBandwidth; + memory_samples_.pcie_link_speed_max_ = static_cast(static_cast(pci_prop.maxSpeed.maxBandwidth) / 1e6); } } @@ -350,10 +364,10 @@ void gpu_intel_hardware_sampler::sampling_loop() { zes_pci_state_t pci_state{}; if (zesDevicePciGetState(device, &pci_state) == ZE_RESULT_SUCCESS) { if (pci_state.speed.maxBandwidth != -1) { - memory_samples_.pcie_link_speed_ = decltype(memory_samples_.pcie_link_speed_)::value_type{ pci_state.speed.maxBandwidth }; + memory_samples_.pcie_link_speed_ = decltype(memory_samples_.pcie_link_speed_)::value_type{ static_cast(static_cast(pci_state.speed.maxBandwidth) / 1e6) }; } if (pci_state.speed.width != -1) { - memory_samples_.pcie_link_width_ = decltype(memory_samples_.pcie_link_width_)::value_type{ pci_state.speed.width }; + memory_samples_.num_pcie_lanes_ = decltype(memory_samples_.num_pcie_lanes_)::value_type{ pci_state.speed.width }; } if (pci_state.speed.gen != -1) { memory_samples_.pcie_link_generation_ = decltype(memory_samples_.pcie_link_generation_)::value_type{ pci_state.speed.gen }; @@ -365,6 +379,29 @@ void gpu_intel_hardware_sampler::sampling_loop() { // retrieve initial temperature related information { + std::uint32_t num_fans{ 0 }; + if (zesDeviceEnumFans(device, &num_fans, nullptr) == ZE_RESULT_SUCCESS) { + temperature_samples_.num_fans_ = num_fans; + + fan_handles.resize(num_fans); + if (zesDeviceEnumFans(device, &num_fans, fan_handles.data()) == ZE_RESULT_SUCCESS) { + // NOTE: only the first fan handle is used here + if (!fan_handles.empty()) { + zes_fan_properties_t prop{}; + if (zesFanGetProperties(fan_handles.front(), &prop) == ZE_RESULT_SUCCESS) { + temperature_samples_.fan_speed_max_ = prop.maxRPM; + } + + std::int32_t fan_speed{}; + if (zesFanGetState(fan_handles.front(), ZES_FAN_SPEED_UNITS_PERCENT, &fan_speed) == ZE_RESULT_SUCCESS) { + if (fan_speed != -1) { + temperature_samples_.fan_speed_percentage_ = decltype(temperature_samples_.fan_speed_percentage_)::value_type{ static_cast(fan_speed) }; + } + } + } + } + } + std::uint32_t num_psus{ 0 }; if (zesDeviceEnumPsus(device, &num_psus, nullptr) == ZE_RESULT_SUCCESS) { psu_handles.resize(num_psus); @@ -374,7 +411,7 @@ void gpu_intel_hardware_sampler::sampling_loop() { zes_psu_state_t psu_state{}; if (zesPsuGetState(psu_handles.front(), &psu_state) == ZE_RESULT_SUCCESS) { if (psu_state.temperature != -1) { - temperature_samples_.temperature_psu_ = decltype(temperature_samples_.temperature_psu_)::value_type{ psu_state.temperature }; + temperature_samples_.psu_temperature_ = static_cast(psu_state.temperature); } } } @@ -388,26 +425,66 @@ void gpu_intel_hardware_sampler::sampling_loop() { for (zes_temp_handle_t handle : temperature_handles) { zes_temp_properties_t prop{}; if (zesTemperatureGetProperties(handle, &prop) == ZE_RESULT_SUCCESS) { - const std::string sensor_name = detail::temperature_sensor_type_to_name(prop.type); - if (sensor_name.empty()) { - // unsupported sensor type - continue; - } + switch (prop.type) { + case ZES_TEMP_SENSORS_GLOBAL: + { + // first value to add -> initialize map + if (!temperature_samples_.global_temperature_max_.has_value()) { + temperature_samples_.global_temperature_max_ = decltype(temperature_samples_.global_temperature_max_)::value_type{}; + } + // add new maximum temperature + temperature_samples_.global_temperature_max_ = prop.maxTemperature; - // first value to add -> initialize map - if (!temperature_samples_.temperature_max_.has_value()) { - temperature_samples_.temperature_max_ = decltype(temperature_samples_.temperature_max_)::value_type{}; - } - // add new maximum temperature - temperature_samples_.temperature_max_.value()[sensor_name] = prop.maxTemperature; + // first value to add -> initialize map + if (!temperature_samples_.global_temperature_.has_value()) { + temperature_samples_.global_temperature_ = decltype(temperature_samples_.global_temperature_)::value_type{}; + } + double temp{}; + if (zesTemperatureGetState(handle, &temp) == ZE_RESULT_SUCCESS) { + temperature_samples_.global_temperature_->push_back(temp); + } + } + break; + case ZES_TEMP_SENSORS_GPU: + { + // first value to add -> initialize map + if (!temperature_samples_.temperature_max_.has_value()) { + temperature_samples_.temperature_max_ = decltype(temperature_samples_.temperature_max_)::value_type{}; + } + // add new maximum temperature + temperature_samples_.temperature_max_ = prop.maxTemperature; - // first value to add -> initialize map - if (!temperature_samples_.temperature_.has_value()) { - temperature_samples_.temperature_ = decltype(temperature_samples_.temperature_)::value_type{}; - } - double temp{}; - if (zesTemperatureGetState(handle, &temp) == ZE_RESULT_SUCCESS) { - temperature_samples_.temperature_.value()[sensor_name].push_back(temp); + // first value to add -> initialize map + if (!temperature_samples_.temperature_.has_value()) { + temperature_samples_.temperature_ = decltype(temperature_samples_.temperature_)::value_type{}; + } + double temp{}; + if (zesTemperatureGetState(handle, &temp) == ZE_RESULT_SUCCESS) { + temperature_samples_.temperature_->push_back(temp); + } + } + break; + case ZES_TEMP_SENSORS_MEMORY: + { + // first value to add -> initialize map + if (!temperature_samples_.memory_temperature_max_.has_value()) { + temperature_samples_.memory_temperature_max_ = decltype(temperature_samples_.memory_temperature_max_)::value_type{}; + } + // add new maximum temperature + temperature_samples_.memory_temperature_max_ = prop.maxTemperature; + + // first value to add -> initialize map + if (!temperature_samples_.memory_temperature_.has_value()) { + temperature_samples_.memory_temperature_ = decltype(temperature_samples_.memory_temperature_)::value_type{}; + } + double temp{}; + if (zesTemperatureGetState(handle, &temp) == ZE_RESULT_SUCCESS) { + temperature_samples_.memory_temperature_->push_back(temp); + } + } + break; + default: + break; } } } @@ -430,37 +507,37 @@ void gpu_intel_hardware_sampler::sampling_loop() { for (zes_freq_handle_t handle : frequency_handles) { // get frequency properties zes_freq_properties_t prop{}; - HWS_LEVEL_ZERO_ERROR_CHECK(zesFrequencyGetProperties(handle, &prop)); + HWS_LEVEL_ZERO_ERROR_CHECK(zesFrequencyGetProperties(handle, &prop)) // get current frequency information zes_freq_state_t frequency_state{}; - if (clock_samples_.clock_gpu_.has_value() || clock_samples_.clock_mem_.has_value()) { - HWS_LEVEL_ZERO_ERROR_CHECK(zesFrequencyGetState(handle, &frequency_state)); + if (clock_samples_.clock_frequency_.has_value() || clock_samples_.memory_clock_frequency_.has_value()) { + HWS_LEVEL_ZERO_ERROR_CHECK(zesFrequencyGetState(handle, &frequency_state)) // determine the frequency domain (e.g. GPU, memory, etc) switch (prop.type) { case ZES_FREQ_DOMAIN_GPU: { - if (clock_samples_.tdp_frequency_limit_gpu_.has_value()) { - clock_samples_.tdp_frequency_limit_gpu_->push_back(frequency_state.tdp); + if (clock_samples_.frequency_limit_tdp_.has_value()) { + clock_samples_.frequency_limit_tdp_->push_back(frequency_state.tdp); } - if (clock_samples_.clock_gpu_.has_value()) { - clock_samples_.clock_gpu_->push_back(frequency_state.actual); + if (clock_samples_.clock_frequency_.has_value()) { + clock_samples_.clock_frequency_->push_back(frequency_state.actual); } - if (clock_samples_.throttle_reason_gpu_.has_value()) { - clock_samples_.throttle_reason_gpu_->push_back(static_cast(frequency_state.throttleReasons)); + if (clock_samples_.throttle_reason_.has_value()) { + clock_samples_.throttle_reason_->push_back(detail::throttle_reason_to_string(frequency_state.throttleReasons)); } } break; case ZES_FREQ_DOMAIN_MEMORY: { - if (clock_samples_.tdp_frequency_limit_mem_.has_value()) { - clock_samples_.tdp_frequency_limit_mem_->push_back(frequency_state.tdp); + if (clock_samples_.memory_frequency_limit_tdp_.has_value()) { + clock_samples_.memory_frequency_limit_tdp_->push_back(frequency_state.tdp); } - if (clock_samples_.clock_mem_.has_value()) { - clock_samples_.clock_mem_->push_back(frequency_state.actual); + if (clock_samples_.memory_clock_frequency_.has_value()) { + clock_samples_.memory_clock_frequency_->push_back(frequency_state.actual); } - if (clock_samples_.throttle_reason_mem_.has_value()) { - clock_samples_.throttle_reason_mem_->push_back(static_cast(frequency_state.throttleReasons)); + if (clock_samples_.memory_throttle_reason_.has_value()) { + clock_samples_.memory_throttle_reason_->push_back(detail::throttle_reason_to_string(frequency_state.throttleReasons)); } } break; @@ -479,9 +556,17 @@ void gpu_intel_hardware_sampler::sampling_loop() { if (power_samples_.power_total_energy_consumption_.has_value()) { // get total power consumption zes_power_energy_counter_t energy_counter{}; - HWS_LEVEL_ZERO_ERROR_CHECK(zesPowerGetEnergyCounter(power_handles.front(), &energy_counter)); + HWS_LEVEL_ZERO_ERROR_CHECK(zesPowerGetEnergyCounter(power_handles.front(), &energy_counter)) - power_samples_.power_total_energy_consumption_->push_back(static_cast(energy_counter.energy) / 1000.0 / 1000.0); + const auto power_consumption = static_cast(energy_counter.energy) / 1000.0 / 1000.0; + + // calculate current power draw as (Energy Difference [J]) / (Time Difference [s]) + const std::size_t last_index = this->sampling_time_points().size() - 1; + const double power_usage = (power_consumption - power_samples_.power_total_energy_consumption_->back()) / (std::chrono::duration(this->sampling_time_points()[last_index] - this->sampling_time_points()[last_index - 1]).count()); + power_samples_.power_usage_->push_back(power_usage); + + // add power consumption last to be able to use the std::vector::back() function + power_samples_.power_total_energy_consumption_->push_back(power_consumption - initial_total_power_consumption); } } } @@ -490,7 +575,7 @@ void gpu_intel_hardware_sampler::sampling_loop() { { for (zes_mem_handle_t handle : memory_handles) { zes_mem_properties_t prop{}; - HWS_LEVEL_ZERO_ERROR_CHECK(zesMemoryGetProperties(handle, &prop)); + HWS_LEVEL_ZERO_ERROR_CHECK(zesMemoryGetProperties(handle, &prop)) // get the memory module name const std::string memory_module_name = detail::memory_module_to_name(prop.type); @@ -498,23 +583,27 @@ void gpu_intel_hardware_sampler::sampling_loop() { if (memory_samples_.memory_free_.has_value()) { // get current memory information zes_mem_state_t mem_state{}; - HWS_LEVEL_ZERO_ERROR_CHECK(zesMemoryGetState(handle, &mem_state)); + HWS_LEVEL_ZERO_ERROR_CHECK(zesMemoryGetState(handle, &mem_state)) memory_samples_.memory_free_.value()[memory_module_name].push_back(mem_state.free); + + if (memory_samples_.visible_memory_total_.has_value()) { + memory_samples_.memory_used_.value()[memory_module_name].push_back(memory_samples_.visible_memory_total_.value()[memory_module_name] - mem_state.free); + } } } - if (memory_samples_.pcie_link_speed_.has_value() || memory_samples_.pcie_link_width_.has_value() || memory_samples_.pcie_link_width_.has_value()) { + if (memory_samples_.pcie_link_speed_.has_value() || memory_samples_.num_pcie_lanes_.has_value() || memory_samples_.num_pcie_lanes_.has_value()) { // the current PCIe stats zes_pci_state_t pci_state{}; - HWS_LEVEL_ZERO_ERROR_CHECK(zesDevicePciGetState(device, &pci_state)); + HWS_LEVEL_ZERO_ERROR_CHECK(zesDevicePciGetState(device, &pci_state)) if (memory_samples_.pcie_link_speed_.has_value()) { - memory_samples_.pcie_link_speed_->push_back(pci_state.speed.maxBandwidth); + memory_samples_.pcie_link_speed_->push_back(static_cast(static_cast(pci_state.speed.maxBandwidth) / 1e6)); } - if (memory_samples_.pcie_link_width_.has_value()) { - memory_samples_.pcie_link_width_->push_back(pci_state.speed.width); + if (memory_samples_.num_pcie_lanes_.has_value()) { + memory_samples_.num_pcie_lanes_->push_back(pci_state.speed.width); } - if (memory_samples_.pcie_link_width_.has_value()) { + if (memory_samples_.pcie_link_generation_.has_value()) { memory_samples_.pcie_link_generation_->push_back(pci_state.speed.gen); } } @@ -523,28 +612,48 @@ void gpu_intel_hardware_sampler::sampling_loop() { // retrieve temperature related samples { if (!psu_handles.empty()) { - if (temperature_samples_.temperature_psu_.has_value()) { + if (temperature_samples_.psu_temperature_.has_value()) { // NOTE: only the first PSU is used here zes_psu_state_t psu_state{}; - HWS_LEVEL_ZERO_ERROR_CHECK(zesPsuGetState(psu_handles.front(), &psu_state)); - temperature_samples_.temperature_psu_->push_back(psu_state.temperature); + HWS_LEVEL_ZERO_ERROR_CHECK(zesPsuGetState(psu_handles.front(), &psu_state)) + temperature_samples_.psu_temperature_->push_back(psu_state.temperature); } } for (zes_temp_handle_t handle : temperature_handles) { zes_temp_properties_t prop{}; - HWS_LEVEL_ZERO_ERROR_CHECK(zesTemperatureGetProperties(handle, &prop)); - - const std::string sensor_name = detail::temperature_sensor_type_to_name(prop.type); - if (sensor_name.empty()) { - // unsupported sensor type - continue; - } - - if (temperature_samples_.temperature_.has_value() && temperature_samples_.temperature_.value().contains(sensor_name)) { - double temp{}; - HWS_LEVEL_ZERO_ERROR_CHECK(zesTemperatureGetState(handle, &temp)); - temperature_samples_.temperature_.value()[sensor_name].push_back(temp); + HWS_LEVEL_ZERO_ERROR_CHECK(zesTemperatureGetProperties(handle, &prop)) + + switch (prop.type) { + case ZES_TEMP_SENSORS_GLOBAL: + { + if (temperature_samples_.global_temperature_.has_value()) { + double temp{}; + HWS_LEVEL_ZERO_ERROR_CHECK(zesTemperatureGetState(handle, &temp)) + temperature_samples_.global_temperature_->push_back(temp); + } + } + break; + case ZES_TEMP_SENSORS_GPU: + { + if (temperature_samples_.temperature_.has_value()) { + double temp{}; + HWS_LEVEL_ZERO_ERROR_CHECK(zesTemperatureGetState(handle, &temp)) + temperature_samples_.temperature_->push_back(temp); + } + } + break; + case ZES_TEMP_SENSORS_MEMORY: + { + if (temperature_samples_.memory_temperature_.has_value()) { + double temp{}; + HWS_LEVEL_ZERO_ERROR_CHECK(zesTemperatureGetState(handle, &temp)) + temperature_samples_.memory_temperature_->push_back(temp); + } + } + break; + default: + break; } } } @@ -559,8 +668,8 @@ std::string gpu_intel_hardware_sampler::device_identification() const { // get the level zero handle from the device ze_device_handle_t device = device_.get_impl().device; ze_device_properties_t prop{}; - HWS_LEVEL_ZERO_ERROR_CHECK(zeDeviceGetProperties(device, &prop)); - return std::format("gpu_intel_device_{}", prop.deviceId); + HWS_LEVEL_ZERO_ERROR_CHECK(zeDeviceGetProperties(device, &prop)) + return fmt::format("gpu_intel_device_{}", prop.deviceId); } std::string gpu_intel_hardware_sampler::generate_yaml_string() const { @@ -569,10 +678,10 @@ std::string gpu_intel_hardware_sampler::generate_yaml_string() const { throw std::runtime_error{ "Can't create the final YAML entry if the hardware sampler is still running!" }; } - return std::format("{}\n" - "{}\n" - "{}\n" - "{}\n" + return fmt::format("{}\n\n" + "{}\n\n" + "{}\n\n" + "{}\n\n" "{}", general_samples_.generate_yaml_string(), clock_samples_.generate_yaml_string(), @@ -586,7 +695,7 @@ std::ostream &operator<<(std::ostream &out, const gpu_intel_hardware_sampler &sa out.setstate(std::ios_base::failbit); return out; } else { - return out << std::format("sampling interval: {}\n" + return out << fmt::format("sampling interval: {}\n" "time points: [{}]\n\n" "general samples:\n{}\n\n" "clock samples:\n{}\n\n" @@ -594,7 +703,7 @@ std::ostream &operator<<(std::ostream &out, const gpu_intel_hardware_sampler &sa "memory samples:\n{}\n\n" "temperature samples:\n{}", sampler.sampling_interval(), - detail::join(detail::time_points_to_epoch(sampler.sampling_time_points()), ", "), + fmt::join(detail::time_points_to_epoch(sampler.sampling_time_points()), ", "), sampler.general_samples(), sampler.clock_samples(), sampler.power_samples(), diff --git a/src/hardware_sampling/gpu_intel/level_zero_samples.cpp b/src/hardware_sampling/gpu_intel/level_zero_samples.cpp index 971bfb9..5ceffcf 100644 --- a/src/hardware_sampling/gpu_intel/level_zero_samples.cpp +++ b/src/hardware_sampling/gpu_intel/level_zero_samples.cpp @@ -7,9 +7,8 @@ #include "hardware_sampling/gpu_intel/level_zero_samples.hpp" -#include "hardware_sampling/utility.hpp" // hws::detail::{value_or_default, join} +#include "hardware_sampling/utility.hpp" // hws::detail::{value_or_default, remove_cvref_t} -#include // std::format #include // std::ostream #include // std::string #include // std::string_view @@ -18,22 +17,22 @@ namespace hws { -namespace detail { +namespace { template void append_map_values(std::string &str, const std::string_view entry_name, const MapType &map) { if (map.has_value()) { for (const auto &[key, value] : map.value()) { - if constexpr (is_vector_v>) { - str += std::format("{}_{}: [{}]\n", entry_name, key, detail::join(value, ", ")); + if constexpr (detail::is_vector_v>) { + str += fmt::format("{}_{}: [{}]\n", entry_name, key, fmt::join(value, ", ")); } else { - str += std::format("{}_{}: {}\n", entry_name, key, value); + str += fmt::format("{}_{}: {}\n", entry_name, key, value); } } } } -} // namespace detail +} // namespace //*************************************************************************************************************************************// // general samples // @@ -44,42 +43,49 @@ std::string level_zero_general_samples::generate_yaml_string() const { // device byte order if (this->byte_order_.has_value()) { - str += std::format(" byte_order:\n" + str += fmt::format(" byte_order:\n" " unit: \"string\"\n" " values: \"{}\"\n", this->byte_order_.value()); } // the vendor specific ID if (this->vendor_id_.has_value()) { - str += std::format(" vendor_id:\n" + str += fmt::format(" vendor_id:\n" " unit: \"string\"\n" " values: \"{}\"\n", this->vendor_id_.value()); } // device name if (this->name_.has_value()) { - str += std::format(" name:\n" + str += fmt::format(" name:\n" " unit: \"string\"\n" " values: \"{}\"\n", this->name_.value()); } + // GPU specific flags + if (this->flags_.has_value()) { + str += fmt::format(" flags:\n" + " unit: \"string\"\n" + " values: [{}]\n", + fmt::join(detail::quote(this->flags_.value()), ", ")); + } // the standby mode if (this->standby_mode_.has_value()) { - str += std::format(" standby_mode:\n" + str += fmt::format(" standby_mode:\n" " unit: \"string\"\n" " values: \"{}\"\n", this->standby_mode_.value()); } // the number of threads per EU unit if (this->num_threads_per_eu_.has_value()) { - str += std::format(" num_threads_per_eu:\n" + str += fmt::format(" num_threads_per_eu:\n" " unit: \"int\"\n" " values: {}\n", this->num_threads_per_eu_.value()); } // the EU SIMD width if (this->eu_simd_width_.has_value()) { - str += std::format(" physical_eu_simd_width:\n" + str += fmt::format(" eu_simd_width:\n" " unit: \"int\"\n" " values: {}\n", this->eu_simd_width_.value()); @@ -92,15 +98,17 @@ std::string level_zero_general_samples::generate_yaml_string() const { } std::ostream &operator<<(std::ostream &out, const level_zero_general_samples &samples) { - return out << std::format("byte_order [string]: {}\n" + return out << fmt::format("byte_order [string]: {}\n" "vendor_id [string]: {}\n" "name [string]: {}\n" + "flags [string]: [{}]\n" "standby_mode [string]: {}\n" "num_threads_per_eu [int]: {}\n" "eu_simd_width [int]: {}", detail::value_or_default(samples.get_byte_order()), detail::value_or_default(samples.get_vendor_id()), detail::value_or_default(samples.get_name()), + fmt::join(detail::value_or_default(samples.get_flags()), ", "), detail::value_or_default(samples.get_standby_mode()), detail::value_or_default(samples.get_num_threads_per_eu()), detail::value_or_default(samples.get_eu_simd_width())); @@ -114,89 +122,89 @@ std::string level_zero_clock_samples::generate_yaml_string() const { std::string str{ "clock:\n" }; // minimum GPU core clock - if (this->clock_gpu_min_.has_value()) { - str += std::format(" clock_gpu_min:\n" + if (this->clock_frequency_min_.has_value()) { + str += fmt::format(" clock_frequency_min:\n" " unit: \"MHz\"\n" " values: {}\n", - this->clock_gpu_min_.value()); + this->clock_frequency_min_.value()); } // maximum GPU core clock - if (this->clock_gpu_max_.has_value()) { - str += std::format(" clock_gpu_max:\n" + if (this->clock_frequency_max_.has_value()) { + str += fmt::format(" clock_gpu_max:\n" " unit: \"MHz\"\n" " values: {}\n", - this->clock_gpu_max_.value()); - } - // all possible GPU core clock frequencies - if (this->available_clocks_gpu_.has_value()) { - str += std::format(" available_clocks_gpu:\n" - " unit: \"MHz\"\n" - " values: [{}]\n", - detail::join(this->available_clocks_gpu_.value(), ", ")); + this->clock_frequency_max_.value()); } // minimum memory clock - if (this->clock_mem_min_.has_value()) { - str += std::format(" clock_mem_min:\n" + if (this->memory_clock_frequency_min_.has_value()) { + str += fmt::format(" memory_clock_frequency_min:\n" " unit: \"MHz\"\n" " values: {}\n", - this->clock_mem_min_.value()); + this->memory_clock_frequency_min_.value()); } // maximum memory clock - if (this->clock_mem_max_.has_value()) { - str += std::format(" clock_mem_max:\n" + if (this->memory_clock_frequency_max_.has_value()) { + str += fmt::format(" memory_clock_frequency_max:\n" " unit: \"MHz\"\n" " values: {}\n", - this->clock_mem_max_.value()); + this->memory_clock_frequency_max_.value()); + } + // all possible GPU core clock frequencies + if (this->available_clock_frequencies_.has_value()) { + str += fmt::format(" available_clock_frequencies:\n" + " unit: \"MHz\"\n" + " values: [{}]\n", + fmt::join(this->available_clock_frequencies_.value(), ", ")); } // all possible memory clock frequencies - if (this->available_clocks_mem_.has_value()) { - str += std::format(" available_clocks_mem:\n" + if (this->available_memory_clock_frequencies_.has_value()) { + str += fmt::format(" available_memory_clock_frequencies:\n" " unit: \"MHz\"\n" " values: [{}]\n", - detail::join(this->available_clocks_mem_.value(), ", ")); + fmt::join(this->available_memory_clock_frequencies_.value(), ", ")); } - // the maximum GPU core frequency based on the current TDP limit - if (this->tdp_frequency_limit_gpu_.has_value()) { - str += std::format(" tdp_frequency_limit_gpu:\n" + // the current GPU core clock frequency + if (this->clock_frequency_.has_value()) { + str += fmt::format(" clock_frequency:\n" " unit: \"MHz\"\n" " values: [{}]\n", - detail::join(this->tdp_frequency_limit_gpu_.value(), ", ")); + fmt::join(this->clock_frequency_.value(), ", ")); } - // the current GPU core clock frequency - if (this->clock_gpu_.has_value()) { - str += std::format(" clock_gpu:\n" + // the current memory clock frequency + if (this->memory_clock_frequency_.has_value()) { + str += fmt::format(" memory_clock_frequency:\n" " unit: \"MHz\"\n" " values: [{}]\n", - detail::join(this->clock_gpu_.value(), ", ")); + fmt::join(this->memory_clock_frequency_.value(), ", ")); } // the current GPU core throttle reason - if (this->throttle_reason_gpu_.has_value()) { - str += std::format(" throttle_reason_gpu:\n" - " unit: \"bitmask\"\n" + if (this->throttle_reason_.has_value()) { + str += fmt::format(" throttle_reason:\n" + " unit: \"string\"\n" " values: [{}]\n", - detail::join(this->throttle_reason_gpu_.value(), ", ")); + fmt::join(this->throttle_reason_.value(), ", ")); } - // the maximum memory frequency based on the current TDP limit - if (this->tdp_frequency_limit_mem_.has_value()) { - str += std::format(" tdp_frequency_limit_mem:\n" - " unit: \"MHz\"\n" + // the current memory throttle reason + if (this->memory_throttle_reason_.has_value()) { + str += fmt::format(" memory_throttle_reason:\n" + " unit: \"string\"\n" " values: [{}]\n", - detail::join(this->tdp_frequency_limit_mem_.value(), ", ")); + fmt::join(this->memory_throttle_reason_.value(), ", ")); } - // the current memory clock frequency - if (this->clock_mem_.has_value()) { - str += std::format(" clock_mem:\n" + // the maximum GPU core frequency based on the current TDP limit + if (this->frequency_limit_tdp_.has_value()) { + str += fmt::format(" frequency_limit_tdp:\n" " unit: \"MHz\"\n" " values: [{}]\n", - detail::join(this->clock_mem_.value(), ", ")); + fmt::join(this->frequency_limit_tdp_.value(), ", ")); } - // the current memory throttle reason - if (this->throttle_reason_mem_.has_value()) { - str += std::format(" throttle_reason_mem:\n" - " unit: \"bitmask\"\n" + // the maximum memory frequency based on the current TDP limit + if (this->memory_frequency_limit_tdp_.has_value()) { + str += fmt::format(" memory_frequency_limit_tdp:\n" + " unit: \"MHz\"\n" " values: [{}]\n", - detail::join(this->throttle_reason_mem_.value(), ", ")); + fmt::join(this->memory_frequency_limit_tdp_.value(), ", ")); } // remove last newline @@ -206,30 +214,30 @@ std::string level_zero_clock_samples::generate_yaml_string() const { } std::ostream &operator<<(std::ostream &out, const level_zero_clock_samples &samples) { - return out << std::format("clock_gpu_min [MHz]: {}\n" - "clock_gpu_max [MHz]: {}\n" - "available_clocks_gpu [MHz]: [{}]\n" - "clock_mem_min [MHz]: {}\n" - "clock_mem_max [MHz]: {}\n" - "available_clocks_mem [MHz]: [{}]\n" - "tdp_frequency_limit_gpu [MHz]: [{}]\n" - "clock_gpu [MHz]: [{}]\n" - "throttle_reason_gpu [bitmask]: [{}]\n" - "tdp_frequency_limit_mem [MHz]: [{}]\n" - "clock_mem [MHz]: [{}]\n" - "throttle_reason_mem [bitmask]: [{}]", - detail::value_or_default(samples.get_clock_gpu_min()), - detail::value_or_default(samples.get_clock_gpu_max()), - detail::join(detail::value_or_default(samples.get_available_clocks_gpu()), ", "), - detail::value_or_default(samples.get_clock_mem_min()), - detail::value_or_default(samples.get_clock_mem_max()), - detail::join(detail::value_or_default(samples.get_available_clocks_mem()), ", "), - detail::join(detail::value_or_default(samples.get_tdp_frequency_limit_gpu()), ", "), - detail::join(detail::value_or_default(samples.get_clock_gpu()), ", "), - detail::join(detail::value_or_default(samples.get_throttle_reason_gpu()), ", "), - detail::join(detail::value_or_default(samples.get_tdp_frequency_limit_mem()), ", "), - detail::join(detail::value_or_default(samples.get_clock_mem()), ", "), - detail::join(detail::value_or_default(samples.get_throttle_reason_mem()), ", ")); + return out << fmt::format("clock_frequency_min [MHz]: {}\n" + "clock_frequency_max [MHz]: {}\n" + "memory_clock_frequency_min [MHz]: {}\n" + "memory_clock_frequency_max [MHz]: {}\n" + "available_clock_frequencies [MHz]: [{}]\n" + "available_memory_clock_frequencies [MHz]: [{}]\n" + "clock_frequency [MHz]: [{}]\n" + "memory_clock_frequency [MHz]: [{}]\n" + "throttle_reason [string]: [{}]\n" + "memory_throttle_reason [string]: [{}]\n" + "frequency_limit_tdp [MHz]: [{}]\n" + "memory_frequency_limit_tdp [MHz]: [{}]", + detail::value_or_default(samples.get_clock_frequency_min()), + detail::value_or_default(samples.get_clock_frequency_max()), + detail::value_or_default(samples.get_memory_clock_frequency_min()), + detail::value_or_default(samples.get_memory_clock_frequency_max()), + fmt::join(detail::value_or_default(samples.get_available_clock_frequencies()), ", "), + fmt::join(detail::value_or_default(samples.get_available_memory_clock_frequencies()), ", "), + fmt::join(detail::value_or_default(samples.get_clock_frequency()), ", "), + fmt::join(detail::value_or_default(samples.get_memory_clock_frequency()), ", "), + fmt::join(detail::value_or_default(samples.get_throttle_reason()), ", "), + fmt::join(detail::value_or_default(samples.get_memory_throttle_reason()), ", "), + fmt::join(detail::value_or_default(samples.get_frequency_limit_tdp()), ", "), + fmt::join(detail::value_or_default(samples.get_memory_frequency_limit_tdp()), ", ")); } //*************************************************************************************************************************************// @@ -241,36 +249,39 @@ std::string level_zero_power_samples::generate_yaml_string() const { // power enforced limit if (this->power_enforced_limit_.has_value()) { - str += std::format(" power_enforced_limit:\n" + str += fmt::format(" power_enforced_limit:\n" " unit: \"W\"\n" " values: {}\n", this->power_enforced_limit_.value()); } // power measurement type if (this->power_measurement_type_.has_value()) { - str += std::format(" power_measurement_type:\n" + str += fmt::format(" power_measurement_type:\n" " unit: \"string\"\n" " values: {}\n", this->power_measurement_type_.value()); } // the power management mode if (this->power_management_mode_.has_value()) { - str += std::format(" power_management_mode:\n" + str += fmt::format(" power_management_mode:\n" " unit: \"bool\"\n" " values: {}\n", this->power_management_mode_.value()); } + // the current power draw + if (this->power_usage_.has_value()) { + str += fmt::format(" power_usage:\n" + " unit: \"W\"\n" + " values: [{}]\n", + fmt::join(this->power_usage_.value(), ", ")); + } // the total consumed energy if (this->power_total_energy_consumption_.has_value()) { - decltype(level_zero_power_samples::power_total_energy_consumption_)::value_type consumed_energy(this->power_total_energy_consumption_->size()); - for (std::size_t i = 0; i < consumed_energy.size(); ++i) { - consumed_energy[i] = this->power_total_energy_consumption_.value()[i] - this->power_total_energy_consumption_->front(); - } - str += std::format(" power_total_energy_consumed:\n" + str += fmt::format(" power_total_energy_consumption:\n" " unit: \"J\"\n" " values: [{}]\n", - detail::join(consumed_energy, ", ")); + fmt::join(this->power_total_energy_consumption_.value(), ", ")); } // remove last newline @@ -280,14 +291,16 @@ std::string level_zero_power_samples::generate_yaml_string() const { } std::ostream &operator<<(std::ostream &out, const level_zero_power_samples &samples) { - return out << std::format("power_enforced_limit [W]: {}\n" + return out << fmt::format("power_enforced_limit [W]: {}\n" "power_measurement_type [string]: {}\n" "power_management_mode [bool]: {}\n" + "power_usage [W]: [{}]\n" "power_total_energy_consumption [J]: [{}]", detail::value_or_default(samples.get_power_enforced_limit()), detail::value_or_default(samples.get_power_measurement_type()), detail::value_or_default(samples.get_power_management_mode()), - detail::join(detail::value_or_default(samples.get_power_total_energy_consumption()), ", ")); + fmt::join(detail::value_or_default(samples.get_power_usage()), ", "), + fmt::join(detail::value_or_default(samples.get_power_total_energy_consumption()), ", ")); } //*************************************************************************************************************************************// @@ -300,7 +313,7 @@ std::string level_zero_memory_samples::generate_yaml_string() const { // the total memory if (this->memory_total_.has_value()) { for (const auto &[key, value] : this->memory_total_.value()) { - str += std::format(" memory_total_{}:\n" + str += fmt::format(" {}_memory_total:\n" " unit: \"B\"\n" " values: {}\n", key, @@ -308,40 +321,50 @@ std::string level_zero_memory_samples::generate_yaml_string() const { } } // the total allocatable memory - if (this->allocatable_memory_total_.has_value()) { - for (const auto &[key, value] : this->allocatable_memory_total_.value()) { - str += std::format(" allocatable_memory_total_{}:\n" + if (this->visible_memory_total_.has_value()) { + for (const auto &[key, value] : this->visible_memory_total_.value()) { + str += fmt::format(" {}_visible_memory_total:\n" " unit: \"B\"\n" " values: {}\n", key, value); } } - // the pcie max bandwidth - if (this->pcie_link_max_speed_.has_value()) { - str += std::format(" pcie_max_bandwidth:\n" - " unit: \"BPS\"\n" - " values: {}\n", - this->pcie_link_max_speed_.value()); + // the memory location (system or device) + if (this->memory_location_.has_value()) { + for (const auto &[key, value] : this->memory_location_.value()) { + str += fmt::format(" {}_memory_location:\n" + " unit: \"string\"\n" + " values: \"{}\"\n", + key, + value); + } } // the pcie link width - if (this->pcie_max_width_.has_value()) { - str += std::format(" max_pcie_link_width:\n" + if (this->num_pcie_lanes_max_.has_value()) { + str += fmt::format(" num_pcie_lanes_max:\n" " unit: \"int\"\n" " values: {}\n", - this->pcie_max_width_.value()); + this->num_pcie_lanes_max_.value()); } // the pcie generation - if (this->max_pcie_link_generation_.has_value()) { - str += std::format(" max_pcie_link_generation:\n" + if (this->pcie_link_generation_max_.has_value()) { + str += fmt::format(" pcie_link_generation_max:\n" " unit: \"int\"\n" " values: {}\n", - this->max_pcie_link_generation_.value()); + this->pcie_link_generation_max_.value()); + } + // the pcie max bandwidth + if (this->pcie_link_speed_max_.has_value()) { + str += fmt::format(" pcie_link_speed_max:\n" + " unit: \"MBPS\"\n" + " values: {}\n", + this->pcie_link_speed_max_.value()); } // the memory bus width - if (this->bus_width_.has_value()) { - for (const auto &[key, value] : this->bus_width_.value()) { - str += std::format(" memory_bus_width_{}:\n" + if (this->memory_bus_width_.has_value()) { + for (const auto &[key, value] : this->memory_bus_width_.value()) { + str += fmt::format(" {}_memory_bus_width:\n" " unit: \"Bit\"\n" " values: {}\n", key, @@ -349,69 +372,56 @@ std::string level_zero_memory_samples::generate_yaml_string() const { } } // the number of memory channels - if (this->num_channels_.has_value()) { - for (const auto &[key, value] : this->num_channels_.value()) { - str += std::format(" memory_num_channels_{}:\n" + if (this->memory_num_channels_.has_value()) { + for (const auto &[key, value] : this->memory_num_channels_.value()) { + str += fmt::format(" {}_memory_num_channels:\n" " unit: \"int\"\n" " values: {}\n", key, value); } } - // the memory location (system or device) - if (this->location_.has_value()) { - for (const auto &[key, value] : this->location_.value()) { - str += std::format(" memory_location_{}:\n" - " unit: \"string\"\n" - " values: \"{}\"\n", - key, - value); - } - } - // the currently free and used memory + // the currently free memory if (this->memory_free_.has_value()) { for (const auto &[key, value] : this->memory_free_.value()) { - str += std::format(" memory_free_{}:\n" + str += fmt::format(" {}_memory_free:\n" " unit: \"string\"\n" " values: [{}]\n", key, - detail::join(value, ", ")); - - // calculate the used memory - if (this->allocatable_memory_total_.has_value()) { - decltype(level_zero_memory_samples::memory_free_)::value_type::mapped_type memory_used(value.size(), this->allocatable_memory_total_->at(key)); - for (std::size_t i = 0; i < memory_used.size(); ++i) { - memory_used[i] -= value[i]; - } - str += std::format(" memory_used_{}:\n" - " unit: \"string\"\n" - " values: [{}]\n", - key, - detail::join(memory_used, ", ")); - } + fmt::join(value, ", ")); } } - // PCIe link speed - if (this->pcie_link_speed_.has_value()) { - str += std::format(" pcie_bandwidth:\n" - " unit: \"MBPS\"\n" - " values: [{}]\n", - detail::join(this->pcie_link_speed_.value(), ", ")); + // the currently used memory + if (this->memory_used_.has_value()) { + for (const auto &[key, value] : this->memory_used_.value()) { + str += fmt::format(" {}_memory_used:\n" + " unit: \"string\"\n" + " values: [{}]\n", + key, + fmt::join(value, ", ")); + } } // PCIe link width - if (this->pcie_link_width_.has_value()) { - str += std::format(" pcie_link_width:\n" + if (this->num_pcie_lanes_.has_value()) { + str += fmt::format(" num_pcie_lanes:\n" " unit: \"int\"\n" " values: [{}]\n", - detail::join(this->pcie_link_width_.value(), ", ")); + fmt::join(this->num_pcie_lanes_.value(), ", ")); } // PCIe link generation if (this->pcie_link_generation_.has_value()) { - str += std::format(" pcie_link_generation:\n" + str += fmt::format(" pcie_link_generation:\n" " unit: \"int\"\n" " values: [{}]\n", - detail::join(this->pcie_link_generation_.value(), ", ")); + fmt::join(this->pcie_link_generation_.value(), ", ")); + } + // PCIe link speed + if (this->pcie_link_speed_.has_value()) { + str += fmt::format(" pcie_link_speed:\n" + " unit: \"MBPS\"\n" + " values: [{}]\n", + fmt::join(this->pcie_link_speed_.value(), ", ")); } // remove last newline @@ -423,27 +433,26 @@ std::string level_zero_memory_samples::generate_yaml_string() const { std::ostream &operator<<(std::ostream &out, const level_zero_memory_samples &samples) { std::string str{}; - detail::append_map_values(str, "memory_total [B]", samples.get_memory_total()); - detail::append_map_values(str, "allocatable_memory_total [B]", samples.get_allocatable_memory_total()); - - str += std::format("pcie_link_max_speed [BPS]: {}\n" - "pcie_max_width [int]: {}\n" - "max_pcie_link_generation [int]: {}\n", - detail::value_or_default(samples.get_pcie_link_max_speed()), - detail::value_or_default(samples.get_pcie_max_width()), - detail::value_or_default(samples.get_max_pcie_link_generation())); - - detail::append_map_values(str, "bus_width [Bit]", samples.get_bus_width()); - detail::append_map_values(str, "num_channels [int]", samples.get_num_channels()); - detail::append_map_values(str, "location [string]", samples.get_location()); - detail::append_map_values(str, "memory_free [string]", samples.get_memory_free()); - - str += std::format("pcie_link_speed [MBPS]: [{}]\n" - "pcie_link_width [int]: [{}]\n" - "pcie_link_generation [int]: [{}]", - detail::join(detail::value_or_default(samples.get_pcie_link_speed()), ", "), - detail::join(detail::value_or_default(samples.get_pcie_link_width()), ", "), - detail::join(detail::value_or_default(samples.get_pcie_link_generation()), ", ")); + append_map_values(str, "memory_total [B]", samples.get_memory_total()); + append_map_values(str, "visible_memory_total [B]", samples.get_visible_memory_total()); + append_map_values(str, "memory_location [string]", samples.get_memory_location()); + str += fmt::format("num_pcie_lanes_max [int]: {}\n" + "pcie_link_generation_max [int]: {}\n" + "pcie_link_speed_max [MBPS]: {}\n", + detail::value_or_default(samples.get_num_pcie_lanes_max()), + detail::value_or_default(samples.get_pcie_link_generation_max()), + detail::value_or_default(samples.get_pcie_link_speed_max())); + append_map_values(str, "memory_bus_width [Bit]", samples.get_memory_bus_width()); + append_map_values(str, "memory_num_channels [int]", samples.get_memory_num_channels()); + + append_map_values(str, "memory_free [string]", samples.get_memory_free()); + append_map_values(str, "memory_used [string]", samples.get_memory_used()); + str += fmt::format("num_pcie_lanes [int]: [{}]\n" + "pcie_link_generation [int]: [{}]\n" + "pcie_link_speed [MBPS]: [{}]", + fmt::join(detail::value_or_default(samples.get_num_pcie_lanes()), ", "), + fmt::join(detail::value_or_default(samples.get_pcie_link_generation()), ", "), + fmt::join(detail::value_or_default(samples.get_pcie_link_speed()), ", ")); return out << str; } @@ -455,33 +464,76 @@ std::ostream &operator<<(std::ostream &out, const level_zero_memory_samples &sam std::string level_zero_temperature_samples::generate_yaml_string() const { std::string str{ "temperature:\n" }; - // the maximum sensor temperature + // the number of fans + if (this->num_fans_.has_value()) { + str += fmt::format(" num_fans:\n" + " unit: \"int\"\n" + " values: {}\n", + this->num_fans_.value()); + } + // the maximum fan speed in RPM + if (this->fan_speed_max_.has_value()) { + str += fmt::format(" fan_speed_max:\n" + " unit: \"RPM\"\n" + " values: {}\n", + this->fan_speed_max_.value()); + } + // the maximum GPU temperature if (this->temperature_max_.has_value()) { - for (const auto &[key, value] : this->temperature_max_.value()) { - str += std::format(" temperature_{}_max:\n" - " unit: \"°C\"\n" - " values: {}\n", - key, - value); - } + str += fmt::format(" temperature_max:\n" + " unit: \"°C\"\n" + " values: {}\n", + this->temperature_max_.value()); } - - // the current PSU temperatures - if (this->temperature_psu_.has_value()) { - str += std::format(" temperature_psu:\n" + // the maximum memory temperature + if (this->memory_temperature_max_.has_value()) { + str += fmt::format(" memory_temperature_max:\n" + " unit: \"°C\"\n" + " values: {}\n", + this->memory_temperature_max_.value()); + } + // the maximum global temperature + if (this->global_temperature_max_.has_value()) { + str += fmt::format(" global_temperature_max:\n" " unit: \"°C\"\n" + " values: {}\n", + this->global_temperature_max_.value()); + } + + // the current fan speed in percent + if (this->fan_speed_percentage_.has_value()) { + str += fmt::format(" fan_speed_percentage:\n" + " unit: \"percentage\"\n" " values: [{}]\n", - detail::join(this->temperature_psu_.value(), ", ")); + fmt::join(this->fan_speed_percentage_.value(), ", ")); } - // the current sensor temperatures + // the current GPU temperature if (this->temperature_.has_value()) { - for (const auto &[key, value] : this->temperature_.value()) { - str += std::format(" temperature_{}:\n" - " unit: \"°C\"\n" - " values: [{}]\n", - key, - detail::join(value, ", ")); - } + str += fmt::format(" temperature:\n" + " unit: \"°C\"\n" + " values: [{}]\n", + fmt::join(this->temperature_.value(), ", ")); + } + // the current memory temperature + if (this->memory_temperature_.has_value()) { + str += fmt::format(" memory_temperature:\n" + " unit: \"°C\"\n" + " values: [{}]\n", + fmt::join(this->memory_temperature_.value(), ", ")); + } + // the current global temperature + if (this->global_temperature_.has_value()) { + str += fmt::format(" global_temperature:\n" + " unit: \"°C\"\n" + " values: [{}]\n", + fmt::join(this->global_temperature_.value(), ", ")); + } + // the current PSU temperature + if (this->psu_temperature_.has_value()) { + str += fmt::format(" psu_temperature:\n" + " unit: \"°C\"\n" + " values: [{}]\n", + fmt::join(this->psu_temperature_.value(), ", ")); } // remove last newline @@ -491,19 +543,26 @@ std::string level_zero_temperature_samples::generate_yaml_string() const { } std::ostream &operator<<(std::ostream &out, const level_zero_temperature_samples &samples) { - std::string str{}; - - detail::append_map_values(str, "temperature_max [°C]", samples.get_temperature_max()); - - str += std::format("temperature_psu [°C]: [{}]\n", - detail::join(detail::value_or_default(samples.get_temperature_psu()), ", ")); - - detail::append_map_values(str, "temperature [°C]", samples.get_temperature()); - - // remove last newline - str.pop_back(); - - return out << str; + return out << fmt::format("num_fans [int]: {}\n" + "fan_speed_max [RPM]: {}\n" + "temperature_max [°C]: {}\n" + "memory_temperature_max [°C]: {}\n" + "global_temperature_max [°C]: {}\n" + "fan_speed_percentage [%]: [{}]\n" + "temperature [°C]: [{}]\n" + "memory_temperature [°C]: [{}]\n" + "global_temperature [°C]: [{}]\n" + "psu_temperature [°C]: [{}]", + detail::value_or_default(samples.get_num_fans()), + detail::value_or_default(samples.get_fan_speed_max()), + detail::value_or_default(samples.get_temperature_max()), + detail::value_or_default(samples.get_memory_temperature_max()), + detail::value_or_default(samples.get_global_temperature_max()), + fmt::join(detail::value_or_default(samples.get_fan_speed_percentage()), ", "), + fmt::join(detail::value_or_default(samples.get_temperature()), ", "), + fmt::join(detail::value_or_default(samples.get_memory_temperature()), ", "), + fmt::join(detail::value_or_default(samples.get_global_temperature()), ", "), + fmt::join(detail::value_or_default(samples.get_psu_temperature()), ", ")); } } // namespace hws diff --git a/src/hardware_sampling/gpu_intel/utility.cpp b/src/hardware_sampling/gpu_intel/utility.cpp index a0cefe9..635b5c1 100644 --- a/src/hardware_sampling/gpu_intel/utility.cpp +++ b/src/hardware_sampling/gpu_intel/utility.cpp @@ -7,14 +7,66 @@ #include "hardware_sampling/gpu_intel/utility.hpp" +#include "fmt/format.h" // fmt::format +#include "fmt/ranges.h" // fmt::join #include "level_zero/ze_api.h" // Level Zero runtime functions #include "level_zero/zes_api.h" // Level Zero runtime functions #include // std::string #include // std::string_view +#include // std::vector namespace hws::detail { +std::vector property_flags_to_vector(const ze_device_property_flags_t flags) { + std::vector string_flags{}; + + if ((flags & ze_device_property_flag_t::ZE_DEVICE_PROPERTY_FLAG_INTEGRATED) != 0) { + string_flags.emplace_back("integrated_gpu"); + } + if ((flags & ze_device_property_flag_t::ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE) != 0) { + string_flags.emplace_back("sub-device"); + } + if ((flags & ze_device_property_flag_t::ZE_DEVICE_PROPERTY_FLAG_ECC) != 0) { + string_flags.emplace_back("ecc"); + } + if ((flags & ze_device_property_flag_t::ZE_DEVICE_PROPERTY_FLAG_ONDEMANDPAGING) != 0) { + string_flags.emplace_back("on-demand_page-faulting"); + } + + return string_flags; +} + +std::string throttle_reason_to_string(const zes_freq_throttle_reason_flags_t reasons) { + if (reasons == 0) { + return "None"; + } else { + std::vector string_reasons{}; + if ((reasons & zes_freq_throttle_reason_flag_t::ZES_FREQ_THROTTLE_REASON_FLAG_AVE_PWR_CAP) != 0) { + string_reasons.emplace_back("average_power"); + } + if ((reasons & zes_freq_throttle_reason_flag_t::ZES_FREQ_THROTTLE_REASON_FLAG_BURST_PWR_CAP) != 0) { + string_reasons.emplace_back("burst_power"); + } + if ((reasons & zes_freq_throttle_reason_flag_t::ZES_FREQ_THROTTLE_REASON_FLAG_CURRENT_LIMIT) != 0) { + string_reasons.emplace_back("current_limit"); + } + if ((reasons & zes_freq_throttle_reason_flag_t::ZES_FREQ_THROTTLE_REASON_FLAG_THERMAL_LIMIT) != 0) { + string_reasons.emplace_back("thermal_limit"); + } + if ((reasons & zes_freq_throttle_reason_flag_t::ZES_FREQ_THROTTLE_REASON_FLAG_PSU_ALERT) != 0) { + string_reasons.emplace_back("psu_assertion"); + } + if ((reasons & zes_freq_throttle_reason_flag_t::ZES_FREQ_THROTTLE_REASON_FLAG_SW_RANGE) != 0) { + string_reasons.emplace_back("software_frequency_range"); + } + if ((reasons & zes_freq_throttle_reason_flag_t::ZES_FREQ_THROTTLE_REASON_FLAG_HW_RANGE) != 0) { + string_reasons.emplace_back("hardware_frequency_range"); + } + return fmt::format("{}", fmt::join(string_reasons, "|")); + } +} + std::string_view to_result_string(const ze_result_t errc) { switch (errc) { case ZE_RESULT_SUCCESS: @@ -175,17 +227,4 @@ std::string memory_location_to_name(const zes_mem_loc_t mem_loc) { } } -std::string temperature_sensor_type_to_name(const zes_temp_sensors_t sensor_type) { - switch (sensor_type) { - case ZES_TEMP_SENSORS_GLOBAL: - return "global"; - case ZES_TEMP_SENSORS_GPU: - return "gpu"; - case ZES_TEMP_SENSORS_MEMORY: - return "memory"; - default: - return ""; - } -} - } // namespace hws::detail From 81fe9cd966384cc4c81edddd5ae08c9b1cdf7cfe Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Tue, 24 Sep 2024 13:47:29 +0200 Subject: [PATCH 46/69] Add function to check whether a sample category as any sample. Output category to YAML file only if at least one sample is available. --- bindings/cpu_hardware_sampler.cpp | 7 ++ bindings/gpu_amd_hardware_sampler.cpp | 5 + bindings/gpu_intel_hardware_sampler.cpp | 5 + bindings/gpu_nvidia_hardware_sampler.cpp | 5 + include/hardware_sampling/cpu/cpu_samples.hpp | 42 +++++++++ .../gpu_amd/rocm_smi_samples.hpp | 30 ++++++ .../gpu_intel/level_zero_samples.hpp | 30 ++++++ .../gpu_nvidia/nvml_samples.hpp | 29 ++++++ src/hardware_sampling/cpu/cpu_samples.cpp | 94 ++++++++++++++----- .../cpu/hardware_sampler.cpp | 12 +-- .../gpu_amd/hardware_sampler.cpp | 8 +- .../gpu_amd/rocm_smi_samples.cpp | 76 ++++++++++++--- .../gpu_intel/hardware_sampler.cpp | 8 +- .../gpu_intel/level_zero_samples.cpp | 71 +++++++++++--- .../gpu_nvidia/hardware_sampler.cpp | 8 +- .../gpu_nvidia/nvml_samples.cpp | 72 +++++++++++--- src/hardware_sampling/hardware_sampler.cpp | 6 +- 17 files changed, 421 insertions(+), 87 deletions(-) diff --git a/bindings/cpu_hardware_sampler.cpp b/bindings/cpu_hardware_sampler.cpp index 354c585..ba31dfe 100644 --- a/bindings/cpu_hardware_sampler.cpp +++ b/bindings/cpu_hardware_sampler.cpp @@ -21,6 +21,7 @@ namespace py = pybind11; void init_cpu_hardware_sampler(py::module_ &m) { // bind the general samples py::class_(m, "CpuGeneralSamples") + .def("has_samples", &hws::cpu_general_samples::has_samples, "true if any sample is available, false otherwise") .def("get_architecture", &hws::cpu_general_samples::get_architecture, "the CPU architecture (e.g., x86_64)") .def("get_byte_order", &hws::cpu_general_samples::get_byte_order, "the byte order (e.g., little/big endian)") .def("get_num_cores", &hws::cpu_general_samples::get_num_cores, "the total number of cores of the CPU(s)") @@ -44,6 +45,7 @@ void init_cpu_hardware_sampler(py::module_ &m) { // bind the clock samples py::class_(m, "CpuClockSamples") + .def("has_samples", &hws::cpu_clock_samples::has_samples, "true if any sample is available, false otherwise") .def("get_auto_boosted_clock_enabled", &hws::cpu_clock_samples::get_auto_boosted_clock_enabled, "true if frequency boosting is enabled") .def("get_clock_frequency_min", &hws::cpu_clock_samples::get_clock_frequency_min, "the minimum possible CPU frequency in MHz") .def("get_clock_frequency_max", &hws::cpu_clock_samples::get_clock_frequency_max, "the maximum possible CPU frequency in MHz") @@ -56,6 +58,7 @@ void init_cpu_hardware_sampler(py::module_ &m) { // bind the power samples py::class_(m, "CpuPowerSamples") + .def("has_samples", &hws::cpu_power_samples::has_samples, "true if any sample is available, false otherwise") .def("get_power_measurement_type", &hws::cpu_power_samples::get_power_measurement_type, "the type of the power readings: always \"instant/current\"") .def("get_power_usage", &hws::cpu_power_samples::get_power_usage, "the currently consumed power of the package of the CPU in W") .def("get_power_total_energy_consumed", &hws::cpu_power_samples::get_power_total_energy_consumption, "the total power consumption in J") @@ -69,6 +72,7 @@ void init_cpu_hardware_sampler(py::module_ &m) { // bind the memory samples py::class_(m, "CpuMemorySamples") + .def("has_samples", &hws::cpu_memory_samples::has_samples, "true if any sample is available, false otherwise") .def("get_cache_size_L1d", &hws::cpu_memory_samples::get_cache_size_L1d, "the size of the L1 data cache") .def("get_cache_size_L1i", &hws::cpu_memory_samples::get_cache_size_L1i, "the size of the L1 instruction cache") .def("get_cache_size_L2", &hws::cpu_memory_samples::get_cache_size_L2, "the size of the L2 cache") @@ -85,6 +89,7 @@ void init_cpu_hardware_sampler(py::module_ &m) { // bind the temperature samples py::class_(m, "CpuTemperatureSamples") + .def("has_samples", &hws::cpu_temperature_samples::has_samples, "true if any sample is available, false otherwise") .def("get_temperature", &hws::cpu_temperature_samples::get_temperature, "the current temperature of the whole package in °C") .def("get_core_temperature", &hws::cpu_temperature_samples::get_core_temperature, "the current temperature of the core part of the CPU in °C") .def("get_core_throttle_percent", &hws::cpu_temperature_samples::get_core_throttle_percent, "the percent of time the CPU has throttled") @@ -94,6 +99,7 @@ void init_cpu_hardware_sampler(py::module_ &m) { // bind the gfx samples py::class_(m, "CpuGfxSamples") + .def("has_samples", &hws::cpu_gfx_samples::has_samples, "true if any sample is available, false otherwise") .def("get_gfx_render_state_percent", &hws::cpu_gfx_samples::get_gfx_render_state_percent, "the percent of time the iGPU was in the render state") .def("get_gfx_frequency", &hws::cpu_gfx_samples::get_gfx_frequency, "the current iGPU power consumption in W") .def("get_average_gfx_frequency", &hws::cpu_gfx_samples::get_average_gfx_frequency, "the average iGPU frequency in MHz") @@ -106,6 +112,7 @@ void init_cpu_hardware_sampler(py::module_ &m) { // bind the idle state samples py::class_(m, "CpuIdleStateSamples") + .def("has_samples", &hws::cpu_idle_states_samples::has_samples, "true if any sample is available, false otherwise") .def("get_idle_states", &hws::cpu_idle_states_samples::get_idle_states, "the map of additional CPU idle states") .def("get_all_cpus_state_c0_percent", &hws::cpu_idle_states_samples::get_all_cpus_state_c0_percent, "the percent of time all CPUs were in idle state c0") .def("get_any_cpu_state_c0_percent", &hws::cpu_idle_states_samples::get_any_cpu_state_c0_percent, "the percent of time any CPU was in the idle state c0") diff --git a/bindings/gpu_amd_hardware_sampler.cpp b/bindings/gpu_amd_hardware_sampler.cpp index 9ffd042..c738340 100644 --- a/bindings/gpu_amd_hardware_sampler.cpp +++ b/bindings/gpu_amd_hardware_sampler.cpp @@ -22,6 +22,7 @@ namespace py = pybind11; void init_gpu_amd_hardware_sampler(py::module_ &m) { // bind the general samples py::class_(m, "RocmSmiGeneralSamples") + .def("has_samples", &hws::rocm_smi_general_samples::has_samples, "true if any sample is available, false otherwise") .def("get_architecture", &hws::rocm_smi_general_samples::get_name, "the architecture name of the device") .def("get_byte_order", &hws::rocm_smi_general_samples::get_byte_order, "the byte order (e.g., little/big endian)") .def("get_vendor_id", &hws::rocm_smi_general_samples::get_vendor_id, "the vendor ID") @@ -35,6 +36,7 @@ void init_gpu_amd_hardware_sampler(py::module_ &m) { // bind the clock samples py::class_(m, "RocmSmiClockSamples") + .def("has_samples", &hws::rocm_smi_clock_samples::has_samples, "true if any sample is available, false otherwise") .def("get_clock_frequency_min", &hws::rocm_smi_clock_samples::get_clock_frequency_min, "the minimum possible system clock frequency in MHz") .def("get_clock_frequency_max", &hws::rocm_smi_clock_samples::get_clock_frequency_max, "the maximum possible system clock frequency in MHz") .def("get_memory_clock_frequency_min", &hws::rocm_smi_clock_samples::get_memory_clock_frequency_min, "the minimum possible memory clock frequency in MHz") @@ -54,6 +56,7 @@ void init_gpu_amd_hardware_sampler(py::module_ &m) { // bind the power samples py::class_(m, "RocmSmiPowerSamples") + .def("has_samples", &hws::rocm_smi_power_samples::has_samples, "true if any sample is available, false otherwise") .def("get_power_management_limit", &hws::rocm_smi_power_samples::get_power_management_limit, "the default power cap (W), may be different from power cap") .def("get_power_enforced_limit", &hws::rocm_smi_power_samples::get_power_enforced_limit, "if the GPU draws more power (W) than the power cap, the GPU may throttle") .def("get_power_measurement_type", &hws::rocm_smi_power_samples::get_power_measurement_type, "the type of the power readings: either current power draw or average power draw") @@ -67,6 +70,7 @@ void init_gpu_amd_hardware_sampler(py::module_ &m) { // bind the memory samples py::class_(m, "RocmSmiMemorySamples") + .def("has_samples", &hws::rocm_smi_memory_samples::has_samples, "true if any sample is available, false otherwise") .def("get_memory_total", &hws::rocm_smi_memory_samples::get_memory_total, "the total available memory in Byte") .def("get_visible_memory_total", &hws::rocm_smi_memory_samples::get_visible_memory_total, "the total visible available memory in Byte, may be smaller than the total memory") .def("get_num_pcie_lanes_min", &hws::rocm_smi_memory_samples::get_num_pcie_lanes_min, "the minimum number of used PCIe lanes") @@ -83,6 +87,7 @@ void init_gpu_amd_hardware_sampler(py::module_ &m) { // bind the temperature samples py::class_(m, "RocmSmiTemperatureSamples") + .def("has_samples", &hws::rocm_smi_temperature_samples::has_samples, "true if any sample is available, false otherwise") .def("get_num_fans", &hws::rocm_smi_temperature_samples::get_num_fans, "the number of fans (if any)") .def("get_fan_speed_max", &hws::rocm_smi_temperature_samples::get_fan_speed_max, "the maximum fan speed in RPM") .def("get_temperature_min", &hws::rocm_smi_temperature_samples::get_temperature_min, "the minimum temperature on the GPU's edge temperature sensor in °C") diff --git a/bindings/gpu_intel_hardware_sampler.cpp b/bindings/gpu_intel_hardware_sampler.cpp index 8cf6f83..77b67fc 100644 --- a/bindings/gpu_intel_hardware_sampler.cpp +++ b/bindings/gpu_intel_hardware_sampler.cpp @@ -22,6 +22,7 @@ namespace py = pybind11; void init_gpu_intel_hardware_sampler(py::module_ &m) { // bind the general samples py::class_(m, "LevelZeroGeneralSamples") + .def("has_samples", &hws::level_zero_general_samples::has_samples, "true if any sample is available, false otherwise") .def("get_byte_order", &hws::level_zero_general_samples::get_byte_order, "the byte order (e.g., little/big endian)") .def("get_vendor_id", &hws::level_zero_general_samples::get_vendor_id, "the vendor ID") .def("get_name", &hws::level_zero_general_samples::get_name, "the model name of the device") @@ -35,6 +36,7 @@ void init_gpu_intel_hardware_sampler(py::module_ &m) { // bind the clock samples py::class_(m, "LevelZeroClockSamples") + .def("has_samples", &hws::level_zero_clock_samples::has_samples, "true if any sample is available, false otherwise") .def("get_clock_frequency_min", &hws::level_zero_clock_samples::get_clock_frequency_min, "the minimum possible GPU clock frequency in MHz") .def("get_clock_frequency_max", &hws::level_zero_clock_samples::get_clock_frequency_max, "the maximum possible GPU clock frequency in MHz") .def("get_memory_clock_frequency_min", &hws::level_zero_clock_samples::get_memory_clock_frequency_min, "the minimum possible memory clock frequency in MHz") @@ -53,6 +55,7 @@ void init_gpu_intel_hardware_sampler(py::module_ &m) { // bind the power samples py::class_(m, "LevelZeroPowerSamples") + .def("has_samples", &hws::level_zero_power_samples::has_samples, "true if any sample is available, false otherwise") .def("get_power_enforced_limit", &hws::level_zero_power_samples::get_power_enforced_limit, "the actually enforced power limit (W), may be different from power management limit if external limiters are set") .def("get_power_measurement_type", &hws::level_zero_power_samples::get_power_measurement_type, "the type of the power readings") .def("get_power_management_mode", &hws::level_zero_power_samples::get_power_management_mode, "true if power management limits are enabled") @@ -64,6 +67,7 @@ void init_gpu_intel_hardware_sampler(py::module_ &m) { // bind the memory samples py::class_(m, "LevelZeroMemorySamples") + .def("has_samples", &hws::level_zero_memory_samples::has_samples, "true if any sample is available, false otherwise") .def("get_memory_total", &hws::level_zero_memory_samples::get_memory_total, "the total memory size of the different memory modules in Bytes") .def("get_visible_memory_total", &hws::level_zero_memory_samples::get_visible_memory_total, "the total allocatable memory size of the different memory modules in Bytes") .def("get_memory_location", &hws::level_zero_memory_samples::get_memory_location, "the location of the different memory modules (system or device)") @@ -83,6 +87,7 @@ void init_gpu_intel_hardware_sampler(py::module_ &m) { // bind the temperature samples py::class_(m, "LevelZeroTemperatureSamples") + .def("has_samples", &hws::level_zero_temperature_samples::has_samples, "true if any sample is available, false otherwise") .def("get_num_fans", &hws::level_zero_temperature_samples::get_num_fans, "the number of fans") .def("get_fan_speed_max", &hws::level_zero_temperature_samples::get_fan_speed_max, "the maximum fan speed the user can set in RPM") .def("get_temperature_max", &hws::level_zero_temperature_samples::get_temperature_max, "the maximum GPU temperature in °C") diff --git a/bindings/gpu_nvidia_hardware_sampler.cpp b/bindings/gpu_nvidia_hardware_sampler.cpp index 1550c07..f9b9261 100644 --- a/bindings/gpu_nvidia_hardware_sampler.cpp +++ b/bindings/gpu_nvidia_hardware_sampler.cpp @@ -22,6 +22,7 @@ namespace py = pybind11; void init_gpu_nvidia_hardware_sampler(py::module_ &m) { // bind the general samples py::class_(m, "NvmlGeneralSamples") + .def("has_samples", &hws::nvml_general_samples::has_samples, "true if any sample is available, false otherwise") .def("get_architecture", &hws::nvml_general_samples::get_architecture, "the architecture name of the device") .def("get_byte_order", &hws::nvml_general_samples::get_byte_order, "the byte order (e.g., little/big endian)") .def("get_num_cores", &hws::nvml_general_samples::get_num_cores, "the number of CUDA cores") @@ -37,6 +38,7 @@ void init_gpu_nvidia_hardware_sampler(py::module_ &m) { // bind the clock samples py::class_(m, "NvmlClockSamples") + .def("has_samples", &hws::nvml_clock_samples::has_samples, "true if any sample is available, false otherwise") .def("get_auto_boosted_clock_enabled", &hws::nvml_clock_samples::get_auto_boosted_clock_enabled, "true if clock boosting is currently enabled") .def("get_clock_frequency_min", &hws::nvml_clock_samples::get_clock_frequency_min, "the minimum possible graphics clock frequency in MHz") .def("get_clock_frequency_max", &hws::nvml_clock_samples::get_clock_frequency_max, "the maximum possible graphics clock frequency in MHz") @@ -56,6 +58,7 @@ void init_gpu_nvidia_hardware_sampler(py::module_ &m) { // bind the power samples py::class_(m, "NvmlPowerSamples") + .def("has_samples", &hws::nvml_power_samples::has_samples, "true if any sample is available, false otherwise") .def("get_power_management_limit", &hws::nvml_power_samples::get_power_management_limit, "if the GPU draws more power (mW) than the power management limit, the GPU may throttle") .def("get_power_enforced_limit", &hws::nvml_power_samples::get_power_enforced_limit, "the actually enforced power limit, may be different from power management limit if external limiters are set") .def("get_power_measurement_type", &hws::nvml_power_samples::get_power_measurement_type, "the type of the power readings: either current power draw or average power draw") @@ -70,6 +73,7 @@ void init_gpu_nvidia_hardware_sampler(py::module_ &m) { // bind the memory samples py::class_(m, "NvmlMemorySamples") + .def("has_samples", &hws::nvml_memory_samples::has_samples, "true if any sample is available, false otherwise") .def("get_memory_total", &hws::nvml_memory_samples::get_memory_total, "the total available memory in Byte") .def("get_num_pcie_lanes_max", &hws::nvml_memory_samples::get_num_pcie_lanes_max, "the maximum number of PCIe lanes") .def("get_pcie_link_generation_max", &hws::nvml_memory_samples::get_pcie_link_generation_max, "the current PCIe link generation (e.g., PCIe 4.0, PCIe 5.0, etc)") @@ -86,6 +90,7 @@ void init_gpu_nvidia_hardware_sampler(py::module_ &m) { // bind the temperature samples py::class_(m, "NvmlTemperatureSamples") + .def("has_samples", &hws::nvml_temperature_samples::has_samples, "true if any sample is available, false otherwise") .def("get_num_fans", &hws::nvml_temperature_samples::get_num_fans, "the number of fans (if any)") .def("get_fan_speed_min", &hws::nvml_temperature_samples::get_fan_speed_min, "the minimum fan speed the user can set in %") .def("get_fan_speed_max", &hws::nvml_temperature_samples::get_fan_speed_max, "the maximum fan speed the user can set in %") diff --git a/include/hardware_sampling/cpu/cpu_samples.hpp b/include/hardware_sampling/cpu/cpu_samples.hpp index f92ba0d..3bd8a49 100644 --- a/include/hardware_sampling/cpu/cpu_samples.hpp +++ b/include/hardware_sampling/cpu/cpu_samples.hpp @@ -36,9 +36,15 @@ class cpu_general_samples { friend class cpu_hardware_sampler; public: + /** + * @brief Checks whether any general hardware sample is present. + * @return `true` if any general hardware sample is, otherwise `false`. + */ + [[nodiscard]] bool has_samples() const; /** * @brief Assemble the YAML string containing all available general hardware samples. * @details Hardware samples that are not supported by the current device are omitted in the YAML output. + * Returns an empty string if `has_samples()` returns `false`. * @return the YAML string (`[[nodiscard]]`) */ [[nodiscard]] std::string generate_yaml_string() const; @@ -84,9 +90,15 @@ class cpu_clock_samples { friend class cpu_hardware_sampler; public: + /** + * @brief Checks whether any clock related hardware sample is present. + * @return `true` if any clock related hardware sample is, otherwise `false`. + */ + [[nodiscard]] bool has_samples() const; /** * @brief Assemble the YAML string containing all available general hardware samples. * @details Hardware samples that are not supported by the current device are omitted in the YAML output. + * Returns an empty string if `has_samples()` returns `false`. * @return the YAML string (`[[nodiscard]]`) */ [[nodiscard]] std::string generate_yaml_string() const; @@ -121,9 +133,15 @@ class cpu_power_samples { friend class cpu_hardware_sampler; public: + /** + * @brief Checks whether any power related hardware sample is present. + * @return `true` if any power related hardware sample is, otherwise `false`. + */ + [[nodiscard]] bool has_samples() const; /** * @brief Assemble the YAML string containing all available general hardware samples. * @details Hardware samples that are not supported by the current device are omitted in the YAML output. + * Returns an empty string if `has_samples()` returns `false`. * @return the YAML string (`[[nodiscard]]`) */ [[nodiscard]] std::string generate_yaml_string() const; @@ -159,9 +177,15 @@ class cpu_memory_samples { friend class cpu_hardware_sampler; public: + /** + * @brief Checks whether any memory related hardware sample is present. + * @return `true` if any memory related hardware sample is, otherwise `false`. + */ + [[nodiscard]] bool has_samples() const; /** * @brief Assemble the YAML string containing all available general hardware samples. * @details Hardware samples that are not supported by the current device are omitted in the YAML output. + * Returns an empty string if `has_samples()` returns `false`. * @return the YAML string (`[[nodiscard]]`) */ [[nodiscard]] std::string generate_yaml_string() const; @@ -200,9 +224,15 @@ class cpu_temperature_samples { friend class cpu_hardware_sampler; public: + /** + * @brief Checks whether any temperature related hardware sample is present. + * @return `true` if any temperature related hardware sample is, otherwise `false`. + */ + [[nodiscard]] bool has_samples() const; /** * @brief Assemble the YAML string containing all available general hardware samples. * @details Hardware samples that are not supported by the current device are omitted in the YAML output. + * Returns an empty string if `has_samples()` returns `false`. * @return the YAML string (`[[nodiscard]]`) */ [[nodiscard]] std::string generate_yaml_string() const; @@ -233,9 +263,15 @@ class cpu_gfx_samples { friend class cpu_hardware_sampler; public: + /** + * @brief Checks whether any gfx related hardware sample is present. + * @return `true` if any gfx related hardware sample is, otherwise `false`. + */ + [[nodiscard]] bool has_samples() const; /** * @brief Assemble the YAML string containing all available general hardware samples. * @details Hardware samples that are not supported by the current device are omitted in the YAML output. + * Returns an empty string if `has_samples()` returns `false`. * @return the YAML string (`[[nodiscard]]`) */ [[nodiscard]] std::string generate_yaml_string() const; @@ -271,9 +307,15 @@ class cpu_idle_states_samples { using map_type = std::unordered_map>; public: + /** + * @brief Checks whether any idle state related hardware sample is present. + * @return `true` if any idle state related hardware sample is, otherwise `false`. + */ + [[nodiscard]] bool has_samples() const; /** * @brief Assemble the YAML string containing all available general hardware samples. * @details Hardware samples that are not supported by the current device are omitted in the YAML output. + * Returns an empty string if `has_samples()` returns `false`. * @return the YAML string (`[[nodiscard]]`) */ [[nodiscard]] std::string generate_yaml_string() const; diff --git a/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp b/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp index af5228b..727e683 100644 --- a/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp +++ b/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp @@ -36,9 +36,15 @@ class rocm_smi_general_samples { friend class gpu_amd_hardware_sampler; public: + /** + * @brief Checks whether any general hardware sample is present. + * @return `true` if any general hardware sample is, otherwise `false`. + */ + [[nodiscard]] bool has_samples() const; /** * @brief Assemble the YAML string containing all available general hardware samples. * @details Hardware samples that are not supported by the current device are omitted in the YAML output. + * Returns an empty string if `has_samples()` returns `false`. * @return the YAML string (`[[nodiscard]]`) */ [[nodiscard]] std::string generate_yaml_string() const; @@ -74,9 +80,15 @@ class rocm_smi_clock_samples { friend class gpu_amd_hardware_sampler; public: + /** + * @brief Checks whether any clock related hardware sample is present. + * @return `true` if any clock related hardware sample is, otherwise `false`. + */ + [[nodiscard]] bool has_samples() const; /** * @brief Assemble the YAML string containing all available general hardware samples. * @details Hardware samples that are not supported by the current device are omitted in the YAML output. + * Returns an empty string if `has_samples()` returns `false`. * @return the YAML string (`[[nodiscard]]`) */ [[nodiscard]] std::string generate_yaml_string() const; @@ -118,9 +130,15 @@ class rocm_smi_power_samples { friend class gpu_amd_hardware_sampler; public: + /** + * @brief Checks whether any power related hardware sample is present. + * @return `true` if any power related hardware sample is, otherwise `false`. + */ + [[nodiscard]] bool has_samples() const; /** * @brief Assemble the YAML string containing all available general hardware samples. * @details Hardware samples that are not supported by the current device are omitted in the YAML output. + * Returns an empty string if `has_samples()` returns `false`. * @return the YAML string (`[[nodiscard]]`) */ [[nodiscard]] std::string generate_yaml_string() const; @@ -156,9 +174,15 @@ class rocm_smi_memory_samples { friend class gpu_amd_hardware_sampler; public: + /** + * @brief Checks whether any memory related hardware sample is present. + * @return `true` if any memory related hardware sample is, otherwise `false`. + */ + [[nodiscard]] bool has_samples() const; /** * @brief Assemble the YAML string containing all available general hardware samples. * @details Hardware samples that are not supported by the current device are omitted in the YAML output. + * Returns an empty string if `has_samples()` returns `false`. * @return the YAML string (`[[nodiscard]]`) */ [[nodiscard]] std::string generate_yaml_string() const; @@ -197,9 +221,15 @@ class rocm_smi_temperature_samples { friend class gpu_amd_hardware_sampler; public: + /** + * @brief Checks whether any temperature related hardware sample is present. + * @return `true` if any temperature related hardware sample is, otherwise `false`. + */ + [[nodiscard]] bool has_samples() const; /** * @brief Assemble the YAML string containing all available general hardware samples. * @details Hardware samples that are not supported by the current device are omitted in the YAML output. + * Returns an empty string if `has_samples()` returns `false`. * @return the YAML string (`[[nodiscard]]`) */ [[nodiscard]] std::string generate_yaml_string() const; diff --git a/include/hardware_sampling/gpu_intel/level_zero_samples.hpp b/include/hardware_sampling/gpu_intel/level_zero_samples.hpp index 9900f95..a1c56c2 100644 --- a/include/hardware_sampling/gpu_intel/level_zero_samples.hpp +++ b/include/hardware_sampling/gpu_intel/level_zero_samples.hpp @@ -37,9 +37,15 @@ class level_zero_general_samples { friend class gpu_intel_hardware_sampler; public: + /** + * @brief Checks whether any general hardware sample is present. + * @return `true` if any general hardware sample is, otherwise `false`. + */ + [[nodiscard]] bool has_samples() const; /** * @brief Assemble the YAML string containing all available general hardware samples. * @details Hardware samples that are not supported by the current device are omitted in the YAML output. + * Returns an empty string if `has_samples()` returns `false`. * @return the YAML string (`[[nodiscard]]`) */ [[nodiscard]] std::string generate_yaml_string() const; @@ -75,9 +81,15 @@ class level_zero_clock_samples { friend class gpu_intel_hardware_sampler; public: + /** + * @brief Checks whether any clock related hardware sample is present. + * @return `true` if any clock related hardware sample is, otherwise `false`. + */ + [[nodiscard]] bool has_samples() const; /** * @brief Assemble the YAML string containing all available general hardware samples. * @details Hardware samples that are not supported by the current device are omitted in the YAML output. + * Returns an empty string if `has_samples()` returns `false`. * @return the YAML string (`[[nodiscard]]`) */ [[nodiscard]] std::string generate_yaml_string() const; @@ -118,9 +130,15 @@ class level_zero_power_samples { friend class gpu_intel_hardware_sampler; public: + /** + * @brief Checks whether any power related hardware sample is present. + * @return `true` if any power related hardware sample is, otherwise `false`. + */ + [[nodiscard]] bool has_samples() const; /** * @brief Assemble the YAML string containing all available general hardware samples. * @details Hardware samples that are not supported by the current device are omitted in the YAML output. + * Returns an empty string if `has_samples()` returns `false`. * @return the YAML string (`[[nodiscard]]`) */ [[nodiscard]] std::string generate_yaml_string() const; @@ -161,9 +179,15 @@ class level_zero_memory_samples { using map_type = std::unordered_map; public: + /** + * @brief Checks whether any memory related hardware sample is present. + * @return `true` if any memory related hardware sample is, otherwise `false`. + */ + [[nodiscard]] bool has_samples() const; /** * @brief Assemble the YAML string containing all available general hardware samples. * @details Hardware samples that are not supported by the current device are omitted in the YAML output. + * Returns an empty string if `has_samples()` returns `false`. * @return the YAML string (`[[nodiscard]]`) */ [[nodiscard]] std::string generate_yaml_string() const; @@ -205,9 +229,15 @@ class level_zero_temperature_samples { friend class gpu_intel_hardware_sampler; public: + /** + * @brief Checks whether any temperature related hardware sample is present. + * @return `true` if any temperature related hardware sample is, otherwise `false`. + */ + [[nodiscard]] bool has_samples() const; /** * @brief Assemble the YAML string containing all available general hardware samples. * @details Hardware samples that are not supported by the current device are omitted in the YAML output. + * Returns an empty string if `has_samples()` returns `false`. * @return the YAML string (`[[nodiscard]]`) */ [[nodiscard]] std::string generate_yaml_string() const; diff --git a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp index c6e7ad9..31acebb 100644 --- a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp +++ b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp @@ -36,9 +36,15 @@ class nvml_general_samples { friend class gpu_nvidia_hardware_sampler; public: + /** + * @brief Checks whether any general hardware sample is present. + * @return `true` if any general hardware sample is, otherwise `false`. + */ + [[nodiscard]] bool has_samples() const; /** * @brief Assemble the YAML string containing all available general hardware samples. * @details Hardware samples that are not supported by the current device are omitted in the YAML output. + * Returns an empty string if `has_samples()` returns `false`. * @return the YAML string (`[[nodiscard]]`) */ [[nodiscard]] std::string generate_yaml_string() const; @@ -78,9 +84,15 @@ class nvml_clock_samples { using map_type = std::map>; public: + /** + * @brief Checks whether any clock related hardware sample is present. + * @return `true` if any clock related hardware sample is, otherwise `false`. + */ + [[nodiscard]] bool has_samples() const; /** * @brief Assemble the YAML string containing all available general hardware samples. * @details Hardware samples that are not supported by the current device are omitted in the YAML output. + * Returns an empty string if `has_samples()` returns `false`. * @return the YAML string (`[[nodiscard]]`) */ [[nodiscard]] std::string generate_yaml_string() const; @@ -122,9 +134,15 @@ class nvml_power_samples { friend class gpu_nvidia_hardware_sampler; public: + /** + * @brief Checks whether any power related hardware sample is present. + * @return `true` if any power related hardware sample is, otherwise `false`. + */ + [[nodiscard]] bool has_samples() const; /** * @brief Assemble the YAML string containing all available general hardware samples. * @details Hardware samples that are not supported by the current device are omitted in the YAML output. + * Returns an empty string if `has_samples()` returns `false`. * @return the YAML string (`[[nodiscard]]`) */ [[nodiscard]] std::string generate_yaml_string() const; @@ -161,9 +179,15 @@ class nvml_memory_samples { friend class gpu_nvidia_hardware_sampler; public: + /** + * @brief Checks whether any memory related hardware sample is present. + * @return `true` if any memory related hardware sample is, otherwise `false`. + */ + [[nodiscard]] bool has_samples() const; /** * @brief Assemble the YAML string containing all available general hardware samples. * @details Hardware samples that are not supported by the current device are omitted in the YAML output. + * Returns an empty string if `has_samples()` returns `false`. * @return the YAML string (`[[nodiscard]]`) */ [[nodiscard]] std::string generate_yaml_string() const; @@ -202,6 +226,11 @@ class nvml_temperature_samples { friend class gpu_nvidia_hardware_sampler; public: + /** + * @brief Checks whether any temperature related hardware sample is present. + * @return `true` if any temperature related hardware sample is, otherwise `false`. + */ + [[nodiscard]] bool has_samples() const; /** * @brief Assemble the YAML string containing all available general hardware samples. * @details Hardware samples that are not supported by the current device are omitted in the YAML output. diff --git a/src/hardware_sampling/cpu/cpu_samples.cpp b/src/hardware_sampling/cpu/cpu_samples.cpp index cc99d76..3ef3ad7 100644 --- a/src/hardware_sampling/cpu/cpu_samples.cpp +++ b/src/hardware_sampling/cpu/cpu_samples.cpp @@ -26,7 +26,19 @@ namespace hws { // general samples // //*************************************************************************************************************************************// +bool cpu_general_samples::has_samples() const { + return this->architecture_.has_value() || this->byte_order_.has_value() || this->num_cores_.has_value() || this->num_threads_.has_value() + || this->threads_per_core_.has_value() || this->cores_per_socket_.has_value() || this->num_sockets_.has_value() || this->numa_nodes_.has_value() + || this->vendor_id_.has_value() || this->name_.has_value() || this->flags_.has_value() || this->compute_utilization_.has_value() + || this->ipc_.has_value() || this->irq_.has_value() || this->smi_.has_value() || this->poll_.has_value() || this->poll_percent_.has_value(); +} + std::string cpu_general_samples::generate_yaml_string() const { + // if no samples are available, return an empty string + if (!this->has_samples()) { + return ""; + } + std::string str{ "general:\n" }; // architecture @@ -156,9 +168,6 @@ std::string cpu_general_samples::generate_yaml_string() const { fmt::join(this->poll_percent_.value(), ", ")); } - // remove last newline - str.pop_back(); - return str; } @@ -208,7 +217,17 @@ std::ostream &operator<<(std::ostream &out, const cpu_general_samples &samples) // clock samples // //*************************************************************************************************************************************// +bool cpu_clock_samples::has_samples() const { + return this->auto_boosted_clock_enabled_.has_value() || this->clock_frequency_min_.has_value() || this->clock_frequency_max_.has_value() + || this->clock_frequency_.has_value() || this->average_non_idle_clock_frequency_.has_value() || this->time_stamp_counter_.has_value(); +} + std::string cpu_clock_samples::generate_yaml_string() const { + // if no samples are available, return an empty string + if (!this->has_samples()) { + return ""; + } + std::string str{ "clock:\n" }; // true if frequency boost is enabled @@ -258,9 +277,6 @@ std::string cpu_clock_samples::generate_yaml_string() const { fmt::join(this->time_stamp_counter_.value(), ", ")); } - // remove last newline - str.pop_back(); - return str; } @@ -283,7 +299,18 @@ std::ostream &operator<<(std::ostream &out, const cpu_clock_samples &samples) { // power samples // //*************************************************************************************************************************************// +bool cpu_power_samples::has_samples() const { + return this->power_measurement_type_.has_value() || this->power_usage_.has_value() || this->power_total_energy_consumption_.has_value() + || this->core_watt_.has_value() || this->ram_watt_.has_value() || this->package_rapl_throttle_percent_.has_value() + || this->dram_rapl_throttle_percent_.has_value(); +} + std::string cpu_power_samples::generate_yaml_string() const { + // if no samples are available, return an empty string + if (!this->has_samples()) { + return ""; + } + std::string str{ "power:\n" }; // power measurement type @@ -343,9 +370,6 @@ std::string cpu_power_samples::generate_yaml_string() const { fmt::join(this->dram_rapl_throttle_percent_.value(), ", ")); } - // remove last newline - str.pop_back(); - return str; } @@ -370,7 +394,18 @@ std::ostream &operator<<(std::ostream &out, const cpu_power_samples &samples) { // memory samples // //*************************************************************************************************************************************// +bool cpu_memory_samples::has_samples() const { + return this->cache_size_L1d_.has_value() || this->cache_size_L1i_.has_value() || this->cache_size_L2_.has_value() || this->cache_size_L3_.has_value() + || this->memory_total_.has_value() || this->swap_memory_total_.has_value() || this->memory_used_.has_value() || this->swap_memory_free_.has_value() + || this->swap_memory_used_.has_value() || this->swap_memory_free_.has_value(); +} + std::string cpu_memory_samples::generate_yaml_string() const { + // if no samples are available, return an empty string + if (!this->has_samples()) { + return ""; + } + std::string str{ "memory:\n" }; // the size of the L1 data cache @@ -446,9 +481,6 @@ std::string cpu_memory_samples::generate_yaml_string() const { fmt::join(this->swap_memory_free_.value(), ", ")); } - // remove last newline - str.pop_back(); - return str; } @@ -479,7 +511,16 @@ std::ostream &operator<<(std::ostream &out, const cpu_memory_samples &samples) { // temperature samples // //*************************************************************************************************************************************// +bool cpu_temperature_samples::has_samples() const { + return this->temperature_.has_value() || this->core_temperature_.has_value() || this->core_throttle_percent_.has_value(); +} + std::string cpu_temperature_samples::generate_yaml_string() const { + // if no samples are available, return an empty string + if (!this->has_samples()) { + return ""; + } + std::string str{ "temperature:\n" }; // the temperature of the whole package @@ -507,9 +548,6 @@ std::string cpu_temperature_samples::generate_yaml_string() const { fmt::join(this->core_throttle_percent_.value(), ", ")); } - // remove last newline - str.pop_back(); - return str; } @@ -526,7 +564,17 @@ std::ostream &operator<<(std::ostream &out, const cpu_temperature_samples &sampl // gfx (iGPU) samples // //*************************************************************************************************************************************// +bool cpu_gfx_samples::has_samples() const { + return this->gfx_render_state_percent_.has_value() || this->gfx_frequency_.has_value() || this->average_gfx_frequency_.has_value() + || this->gfx_state_c0_percent_.has_value() || this->cpu_works_for_gpu_percent_.has_value() || this->gfx_watt_.has_value(); +} + std::string cpu_gfx_samples::generate_yaml_string() const { + // if no samples are available, return an empty string + if (!this->has_samples()) { + return ""; + } + std::string str{ "integrated_gpu:\n" }; // the percentage of time the iGPU was in the render state @@ -578,9 +626,6 @@ std::string cpu_gfx_samples::generate_yaml_string() const { fmt::join(this->gfx_watt_.value(), ", ")); } - // remove last newline - str.pop_back(); - return str; } @@ -603,7 +648,17 @@ std::ostream &operator<<(std::ostream &out, const cpu_gfx_samples &samples) { // idle state samples // //*************************************************************************************************************************************// +bool cpu_idle_states_samples::has_samples() const { + return this->all_cpus_state_c0_percent_.has_value() || this->any_cpu_state_c0_percent_.has_value() || this->low_power_idle_state_percent_.has_value() + || this->system_low_power_idle_state_percent_.has_value() || this->package_low_power_idle_state_percent_.has_value() || this->idle_states_.has_value(); +} + std::string cpu_idle_states_samples::generate_yaml_string() const { + // if no samples are available, return an empty string + if (!this->has_samples()) { + return ""; + } + std::string str{ "idle_states:\n" }; // the percentage of time all CPUs were in the c0 state @@ -687,9 +742,6 @@ std::string cpu_idle_states_samples::generate_yaml_string() const { } } - // remove last newline - str.pop_back(); - return str; } diff --git a/src/hardware_sampling/cpu/hardware_sampler.cpp b/src/hardware_sampling/cpu/hardware_sampler.cpp index d4a6754..7e89eca 100644 --- a/src/hardware_sampling/cpu/hardware_sampler.cpp +++ b/src/hardware_sampling/cpu/hardware_sampler.cpp @@ -428,12 +428,12 @@ std::string cpu_hardware_sampler::generate_yaml_string() const { throw std::runtime_error{ "Can't create the final YAML entry if the hardware sampler is still running!" }; } - return fmt::format("{}\n\n" - "{}\n\n" - "{}\n\n" - "{}\n\n" - "{}\n\n" - "{}\n\n" + return fmt::format("{}\n" + "{}\n" + "{}\n" + "{}\n" + "{}\n" + "{}\n" "{}", general_samples_.generate_yaml_string(), clock_samples_.generate_yaml_string(), diff --git a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp index dbd2971..61a8456 100644 --- a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp +++ b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp @@ -687,10 +687,10 @@ std::string gpu_amd_hardware_sampler::generate_yaml_string() const { throw std::runtime_error{ "Can't create the final YAML entry if the hardware sampler is still running!" }; } - return fmt::format("{}\n\n" - "{}\n\n" - "{}\n\n" - "{}\n\n" + return fmt::format("{}\n" + "{}\n" + "{}\n" + "{}\n" "{}", general_samples_.generate_yaml_string(), clock_samples_.generate_yaml_string(), diff --git a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp b/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp index 641ca29..e93c36b 100644 --- a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp +++ b/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp @@ -21,7 +21,17 @@ namespace hws { // general samples // //*************************************************************************************************************************************// +bool rocm_smi_general_samples::has_samples() const { + return this->architecture_.has_value() || this->byte_order_.has_value() || this->vendor_id_.has_value() || this->name_.has_value() + || this->compute_utilization_.has_value() || this->memory_utilization_.has_value() || this->performance_level_.has_value(); +} + std::string rocm_smi_general_samples::generate_yaml_string() const { + // if no samples are available, return an empty string + if (!this->has_samples()) { + return ""; + } + std::string str{ "general:\n" }; // device architecture @@ -75,9 +85,6 @@ std::string rocm_smi_general_samples::generate_yaml_string() const { fmt::join(detail::quote(this->performance_level_.value()), ", ")); } - // remove last newline - str.pop_back(); - return str; } @@ -102,7 +109,20 @@ std::ostream &operator<<(std::ostream &out, const rocm_smi_general_samples &samp // clock samples // //*************************************************************************************************************************************// +bool rocm_smi_clock_samples::has_samples() const { + return this->clock_frequency_min_.has_value() || this->clock_frequency_max_.has_value() || this->memory_clock_frequency_min_.has_value() + || this->memory_clock_frequency_max_.has_value() || this->socket_clock_frequency_min_.has_value() || this->socket_clock_frequency_max_.has_value() + || this->available_clock_frequencies_.has_value() || this->available_memory_clock_frequencies_.has_value() || this->clock_frequency_.has_value() + || this->memory_clock_frequency_.has_value() || this->socket_clock_frequency_.has_value() || this->overdrive_level_.has_value() + || this->memory_overdrive_level_.has_value(); +} + std::string rocm_smi_clock_samples::generate_yaml_string() const { + // if no samples are available, return an empty string + if (!this->has_samples()) { + return ""; + } + std::string str{ "clock:\n" }; // system clock min frequencies @@ -198,9 +218,6 @@ std::string rocm_smi_clock_samples::generate_yaml_string() const { fmt::join(this->memory_overdrive_level_.value(), ", ")); } - // remove last newline - str.pop_back(); - return str; } @@ -237,7 +254,18 @@ std::ostream &operator<<(std::ostream &out, const rocm_smi_clock_samples &sample // power samples // //*************************************************************************************************************************************// +bool rocm_smi_power_samples::has_samples() const { + return this->power_management_limit_.has_value() || this->power_enforced_limit_.has_value() || this->power_measurement_type_.has_value() + || this->available_power_profiles_.has_value() || this->power_usage_.has_value() || this->power_total_energy_consumption_.has_value() + || this->power_profile_.has_value(); +} + std::string rocm_smi_power_samples::generate_yaml_string() const { + // if no samples are available, return an empty string + if (!this->has_samples()) { + return ""; + } + std::string str{ "power:\n" }; // power management limit @@ -291,9 +319,6 @@ std::string rocm_smi_power_samples::generate_yaml_string() const { fmt::join(detail::quote(this->power_profile_.value()), ", ")); } - // remove last newline - str.pop_back(); - return str; } @@ -318,7 +343,18 @@ std::ostream &operator<<(std::ostream &out, const rocm_smi_power_samples &sample // memory samples // //*************************************************************************************************************************************// +bool rocm_smi_memory_samples::has_samples() const { + return this->memory_total_.has_value() || this->visible_memory_total_.has_value() || this->num_pcie_lanes_min_.has_value() + || this->num_pcie_lanes_max_.has_value() || this->pcie_link_transfer_rate_min_.has_value() || this->pcie_link_transfer_rate_max_.has_value() + || this->memory_used_.has_value() || this->memory_free_.has_value() || this->num_pcie_lanes_.has_value() || this->pcie_link_transfer_rate_.has_value(); +} + std::string rocm_smi_memory_samples::generate_yaml_string() const { + // if no samples are available, return an empty string + if (!this->has_samples()) { + return ""; + } + std::string str{ "memory:\n" }; // total memory @@ -394,9 +430,6 @@ std::string rocm_smi_memory_samples::generate_yaml_string() const { fmt::join(this->pcie_link_transfer_rate_.value(), ", ")); } - // remove last newline - str.pop_back(); - return str; } @@ -427,7 +460,23 @@ std::ostream &operator<<(std::ostream &out, const rocm_smi_memory_samples &sampl // temperature samples // //*************************************************************************************************************************************// +bool rocm_smi_temperature_samples::has_samples() const { + return this->num_fans_.has_value() || this->fan_speed_max_.has_value() || this->temperature_min_.has_value() || this->temperature_max_.has_value() + || this->memory_temperature_min_.has_value() || this->memory_temperature_max_.has_value() || this->hotspot_temperature_min_.has_value() + || this->hotspot_temperature_max_.has_value() || this->hbm_0_temperature_min_.has_value() || this->hbm_0_temperature_max_.has_value() + || this->hbm_1_temperature_min_.has_value() || this->hbm_1_temperature_max_.has_value() || this->hbm_2_temperature_min_.has_value() + || this->hbm_2_temperature_max_.has_value() || this->hbm_3_temperature_min_.has_value() || this->hbm_3_temperature_max_.has_value() + || this->fan_speed_percentage_.has_value() || this->temperature_.has_value() || this->memory_temperature_.has_value() + || this->hotspot_temperature_.has_value() || this->hbm_0_temperature_.has_value() || this->hbm_1_temperature_.has_value() + || this->hbm_2_temperature_.has_value() || this->hbm_3_temperature_.has_value(); +} + std::string rocm_smi_temperature_samples::generate_yaml_string() const { + // if no samples are available, return an empty string + if (!this->has_samples()) { + return ""; + } + std::string str{ "temperature:\n" }; // number of fans (emulated) @@ -600,9 +649,6 @@ std::string rocm_smi_temperature_samples::generate_yaml_string() const { fmt::join(this->hbm_3_temperature_.value(), ", ")); } - // remove last newline - str.pop_back(); - return str; } diff --git a/src/hardware_sampling/gpu_intel/hardware_sampler.cpp b/src/hardware_sampling/gpu_intel/hardware_sampler.cpp index 774ab6c..c2e8eec 100644 --- a/src/hardware_sampling/gpu_intel/hardware_sampler.cpp +++ b/src/hardware_sampling/gpu_intel/hardware_sampler.cpp @@ -678,10 +678,10 @@ std::string gpu_intel_hardware_sampler::generate_yaml_string() const { throw std::runtime_error{ "Can't create the final YAML entry if the hardware sampler is still running!" }; } - return fmt::format("{}\n\n" - "{}\n\n" - "{}\n\n" - "{}\n\n" + return fmt::format("{}\n" + "{}\n" + "{}\n" + "{}\n" "{}", general_samples_.generate_yaml_string(), clock_samples_.generate_yaml_string(), diff --git a/src/hardware_sampling/gpu_intel/level_zero_samples.cpp b/src/hardware_sampling/gpu_intel/level_zero_samples.cpp index 5ceffcf..a10a358 100644 --- a/src/hardware_sampling/gpu_intel/level_zero_samples.cpp +++ b/src/hardware_sampling/gpu_intel/level_zero_samples.cpp @@ -38,7 +38,17 @@ void append_map_values(std::string &str, const std::string_view entry_name, cons // general samples // //*************************************************************************************************************************************// +bool level_zero_general_samples::has_samples() const { + return this->byte_order_.has_value() || this->vendor_id_.has_value() || this->name_.has_value() || this->flags_.has_value() || this->standby_mode_.has_value() + || this->num_threads_per_eu_.has_value() || this->eu_simd_width_.has_value(); +} + std::string level_zero_general_samples::generate_yaml_string() const { + // if no samples are available, return an empty string + if (!this->has_samples()) { + return ""; + } + std::string str{ "general:\n" }; // device byte order @@ -91,9 +101,6 @@ std::string level_zero_general_samples::generate_yaml_string() const { this->eu_simd_width_.value()); } - // remove last newline - str.pop_back(); - return str; } @@ -118,7 +125,19 @@ std::ostream &operator<<(std::ostream &out, const level_zero_general_samples &sa // clock samples // //*************************************************************************************************************************************// +bool level_zero_clock_samples::has_samples() const { + return this->clock_frequency_min_.has_value() || this->clock_frequency_max_.has_value() || this->memory_clock_frequency_min_.has_value() + || this->memory_clock_frequency_max_.has_value() || this->available_clock_frequencies_.has_value() || this->available_memory_clock_frequencies_.has_value() + || this->clock_frequency_.has_value() || this->memory_clock_frequency_.has_value() || this->throttle_reason_.has_value() + || this->memory_throttle_reason_.has_value() || this->frequency_limit_tdp_.has_value() || this->memory_frequency_limit_tdp_.has_value(); +} + std::string level_zero_clock_samples::generate_yaml_string() const { + // if no samples are available, return an empty string + if (!this->has_samples()) { + return ""; + } + std::string str{ "clock:\n" }; // minimum GPU core clock @@ -207,9 +226,6 @@ std::string level_zero_clock_samples::generate_yaml_string() const { fmt::join(this->memory_frequency_limit_tdp_.value(), ", ")); } - // remove last newline - str.pop_back(); - return str; } @@ -244,7 +260,17 @@ std::ostream &operator<<(std::ostream &out, const level_zero_clock_samples &samp // power samples // //*************************************************************************************************************************************// +bool level_zero_power_samples::has_samples() const { + return this->power_enforced_limit_.has_value() || this->power_measurement_type_.has_value() || this->power_management_mode_.has_value() + || this->power_usage_.has_value() || this->power_total_energy_consumption_.has_value(); +} + std::string level_zero_power_samples::generate_yaml_string() const { + // if no samples are available, return an empty string + if (!this->has_samples()) { + return ""; + } + std::string str{ "power:\n" }; // power enforced limit @@ -284,9 +310,6 @@ std::string level_zero_power_samples::generate_yaml_string() const { fmt::join(this->power_total_energy_consumption_.value(), ", ")); } - // remove last newline - str.pop_back(); - return str; } @@ -307,7 +330,20 @@ std::ostream &operator<<(std::ostream &out, const level_zero_power_samples &samp // memory samples // //*************************************************************************************************************************************// +bool level_zero_memory_samples::has_samples() const { + return this->memory_total_.has_value() || this->visible_memory_total_.has_value() || this->memory_location_.has_value() + || this->num_pcie_lanes_max_.has_value() || this->pcie_link_generation_max_.has_value() || this->pcie_link_speed_max_.has_value() + || this->memory_bus_width_.has_value() || this->memory_num_channels_.has_value() || this->memory_free_.has_value() + || this->memory_used_.has_value() || this->num_pcie_lanes_.has_value() || this->pcie_link_generation_.has_value() + || this->pcie_link_speed_.has_value(); +} + std::string level_zero_memory_samples::generate_yaml_string() const { + // if no samples are available, return an empty string + if (!this->has_samples()) { + return ""; + } + std::string str{ "memory:\n" }; // the total memory @@ -424,9 +460,6 @@ std::string level_zero_memory_samples::generate_yaml_string() const { fmt::join(this->pcie_link_speed_.value(), ", ")); } - // remove last newline - str.pop_back(); - return str; } @@ -461,7 +494,18 @@ std::ostream &operator<<(std::ostream &out, const level_zero_memory_samples &sam // temperature samples // //*************************************************************************************************************************************// +bool level_zero_temperature_samples::has_samples() const { + return this->num_fans_.has_value() || this->fan_speed_max_.has_value() || this->temperature_max_.has_value() || this->memory_temperature_max_.has_value() + || this->global_temperature_max_.has_value() || this->fan_speed_percentage_.has_value() || this->temperature_.has_value() + || this->memory_temperature_.has_value() || this->global_temperature_.has_value() || this->psu_temperature_.has_value(); +} + std::string level_zero_temperature_samples::generate_yaml_string() const { + // if no samples are available, return an empty string + if (!this->has_samples()) { + return ""; + } + std::string str{ "temperature:\n" }; // the number of fans @@ -536,9 +580,6 @@ std::string level_zero_temperature_samples::generate_yaml_string() const { fmt::join(this->psu_temperature_.value(), ", ")); } - // remove last newline - str.pop_back(); - return str; } diff --git a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp index 769f0a6..1536237 100644 --- a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp +++ b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp @@ -551,10 +551,10 @@ std::string gpu_nvidia_hardware_sampler::generate_yaml_string() const { throw std::runtime_error{ "Can't create the final YAML entry if the hardware sampler is still running!" }; } - return fmt::format("{}\n\n" - "{}\n\n" - "{}\n\n" - "{}\n\n" + return fmt::format("{}\n" + "{}\n" + "{}\n" + "{}\n" "{}", general_samples_.generate_yaml_string(), clock_samples_.generate_yaml_string(), diff --git a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp index 94bf97b..0412ef2 100644 --- a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp +++ b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp @@ -21,7 +21,18 @@ namespace hws { // general samples // //*************************************************************************************************************************************// +bool nvml_general_samples::has_samples() const { + return this->architecture_.has_value() || this->byte_order_.has_value() || this->vendor_id_.has_value() || this->name_.has_value() + || this->persistence_mode_.has_value() || this->num_cores_.has_value() || this->compute_utilization_.has_value() + || this->memory_utilization_.has_value() || this->performance_level_.has_value(); +} + std::string nvml_general_samples::generate_yaml_string() const { + // if no samples are available, return an empty string + if (!this->has_samples()) { + return ""; + } + std::string str{ "general:\n" }; // device architecture @@ -90,9 +101,6 @@ std::string nvml_general_samples::generate_yaml_string() const { fmt::join(this->performance_level_.value(), ", ")); } - // remove last newline - str.pop_back(); - return str; } @@ -121,7 +129,20 @@ std::ostream &operator<<(std::ostream &out, const nvml_general_samples &samples) // clock samples // //*************************************************************************************************************************************// +bool nvml_clock_samples::has_samples() const { + return this->auto_boosted_clock_enabled_.has_value() || this->clock_frequency_min_.has_value() || this->clock_frequency_max_.has_value() + || this->memory_clock_frequency_min_.has_value() || this->memory_clock_frequency_max_.has_value() || this->sm_clock_frequency_max_.has_value() + || this->available_clock_frequencies_.has_value() || this->available_memory_clock_frequencies_.has_value() || this->clock_frequency_.has_value() + || this->memory_clock_frequency_.has_value() || this->sm_clock_frequency_.has_value() || this->throttle_reason_.has_value() + || this->auto_boosted_clock_.has_value(); +} + std::string nvml_clock_samples::generate_yaml_string() const { + // if no samples are available, return an empty string + if (!this->has_samples()) { + return ""; + } + std::string str{ "clock:\n" }; // adaptive clock status @@ -219,9 +240,6 @@ std::string nvml_clock_samples::generate_yaml_string() const { fmt::join(this->auto_boosted_clock_.value(), ", ")); } - // remove last newline - str.pop_back(); - return str; } @@ -258,7 +276,18 @@ std::ostream &operator<<(std::ostream &out, const nvml_clock_samples &samples) { // power samples // //*************************************************************************************************************************************// +bool nvml_power_samples::has_samples() const { + return this->power_management_limit_.has_value() || this->power_enforced_limit_.has_value() || this->power_measurement_type_.has_value() + || this->power_management_mode_.has_value() || this->available_power_profiles_.has_value() || this->power_usage_.has_value() + || this->power_total_energy_consumption_.has_value() || this->power_profile_.has_value(); +} + std::string nvml_power_samples::generate_yaml_string() const { + // if no samples are available, return an empty string + if (!this->has_samples()) { + return ""; + } + std::string str{ "power:\n" }; // power management limit @@ -319,9 +348,6 @@ std::string nvml_power_samples::generate_yaml_string() const { fmt::join(this->power_profile_.value(), ", ")); } - // remove last newline - str.pop_back(); - return str; } @@ -348,7 +374,19 @@ std::ostream &operator<<(std::ostream &out, const nvml_power_samples &samples) { // memory samples // //*************************************************************************************************************************************// +bool nvml_memory_samples::has_samples() const { + return this->memory_total_.has_value() || this->pcie_link_speed_max_.has_value() || this->pcie_link_generation_max_.has_value() + || this->num_pcie_lanes_max_.has_value() || this->memory_bus_width_.has_value() || this->memory_used_.has_value() + || this->memory_free_.has_value() || this->num_pcie_lanes_.has_value() || this->pcie_link_generation_.has_value() + || this->pcie_link_speed_.has_value(); +} + std::string nvml_memory_samples::generate_yaml_string() const { + // if no samples are available, return an empty string + if (!this->has_samples()) { + return ""; + } + std::string str{ "memory:\n" }; // total memory size @@ -423,9 +461,6 @@ std::string nvml_memory_samples::generate_yaml_string() const { fmt::join(this->pcie_link_speed_.value(), ", ")); } - // remove last newline - str.pop_back(); - return str; } @@ -456,7 +491,17 @@ std::ostream &operator<<(std::ostream &out, const nvml_memory_samples &samples) // temperature samples // //*************************************************************************************************************************************// +bool nvml_temperature_samples::has_samples() const { + return this->num_fans_.has_value() || this->fan_speed_min_.has_value() || this->fan_speed_max_.has_value() || this->temperature_max_.has_value() + || this->memory_temperature_max_.has_value() || this->fan_speed_percentage_.has_value() || this->temperature_.has_value(); +} + std::string nvml_temperature_samples::generate_yaml_string() const { + // if no samples are available, return an empty string + if (!this->has_samples()) { + return ""; + } + std::string str{ "temperature:\n" }; // number of fans @@ -510,9 +555,6 @@ std::string nvml_temperature_samples::generate_yaml_string() const { fmt::join(this->temperature_.value(), ", ")); } - // remove last newline - str.pop_back(); - return str; } diff --git a/src/hardware_sampling/hardware_sampler.cpp b/src/hardware_sampling/hardware_sampler.cpp index 5d27972..903db8a 100644 --- a/src/hardware_sampling/hardware_sampler.cpp +++ b/src/hardware_sampling/hardware_sampler.cpp @@ -152,11 +152,11 @@ void hardware_sampler::dump_yaml(const char *filename) const { // output the sampling information file << fmt::format("sampling_interval:\n" " unit: \"ms\"\n" - " values: {}\n" + " values: {}\n\n" "time_points:\n" " unit: \"s\"\n" - " values: [{}]\n" - "{}\n\n", + " values: [{}]\n\n" + "{}\n", this->sampling_interval().count(), fmt::join(detail::durations_from_reference_time(this->sampling_time_points(), this->get_event(0).time_point), ", "), this->generate_yaml_string()); From e3f7f3b0f79a916f3127adcc900eee1d9c3b0194 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Tue, 24 Sep 2024 15:59:36 +0200 Subject: [PATCH 47/69] Only add newlines if the sample category isn't empty. --- src/hardware_sampling/cpu/hardware_sampler.cpp | 18 ++++++++++++------ .../gpu_amd/hardware_sampler.cpp | 12 ++++++++---- .../gpu_intel/hardware_sampler.cpp | 12 ++++++++---- .../gpu_nvidia/hardware_sampler.cpp | 12 ++++++++---- 4 files changed, 36 insertions(+), 18 deletions(-) diff --git a/src/hardware_sampling/cpu/hardware_sampler.cpp b/src/hardware_sampling/cpu/hardware_sampler.cpp index 7e89eca..824ea7b 100644 --- a/src/hardware_sampling/cpu/hardware_sampler.cpp +++ b/src/hardware_sampling/cpu/hardware_sampler.cpp @@ -428,19 +428,25 @@ std::string cpu_hardware_sampler::generate_yaml_string() const { throw std::runtime_error{ "Can't create the final YAML entry if the hardware sampler is still running!" }; } - return fmt::format("{}\n" - "{}\n" - "{}\n" - "{}\n" - "{}\n" - "{}\n" + return fmt::format("{}{}" + "{}{}" + "{}{}" + "{}{}" + "{}{}" + "{}{}" "{}", general_samples_.generate_yaml_string(), + general_samples_.has_samples() ? "\n" : "", clock_samples_.generate_yaml_string(), + clock_samples_.has_samples() ? "\n" : "", power_samples_.generate_yaml_string(), + power_samples_.has_samples() ? "\n" : "", memory_samples_.generate_yaml_string(), + memory_samples_.has_samples() ? "\n" : "", temperature_samples_.generate_yaml_string(), + temperature_samples_.has_samples() ? "\n" : "", gfx_samples_.generate_yaml_string(), + gfx_samples_.has_samples() ? "\n" : "", idle_state_samples_.generate_yaml_string()); } diff --git a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp index 61a8456..d92e594 100644 --- a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp +++ b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp @@ -687,15 +687,19 @@ std::string gpu_amd_hardware_sampler::generate_yaml_string() const { throw std::runtime_error{ "Can't create the final YAML entry if the hardware sampler is still running!" }; } - return fmt::format("{}\n" - "{}\n" - "{}\n" - "{}\n" + return fmt::format("{}{}" + "{}{}" + "{}{}" + "{}{}" "{}", general_samples_.generate_yaml_string(), + general_samples_.has_samples() ? "\n" : "", clock_samples_.generate_yaml_string(), + clock_samples_.has_samples() ? "\n" : "", power_samples_.generate_yaml_string(), + power_samples_.has_samples() ? "\n" : "", memory_samples_.generate_yaml_string(), + memory_samples_.has_samples() ? "\n" : "", temperature_samples_.generate_yaml_string()); } diff --git a/src/hardware_sampling/gpu_intel/hardware_sampler.cpp b/src/hardware_sampling/gpu_intel/hardware_sampler.cpp index c2e8eec..48e459b 100644 --- a/src/hardware_sampling/gpu_intel/hardware_sampler.cpp +++ b/src/hardware_sampling/gpu_intel/hardware_sampler.cpp @@ -678,15 +678,19 @@ std::string gpu_intel_hardware_sampler::generate_yaml_string() const { throw std::runtime_error{ "Can't create the final YAML entry if the hardware sampler is still running!" }; } - return fmt::format("{}\n" - "{}\n" - "{}\n" - "{}\n" + return fmt::format("{}{}" + "{}{}" + "{}{}" + "{}{}" "{}", general_samples_.generate_yaml_string(), + general_samples_.has_samples() ? "\n" : "", clock_samples_.generate_yaml_string(), + clock_samples_.has_samples() ? "\n" : "", power_samples_.generate_yaml_string(), + power_samples_.has_samples() ? "\n" : "", memory_samples_.generate_yaml_string(), + memory_samples_.has_samples() ? "\n" : "", temperature_samples_.generate_yaml_string()); } diff --git a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp index 1536237..f9a064b 100644 --- a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp +++ b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp @@ -551,15 +551,19 @@ std::string gpu_nvidia_hardware_sampler::generate_yaml_string() const { throw std::runtime_error{ "Can't create the final YAML entry if the hardware sampler is still running!" }; } - return fmt::format("{}\n" - "{}\n" - "{}\n" - "{}\n" + return fmt::format("{}{}" + "{}{}" + "{}{}" + "{}{}" "{}", general_samples_.generate_yaml_string(), + general_samples_.has_samples() ? "\n" : "", clock_samples_.generate_yaml_string(), + clock_samples_.has_samples() ? "\n" : "", power_samples_.generate_yaml_string(), + power_samples_.has_samples() ? "\n" : "", memory_samples_.generate_yaml_string(), + memory_samples_.has_samples() ? "\n" : "", temperature_samples_.generate_yaml_string()); } From 66ba78bf1f26ae5730a911ecc08f5d379007eb26 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Tue, 24 Sep 2024 16:55:24 +0200 Subject: [PATCH 48/69] Add the possibility to disable sampling categories. --- bindings/CMakeLists.txt | 1 + bindings/cpu_hardware_sampler.cpp | 3 + bindings/gpu_amd_hardware_sampler.cpp | 5 + bindings/gpu_intel_hardware_sampler.cpp | 5 + bindings/gpu_nvidia_hardware_sampler.cpp | 5 + bindings/main.cpp | 2 + bindings/sample_category.cpp | 30 + bindings/system_hardware_sampler.cpp | 7 +- include/hardware_sampling/core.hpp | 1 + .../cpu/hardware_sampler.hpp | 7 +- .../gpu_amd/hardware_sampler.hpp | 13 +- .../gpu_intel/hardware_sampler.hpp | 13 +- .../gpu_intel/level_zero_samples.hpp | 1 - .../gpu_nvidia/hardware_sampler.hpp | 13 +- .../hardware_sampling/hardware_sampler.hpp | 18 +- include/hardware_sampling/sample_category.hpp | 117 ++++ .../system_hardware_sampler.hpp | 7 +- .../cpu/hardware_sampler.cpp | 515 ++++++++++-------- .../gpu_amd/hardware_sampler.cpp | 91 ++-- .../gpu_intel/hardware_sampler.cpp | 35 +- .../gpu_nvidia/hardware_sampler.cpp | 37 +- src/hardware_sampling/hardware_sampler.cpp | 15 +- .../system_hardware_sampler.cpp | 15 +- 23 files changed, 608 insertions(+), 348 deletions(-) create mode 100644 bindings/sample_category.cpp create mode 100644 include/hardware_sampling/sample_category.hpp diff --git a/bindings/CMakeLists.txt b/bindings/CMakeLists.txt index fb00d29..ffa7984 100644 --- a/bindings/CMakeLists.txt +++ b/bindings/CMakeLists.txt @@ -34,6 +34,7 @@ set(HWS_PYTHON_BINDINGS_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/event.cpp ${CMAKE_CURRENT_SOURCE_DIR}/relative_event.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hardware_sampler.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/sample_category.cpp ${CMAKE_CURRENT_SOURCE_DIR}/system_hardware_sampler.cpp ${CMAKE_CURRENT_SOURCE_DIR}/main.cpp ) diff --git a/bindings/cpu_hardware_sampler.cpp b/bindings/cpu_hardware_sampler.cpp index ba31dfe..6d18fe1 100644 --- a/bindings/cpu_hardware_sampler.cpp +++ b/bindings/cpu_hardware_sampler.cpp @@ -8,6 +8,7 @@ #include "hardware_sampling/cpu/cpu_samples.hpp" // hws::{cpu_general_samples, clock_samples, power_samples, memory_samples, temperature_samples, gfx_samples, idle_state_samples} #include "hardware_sampling/cpu/hardware_sampler.hpp" // hws::cpu_hardware_sampler #include "hardware_sampling/hardware_sampler.hpp" // hws::hardware_sampler +#include "hardware_sampling/sample_category.hpp" // hws::sample_category #include "fmt/format.h" // fmt::format #include "pybind11/chrono.h" // automatic bindings for std::chrono::milliseconds @@ -126,7 +127,9 @@ void init_cpu_hardware_sampler(py::module_ &m) { // bind the CPU hardware sampler class py::class_(m, "CpuHardwareSampler") .def(py::init<>(), "construct a new CPU hardware sampler") + .def(py::init(), "construct a new CPU hardware sampler sampling only the provided sample_category samples") .def(py::init(), "construct a new CPU hardware sampler specifying the used sampling interval") + .def(py::init(), "construct a new CPU hardware sampler specifying the used sampling interval sampling only the provided sample_category samples") .def("general_samples", &hws::cpu_hardware_sampler::general_samples, "get all general samples") .def("clock_samples", &hws::cpu_hardware_sampler::clock_samples, "get all clock related samples") .def("power_samples", &hws::cpu_hardware_sampler::power_samples, "get all power related samples") diff --git a/bindings/gpu_amd_hardware_sampler.cpp b/bindings/gpu_amd_hardware_sampler.cpp index c738340..55fbc75 100644 --- a/bindings/gpu_amd_hardware_sampler.cpp +++ b/bindings/gpu_amd_hardware_sampler.cpp @@ -8,6 +8,7 @@ #include "hardware_sampling/gpu_amd/hardware_sampler.hpp" // hws::gpu_amd_hardware_sampler #include "hardware_sampling/gpu_amd/rocm_smi_samples.hpp" // hws::{rocm_smi_general_samples, rocm_smi_clock_samples, rocm_smi_power_samples, rocm_smi_memory_samples, rocm_smi_temperature_samples} #include "hardware_sampling/hardware_sampler.hpp" // hws::hardware_sampler +#include "hardware_sampling/sample_category.hpp" // hws::sample_category #include "fmt/format.h" // fmt::format #include "pybind11/chrono.h" // automatic bindings for std::chrono::milliseconds @@ -119,9 +120,13 @@ void init_gpu_amd_hardware_sampler(py::module_ &m) { // bind the GPU AMD hardware sampler class py::class_(m, "GpuAmdHardwareSampler") .def(py::init<>(), "construct a new AMD GPU hardware sampler for the default device with the default sampling interval") + .def(py::init(), "construct a new AMD GPU hardware sampler for the default device with the default sampling interval sampling only the provided sample_category samples") .def(py::init(), "construct a new AMD GPU hardware sampler for the specified device with the default sampling interval") + .def(py::init(), "construct a new AMD GPU hardware sampler for the specified device with the default sampling interval sampling only the provided sample_category samples") .def(py::init(), "construct a new AMD GPU hardware sampler for the default device with the specified sampling interval") + .def(py::init(), "construct a new AMD GPU hardware sampler for the default device with the specified sampling interval sampling only the provided sample_category samples") .def(py::init(), "construct a new AMD GPU hardware sampler for the specified device and sampling interval") + .def(py::init(), "construct a new AMD GPU hardware sampler for the specified device and sampling interval sampling only the provided sample_category samples") .def("general_samples", &hws::gpu_amd_hardware_sampler::general_samples, "get all general samples") .def("clock_samples", &hws::gpu_amd_hardware_sampler::clock_samples, "get all clock related samples") .def("power_samples", &hws::gpu_amd_hardware_sampler::power_samples, "get all power related samples") diff --git a/bindings/gpu_intel_hardware_sampler.cpp b/bindings/gpu_intel_hardware_sampler.cpp index 77b67fc..546d295 100644 --- a/bindings/gpu_intel_hardware_sampler.cpp +++ b/bindings/gpu_intel_hardware_sampler.cpp @@ -8,6 +8,7 @@ #include "hardware_sampling/gpu_intel/hardware_sampler.hpp" // hws::gpu_intel_hardware_sampler #include "hardware_sampling/gpu_intel/level_zero_samples.hpp" // hws::{level_zero_general_samples, level_zero_clock_samples, level_zero_power_samples, level_zero_memory_samples, level_zero_temperature_samples} #include "hardware_sampling/hardware_sampler.hpp" // hws::hardware_sampler +#include "hardware_sampling/sample_category.hpp" // hws::sample_category #include "fmt/format.h" // fmt::format #include "pybind11/chrono.h" // automatic bindings for std::chrono::milliseconds @@ -105,9 +106,13 @@ void init_gpu_intel_hardware_sampler(py::module_ &m) { // bind the GPU Intel hardware sampler class py::class_(m, "GpuIntelHardwareSampler") .def(py::init<>(), "construct a new Intel GPU hardware sampler for the default device with the default sampling interval") + .def(py::init(), "construct a new Intel GPU hardware sampler for the default device with the default sampling interval sampling only the provided sample_category samples") .def(py::init(), "construct a new Intel GPU hardware sampler for the specified device with the default sampling interval") + .def(py::init(), "construct a new Intel GPU hardware sampler for the specified device with the default sampling interval sampling only the provided sample_category samples") .def(py::init(), "construct a new Intel GPU hardware sampler for the default device with the specified sampling interval") + .def(py::init(), "construct a new Intel GPU hardware sampler for the default device with the specified sampling interval sampling only the provided sample_category samples") .def(py::init(), "construct a new Intel GPU hardware sampler for the specified device and sampling interval") + .def(py::init(), "construct a new Intel GPU hardware sampler for the specified device and sampling interval sampling only the provided sample_category samples") .def("general_samples", &hws::gpu_intel_hardware_sampler::general_samples, "get all general samples") .def("clock_samples", &hws::gpu_intel_hardware_sampler::clock_samples, "get all clock related samples") .def("power_samples", &hws::gpu_intel_hardware_sampler::power_samples, "get all power related samples") diff --git a/bindings/gpu_nvidia_hardware_sampler.cpp b/bindings/gpu_nvidia_hardware_sampler.cpp index f9b9261..10a04b3 100644 --- a/bindings/gpu_nvidia_hardware_sampler.cpp +++ b/bindings/gpu_nvidia_hardware_sampler.cpp @@ -8,6 +8,7 @@ #include "hardware_sampling/gpu_nvidia/hardware_sampler.hpp" // hws::gpu_nvidia_hardware_sampler #include "hardware_sampling/gpu_nvidia/nvml_samples.hpp" // hws::{nvml_general_samples, nvml_clock_samples, nvml_power_samples, nvml_memory_samples, nvml_temperature_samples} #include "hardware_sampling/hardware_sampler.hpp" // hws::hardware_sampler +#include "hardware_sampling/sample_category.hpp" // hws::sample_category #include "fmt/format.h" // fmt::format #include "pybind11/chrono.h" // automatic bindings for std::chrono::milliseconds @@ -105,9 +106,13 @@ void init_gpu_nvidia_hardware_sampler(py::module_ &m) { // bind the GPU NVIDIA hardware sampler class py::class_(m, "GpuNvidiaHardwareSampler") .def(py::init<>(), "construct a new NVIDIA GPU hardware sampler for the default device with the default sampling interval") + .def(py::init(), "construct a new NVIDIA GPU hardware sampler for the default device with the default sampling interval sampling only the provided sample_category samples") .def(py::init(), "construct a new NVIDIA GPU hardware sampler for the specified device with the default sampling interval") + .def(py::init(), "construct a new NVIDIA GPU hardware sampler for the specified device with the default sampling interval sampling only the provided sample_category samples") .def(py::init(), "construct a new NVIDIA GPU hardware sampler for the default device with the specified sampling interval") + .def(py::init(), "construct a new NVIDIA GPU hardware sampler for the default device with the specified sampling interval sampling only the provided sample_category samples") .def(py::init(), "construct a new NVIDIA GPU hardware sampler for the specified device and sampling interval") + .def(py::init(), "construct a new NVIDIA GPU hardware sampler for the specified device and sampling interval sampling only the provided sample_category samples") .def("general_samples", &hws::gpu_nvidia_hardware_sampler::general_samples, "get all general samples") .def("clock_samples", &hws::gpu_nvidia_hardware_sampler::clock_samples, "get all clock related samples") .def("power_samples", &hws::gpu_nvidia_hardware_sampler::power_samples, "get all power related samples") diff --git a/bindings/main.cpp b/bindings/main.cpp index 11dbf33..39eb521 100644 --- a/bindings/main.cpp +++ b/bindings/main.cpp @@ -16,6 +16,7 @@ namespace py = pybind11; // forward declare binding functions void init_event(py::module_ &); +void init_sample_category(py::module_ &); void init_relative_event(py::module_ &); void init_hardware_sampler(py::module_ &); void init_system_hardware_sampler(py::module_ &); @@ -28,6 +29,7 @@ PYBIND11_MODULE(HardwareSampling, m) { m.doc() = "Hardware Sampling for CPUs and GPUs"; init_event(m); + init_sample_category(m); init_relative_event(m); init_hardware_sampler(m); init_system_hardware_sampler(m); diff --git a/bindings/sample_category.cpp b/bindings/sample_category.cpp new file mode 100644 index 0000000..2db6563 --- /dev/null +++ b/bindings/sample_category.cpp @@ -0,0 +1,30 @@ +/** + * @author Marcel Breyer + * @copyright 2024-today All Rights Reserved + * @license This file is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + */ + +#include "hardware_sampling/sample_category.hpp" // hws::sample_category + +#include "pybind11/operators.h" // operator overloading +#include "pybind11/pybind11.h" // py::module_, py::overload_cast + +namespace py = pybind11; + +void init_sample_category(py::module_ &m) { + // sample_category enum and bitwise operations on the sample_category enum + py::enum_(m, "SampleCategory") + .value("GENERAL", hws::sample_category::general, "General hardware samples like architecture, names, or utilization.") + .value("CLOCK", hws::sample_category::clock, "Clock-related hardware samples like minimum, maximum, and current frequencies or throttle reasons.") + .value("POWER", hws::sample_category::power, "Power-related hardware samples like current power draw or total energy consumption.") + .value("MEMORY", hws::sample_category::memory, "Memory-related hardware samples like memory usage or PCIe information.") + .value("TEMPERATURE", hws::sample_category::temperature, "Temperature-related hardware samples like maximum and current temperatures.") + .value("GFX", hws::sample_category::gfx, "Gfx-related (iGPU) hardware samples. Only used in the cpu_hardware_sampler.") + .value("IDLE_STATE", hws::sample_category::idle_state, "Idle-state-related hardware samples. Only used in the cpu_hardware_sampler.") + .value("ALL", hws::sample_category::all, "Shortcut to enable all available hardware samples (default).") + .def("__invert__", py::overload_cast(&hws::operator~)) + .def("__and__", py::overload_cast(&hws::operator&)) + .def("__or__", py::overload_cast(&hws::operator|)) + .def("__xor__", py::overload_cast(&hws::operator^)); +} diff --git a/bindings/system_hardware_sampler.cpp b/bindings/system_hardware_sampler.cpp index 3c24ad3..f26a4b6 100644 --- a/bindings/system_hardware_sampler.cpp +++ b/bindings/system_hardware_sampler.cpp @@ -7,8 +7,9 @@ #include "hardware_sampling/system_hardware_sampler.hpp" // hws::system_hardware_sampler -#include "hardware_sampling/event.hpp" // hws::event -#include "hardware_sampling/utility.hpp" // hws::detail::durations_from_reference_time +#include "hardware_sampling/event.hpp" // hws::event +#include "hardware_sampling/sample_category.hpp" // hws::sample_category +#include "hardware_sampling/utility.hpp" // hws::detail::durations_from_reference_time #include "fmt/format.h" // fmt::format #include "pybind11/chrono.h" // bind std::chrono types @@ -24,7 +25,9 @@ void init_system_hardware_sampler(py::module_ &m) { // bind the pure virtual hardware sampler base class py::class_(m, "SystemHardwareSampler") .def(py::init<>(), "construct a new system hardware sampler with the default sampling interval") + .def(py::init(), "construct a new system hardware sampler with the default sampling interval sampling only the provided sample_category samples") .def(py::init(), "construct a new system hardware sampler for with the specified sampling interval") + .def(py::init(), "construct a new system hardware sampler for with the specified sampling interval sampling only the provided sample_category samples") .def("start", &hws::system_hardware_sampler::start_sampling, "start hardware sampling for all available hardware samplers") .def("stop", &hws::system_hardware_sampler::stop_sampling, "stop hardware sampling for all available hardware samplers") .def("pause", &hws::system_hardware_sampler::pause_sampling, "pause hardware sampling for all available hardware samplers") diff --git a/include/hardware_sampling/core.hpp b/include/hardware_sampling/core.hpp index 15d65df..7b259f4 100644 --- a/include/hardware_sampling/core.hpp +++ b/include/hardware_sampling/core.hpp @@ -14,6 +14,7 @@ #include "hardware_sampling/event.hpp" #include "hardware_sampling/hardware_sampler.hpp" +#include "hardware_sampling/sample_category.hpp" #include "hardware_sampling/system_hardware_sampler.hpp" #if defined(HWS_FOR_CPUS_ENABLED) diff --git a/include/hardware_sampling/cpu/hardware_sampler.hpp b/include/hardware_sampling/cpu/hardware_sampler.hpp index 4e65338..8105fd4 100644 --- a/include/hardware_sampling/cpu/hardware_sampler.hpp +++ b/include/hardware_sampling/cpu/hardware_sampler.hpp @@ -14,6 +14,7 @@ #include "hardware_sampling/cpu/cpu_samples.hpp" // hws::{cpu_general_samples, clock_samples, power_samples, memory_samples, temperature_samples, gfx_samples, idle_state_samples} #include "hardware_sampling/hardware_sampler.hpp" // hws::hardware_sampler +#include "hardware_sampling/sample_category.hpp" // hws::sample_category #include "fmt/ostream.h" // fmt::formatter, fmt::ostream_formatter @@ -32,13 +33,15 @@ class cpu_hardware_sampler : public hardware_sampler { public: /** * @brief Construct a new CPU hardware sampler with the default sampling interval. + * @param[in] category the sample categories that are enabled for hardware sampling (default: all) */ - cpu_hardware_sampler(); + explicit cpu_hardware_sampler(sample_category category = sample_category::all); /** * @brief Construct a new CPU hardware sampler with the @p sampling_interval. * @param[in] sampling_interval the used sampling interval + * @param[in] category the sample categories that are enabled for hardware sampling (default: all) */ - explicit cpu_hardware_sampler(std::chrono::milliseconds sampling_interval); + explicit cpu_hardware_sampler(std::chrono::milliseconds sampling_interval, sample_category category = sample_category::all); /** * @brief Delete the copy-constructor (already implicitly deleted due to the base class's std::atomic member). diff --git a/include/hardware_sampling/gpu_amd/hardware_sampler.hpp b/include/hardware_sampling/gpu_amd/hardware_sampler.hpp index 65e6ca3..a44dec7 100644 --- a/include/hardware_sampling/gpu_amd/hardware_sampler.hpp +++ b/include/hardware_sampling/gpu_amd/hardware_sampler.hpp @@ -14,6 +14,7 @@ #include "hardware_sampling/gpu_amd/rocm_smi_samples.hpp" // hws::{rocm_smi_general_samples, rocm_smi_clock_samples, rocm_smi_power_samples, rocm_smi_memory_samples, rocm_smi_temperature_samples} #include "hardware_sampling/hardware_sampler.hpp" // hws::hardware_sampler +#include "hardware_sampling/sample_category.hpp" // hws::sample_category #include "fmt/ostream.h" // fmt::formatter, fmt::ostream_formatter @@ -36,27 +37,31 @@ class gpu_amd_hardware_sampler : public hardware_sampler { /** * @brief Construct a new AMD GPU hardware sampler for the default device with the default sampling interval. * @details If this is the first AMD GPU sampler, initializes the ROCm SMI environment. + * @param[in] category the sample categories that are enabled for hardware sampling (default: all) */ - gpu_amd_hardware_sampler(); + explicit gpu_amd_hardware_sampler(sample_category category = sample_category::all); /** * @brief Construct a new AMD GPU hardware sampler for device @p device_id with the default sampling interval. * @details If this is the first AMD GPU sampler, initializes the ROCm SMI environment. * @param[in] device_id the ID of the device to sample + * @param[in] category the sample categories that are enabled for hardware sampling (default: all) */ - explicit gpu_amd_hardware_sampler(std::size_t device_id); + explicit gpu_amd_hardware_sampler(std::size_t device_id, sample_category category = sample_category::all); /** * @brief Construct a new AMD GPU hardware sampler for the default device with the @p sampling_interval. * @details If this is the first AMD GPU sampler, initializes the ROCm SMI environment. * @param[in] sampling_interval the used sampling interval + * @param[in] category the sample categories that are enabled for hardware sampling (default: all) */ - explicit gpu_amd_hardware_sampler(std::chrono::milliseconds sampling_interval); + explicit gpu_amd_hardware_sampler(std::chrono::milliseconds sampling_interval, sample_category category = sample_category::all); /** * @brief Construct a new AMD GPU hardware sampler for device @p device_id with the @p sampling_interval. * @details If this is the first AMD GPU sampler, initializes the ROCm SMI environment. * @param[in] device_id the ID of the device to sample * @param[in] sampling_interval the used sampling interval + * @param[in] category the sample categories that are enabled for hardware sampling (default: all) */ - gpu_amd_hardware_sampler(std::size_t device_id, std::chrono::milliseconds sampling_interval); + gpu_amd_hardware_sampler(std::size_t device_id, std::chrono::milliseconds sampling_interval, sample_category category = sample_category::all); /** * @brief Delete the copy-constructor (already implicitly deleted due to the base class's std::atomic member). diff --git a/include/hardware_sampling/gpu_intel/hardware_sampler.hpp b/include/hardware_sampling/gpu_intel/hardware_sampler.hpp index d47bd32..bea3103 100644 --- a/include/hardware_sampling/gpu_intel/hardware_sampler.hpp +++ b/include/hardware_sampling/gpu_intel/hardware_sampler.hpp @@ -15,6 +15,7 @@ #include "hardware_sampling/gpu_intel/level_zero_device_handle.hpp" // hws::detail::level_zero_device_handle #include "hardware_sampling/gpu_intel/level_zero_samples.hpp" // hws::{level_zero_general_samples, level_zero_clock_samples, level_zero_power_samples, level_zero_memory_samples, level_zero_temperature_samples} #include "hardware_sampling/hardware_sampler.hpp" // hws::hardware_sampler +#include "hardware_sampling/sample_category.hpp" // hws::sample_category #include "fmt/format.h" // fmt::formatter, fmt::ostream_formatter @@ -37,27 +38,31 @@ class gpu_intel_hardware_sampler : public hardware_sampler { /** * @brief Construct a new Intel GPU hardware sampler for the default device with the default sampling interval. * @details If this is the first Intel GPU sampler, initializes the Level Zero environment. + * @param[in] category the sample categories that are enabled for hardware sampling (default: all) */ - gpu_intel_hardware_sampler(); + explicit gpu_intel_hardware_sampler(sample_category category = sample_category::all); /** * @brief Construct a new Intel GPU hardware sampler for device @p device_id with the default sampling interval. * @details If this is the first Intel GPU sampler, initializes the Level Zero environment. * @param[in] device_id the ID of the device to sample + * @param[in] category the sample categories that are enabled for hardware sampling (default: all) */ - explicit gpu_intel_hardware_sampler(std::size_t device_id); + explicit gpu_intel_hardware_sampler(std::size_t device_id, sample_category category = sample_category::all); /** * @brief Construct a new Intel GPU hardware sampler for the default device with the @p sampling_interval. * @details If this is the first Intel GPU sampler, initializes the Level Zero environment. * @param[in] sampling_interval the used sampling interval + * @param[in] category the sample categories that are enabled for hardware sampling (default: all) */ - explicit gpu_intel_hardware_sampler(std::chrono::milliseconds sampling_interval); + explicit gpu_intel_hardware_sampler(std::chrono::milliseconds sampling_interval, sample_category category = sample_category::all); /** * @brief Construct a new Intel GPU hardware sampler for device @p device_id with the @p sampling_interval. * @details If this is the first Intel GPU sampler, initializes the Level Zero environment. * @param[in] device_id the ID of the device to sample * @param[in] sampling_interval the used sampling interval + * @param[in] category the sample categories that are enabled for hardware sampling (default: all) */ - gpu_intel_hardware_sampler(std::size_t device_id, std::chrono::milliseconds sampling_interval); + gpu_intel_hardware_sampler(std::size_t device_id, std::chrono::milliseconds sampling_interval, sample_category category = sample_category::all); /** * @brief Delete the copy-constructor (already implicitly deleted due to the base class's std::atomic member). diff --git a/include/hardware_sampling/gpu_intel/level_zero_samples.hpp b/include/hardware_sampling/gpu_intel/level_zero_samples.hpp index a1c56c2..f178f7f 100644 --- a/include/hardware_sampling/gpu_intel/level_zero_samples.hpp +++ b/include/hardware_sampling/gpu_intel/level_zero_samples.hpp @@ -54,7 +54,6 @@ class level_zero_general_samples { HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, vendor_id) // the vendor ID HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, name) // the model name of the device HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector, flags) // potential GPU flags (e.g. integrated device) - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, standby_mode) // the enabled standby mode (power saving or never) HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint32_t, num_threads_per_eu) // the number of threads per EU unit HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint32_t, eu_simd_width) // the physical EU unit SIMD width diff --git a/include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp b/include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp index 562348a..d73cd07 100644 --- a/include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp +++ b/include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp @@ -15,6 +15,7 @@ #include "hardware_sampling/gpu_nvidia/nvml_device_handle.hpp" // hws::nvml_device_handle #include "hardware_sampling/gpu_nvidia/nvml_samples.hpp" // hws::{nvml_general_samples, nvml_clock_samples, nvml_power_samples, nvml_memory_samples, nvml_temperature_samples} #include "hardware_sampling/hardware_sampler.hpp" // hws::hardware_sampler +#include "hardware_sampling/sample_category.hpp" // hws::sample_category #include "fmt/format.h" // fmt::formatter, fmt::ostream_formatter @@ -37,27 +38,31 @@ class gpu_nvidia_hardware_sampler : public hardware_sampler { /** * @brief Construct a new NVIDIA GPU hardware sampler for the default device with the default sampling interval. * @details If this is the first NVIDIA GPU sampler, initializes the NVML environment. + * @param[in] category the sample categories that are enabled for hardware sampling (default: all) */ - gpu_nvidia_hardware_sampler(); + explicit gpu_nvidia_hardware_sampler(sample_category category = sample_category::all); /** * @brief Construct a new NVIDIA GPU hardware sampler for device @p device_id with the default sampling interval. * @details If this is the first NVIDIA GPU sampler, initializes the NVML environment. * @param[in] device_id the ID of the device to sample + * @param[in] category the sample categories that are enabled for hardware sampling (default: all) */ - explicit gpu_nvidia_hardware_sampler(std::size_t device_id); + explicit gpu_nvidia_hardware_sampler(std::size_t device_id, sample_category category = sample_category::all); /** * @brief Construct a new NVIDIA GPU hardware sampler for the default device with the @p sampling_interval. * @details If this is the first NVIDIA GPU sampler, initializes the NVML environment. * @param[in] sampling_interval the used sampling interval + * @param[in] category the sample categories that are enabled for hardware sampling (default: all) */ - explicit gpu_nvidia_hardware_sampler(std::chrono::milliseconds sampling_interval); + explicit gpu_nvidia_hardware_sampler(std::chrono::milliseconds sampling_interval, sample_category category = sample_category::all); /** * @brief Construct a new NVIDIA GPU hardware sampler for device @p device_id with the @p sampling_interval. * @details If this is the first NVIDIA GPU sampler, initializes the NVML environment. * @param[in] device_id the ID of the device to sample * @param[in] sampling_interval the used sampling interval + * @param[in] category the sample categories that are enabled for hardware sampling (default: all) */ - gpu_nvidia_hardware_sampler(std::size_t device_id, std::chrono::milliseconds sampling_interval); + gpu_nvidia_hardware_sampler(std::size_t device_id, std::chrono::milliseconds sampling_interval, sample_category category = sample_category::all); /** * @brief Delete the copy-constructor (already implicitly deleted due to the base class's std::atomic member). diff --git a/include/hardware_sampling/hardware_sampler.hpp b/include/hardware_sampling/hardware_sampler.hpp index 64eb833..e534d19 100644 --- a/include/hardware_sampling/hardware_sampler.hpp +++ b/include/hardware_sampling/hardware_sampler.hpp @@ -12,7 +12,8 @@ #define HARDWARE_SAMPLING_HARDWARE_SAMPLER_HPP_ #pragma once -#include "hardware_sampling/event.hpp" // hws::event +#include "hardware_sampling/event.hpp" // hws::event +#include "hardware_sampling/sample_category.hpp" // hws::sample_category #include // std::atomic #include // std::chrono::{system_clock::time_point, steady_clock::time_point, milliseconds} @@ -32,8 +33,9 @@ class hardware_sampler { /** * @brief Construct a new hardware sampler with the provided @p sampling_interval. * @param[in] sampling_interval the used sampling interval + * @param[in] category the sample categories that are enabled for hardware sampling */ - explicit hardware_sampler(std::chrono::milliseconds sampling_interval); + hardware_sampler(std::chrono::milliseconds sampling_interval, sample_category category); /** * @brief Delete the copy-constructor (already implicitly deleted due to the std::atomic member). @@ -182,7 +184,14 @@ class hardware_sampler { * @brief Add a new time point to this hardware sampler. Called during the sampling loop. * @param time_point the new time point to add */ - void add_time_point(const std::chrono::steady_clock::time_point time_point) { time_points_.push_back(time_point); } + void add_time_point(std::chrono::steady_clock::time_point time_point); + + /** + * @brief Check whether the @p category is currently enabled for hardware sampling or not. + * @param[in] category the sample_category to check + * @return Returns `true` if @p category is enabled for sampling, otherwise `false` (`[[nodiscard]]`) + */ + [[nodiscard]] bool sample_category_enabled(sample_category category) const noexcept; private: /// A boolean flag indicating whether the sampling has already started. @@ -206,6 +215,9 @@ class hardware_sampler { /// The sampling interval of this hardware sampler. const std::chrono::milliseconds sampling_interval_{}; + + /// The bitmask of sample categories to use. + const sample_category sample_category_{}; }; } // namespace hws diff --git a/include/hardware_sampling/sample_category.hpp b/include/hardware_sampling/sample_category.hpp new file mode 100644 index 0000000..e740544 --- /dev/null +++ b/include/hardware_sampling/sample_category.hpp @@ -0,0 +1,117 @@ +/** + * @file + * @author Marcel Breyer + * @copyright 2024-today All Rights Reserved + * @license This file is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + * + * @brief Defines an enum class with all sample categories to be able to only selectively enable some samples. + */ + +#ifndef HARDWARE_SAMPLING_SAMPLE_CATEGORY_HPP_ +#define HARDWARE_SAMPLING_SAMPLE_CATEGORY_HPP_ +#pragma once + +namespace hws { + +/** + * @brief Enum class as bitfield containing the possible sample categories. + * @details The sample_category "gfx" and "idle_state" are only used in the cpu_hardware_sampler. + * Additionally, the "all" sample_category is available to easily enable all hardware samples (default). + */ +enum class sample_category : int { + // clang-format off + /// General hardware samples like architecture, names, or utilization. + general = 0b00000001, + /// Clock-related hardware samples like minimum, maximum, and current frequencies or throttle reasons. + clock = 0b00000010, + /// Power-related hardware samples like current power draw or total energy consumption. + power = 0b00000100, + /// Memory-related hardware samples like memory usage or PCIe information. + memory = 0b00001000, + /// Temperature-related hardware samples like maximum and current temperatures. + temperature = 0b00010000, + /// Gfx-related (iGPU) hardware samples. Only used in the cpu_hardware_sampler. + gfx = 0b00100000, + /// Idle-state-related hardware samples. Only used in the cpu_hardware_sampler. + idle_state = 0b01000000, + /// Shortcut to enable all available hardware samples (default). + all = 0b01111111 + // clang-format on +}; + +/** + * @brief Compute the bitwise not of @p sc. + * @param[in] sc the sample_category to apply the bitwise not to + * @return the bitwise not result (`[[nodiscard]]`) + */ +[[nodiscard]] constexpr sample_category operator~(const sample_category sc) noexcept { + return static_cast(~static_cast(sc)); +} + +/** + * @brief Compute the bitwise and between @p lhs and @p rhs and return a new sample_category. + * @param[in] lhs the first sample_category + * @param[in] rhs the second sample_category + * @return the bitwise and result (`[[nodiscard]]`) + */ +[[nodiscard]] constexpr sample_category operator&(const sample_category lhs, const sample_category rhs) noexcept { + return static_cast(static_cast(lhs) & static_cast(rhs)); +} + +/** + * @brief Compute the bitwise or between @p lhs and @p rhs and return a new sample_category. + * @param[in] lhs the first sample_category + * @param[in] rhs the second sample_category + * @return the bitwise or result (`[[nodiscard]]`) + */ +[[nodiscard]] constexpr sample_category operator|(const sample_category lhs, const sample_category rhs) noexcept { + return static_cast(static_cast(lhs) | static_cast(rhs)); +} + +/** + * @brief Compute the bitwise xor between @p lhs and @p rhs and return a new sample_category. + * @param[in] lhs the first sample_category + * @param[in] rhs the second sample_category + * @return the bitwise xor result (`[[nodiscard]]`) + */ +[[nodiscard]] constexpr sample_category operator^(const sample_category lhs, const sample_category rhs) noexcept { + return static_cast(static_cast(lhs) ^ static_cast(rhs)); +} + +/** + * @brief Compute the bitwise compound and between @p lhs and @p rhs and return the result in @p lhs. + * @param[in,out] lhs the first sample_category + * @param[in] rhs the second sample_category + * @return a reference to @p lhs containing the bitwise and result + */ +constexpr sample_category &operator&=(sample_category &lhs, const sample_category rhs) noexcept { + lhs = lhs & rhs; + return lhs; +} + +/** + * @brief Compute the bitwise compound or between @p lhs and @p rhs and return the result in @p lhs. + * @param[in,out] lhs the first sample_category + * @param[in] rhs the second sample_category + * @return a reference to @p lhs containing the bitwise or result + */ +constexpr sample_category &operator|=(sample_category &lhs, const sample_category rhs) noexcept { + lhs = lhs | rhs; + return lhs; +} + +/** + * @brief Compute the bitwise compound xor between @p lhs and @p rhs and return the result in @p lhs. + * @param[in,out] lhs the first sample_category + * @param[in] rhs the second sample_category + * @return a reference to @p lhs containing the bitwise xor result + */ +constexpr sample_category &operator^=(sample_category &lhs, const sample_category rhs) noexcept { + lhs = lhs ^ rhs; + return lhs; +} + +} // namespace hws + +#endif // HARDWARE_SAMPLING_SAMPLE_CATEGORY_HPP_ diff --git a/include/hardware_sampling/system_hardware_sampler.hpp b/include/hardware_sampling/system_hardware_sampler.hpp index 394a0c6..d8c56fd 100644 --- a/include/hardware_sampling/system_hardware_sampler.hpp +++ b/include/hardware_sampling/system_hardware_sampler.hpp @@ -13,6 +13,7 @@ #include "hardware_sampling/event.hpp" // hws::event #include "hardware_sampling/hardware_sampler.hpp" // hws::hardware_sampler +#include "hardware_sampling/sample_category.hpp" // hws::sample_category #include // std::chrono::{milliseconds, steady_clock::time_point} #include // std::size_t @@ -31,13 +32,15 @@ class system_hardware_sampler { public: /** * @brief Construct hardware samplers with the default sampling interval. + * @param[in] category the sample categories that are enabled for hardware sampling (default: all) */ - system_hardware_sampler(); + explicit system_hardware_sampler(sample_category category = sample_category::all); /** * @brief Construct hardware samplers with the provided @p sampling_interval. * @param[in] sampling_interval the used sampling interval + * @param[in] category the sample categories that are enabled for hardware sampling (default: all) */ - explicit system_hardware_sampler(std::chrono::milliseconds sampling_interval); + explicit system_hardware_sampler(std::chrono::milliseconds sampling_interval, sample_category category = sample_category::all); /** * @brief Delete the copy-constructor. diff --git a/src/hardware_sampling/cpu/hardware_sampler.cpp b/src/hardware_sampling/cpu/hardware_sampler.cpp index 824ea7b..4a51c10 100644 --- a/src/hardware_sampling/cpu/hardware_sampler.cpp +++ b/src/hardware_sampling/cpu/hardware_sampler.cpp @@ -10,6 +10,7 @@ #include "hardware_sampling/cpu/cpu_samples.hpp" // hws::{cpu_general_samples, clock_samples, power_samples, memory_samples, temperature_samples, gfx_samples, idle_state_samples} #include "hardware_sampling/cpu/utility.hpp" // HWS_SUBPROCESS_ERROR_CHECK, hws::detail::run_subprocess #include "hardware_sampling/hardware_sampler.hpp" // hws::tracking::hardware_sampler +#include "hardware_sampling/sample_category.hpp" // hws::sample_category #include "hardware_sampling/utility.hpp" // hws::detail::{split, split_as, trim, convert_to, starts_with} #include "fmt/format.h" // fmt::format @@ -33,11 +34,11 @@ namespace hws { -cpu_hardware_sampler::cpu_hardware_sampler() : - cpu_hardware_sampler{ HWS_SAMPLING_INTERVAL } { } +cpu_hardware_sampler::cpu_hardware_sampler(const sample_category category) : + cpu_hardware_sampler{ HWS_SAMPLING_INTERVAL, category } { } -cpu_hardware_sampler::cpu_hardware_sampler(const std::chrono::milliseconds sampling_interval) : - hardware_sampler{ sampling_interval } { } +cpu_hardware_sampler::cpu_hardware_sampler(const std::chrono::milliseconds sampling_interval, const sample_category category) : + hardware_sampler{ sampling_interval, category } { } cpu_hardware_sampler::~cpu_hardware_sampler() { try { @@ -71,53 +72,63 @@ void cpu_hardware_sampler::sampling_loop() { value = detail::trim(value); // check the lines if the start with an entry that we want to sample - if (detail::starts_with(line, "Architecture")) { - general_samples_.architecture_ = detail::convert_to(value); - } else if (detail::starts_with(line, "Byte Order")) { - general_samples_.byte_order_ = detail::convert_to(value); - } else if (detail::starts_with(line, "CPU(s)")) { - general_samples_.num_threads_ = detail::convert_to(value); - } else if (detail::starts_with(line, "Thread(s) per core")) { - general_samples_.threads_per_core_ = detail::convert_to(value); - } else if (detail::starts_with(line, "Core(s) per socket")) { - general_samples_.cores_per_socket_ = detail::convert_to(value); - } else if (detail::starts_with(line, "Socket(s)")) { - general_samples_.num_sockets_ = detail::convert_to(value); - } else if (detail::starts_with(line, "NUMA node(s)")) { - general_samples_.numa_nodes_ = detail::convert_to(value); - } else if (detail::starts_with(line, "Vendor ID")) { - general_samples_.vendor_id_ = detail::convert_to(value); - } else if (detail::starts_with(line, "Model name")) { - general_samples_.name_ = detail::convert_to(value); - } else if (detail::starts_with(line, "Flags")) { - general_samples_.flags_ = detail::split_as(value, ' '); - } else if (detail::starts_with(line, "Frequency boost")) { - clock_samples_.auto_boosted_clock_enabled_ = value == "enabled"; - } else if (detail::starts_with(line, "CPU max MHz")) { - clock_samples_.clock_frequency_max_ = detail::convert_to(value); - } else if (detail::starts_with(line, "CPU min MHz")) { - clock_samples_.clock_frequency_min_ = detail::convert_to(value); - } else if (detail::starts_with(line, "L1d cache")) { - memory_samples_.cache_size_L1d_ = detail::convert_to(value); - } else if (detail::starts_with(line, "L1i cache")) { - memory_samples_.cache_size_L1i_ = detail::convert_to(value); - } else if (detail::starts_with(line, "L2 cache")) { - memory_samples_.cache_size_L2_ = detail::convert_to(value); - } else if (detail::starts_with(line, "L3 cache")) { - memory_samples_.cache_size_L3_ = detail::convert_to(value); + if (this->sample_category_enabled(sample_category::general)) { + if (detail::starts_with(line, "Architecture")) { + general_samples_.architecture_ = detail::convert_to(value); + } else if (detail::starts_with(line, "Byte Order")) { + general_samples_.byte_order_ = detail::convert_to(value); + } else if (detail::starts_with(line, "CPU(s)")) { + general_samples_.num_threads_ = detail::convert_to(value); + } else if (detail::starts_with(line, "Thread(s) per core")) { + general_samples_.threads_per_core_ = detail::convert_to(value); + } else if (detail::starts_with(line, "Core(s) per socket")) { + general_samples_.cores_per_socket_ = detail::convert_to(value); + } else if (detail::starts_with(line, "Socket(s)")) { + general_samples_.num_sockets_ = detail::convert_to(value); + } else if (detail::starts_with(line, "NUMA node(s)")) { + general_samples_.numa_nodes_ = detail::convert_to(value); + } else if (detail::starts_with(line, "Vendor ID")) { + general_samples_.vendor_id_ = detail::convert_to(value); + } else if (detail::starts_with(line, "Model name")) { + general_samples_.name_ = detail::convert_to(value); + } else if (detail::starts_with(line, "Flags")) { + general_samples_.flags_ = detail::split_as(value, ' '); + } + } + if (this->sample_category_enabled(sample_category::clock)) { + if (detail::starts_with(line, "Frequency boost")) { + clock_samples_.auto_boosted_clock_enabled_ = value == "enabled"; + } else if (detail::starts_with(line, "CPU max MHz")) { + clock_samples_.clock_frequency_max_ = detail::convert_to(value); + } else if (detail::starts_with(line, "CPU min MHz")) { + clock_samples_.clock_frequency_min_ = detail::convert_to(value); + } + } + if (this->sample_category_enabled(sample_category::memory)) { + if (detail::starts_with(line, "L1d cache")) { + memory_samples_.cache_size_L1d_ = detail::convert_to(value); + } else if (detail::starts_with(line, "L1i cache")) { + memory_samples_.cache_size_L1i_ = detail::convert_to(value); + } else if (detail::starts_with(line, "L2 cache")) { + memory_samples_.cache_size_L2_ = detail::convert_to(value); + } else if (detail::starts_with(line, "L3 cache")) { + memory_samples_.cache_size_L3_ = detail::convert_to(value); + } } } - // check if the number of cores can be derived from the otherwise found values - if (general_samples_.num_threads_.has_value() && general_samples_.threads_per_core_.has_value()) { - general_samples_.num_cores_ = general_samples_.num_threads_.value() / general_samples_.threads_per_core_.value(); + if (this->sample_category_enabled(sample_category::general)) { + // check if the number of cores can be derived from the otherwise found values + if (general_samples_.num_threads_.has_value() && general_samples_.threads_per_core_.has_value()) { + general_samples_.num_cores_ = general_samples_.num_threads_.value() / general_samples_.threads_per_core_.value(); + } } } #endif #if defined(HWS_VIA_FREE_ENABLED) const std::regex whitespace_replace_reg{ "[ ]+", std::regex::extended }; - { + if (this->sample_category_enabled(sample_category::memory)) { std::string free_output = detail::run_subprocess("free -b"); free_output = std::regex_replace(free_output, whitespace_replace_reg, " "); const std::vector free_lines = detail::split(detail::trim(free_output), '\n'); @@ -164,104 +175,121 @@ void cpu_hardware_sampler::sampling_loop() { const std::vector values = detail::split(data[1], '\t'); for (std::size_t i = 0; i < header.size(); ++i) { - if (header[i] == "Avg_MHz") { - using vector_type = decltype(clock_samples_.clock_frequency_)::value_type; - clock_samples_.clock_frequency_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "Busy%") { - using vector_type = decltype(general_samples_.compute_utilization_)::value_type; - general_samples_.compute_utilization_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "Bzy_MHz") { - using vector_type = decltype(clock_samples_.average_non_idle_clock_frequency_)::value_type; - clock_samples_.average_non_idle_clock_frequency_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "TSC_MHz") { - using vector_type = decltype(clock_samples_.time_stamp_counter_)::value_type; - clock_samples_.time_stamp_counter_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "IPC") { - using vector_type = decltype(general_samples_.ipc_)::value_type; - general_samples_.ipc_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "IRQ") { - using vector_type = decltype(general_samples_.irq_)::value_type; - general_samples_.irq_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "SMI") { - using vector_type = decltype(general_samples_.smi_)::value_type; - general_samples_.smi_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "POLL") { - using vector_type = decltype(general_samples_.poll_)::value_type; - general_samples_.poll_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "POLL%") { - using vector_type = decltype(general_samples_.poll_percent_)::value_type; - general_samples_.poll_percent_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "CoreTmp") { - using vector_type = decltype(temperature_samples_.core_temperature_)::value_type; - temperature_samples_.core_temperature_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "CoreThr") { - using vector_type = decltype(temperature_samples_.core_throttle_percent_)::value_type; - temperature_samples_.core_throttle_percent_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "PkgTmp") { - using vector_type = decltype(temperature_samples_.temperature_)::value_type; - temperature_samples_.temperature_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "GFX%rc6") { - using vector_type = decltype(gfx_samples_.gfx_render_state_percent_)::value_type; - gfx_samples_.gfx_render_state_percent_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "GFXMHz") { - using vector_type = decltype(gfx_samples_.gfx_frequency_)::value_type; - gfx_samples_.gfx_frequency_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "GFXAMHz") { - using vector_type = decltype(gfx_samples_.average_gfx_frequency_)::value_type; - gfx_samples_.average_gfx_frequency_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "Totl%C0") { - using vector_type = decltype(idle_state_samples_.all_cpus_state_c0_percent_)::value_type; - idle_state_samples_.all_cpus_state_c0_percent_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "Any%C0") { - using vector_type = decltype(idle_state_samples_.any_cpu_state_c0_percent_)::value_type; - idle_state_samples_.any_cpu_state_c0_percent_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "GFX%C0") { - using vector_type = decltype(gfx_samples_.gfx_state_c0_percent_)::value_type; - gfx_samples_.gfx_state_c0_percent_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "CPUGFX%") { - using vector_type = decltype(gfx_samples_.cpu_works_for_gpu_percent_)::value_type; - gfx_samples_.cpu_works_for_gpu_percent_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "CPU%LPI") { - using vector_type = decltype(idle_state_samples_.low_power_idle_state_percent_)::value_type; - idle_state_samples_.low_power_idle_state_percent_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "SYS%LPI") { - using vector_type = decltype(idle_state_samples_.system_low_power_idle_state_percent_)::value_type; - idle_state_samples_.system_low_power_idle_state_percent_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "Pkg%LPI") { - using vector_type = decltype(idle_state_samples_.package_low_power_idle_state_percent_)::value_type; - idle_state_samples_.package_low_power_idle_state_percent_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "PkgWatt") { - using vector_type = decltype(power_samples_.power_usage_)::value_type; - power_samples_.power_usage_ = vector_type{ detail::convert_to(values[i]) }; - power_samples_.power_measurement_type_ = "current/instant"; - power_samples_.power_total_energy_consumption_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ 0 }; - } else if (header[i] == "CorWatt") { - using vector_type = decltype(power_samples_.core_watt_)::value_type; - power_samples_.core_watt_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "GFXWatt") { - using vector_type = decltype(gfx_samples_.gfx_watt_)::value_type; - gfx_samples_.gfx_watt_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "RAMWatt") { - using vector_type = decltype(power_samples_.ram_watt_)::value_type; - power_samples_.ram_watt_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "PKG_%") { - using vector_type = decltype(power_samples_.package_rapl_throttle_percent_)::value_type; - power_samples_.package_rapl_throttle_percent_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "RAM_%") { - using vector_type = decltype(power_samples_.dram_rapl_throttle_percent_)::value_type; - power_samples_.dram_rapl_throttle_percent_ = vector_type{ detail::convert_to(values[i]) }; - } else { - // test against regex - const std::string header_str{ header[i] }; - const std::regex reg{ std::string{ "CPU%[0-9a-zA-Z]+|Pkg%[0-9a-zA-Z]+|Pk%[0-9a-zA-Z]+|C[0-9a-zA-Z]+%|C[0-9a-zA-Z]+" }, std::regex::extended }; - if (std::regex_match(header_str, reg)) { - // first time this branch is reached -> create optional value - if (!idle_state_samples_.idle_states_.has_value()) { - idle_state_samples_.idle_states_ = std::make_optional(); - } + if (this->sample_category_enabled(sample_category::general)) { + if (header[i] == "Busy%") { + using vector_type = decltype(general_samples_.compute_utilization_)::value_type; + general_samples_.compute_utilization_ = vector_type{ detail::convert_to(values[i]) }; + } else if (header[i] == "IPC") { + using vector_type = decltype(general_samples_.ipc_)::value_type; + general_samples_.ipc_ = vector_type{ detail::convert_to(values[i]) }; + } else if (header[i] == "IRQ") { + using vector_type = decltype(general_samples_.irq_)::value_type; + general_samples_.irq_ = vector_type{ detail::convert_to(values[i]) }; + } else if (header[i] == "SMI") { + using vector_type = decltype(general_samples_.smi_)::value_type; + general_samples_.smi_ = vector_type{ detail::convert_to(values[i]) }; + } else if (header[i] == "POLL") { + using vector_type = decltype(general_samples_.poll_)::value_type; + general_samples_.poll_ = vector_type{ detail::convert_to(values[i]) }; + } else if (header[i] == "POLL%") { + using vector_type = decltype(general_samples_.poll_percent_)::value_type; + general_samples_.poll_percent_ = vector_type{ detail::convert_to(values[i]) }; + } + } + if (this->sample_category_enabled(sample_category::clock)) { + if (header[i] == "Avg_MHz") { + using vector_type = decltype(clock_samples_.clock_frequency_)::value_type; + clock_samples_.clock_frequency_ = vector_type{ detail::convert_to(values[i]) }; + } else if (header[i] == "Bzy_MHz") { + using vector_type = decltype(clock_samples_.average_non_idle_clock_frequency_)::value_type; + clock_samples_.average_non_idle_clock_frequency_ = vector_type{ detail::convert_to(values[i]) }; + } else if (header[i] == "TSC_MHz") { + using vector_type = decltype(clock_samples_.time_stamp_counter_)::value_type; + clock_samples_.time_stamp_counter_ = vector_type{ detail::convert_to(values[i]) }; + } + } + if (this->sample_category_enabled(sample_category::power)) { + if (header[i] == "PkgWatt") { + using vector_type = decltype(power_samples_.power_usage_)::value_type; + power_samples_.power_usage_ = vector_type{ detail::convert_to(values[i]) }; + power_samples_.power_measurement_type_ = "current/instant"; + power_samples_.power_total_energy_consumption_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ 0 }; + } else if (header[i] == "CorWatt") { + using vector_type = decltype(power_samples_.core_watt_)::value_type; + power_samples_.core_watt_ = vector_type{ detail::convert_to(values[i]) }; + } else if (header[i] == "RAMWatt") { + using vector_type = decltype(power_samples_.ram_watt_)::value_type; + power_samples_.ram_watt_ = vector_type{ detail::convert_to(values[i]) }; + } else if (header[i] == "PKG_%") { + using vector_type = decltype(power_samples_.package_rapl_throttle_percent_)::value_type; + power_samples_.package_rapl_throttle_percent_ = vector_type{ detail::convert_to(values[i]) }; + } else if (header[i] == "RAM_%") { + using vector_type = decltype(power_samples_.dram_rapl_throttle_percent_)::value_type; + power_samples_.dram_rapl_throttle_percent_ = vector_type{ detail::convert_to(values[i]) }; + } + } + if (this->sample_category_enabled(sample_category::temperature)) { + if (header[i] == "CoreTmp") { + using vector_type = decltype(temperature_samples_.core_temperature_)::value_type; + temperature_samples_.core_temperature_ = vector_type{ detail::convert_to(values[i]) }; + } else if (header[i] == "CoreThr") { + using vector_type = decltype(temperature_samples_.core_throttle_percent_)::value_type; + temperature_samples_.core_throttle_percent_ = vector_type{ detail::convert_to(values[i]) }; + } else if (header[i] == "PkgTmp") { + using vector_type = decltype(temperature_samples_.temperature_)::value_type; + temperature_samples_.temperature_ = vector_type{ detail::convert_to(values[i]) }; + } + } + if (this->sample_category_enabled(sample_category::gfx)) { + if (header[i] == "GFX%rc6") { + using vector_type = decltype(gfx_samples_.gfx_render_state_percent_)::value_type; + gfx_samples_.gfx_render_state_percent_ = vector_type{ detail::convert_to(values[i]) }; + } else if (header[i] == "GFXMHz") { + using vector_type = decltype(gfx_samples_.gfx_frequency_)::value_type; + gfx_samples_.gfx_frequency_ = vector_type{ detail::convert_to(values[i]) }; + } else if (header[i] == "GFXAMHz") { + using vector_type = decltype(gfx_samples_.average_gfx_frequency_)::value_type; + gfx_samples_.average_gfx_frequency_ = vector_type{ detail::convert_to(values[i]) }; + } else if (header[i] == "GFX%C0") { + using vector_type = decltype(gfx_samples_.gfx_state_c0_percent_)::value_type; + gfx_samples_.gfx_state_c0_percent_ = vector_type{ detail::convert_to(values[i]) }; + } else if (header[i] == "CPUGFX%") { + using vector_type = decltype(gfx_samples_.cpu_works_for_gpu_percent_)::value_type; + gfx_samples_.cpu_works_for_gpu_percent_ = vector_type{ detail::convert_to(values[i]) }; + } else if (header[i] == "GFXWatt") { + using vector_type = decltype(gfx_samples_.gfx_watt_)::value_type; + gfx_samples_.gfx_watt_ = vector_type{ detail::convert_to(values[i]) }; + } + } + if (this->sample_category_enabled(sample_category::idle_state)) { + if (header[i] == "Totl%C0") { + using vector_type = decltype(idle_state_samples_.all_cpus_state_c0_percent_)::value_type; + idle_state_samples_.all_cpus_state_c0_percent_ = vector_type{ detail::convert_to(values[i]) }; + } else if (header[i] == "Any%C0") { + using vector_type = decltype(idle_state_samples_.any_cpu_state_c0_percent_)::value_type; + idle_state_samples_.any_cpu_state_c0_percent_ = vector_type{ detail::convert_to(values[i]) }; + } else if (header[i] == "CPU%LPI") { + using vector_type = decltype(idle_state_samples_.low_power_idle_state_percent_)::value_type; + idle_state_samples_.low_power_idle_state_percent_ = vector_type{ detail::convert_to(values[i]) }; + } else if (header[i] == "SYS%LPI") { + using vector_type = decltype(idle_state_samples_.system_low_power_idle_state_percent_)::value_type; + idle_state_samples_.system_low_power_idle_state_percent_ = vector_type{ detail::convert_to(values[i]) }; + } else if (header[i] == "Pkg%LPI") { + using vector_type = decltype(idle_state_samples_.package_low_power_idle_state_percent_)::value_type; + idle_state_samples_.package_low_power_idle_state_percent_ = vector_type{ detail::convert_to(values[i]) }; + } else { + // test against regex + const std::string header_str{ header[i] }; + const std::regex reg{ std::string{ "CPU%[0-9a-zA-Z]+|Pkg%[0-9a-zA-Z]+|Pk%[0-9a-zA-Z]+|C[0-9a-zA-Z]+%|C[0-9a-zA-Z]+" }, std::regex::extended }; + if (std::regex_match(header_str, reg)) { + // first time this branch is reached -> create optional value + if (!idle_state_samples_.idle_states_.has_value()) { + idle_state_samples_.idle_states_ = std::make_optional(); + } - using vector_type = cpu_idle_states_samples::map_type::mapped_type; - idle_state_samples_.idle_states_.value()[header_str] = vector_type{ detail::convert_to(values[i]) }; + using vector_type = cpu_idle_states_samples::map_type::mapped_type; + idle_state_samples_.idle_states_.value()[header_str] = vector_type{ detail::convert_to(values[i]) }; + } } } } @@ -279,7 +307,7 @@ void cpu_hardware_sampler::sampling_loop() { this->add_time_point(std::chrono::steady_clock::now()); #if defined(HWS_VIA_FREE_ENABLED) - { + if (this->sample_category_enabled(sample_category::memory)) { // run free std::string free_output = detail::run_subprocess("free -b"); free_output = std::regex_replace(free_output, whitespace_replace_reg, " "); @@ -311,101 +339,118 @@ void cpu_hardware_sampler::sampling_loop() { // add values to the respective sample entries for (std::size_t i = 0; i < header.size(); ++i) { - if (header[i] == "Avg_MHz") { - using vector_type = decltype(clock_samples_.clock_frequency_)::value_type; - clock_samples_.clock_frequency_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "Busy%") { - using vector_type = decltype(general_samples_.compute_utilization_)::value_type; - general_samples_.compute_utilization_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "Bzy_MHz") { - using vector_type = decltype(clock_samples_.average_non_idle_clock_frequency_)::value_type; - clock_samples_.average_non_idle_clock_frequency_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "TSC_MHz") { - using vector_type = decltype(clock_samples_.time_stamp_counter_)::value_type; - clock_samples_.time_stamp_counter_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "IPC") { - using vector_type = decltype(general_samples_.ipc_)::value_type; - general_samples_.ipc_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "IRQ") { - using vector_type = decltype(general_samples_.irq_)::value_type; - general_samples_.irq_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "SMI") { - using vector_type = decltype(general_samples_.smi_)::value_type; - general_samples_.smi_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "POLL") { - using vector_type = decltype(general_samples_.poll_)::value_type; - general_samples_.poll_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "POLL%") { - using vector_type = decltype(general_samples_.poll_percent_)::value_type; - general_samples_.poll_percent_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "CoreTmp") { - using vector_type = decltype(temperature_samples_.core_temperature_)::value_type; - temperature_samples_.core_temperature_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "CoreThr") { - using vector_type = decltype(temperature_samples_.core_throttle_percent_)::value_type; - temperature_samples_.core_throttle_percent_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "PkgTmp") { - using vector_type = decltype(temperature_samples_.temperature_)::value_type; - temperature_samples_.temperature_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "GFX%rc6") { - using vector_type = decltype(gfx_samples_.gfx_render_state_percent_)::value_type; - gfx_samples_.gfx_render_state_percent_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "GFXMHz") { - using vector_type = decltype(gfx_samples_.gfx_frequency_)::value_type; - gfx_samples_.gfx_frequency_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "GFXAMHz") { - using vector_type = decltype(gfx_samples_.average_gfx_frequency_)::value_type; - gfx_samples_.average_gfx_frequency_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "Totl%C0") { - using vector_type = decltype(idle_state_samples_.all_cpus_state_c0_percent_)::value_type; - idle_state_samples_.all_cpus_state_c0_percent_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "Any%C0") { - using vector_type = decltype(idle_state_samples_.any_cpu_state_c0_percent_)::value_type; - idle_state_samples_.any_cpu_state_c0_percent_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "GFX%C0") { - using vector_type = decltype(gfx_samples_.gfx_state_c0_percent_)::value_type; - gfx_samples_.gfx_state_c0_percent_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "CPUGFX%") { - using vector_type = decltype(gfx_samples_.cpu_works_for_gpu_percent_)::value_type; - gfx_samples_.cpu_works_for_gpu_percent_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "CPU%LPI") { - using vector_type = decltype(idle_state_samples_.low_power_idle_state_percent_)::value_type; - idle_state_samples_.low_power_idle_state_percent_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "SYS%LPI") { - using vector_type = decltype(idle_state_samples_.system_low_power_idle_state_percent_)::value_type; - idle_state_samples_.system_low_power_idle_state_percent_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "Pkg%LPI") { - using vector_type = decltype(idle_state_samples_.package_low_power_idle_state_percent_)::value_type; - idle_state_samples_.package_low_power_idle_state_percent_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "PkgWatt") { - using vector_type = decltype(power_samples_.power_usage_)::value_type; - power_samples_.power_usage_->push_back(detail::convert_to(values[i])); - // calculate total energy consumption - using value_type = decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type; - const std::size_t num_time_points = this->sampling_time_points().size(); - const value_type time_difference = std::chrono::duration(this->sampling_time_points()[num_time_points - 1] - this->sampling_time_points()[num_time_points - 2]).count(); - const auto current = power_samples_.power_usage_->back() * time_difference; - power_samples_.power_total_energy_consumption_->push_back(power_samples_.power_total_energy_consumption_->back() + current); - } else if (header[i] == "CorWatt") { - using vector_type = decltype(power_samples_.core_watt_)::value_type; - power_samples_.core_watt_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "GFXWatt") { - using vector_type = decltype(gfx_samples_.gfx_watt_)::value_type; - gfx_samples_.gfx_watt_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "RAMWatt") { - using vector_type = decltype(power_samples_.ram_watt_)::value_type; - power_samples_.ram_watt_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "PKG_%") { - using vector_type = decltype(power_samples_.package_rapl_throttle_percent_)::value_type; - power_samples_.package_rapl_throttle_percent_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "RAM_%") { - using vector_type = decltype(power_samples_.dram_rapl_throttle_percent_)::value_type; - power_samples_.dram_rapl_throttle_percent_->push_back(detail::convert_to(values[i])); - } else { - const std::string header_str{ header[i] }; - if (idle_state_samples_.idle_states_.value().count(header_str) > decltype(idle_state_samples_)::map_type::size_type{ 0 }) { - using vector_type = cpu_idle_states_samples::map_type::mapped_type; - idle_state_samples_.idle_states_.value()[header_str].push_back(detail::convert_to(values[i])); + if (this->sample_category_enabled(sample_category::general)) { + if (header[i] == "Busy%") { + using vector_type = decltype(general_samples_.compute_utilization_)::value_type; + general_samples_.compute_utilization_->push_back(detail::convert_to(values[i])); + } else if (header[i] == "IPC") { + using vector_type = decltype(general_samples_.ipc_)::value_type; + general_samples_.ipc_->push_back(detail::convert_to(values[i])); + } else if (header[i] == "IRQ") { + using vector_type = decltype(general_samples_.irq_)::value_type; + general_samples_.irq_->push_back(detail::convert_to(values[i])); + } else if (header[i] == "SMI") { + using vector_type = decltype(general_samples_.smi_)::value_type; + general_samples_.smi_->push_back(detail::convert_to(values[i])); + } else if (header[i] == "POLL") { + using vector_type = decltype(general_samples_.poll_)::value_type; + general_samples_.poll_->push_back(detail::convert_to(values[i])); + } else if (header[i] == "POLL%") { + using vector_type = decltype(general_samples_.poll_percent_)::value_type; + general_samples_.poll_percent_->push_back(detail::convert_to(values[i])); + } + } + if (this->sample_category_enabled(sample_category::clock)) { + if (header[i] == "Avg_MHz") { + using vector_type = decltype(clock_samples_.clock_frequency_)::value_type; + clock_samples_.clock_frequency_->push_back(detail::convert_to(values[i])); + } else if (header[i] == "Bzy_MHz") { + using vector_type = decltype(clock_samples_.average_non_idle_clock_frequency_)::value_type; + clock_samples_.average_non_idle_clock_frequency_->push_back(detail::convert_to(values[i])); + } else if (header[i] == "TSC_MHz") { + using vector_type = decltype(clock_samples_.time_stamp_counter_)::value_type; + clock_samples_.time_stamp_counter_->push_back(detail::convert_to(values[i])); + } + } + if (this->sample_category_enabled(sample_category::power)) { + if (header[i] == "PkgWatt") { + using vector_type = decltype(power_samples_.power_usage_)::value_type; + power_samples_.power_usage_->push_back(detail::convert_to(values[i])); + // calculate total energy consumption + using value_type = decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type; + const std::size_t num_time_points = this->sampling_time_points().size(); + const value_type time_difference = std::chrono::duration(this->sampling_time_points()[num_time_points - 1] - this->sampling_time_points()[num_time_points - 2]).count(); + const auto current = power_samples_.power_usage_->back() * time_difference; + power_samples_.power_total_energy_consumption_->push_back(power_samples_.power_total_energy_consumption_->back() + current); + } else if (header[i] == "CorWatt") { + using vector_type = decltype(power_samples_.core_watt_)::value_type; + power_samples_.core_watt_->push_back(detail::convert_to(values[i])); + } else if (header[i] == "RAMWatt") { + using vector_type = decltype(power_samples_.ram_watt_)::value_type; + power_samples_.ram_watt_->push_back(detail::convert_to(values[i])); + } else if (header[i] == "PKG_%") { + using vector_type = decltype(power_samples_.package_rapl_throttle_percent_)::value_type; + power_samples_.package_rapl_throttle_percent_->push_back(detail::convert_to(values[i])); + } else if (header[i] == "RAM_%") { + using vector_type = decltype(power_samples_.dram_rapl_throttle_percent_)::value_type; + power_samples_.dram_rapl_throttle_percent_->push_back(detail::convert_to(values[i])); + } + } + if (this->sample_category_enabled(sample_category::temperature)) { + if (header[i] == "CoreTmp") { + using vector_type = decltype(temperature_samples_.core_temperature_)::value_type; + temperature_samples_.core_temperature_->push_back(detail::convert_to(values[i])); + } else if (header[i] == "CoreThr") { + using vector_type = decltype(temperature_samples_.core_throttle_percent_)::value_type; + temperature_samples_.core_throttle_percent_->push_back(detail::convert_to(values[i])); + } else if (header[i] == "PkgTmp") { + using vector_type = decltype(temperature_samples_.temperature_)::value_type; + temperature_samples_.temperature_->push_back(detail::convert_to(values[i])); + } + } + if (this->sample_category_enabled(sample_category::gfx)) { + if (header[i] == "GFX%rc6") { + using vector_type = decltype(gfx_samples_.gfx_render_state_percent_)::value_type; + gfx_samples_.gfx_render_state_percent_->push_back(detail::convert_to(values[i])); + } else if (header[i] == "GFXMHz") { + using vector_type = decltype(gfx_samples_.gfx_frequency_)::value_type; + gfx_samples_.gfx_frequency_->push_back(detail::convert_to(values[i])); + } else if (header[i] == "GFXAMHz") { + using vector_type = decltype(gfx_samples_.average_gfx_frequency_)::value_type; + gfx_samples_.average_gfx_frequency_->push_back(detail::convert_to(values[i])); + } else if (header[i] == "GFX%C0") { + using vector_type = decltype(gfx_samples_.gfx_state_c0_percent_)::value_type; + gfx_samples_.gfx_state_c0_percent_->push_back(detail::convert_to(values[i])); + } else if (header[i] == "CPUGFX%") { + using vector_type = decltype(gfx_samples_.cpu_works_for_gpu_percent_)::value_type; + gfx_samples_.cpu_works_for_gpu_percent_->push_back(detail::convert_to(values[i])); + } else if (header[i] == "GFXWatt") { + using vector_type = decltype(gfx_samples_.gfx_watt_)::value_type; + gfx_samples_.gfx_watt_->push_back(detail::convert_to(values[i])); + } + } + if (this->sample_category_enabled(sample_category::idle_state)) { + if (header[i] == "Totl%C0") { + using vector_type = decltype(idle_state_samples_.all_cpus_state_c0_percent_)::value_type; + idle_state_samples_.all_cpus_state_c0_percent_->push_back(detail::convert_to(values[i])); + } else if (header[i] == "Any%C0") { + using vector_type = decltype(idle_state_samples_.any_cpu_state_c0_percent_)::value_type; + idle_state_samples_.any_cpu_state_c0_percent_->push_back(detail::convert_to(values[i])); + } else if (header[i] == "CPU%LPI") { + using vector_type = decltype(idle_state_samples_.low_power_idle_state_percent_)::value_type; + idle_state_samples_.low_power_idle_state_percent_->push_back(detail::convert_to(values[i])); + } else if (header[i] == "SYS%LPI") { + using vector_type = decltype(idle_state_samples_.system_low_power_idle_state_percent_)::value_type; + idle_state_samples_.system_low_power_idle_state_percent_->push_back(detail::convert_to(values[i])); + } else if (header[i] == "Pkg%LPI") { + using vector_type = decltype(idle_state_samples_.package_low_power_idle_state_percent_)::value_type; + idle_state_samples_.package_low_power_idle_state_percent_->push_back(detail::convert_to(values[i])); + } else { + const std::string header_str{ header[i] }; + if (idle_state_samples_.idle_states_.value().count(header_str) > decltype(idle_state_samples_)::map_type::size_type{ 0 }) { + using vector_type = cpu_idle_states_samples::map_type::mapped_type; + idle_state_samples_.idle_states_.value()[header_str].push_back(detail::convert_to(values[i])); + } } } } diff --git a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp index d92e594..7369fa3 100644 --- a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp +++ b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp @@ -10,6 +10,7 @@ #include "hardware_sampling/gpu_amd/rocm_smi_samples.hpp" // hws::{rocm_smi_general_samples, rocm_smi_clock_samples, rocm_smi_power_samples, rocm_smi_memory_samples, rocm_smi_temperature_samples} #include "hardware_sampling/gpu_amd/utility.hpp" // hws::detail::performance_level_to_string, HWS_ROCM_SMI_ERROR_CHECK #include "hardware_sampling/hardware_sampler.hpp" // hws::hardware_sampler +#include "hardware_sampling/sample_category.hpp" // hws::sample_category #include "hardware_sampling/utility.hpp" // hws::detail::time_points_to_epoch #include "fmt/format.h" // fmt::format @@ -33,17 +34,17 @@ namespace hws { -gpu_amd_hardware_sampler::gpu_amd_hardware_sampler() : - gpu_amd_hardware_sampler{ 0, HWS_SAMPLING_INTERVAL } { } +gpu_amd_hardware_sampler::gpu_amd_hardware_sampler(const sample_category category) : + gpu_amd_hardware_sampler{ 0, HWS_SAMPLING_INTERVAL, category } { } -gpu_amd_hardware_sampler::gpu_amd_hardware_sampler(const std::size_t device_id) : - gpu_amd_hardware_sampler{ device_id, HWS_SAMPLING_INTERVAL } { } +gpu_amd_hardware_sampler::gpu_amd_hardware_sampler(const std::size_t device_id, const sample_category category) : + gpu_amd_hardware_sampler{ device_id, HWS_SAMPLING_INTERVAL, category } { } -gpu_amd_hardware_sampler::gpu_amd_hardware_sampler(const std::chrono::milliseconds sampling_interval) : - gpu_amd_hardware_sampler{ 0, sampling_interval } { } +gpu_amd_hardware_sampler::gpu_amd_hardware_sampler(const std::chrono::milliseconds sampling_interval, const sample_category category) : + gpu_amd_hardware_sampler{ 0, sampling_interval, category } { } -gpu_amd_hardware_sampler::gpu_amd_hardware_sampler(const std::size_t device_id, const std::chrono::milliseconds sampling_interval) : - hardware_sampler{ sampling_interval }, +gpu_amd_hardware_sampler::gpu_amd_hardware_sampler(const std::size_t device_id, const std::chrono::milliseconds sampling_interval, const sample_category category) : + hardware_sampler{ sampling_interval, category }, device_id_{ static_cast(device_id) } { // make sure that rsmi_init is only called once for all instances if (instances_++ == 0) { @@ -86,7 +87,7 @@ void gpu_amd_hardware_sampler::sampling_loop() { double initial_total_power_consumption{}; // initial total power consumption in J // retrieve initial general information - { + if (this->sample_category_enabled(sample_category::general)) { // fixed information -> only retrieved once // the byte order is given by AMD directly general_samples_.byte_order_ = "Little Endian"; @@ -125,51 +126,51 @@ void gpu_amd_hardware_sampler::sampling_loop() { } // retrieve initial clock related information - { + if (this->sample_category_enabled(sample_category::clock)) { rsmi_frequencies_t frequency_info{}; if (rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_SYS, &frequency_info) == RSMI_STATUS_SUCCESS) { - clock_samples_.clock_frequency_min_ = static_cast(frequency_info.frequency[0]) / 1000.0 / 1000.0; - clock_samples_.clock_frequency_max_ = static_cast(frequency_info.frequency[frequency_info.num_supported - 1]) / 1000.0 / 1000.0; + clock_samples_.clock_frequency_min_ = static_cast(frequency_info.frequency[0]) / 1000'000.0; + clock_samples_.clock_frequency_max_ = static_cast(frequency_info.frequency[frequency_info.num_supported - 1]) / 1000'000.0; decltype(clock_samples_.available_clock_frequencies_)::value_type frequencies{}; for (std::size_t i = 0; i < frequency_info.num_supported; ++i) { - frequencies.push_back(static_cast(frequency_info.frequency[i]) / 1000.0 / 1000.0); + frequencies.push_back(static_cast(frequency_info.frequency[i]) / 1000'000.0); } clock_samples_.available_clock_frequencies_ = frequencies; // queried samples -> retrieved every iteration if available clock_samples_.clock_frequency_ = decltype(clock_samples_.clock_frequency_)::value_type{}; if (frequency_info.current < RSMI_MAX_NUM_FREQUENCIES) { - clock_samples_.clock_frequency_->push_back(static_cast(frequency_info.frequency[frequency_info.current]) / 1000.0 / 1000.0); + clock_samples_.clock_frequency_->push_back(static_cast(frequency_info.frequency[frequency_info.current]) / 1000'000.0); } else { clock_samples_.clock_frequency_->push_back(0); } } if (rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_SOC, &frequency_info) == RSMI_STATUS_SUCCESS) { - clock_samples_.socket_clock_frequency_min_ = static_cast(frequency_info.frequency[0]) / 1000.0 / 1000.0; - clock_samples_.socket_clock_frequency_max_ = static_cast(frequency_info.frequency[frequency_info.num_supported - 1]) / 1000.0 / 1000.0; + clock_samples_.socket_clock_frequency_min_ = static_cast(frequency_info.frequency[0]) / 1000'000.0; + clock_samples_.socket_clock_frequency_max_ = static_cast(frequency_info.frequency[frequency_info.num_supported - 1]) / 1000'000.0; // queried samples -> retrieved every iteration if available clock_samples_.socket_clock_frequency_ = decltype(clock_samples_.socket_clock_frequency_)::value_type{}; if (frequency_info.current < RSMI_MAX_NUM_FREQUENCIES) { - clock_samples_.socket_clock_frequency_->push_back(static_cast(frequency_info.frequency[frequency_info.current]) / 1000.0 / 1000.0); + clock_samples_.socket_clock_frequency_->push_back(static_cast(frequency_info.frequency[frequency_info.current]) / 1000'000.0); } else { clock_samples_.socket_clock_frequency_->push_back(0); } } if (rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_MEM, &frequency_info) == RSMI_STATUS_SUCCESS) { - clock_samples_.memory_clock_frequency_min_ = static_cast(frequency_info.frequency[0]) / 1000.0 / 1000.0; - clock_samples_.memory_clock_frequency_max_ = static_cast(frequency_info.frequency[frequency_info.num_supported - 1]) / 1000.0 / 1000.0; + clock_samples_.memory_clock_frequency_min_ = static_cast(frequency_info.frequency[0]) / 1000'000.0; + clock_samples_.memory_clock_frequency_max_ = static_cast(frequency_info.frequency[frequency_info.num_supported - 1]) / 1000'000.0; decltype(clock_samples_.available_memory_clock_frequencies_)::value_type frequencies{}; for (std::size_t i = 0; i < frequency_info.num_supported; ++i) { - frequencies.push_back(static_cast(frequency_info.frequency[i]) / 1000.0 / 1000.0); + frequencies.push_back(static_cast(frequency_info.frequency[i]) / 1000'000.0); } clock_samples_.available_memory_clock_frequencies_ = frequencies; // queried samples -> retrieved every iteration if available clock_samples_.memory_clock_frequency_ = decltype(clock_samples_.memory_clock_frequency_)::value_type{}; if (frequency_info.current < RSMI_MAX_NUM_FREQUENCIES) { - clock_samples_.memory_clock_frequency_->push_back(static_cast(frequency_info.frequency[frequency_info.current]) / 1000.0 / 1000.0); + clock_samples_.memory_clock_frequency_->push_back(static_cast(frequency_info.frequency[frequency_info.current]) / 1000'000.0); } else { clock_samples_.memory_clock_frequency_->push_back(0); } @@ -188,15 +189,15 @@ void gpu_amd_hardware_sampler::sampling_loop() { } // retrieve initial power related information - { + if (this->sample_category_enabled(sample_category::power)) { std::uint64_t power_default_cap{}; if (rsmi_dev_power_cap_default_get(device_id_, &power_default_cap) == RSMI_STATUS_SUCCESS) { - power_samples_.power_management_limit_ = static_cast(power_default_cap) / 1000.0 / 1000.0; + power_samples_.power_management_limit_ = static_cast(power_default_cap) / 1000'000.0; } std::uint64_t power_cap{}; if (rsmi_dev_power_cap_get(device_id_, std::uint32_t{ 0 }, &power_cap) == RSMI_STATUS_SUCCESS) { - power_samples_.power_enforced_limit_ = static_cast(power_cap) / 1000.0 / 1000.0; + power_samples_.power_enforced_limit_ = static_cast(power_cap) / 1000'000.0; } { @@ -215,7 +216,7 @@ void gpu_amd_hardware_sampler::sampling_loop() { break; } // report power usage since the first sample - power_samples_.power_usage_ = decltype(power_samples_.power_usage_)::value_type{ static_cast(power_usage) / 1000.0 / 1000.0 }; + power_samples_.power_usage_ = decltype(power_samples_.power_usage_)::value_type{ static_cast(power_usage) / 1000'000.0 }; } } @@ -281,7 +282,7 @@ void gpu_amd_hardware_sampler::sampling_loop() { std::uint64_t power_total_energy_consumption{}; if (rsmi_dev_energy_count_get(device_id_, &power_total_energy_consumption, &resolution, ×tamp) == RSMI_STATUS_SUCCESS) { const auto scaled_value = static_cast(power_total_energy_consumption) * static_cast(resolution); - initial_total_power_consumption = scaled_value / 1000.0 / 1000.0; + initial_total_power_consumption = scaled_value / 1000'000.0; power_samples_.power_total_energy_consumption_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ 0.0 }; } else if (power_samples_.power_usage_.has_value()) { // if the total energy consumption cannot be retrieved, but the current power draw, approximate it @@ -290,7 +291,7 @@ void gpu_amd_hardware_sampler::sampling_loop() { } // retrieve initial memory related information - { + if (this->sample_category_enabled(sample_category::memory)) { decltype(memory_samples_.memory_total_)::value_type memory_total{}; if (rsmi_dev_memory_total_get(device_id_, RSMI_MEM_TYPE_VRAM, &memory_total) == RSMI_STATUS_SUCCESS) { memory_samples_.memory_total_ = memory_total; @@ -305,14 +306,14 @@ void gpu_amd_hardware_sampler::sampling_loop() { if (rsmi_dev_pci_bandwidth_get(device_id_, &bandwidth_info) == RSMI_STATUS_SUCCESS) { memory_samples_.num_pcie_lanes_min_ = bandwidth_info.lanes[0]; memory_samples_.num_pcie_lanes_max_ = bandwidth_info.lanes[bandwidth_info.transfer_rate.num_supported - 1]; - memory_samples_.pcie_link_transfer_rate_min_ = bandwidth_info.transfer_rate.frequency[0] / 1000000; - memory_samples_.pcie_link_transfer_rate_max_ = bandwidth_info.transfer_rate.frequency[bandwidth_info.transfer_rate.num_supported - 1] / 1000000; + memory_samples_.pcie_link_transfer_rate_min_ = bandwidth_info.transfer_rate.frequency[0] / 1'000'000; + memory_samples_.pcie_link_transfer_rate_max_ = bandwidth_info.transfer_rate.frequency[bandwidth_info.transfer_rate.num_supported - 1] / 1'000'000; // queried samples -> retrieved every iteration if available memory_samples_.pcie_link_transfer_rate_ = decltype(memory_samples_.pcie_link_transfer_rate_)::value_type{}; memory_samples_.num_pcie_lanes_ = decltype(memory_samples_.num_pcie_lanes_)::value_type{}; if (bandwidth_info.transfer_rate.current < RSMI_MAX_NUM_FREQUENCIES) { - memory_samples_.pcie_link_transfer_rate_->push_back(bandwidth_info.transfer_rate.frequency[bandwidth_info.transfer_rate.current] / 1000000); + memory_samples_.pcie_link_transfer_rate_->push_back(bandwidth_info.transfer_rate.frequency[bandwidth_info.transfer_rate.current] / 1'000'000); memory_samples_.num_pcie_lanes_->push_back(bandwidth_info.lanes[bandwidth_info.transfer_rate.current]); } else { // the current index is (somehow) wrong @@ -332,14 +333,13 @@ void gpu_amd_hardware_sampler::sampling_loop() { } // retrieve fixed temperature related information - { + if (this->sample_category_enabled(sample_category::temperature)) { std::uint32_t fan_id{ 0 }; std::int64_t fan_speed{}; while (rsmi_dev_fan_speed_get(device_id_, fan_id, &fan_speed) == RSMI_STATUS_SUCCESS) { if (fan_id == 0) { // queried samples -> retrieved every iteration if available - const auto percentage = static_cast(fan_speed) / - static_cast(RSMI_MAX_FAN_SPEED); + const auto percentage = static_cast(fan_speed) / static_cast(RSMI_MAX_FAN_SPEED); temperature_samples_.fan_speed_percentage_ = decltype(temperature_samples_.fan_speed_percentage_)::value_type{ percentage }; } ++fan_id; @@ -469,7 +469,7 @@ void gpu_amd_hardware_sampler::sampling_loop() { this->add_time_point(std::chrono::steady_clock::now()); // retrieve general samples - { + if (this->sample_category_enabled(sample_category::general)) { if (general_samples_.performance_level_.has_value()) { rsmi_dev_perf_level_t pstate{}; HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_perf_level_get(device_id_, &pstate)) @@ -490,12 +490,12 @@ void gpu_amd_hardware_sampler::sampling_loop() { } // retrieve clock related samples - { + if (this->sample_category_enabled(sample_category::clock)) { if (clock_samples_.clock_frequency_.has_value()) { rsmi_frequencies_t frequency_info{}; HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_SYS, &frequency_info)) if (frequency_info.current < RSMI_MAX_NUM_FREQUENCIES) { - clock_samples_.clock_frequency_->push_back(static_cast(frequency_info.frequency[frequency_info.current]) / 1000.0 / 1000.0); + clock_samples_.clock_frequency_->push_back(static_cast(frequency_info.frequency[frequency_info.current]) / 1000'000.0); } else { // the current index is (somehow) wrong clock_samples_.clock_frequency_->push_back(0); @@ -506,7 +506,7 @@ void gpu_amd_hardware_sampler::sampling_loop() { rsmi_frequencies_t frequency_info{}; HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_SOC, &frequency_info)) if (frequency_info.current < RSMI_MAX_NUM_FREQUENCIES) { - clock_samples_.socket_clock_frequency_->push_back(static_cast(frequency_info.frequency[frequency_info.current]) / 1000.0 / 1000.0); + clock_samples_.socket_clock_frequency_->push_back(static_cast(frequency_info.frequency[frequency_info.current]) / 1000'000.0); } else { // the current index is (somehow) wrong clock_samples_.socket_clock_frequency_->push_back(0); @@ -517,7 +517,7 @@ void gpu_amd_hardware_sampler::sampling_loop() { rsmi_frequencies_t frequency_info{}; HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_MEM, &frequency_info)) if (frequency_info.current < RSMI_MAX_NUM_FREQUENCIES) { - clock_samples_.memory_clock_frequency_->push_back(static_cast(frequency_info.frequency[frequency_info.current]) / 1000.0 / 1000.0); + clock_samples_.memory_clock_frequency_->push_back(static_cast(frequency_info.frequency[frequency_info.current]) / 1000'000.0); } else { // the current index is (somehow) wrong clock_samples_.memory_clock_frequency_->push_back(0); @@ -538,12 +538,12 @@ void gpu_amd_hardware_sampler::sampling_loop() { } // retrieve power related samples - { + if (this->sample_category_enabled(sample_category::power)) { if (power_samples_.power_usage_.has_value()) { [[maybe_unused]] RSMI_POWER_TYPE power_type{}; std::uint64_t value{}; HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_power_get(device_id_, &value, &power_type)) - power_samples_.power_usage_->push_back(static_cast(value) / 1000.0 / 1000.0); + power_samples_.power_usage_->push_back(static_cast(value) / 1000'000.0); } if (power_samples_.power_total_energy_consumption_.has_value()) { @@ -552,7 +552,7 @@ void gpu_amd_hardware_sampler::sampling_loop() { std::uint64_t value{}; if (rsmi_dev_energy_count_get(device_id_, &value, &resolution, ×tamp) == RSMI_STATUS_SUCCESS) { const auto scaled_value = static_cast(value) * static_cast(resolution); - power_samples_.power_total_energy_consumption_->push_back((scaled_value / 1000.0 / 1000.0) - initial_total_power_consumption); + power_samples_.power_total_energy_consumption_->push_back((scaled_value / 1000'000.0) - initial_total_power_consumption); } else if (power_samples_.power_usage_.has_value()) { // if the total energy consumption cannot be retrieved, but the current power draw, approximate it const std::size_t num_time_points = this->sampling_time_points().size(); @@ -595,7 +595,7 @@ void gpu_amd_hardware_sampler::sampling_loop() { } // retrieve memory related samples - { + if (this->sample_category_enabled(sample_category::memory)) { if (memory_samples_.memory_used_.has_value()) { decltype(memory_samples_.memory_used_)::value_type::value_type value{}; HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_memory_usage_get(device_id_, RSMI_MEM_TYPE_VRAM, &value)) @@ -609,7 +609,7 @@ void gpu_amd_hardware_sampler::sampling_loop() { rsmi_pcie_bandwidth_t bandwidth_info{}; HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_pci_bandwidth_get(device_id_, &bandwidth_info)) if (bandwidth_info.transfer_rate.current < RSMI_MAX_NUM_FREQUENCIES) { - memory_samples_.pcie_link_transfer_rate_->push_back(bandwidth_info.transfer_rate.frequency[bandwidth_info.transfer_rate.current] / 1000000); + memory_samples_.pcie_link_transfer_rate_->push_back(bandwidth_info.transfer_rate.frequency[bandwidth_info.transfer_rate.current] / 1'000'000); memory_samples_.num_pcie_lanes_->push_back(bandwidth_info.lanes[bandwidth_info.transfer_rate.current]); } else { // the current index is (somehow) wrong @@ -620,12 +620,11 @@ void gpu_amd_hardware_sampler::sampling_loop() { } // retrieve temperature related samples - { + if (this->sample_category_enabled(sample_category::temperature)) { if (temperature_samples_.fan_speed_percentage_.has_value()) { std::int64_t value{}; HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_fan_speed_get(device_id_, std::uint32_t{ 0 }, &value)) - temperature_samples_.fan_speed_percentage_->push_back(static_cast(value) / - static_cast(RSMI_MAX_FAN_SPEED)); + temperature_samples_.fan_speed_percentage_->push_back(static_cast(value) / static_cast(RSMI_MAX_FAN_SPEED)); } if (temperature_samples_.temperature_.has_value()) { diff --git a/src/hardware_sampling/gpu_intel/hardware_sampler.cpp b/src/hardware_sampling/gpu_intel/hardware_sampler.cpp index 48e459b..f96a695 100644 --- a/src/hardware_sampling/gpu_intel/hardware_sampler.cpp +++ b/src/hardware_sampling/gpu_intel/hardware_sampler.cpp @@ -11,6 +11,7 @@ #include "hardware_sampling/gpu_intel/level_zero_samples.hpp" // hws::{level_zero_general_samples, level_zero_clock_samples, level_zero_power_samples, level_zero_memory_samples, level_zero_temperature_samples} #include "hardware_sampling/gpu_intel/utility.hpp" // HWS_LEVEL_ZERO_ERROR_CHECK #include "hardware_sampling/hardware_sampler.hpp" // hws::hardware_sampler +#include "hardware_sampling/sample_category.hpp" // hws::sample_category #include "hardware_sampling/utility.hpp" // hws::{durations_from_reference_time, join} #include "fmt/format.h" // fmt::format @@ -31,17 +32,17 @@ namespace hws { -gpu_intel_hardware_sampler::gpu_intel_hardware_sampler() : - gpu_intel_hardware_sampler{ 0, HWS_SAMPLING_INTERVAL } { } +gpu_intel_hardware_sampler::gpu_intel_hardware_sampler(const sample_category category) : + gpu_intel_hardware_sampler{ 0, HWS_SAMPLING_INTERVAL, category } { } -gpu_intel_hardware_sampler::gpu_intel_hardware_sampler(const std::size_t device_id) : - gpu_intel_hardware_sampler{ device_id, HWS_SAMPLING_INTERVAL } { } +gpu_intel_hardware_sampler::gpu_intel_hardware_sampler(const std::size_t device_id, const sample_category category) : + gpu_intel_hardware_sampler{ device_id, HWS_SAMPLING_INTERVAL, category } { } -gpu_intel_hardware_sampler::gpu_intel_hardware_sampler(const std::chrono::milliseconds sampling_interval) : - gpu_intel_hardware_sampler{ 0, sampling_interval } { } +gpu_intel_hardware_sampler::gpu_intel_hardware_sampler(const std::chrono::milliseconds sampling_interval, const sample_category category) : + gpu_intel_hardware_sampler{ 0, sampling_interval, category } { } -gpu_intel_hardware_sampler::gpu_intel_hardware_sampler(const std::size_t device_id, const std::chrono::milliseconds sampling_interval) : - hardware_sampler{ sampling_interval } { +gpu_intel_hardware_sampler::gpu_intel_hardware_sampler(const std::size_t device_id, const std::chrono::milliseconds sampling_interval, const sample_category category) : + hardware_sampler{ sampling_interval, category } { // make sure that zeInit is only called once for all instances if (instances_++ == 0) { HWS_LEVEL_ZERO_ERROR_CHECK(zeInit(ZE_INIT_FLAG_GPU_ONLY)) @@ -90,7 +91,7 @@ void gpu_intel_hardware_sampler::sampling_loop() { double initial_total_power_consumption{}; // initial total power consumption in J // retrieve initial general information - { + if (this->sample_category_enabled(sample_category::general)) { // the byte order is given by Intel directly general_samples_.byte_order_ = "Little Endian"; @@ -137,7 +138,7 @@ void gpu_intel_hardware_sampler::sampling_loop() { } // retrieve initial clock related information - { + if (this->sample_category_enabled(sample_category::clock)) { std::uint32_t num_frequency_domains{ 0 }; if (zesDeviceEnumFrequencyDomains(device, &num_frequency_domains, nullptr) == ZE_RESULT_SUCCESS) { frequency_handles.resize(num_frequency_domains); @@ -226,7 +227,7 @@ void gpu_intel_hardware_sampler::sampling_loop() { } // retrieve initial power related information - { + if (this->sample_category_enabled(sample_category::power)) { std::uint32_t num_power_domains{ 0 }; if (zesDeviceEnumPowerDomains(device, &num_power_domains, nullptr) == ZE_RESULT_SUCCESS) { power_handles.resize(num_power_domains); @@ -281,7 +282,7 @@ void gpu_intel_hardware_sampler::sampling_loop() { } // retrieve initial memory related information - { + if (this->sample_category_enabled(sample_category::memory)) { std::uint32_t num_memory_modules{ 0 }; if (zesDeviceEnumMemoryModules(device, &num_memory_modules, nullptr) == ZE_RESULT_SUCCESS) { memory_handles.resize(num_memory_modules); @@ -378,7 +379,7 @@ void gpu_intel_hardware_sampler::sampling_loop() { } // retrieve initial temperature related information - { + if (this->sample_category_enabled(sample_category::temperature)) { std::uint32_t num_fans{ 0 }; if (zesDeviceEnumFans(device, &num_fans, nullptr) == ZE_RESULT_SUCCESS) { temperature_samples_.num_fans_ = num_fans; @@ -503,7 +504,7 @@ void gpu_intel_hardware_sampler::sampling_loop() { this->add_time_point(std::chrono::steady_clock::now()); // retrieve clock related samples - { + if (this->sample_category_enabled(sample_category::clock)) { for (zes_freq_handle_t handle : frequency_handles) { // get frequency properties zes_freq_properties_t prop{}; @@ -550,7 +551,7 @@ void gpu_intel_hardware_sampler::sampling_loop() { } // retrieve power related samples - { + if (this->sample_category_enabled(sample_category::power)) { if (!power_handles.empty()) { // NOTE: only the first power domain is used here if (power_samples_.power_total_energy_consumption_.has_value()) { @@ -572,7 +573,7 @@ void gpu_intel_hardware_sampler::sampling_loop() { } // retrieve memory related samples - { + if (this->sample_category_enabled(sample_category::memory)) { for (zes_mem_handle_t handle : memory_handles) { zes_mem_properties_t prop{}; HWS_LEVEL_ZERO_ERROR_CHECK(zesMemoryGetProperties(handle, &prop)) @@ -610,7 +611,7 @@ void gpu_intel_hardware_sampler::sampling_loop() { } // retrieve temperature related samples - { + if (this->sample_category_enabled(sample_category::temperature)) { if (!psu_handles.empty()) { if (temperature_samples_.psu_temperature_.has_value()) { // NOTE: only the first PSU is used here diff --git a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp index f9a064b..7d16a0a 100644 --- a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp +++ b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp @@ -11,6 +11,7 @@ #include "hardware_sampling/gpu_nvidia/nvml_samples.hpp" // hws::{nvml_general_samples, nvml_clock_samples, nvml_power_samples, nvml_memory_samples, nvml_temperature_samples} #include "hardware_sampling/gpu_nvidia/utility.hpp" // HWS_NVML_ERROR_CHECK #include "hardware_sampling/hardware_sampler.hpp" // hws::hardware_sampler +#include "hardware_sampling/sample_category.hpp" // hws::sample_category #include "hardware_sampling/utility.hpp" // hws::detail::time_points_to_epoch #include "fmt/format.h" // fmt::format @@ -33,17 +34,17 @@ namespace hws { -gpu_nvidia_hardware_sampler::gpu_nvidia_hardware_sampler() : - gpu_nvidia_hardware_sampler{ 0, HWS_SAMPLING_INTERVAL } { } +gpu_nvidia_hardware_sampler::gpu_nvidia_hardware_sampler(const sample_category category) : + gpu_nvidia_hardware_sampler{ 0, HWS_SAMPLING_INTERVAL, category } { } -gpu_nvidia_hardware_sampler::gpu_nvidia_hardware_sampler(const std::size_t device_id) : - gpu_nvidia_hardware_sampler{ device_id, HWS_SAMPLING_INTERVAL } { } +gpu_nvidia_hardware_sampler::gpu_nvidia_hardware_sampler(const std::size_t device_id, const sample_category category) : + gpu_nvidia_hardware_sampler{ device_id, HWS_SAMPLING_INTERVAL, category } { } -gpu_nvidia_hardware_sampler::gpu_nvidia_hardware_sampler(const std::chrono::milliseconds sampling_interval) : - gpu_nvidia_hardware_sampler{ 0, sampling_interval } { } +gpu_nvidia_hardware_sampler::gpu_nvidia_hardware_sampler(const std::chrono::milliseconds sampling_interval, const sample_category category) : + gpu_nvidia_hardware_sampler{ 0, sampling_interval, category } { } -gpu_nvidia_hardware_sampler::gpu_nvidia_hardware_sampler(const std::size_t device_id, const std::chrono::milliseconds sampling_interval) : - hardware_sampler{ sampling_interval } { +gpu_nvidia_hardware_sampler::gpu_nvidia_hardware_sampler(const std::size_t device_id, const std::chrono::milliseconds sampling_interval, const sample_category category) : + hardware_sampler{ sampling_interval, category } { // make sure that nvmlInit is only called once for all instances if (instances_++ == 0) { HWS_NVML_ERROR_CHECK(nvmlInit()) @@ -91,7 +92,7 @@ void gpu_nvidia_hardware_sampler::sampling_loop() { double initial_total_power_consumption{}; // initial total power consumption in J // retrieve initial general information - { + if (this->sample_category_enabled(sample_category::general)) { // fixed information -> only retrieved once nvmlDeviceArchitecture_t device_arch{}; if (nvmlDeviceGetArchitecture(device, &device_arch) == NVML_SUCCESS) { @@ -186,7 +187,7 @@ void gpu_nvidia_hardware_sampler::sampling_loop() { } // retrieve initial clock related information - { + if (this->sample_category_enabled(sample_category::clock)) { // fixed information -> only retrieved once unsigned int adaptive_clock_status{}; if (nvmlDeviceGetAdaptiveClockInfoStatus(device, &adaptive_clock_status) == NVML_SUCCESS) { @@ -278,7 +279,7 @@ void gpu_nvidia_hardware_sampler::sampling_loop() { } // retrieve initial power related information - { + if (this->sample_category_enabled(sample_category::power)) { // fixed information -> only retrieved once nvmlEnableState_t mode{}; if (nvmlDeviceGetPowerManagementMode(device, &mode) == NVML_SUCCESS) { @@ -334,7 +335,7 @@ void gpu_nvidia_hardware_sampler::sampling_loop() { } // retrieve initial memory related information - { + if (this->sample_category_enabled(sample_category::memory)) { // fixed information -> only retrieved once nvmlMemory_t memory_info{}; if (nvmlDeviceGetMemoryInfo(device, &memory_info) == NVML_SUCCESS) { @@ -377,7 +378,7 @@ void gpu_nvidia_hardware_sampler::sampling_loop() { } // retrieve initial temperature related information - { + if (this->sample_category_enabled(sample_category::temperature)) { // fixed information -> only retrieved once decltype(temperature_samples_.num_fans_)::value_type num_fans{}; if (nvmlDeviceGetNumFans(device, &num_fans) == NVML_SUCCESS) { @@ -426,7 +427,7 @@ void gpu_nvidia_hardware_sampler::sampling_loop() { this->add_time_point(std::chrono::steady_clock::now()); // retrieve general samples - { + if (this->sample_category_enabled(sample_category::general)) { if (general_samples_.performance_level_.has_value()) { nvmlPstates_t pstate{}; HWS_NVML_ERROR_CHECK(nvmlDeviceGetPerformanceState(device, &pstate)) @@ -442,7 +443,7 @@ void gpu_nvidia_hardware_sampler::sampling_loop() { } // retrieve clock related samples - { + if (this->sample_category_enabled(sample_category::clock)) { if (clock_samples_.clock_frequency_.has_value()) { unsigned int value{}; HWS_NVML_ERROR_CHECK(nvmlDeviceGetClockInfo(device, NVML_CLOCK_GRAPHICS, &value)) @@ -476,7 +477,7 @@ void gpu_nvidia_hardware_sampler::sampling_loop() { } // retrieve power related information - { + if (this->sample_category_enabled(sample_category::power)) { if (power_samples_.power_profile_.has_value()) { nvmlPstates_t pstate{}; HWS_NVML_ERROR_CHECK(nvmlDeviceGetPowerState(device, &pstate)) @@ -497,7 +498,7 @@ void gpu_nvidia_hardware_sampler::sampling_loop() { } // retrieve memory related information - { + if (this->sample_category_enabled(sample_category::memory)) { if (memory_samples_.memory_free_.has_value() && memory_samples_.memory_used_.has_value()) { nvmlMemory_t memory_info{}; HWS_NVML_ERROR_CHECK(nvmlDeviceGetMemoryInfo(device, &memory_info)) @@ -519,7 +520,7 @@ void gpu_nvidia_hardware_sampler::sampling_loop() { } // retrieve temperature related information - { + if (this->sample_category_enabled(sample_category::temperature)) { if (temperature_samples_.fan_speed_percentage_.has_value()) { unsigned int value{}; HWS_NVML_ERROR_CHECK(nvmlDeviceGetFanSpeed(device, &value)) diff --git a/src/hardware_sampling/hardware_sampler.cpp b/src/hardware_sampling/hardware_sampler.cpp index 903db8a..db0a4f5 100644 --- a/src/hardware_sampling/hardware_sampler.cpp +++ b/src/hardware_sampling/hardware_sampler.cpp @@ -10,9 +10,9 @@ #include "hardware_sampling/event.hpp" // hws::event #include "hardware_sampling/utility.hpp" // hws::detail::durations_from_reference_time +#include "fmt/chrono.h" // direct formatting of std::chrono types #include "fmt/format.h" // fmt::format #include "fmt/ranges.h" // fmt::join -#include "fmt/chrono.h" // direct formatting of std::chrono types #include // std::chrono::{system_clock, steady_clock, duration_cast, milliseconds} #include // std::size_t @@ -25,8 +25,9 @@ namespace hws { -hardware_sampler::hardware_sampler(const std::chrono::milliseconds sampling_interval) : - sampling_interval_{ sampling_interval } { } +hardware_sampler::hardware_sampler(const std::chrono::milliseconds sampling_interval, const sample_category category) : + sampling_interval_{ sampling_interval }, + sample_category_{ category } { } hardware_sampler::~hardware_sampler() = default; @@ -170,4 +171,12 @@ void hardware_sampler::dump_yaml(const std::filesystem::path &filename) const { this->dump_yaml(filename.string().c_str()); } +void hardware_sampler::add_time_point(const std::chrono::steady_clock::time_point time_point) { + time_points_.push_back(time_point); +} + +bool hardware_sampler::sample_category_enabled(const sample_category category) const noexcept { + return static_cast(this->sample_category_ & category) != 0; +} + } // namespace hws diff --git a/src/hardware_sampling/system_hardware_sampler.cpp b/src/hardware_sampling/system_hardware_sampler.cpp index d2d63a9..dc5d34e 100644 --- a/src/hardware_sampling/system_hardware_sampler.cpp +++ b/src/hardware_sampling/system_hardware_sampler.cpp @@ -7,7 +7,8 @@ #include "hardware_sampling/system_hardware_sampler.hpp" -#include "hardware_sampling/event.hpp" // hws::event +#include "hardware_sampling/event.hpp" // hws::event +#include "hardware_sampling/sample_category.hpp" // hws::sample_category #if defined(HWS_FOR_CPUS_ENABLED) #include "hardware_sampling/cpu/hardware_sampler.hpp" // hws::cpu_hardware_sampler @@ -37,14 +38,14 @@ namespace hws { -system_hardware_sampler::system_hardware_sampler() : - system_hardware_sampler{ HWS_SAMPLING_INTERVAL } { } +system_hardware_sampler::system_hardware_sampler(const sample_category category) : + system_hardware_sampler{ HWS_SAMPLING_INTERVAL, category } { } -system_hardware_sampler::system_hardware_sampler(const std::chrono::milliseconds sampling_interval) { +system_hardware_sampler::system_hardware_sampler(const std::chrono::milliseconds sampling_interval, sample_category category) { // create the hardware samplers based on the available hardware #if defined(HWS_FOR_CPUS_ENABLED) { - samplers_.push_back(std::make_unique(sampling_interval)); + samplers_.push_back(std::make_unique(sampling_interval, category)); } #endif #if defined(HWS_FOR_NVIDIA_GPUS_ENABLED) @@ -52,7 +53,7 @@ system_hardware_sampler::system_hardware_sampler(const std::chrono::milliseconds int device_count{}; HWS_CUDA_ERROR_CHECK(cudaGetDeviceCount(&device_count)); for (int device = 0; device < device_count; ++device) { - samplers_.push_back(std::make_unique(static_cast(device), sampling_interval)); + samplers_.push_back(std::make_unique(static_cast(device), sampling_interval, category)); } } #endif @@ -61,7 +62,7 @@ system_hardware_sampler::system_hardware_sampler(const std::chrono::milliseconds int device_count{}; HWS_HIP_ERROR_CHECK(hipGetDeviceCount(&device_count)); for (int device = 0; device < device_count; ++device) { - samplers_.push_back(std::make_unique(static_cast(device), sampling_interval)); + samplers_.push_back(std::make_unique(static_cast(device), sampling_interval, category)); } } #endif From d5e33bfb890c8dc90360ec50c1551aa56fa72500 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Tue, 24 Sep 2024 17:21:26 +0200 Subject: [PATCH 49/69] Output throttle reasons as string and as bitmask. --- README.md | 52 ++++++++++--------- bindings/gpu_intel_hardware_sampler.cpp | 6 ++- bindings/gpu_nvidia_hardware_sampler.cpp | 3 +- .../gpu_intel/level_zero_samples.hpp | 24 +++++---- .../gpu_nvidia/nvml_samples.hpp | 11 ++-- .../gpu_intel/hardware_sampler.cpp | 32 +++++++++--- .../gpu_intel/level_zero_samples.cpp | 33 +++++++++--- .../gpu_nvidia/hardware_sampler.cpp | 12 +++-- .../gpu_nvidia/nvml_samples.cpp | 17 ++++-- 9 files changed, 123 insertions(+), 67 deletions(-) diff --git a/README.md b/README.md index 3207e22..77869b4 100644 --- a/README.md +++ b/README.md @@ -109,31 +109,33 @@ current clock frequencies, temperatures, or memory consumption. ### clock-related samples -| sample | sample type | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs | -|:-----------------------------------|:-----------:|:----:|:----------------:|:-----------:|:----------------:| -| auto_boosted_clock_enabled | fixed | bool | bool | - | - | -| clock_frequency_min | fixed | MHz | MHz | MHz | MHz | -| clock_frequency_max | fixed | MHz | MHz | MHz | MHz | -| memory_clock_frequency_min | fixed | - | MHz | MHz | MHz | -| memory_clock_frequency_max | fixed | - | MHz | MHz | MHz | -| socket_clock_frequency_min | fixed | - | - | MHz | - | -| socket_clock_frequency_min | fixed | - | - | MHz | - | -| sm_clock_frequency_max | fixed | - | MHz | - | - | -| available_clock_frequencies | fixed | - | map of MHz | list of MHz | list of MHz | -| available_memory_clock_frequencies | fixed | - | list of MHz | list of MHz | list of MHz | -| clock_frequency | sampled | MHz | MHz | MHz | MHz | -| average_non_idle_clock_frequency | sampled | MHz | - | - | - | -| time_stamp_counter | sampled | MHz | - | - | - | -| memory_clock_frequency | sampled | - | MHz | MHz | MHz | -| socket_clock_frequency | sampled | - | - | MHz | - | -| sm_clock_frequency | sampled | - | MHz | - | - | -| overdrive_level | sampled | - | - | % | - | -| memory_overdrive_level | sampled | - | - | % | - | -| throttle_reason | sampled | - | string (bitmask) | - | string (bitmask) | -| memory_throttle_reason | sampled | - | - | - | string (bitmask) | -| auto_boosted_clock | sampled | - | bool | - | - | -| frequency_limit_tdp | sampled | - | - | - | MHz | -| memory_frequency_limit_tdp | sampled | - | - | - | MHz | +| sample | sample type | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs | +|:-----------------------------------|:-----------:|:----:|:-----------:|:-----------:|:-----------:| +| auto_boosted_clock_enabled | fixed | bool | bool | - | - | +| clock_frequency_min | fixed | MHz | MHz | MHz | MHz | +| clock_frequency_max | fixed | MHz | MHz | MHz | MHz | +| memory_clock_frequency_min | fixed | - | MHz | MHz | MHz | +| memory_clock_frequency_max | fixed | - | MHz | MHz | MHz | +| socket_clock_frequency_min | fixed | - | - | MHz | - | +| socket_clock_frequency_min | fixed | - | - | MHz | - | +| sm_clock_frequency_max | fixed | - | MHz | - | - | +| available_clock_frequencies | fixed | - | map of MHz | list of MHz | list of MHz | +| available_memory_clock_frequencies | fixed | - | list of MHz | list of MHz | list of MHz | +| clock_frequency | sampled | MHz | MHz | MHz | MHz | +| average_non_idle_clock_frequency | sampled | MHz | - | - | - | +| time_stamp_counter | sampled | MHz | - | - | - | +| memory_clock_frequency | sampled | - | MHz | MHz | MHz | +| socket_clock_frequency | sampled | - | - | MHz | - | +| sm_clock_frequency | sampled | - | MHz | - | - | +| overdrive_level | sampled | - | - | % | - | +| memory_overdrive_level | sampled | - | - | % | - | +| throttle_reason | sampled | - | bitmask | - | bitmask | +| throttle_reason_string | sampled | - | str | - | str | +| memory_throttle_reason | sampled | - | - | - | bitmask | +| memory_throttle_reason_string | sampled | - | - | - | str | +| auto_boosted_clock | sampled | - | bool | - | - | +| frequency_limit_tdp | sampled | - | - | - | MHz | +| memory_frequency_limit_tdp | sampled | - | - | - | MHz | ### power-related samples diff --git a/bindings/gpu_intel_hardware_sampler.cpp b/bindings/gpu_intel_hardware_sampler.cpp index 546d295..4485701 100644 --- a/bindings/gpu_intel_hardware_sampler.cpp +++ b/bindings/gpu_intel_hardware_sampler.cpp @@ -46,8 +46,10 @@ void init_gpu_intel_hardware_sampler(py::module_ &m) { .def("get_available_memory_clock_frequencies", &hws::level_zero_clock_samples::get_available_memory_clock_frequencies, "the available memory clock frequencies in MHz (slowest to fastest)") .def("get_clock_frequency", &hws::level_zero_clock_samples::get_clock_frequency, "the current GPU frequency in MHz") .def("get_memory_clock_frequency", &hws::level_zero_clock_samples::get_memory_clock_frequency, "the current memory frequency in MHz") - .def("get_throttle_reason", &hws::level_zero_clock_samples::get_throttle_reason, "the current GPU frequency throttle reason") - .def("get_memory_throttle_reason", &hws::level_zero_clock_samples::get_memory_throttle_reason, "the current memory frequency throttle reason") + .def("get_throttle_reason", &hws::level_zero_clock_samples::get_throttle_reason, "the current GPU frequency throttle reason (as bitmask)") + .def("get_throttle_reason_string", &hws::level_zero_clock_samples::get_throttle_reason_string, "the current GPU frequency throttle reason (as string)") + .def("get_memory_throttle_reason", &hws::level_zero_clock_samples::get_memory_throttle_reason, "the current memory frequency throttle reason (as bitmask)") + .def("get_memory_throttle_reason_string", &hws::level_zero_clock_samples::get_memory_throttle_reason_string, "the current memory frequency throttle reason (as string)") .def("get_frequency_limit_tdp", &hws::level_zero_clock_samples::get_frequency_limit_tdp, "the current maximum allowed GPU frequency based on the TDP limit in MHz") .def("get_memory_frequency_limit_tdp", &hws::level_zero_clock_samples::get_memory_frequency_limit_tdp, "the current maximum allowed memory frequency based on the TDP limit in MHz") .def("__repr__", [](const hws::level_zero_clock_samples &self) { diff --git a/bindings/gpu_nvidia_hardware_sampler.cpp b/bindings/gpu_nvidia_hardware_sampler.cpp index 10a04b3..23a0549 100644 --- a/bindings/gpu_nvidia_hardware_sampler.cpp +++ b/bindings/gpu_nvidia_hardware_sampler.cpp @@ -51,7 +51,8 @@ void init_gpu_nvidia_hardware_sampler(py::module_ &m) { .def("get_available_memory_clock_frequencies", &hws::nvml_clock_samples::get_available_memory_clock_frequencies, "the available memory clock frequencies in MHz (slowest to fastest)") .def("get_memory_clock_frequency", &hws::nvml_clock_samples::get_memory_clock_frequency, "the current memory clock frequency in MHz") .def("get_sm_clock_frequency", &hws::nvml_clock_samples::get_sm_clock_frequency, "the current SM clock frequency in Mhz") - .def("get_throttle_reason", &hws::nvml_clock_samples::get_throttle_reason, "the reason the GPU clock throttled") + .def("get_throttle_reason", &hws::nvml_clock_samples::get_throttle_reason, "the reason the GPU clock throttled (as bitmask)") + .def("get_throttle_reason_string", &hws::nvml_clock_samples::get_throttle_reason_string, "the reason the GPU clock throttled (as string)") .def("get_auto_boosted_clock", &hws::nvml_clock_samples::get_auto_boosted_clock, "true if the clocks are currently auto boosted") .def("__repr__", [](const hws::nvml_clock_samples &self) { return fmt::format("", self); diff --git a/include/hardware_sampling/gpu_intel/level_zero_samples.hpp b/include/hardware_sampling/gpu_intel/level_zero_samples.hpp index f178f7f..f53422e 100644 --- a/include/hardware_sampling/gpu_intel/level_zero_samples.hpp +++ b/include/hardware_sampling/gpu_intel/level_zero_samples.hpp @@ -16,7 +16,7 @@ #include "fmt/ostream.h" // fmt::formatter, fmt::ostream_formatter -#include // std::uint64_t, std::int32_t +#include // std::uint64_t, std::int64_t, std::int32_t #include // std::ostream forward declaration #include // std::optional #include // std::string @@ -50,10 +50,10 @@ class level_zero_general_samples { */ [[nodiscard]] std::string generate_yaml_string() const; - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, byte_order) // the byte order (e.g., little/big endian) - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, vendor_id) // the vendor ID - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, name) // the model name of the device - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector, flags) // potential GPU flags (e.g. integrated device) + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, byte_order) // the byte order (e.g., little/big endian) + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, vendor_id) // the vendor ID + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, name) // the model name of the device + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector, flags) // potential GPU flags (e.g. integrated device) HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, standby_mode) // the enabled standby mode (power saving or never) HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint32_t, num_threads_per_eu) // the number of threads per EU unit HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint32_t, eu_simd_width) // the physical EU unit SIMD width @@ -100,12 +100,14 @@ class level_zero_clock_samples { HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector, available_clock_frequencies) // the available GPU clock frequencies in MHz (slowest to fastest) HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector, available_memory_clock_frequencies) // the available memory clock frequencies in MHz (slowest to fastest) - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, clock_frequency) // the current GPU frequency in MHz - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, memory_clock_frequency) // the current memory frequency in MHz - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::string, throttle_reason) // the current GPU frequency throttle reason - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::string, memory_throttle_reason) // the current memory frequency throttle reason - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, frequency_limit_tdp) // the current maximum allowed GPU frequency based on the TDP limit in MHz - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, memory_frequency_limit_tdp) // the current maximum allowed memory frequency based on the TDP limit in MHz + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, clock_frequency) // the current GPU frequency in MHz + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, memory_clock_frequency) // the current memory frequency in MHz + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int64_t, throttle_reason) // the current GPU frequency throttle reason as bitmask + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::string, throttle_reason_string) // the current GPU frequency throttle reason as string + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int64_t, memory_throttle_reason) // the current memory frequency throttle reason as bitmask + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::string, memory_throttle_reason_string) // the current memory frequency throttle reason as string + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, frequency_limit_tdp) // the current maximum allowed GPU frequency based on the TDP limit in MHz + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, memory_frequency_limit_tdp) // the current maximum allowed memory frequency based on the TDP limit in MHz }; /** diff --git a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp index 31acebb..91f4e6b 100644 --- a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp +++ b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp @@ -106,11 +106,12 @@ class nvml_clock_samples { HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type, available_clock_frequencies) // the available clock frequencies in MHz, based on a memory clock frequency (slowest to fastest) HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector, available_memory_clock_frequencies) // the available memory clock frequencies in MHz (slowest to fastest) - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, clock_frequency) // the current graphics clock frequency in MHz - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, memory_clock_frequency) // the current memory clock frequency in MHz - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, sm_clock_frequency) // the current SM clock frequency in Mhz - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::string, throttle_reason) // the reason the GPU clock throttled - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(bool, auto_boosted_clock) // true if the clocks are currently auto boosted + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, clock_frequency) // the current graphics clock frequency in MHz + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, memory_clock_frequency) // the current memory clock frequency in MHz + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, sm_clock_frequency) // the current SM clock frequency in Mhz + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned long long, throttle_reason) // the reason the GPU clock throttled (as bitmask) + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::string, throttle_reason_string) // the reason the GPU clock throttled (as string) + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(bool, auto_boosted_clock) // true if the clocks are currently auto boosted }; /** diff --git a/src/hardware_sampling/gpu_intel/hardware_sampler.cpp b/src/hardware_sampling/gpu_intel/hardware_sampler.cpp index f96a695..67abc5d 100644 --- a/src/hardware_sampling/gpu_intel/hardware_sampler.cpp +++ b/src/hardware_sampling/gpu_intel/hardware_sampler.cpp @@ -20,7 +20,7 @@ #include // std::chrono::{steady_clock, duration_cast, milliseconds} #include // std::size_t -#include // std::int32_t +#include // std::int32_t, std::int64_t #include // std::exception, std::terminate #include // std::ios_base #include // std::cerr, std::endl @@ -196,8 +196,14 @@ void gpu_intel_hardware_sampler::sampling_loop() { clock_samples_.clock_frequency_ = decltype(clock_samples_.clock_frequency_)::value_type{ frequency_state.actual }; } if (frequency_state.throttleReasons >= 0.0) { - using vector_type = decltype(clock_samples_.throttle_reason_)::value_type; - clock_samples_.throttle_reason_ = vector_type{ static_cast(detail::throttle_reason_to_string(frequency_state.throttleReasons)) }; + { + using vector_type = decltype(clock_samples_.throttle_reason_)::value_type; + clock_samples_.throttle_reason_ = vector_type{ static_cast(static_cast(frequency_state.throttleReasons)) }; + } + { + using vector_type = decltype(clock_samples_.throttle_reason_string_)::value_type; + clock_samples_.throttle_reason_string_ = vector_type{ static_cast(detail::throttle_reason_to_string(frequency_state.throttleReasons)) }; + } } } break; @@ -210,8 +216,14 @@ void gpu_intel_hardware_sampler::sampling_loop() { clock_samples_.memory_clock_frequency_ = decltype(clock_samples_.memory_clock_frequency_)::value_type{ frequency_state.actual }; } if (frequency_state.throttleReasons >= 0.0) { - using vector_type = decltype(clock_samples_.memory_throttle_reason_)::value_type; - clock_samples_.memory_throttle_reason_ = vector_type{ static_cast(detail::throttle_reason_to_string(frequency_state.throttleReasons)) }; + { + using vector_type = decltype(clock_samples_.memory_throttle_reason_)::value_type; + clock_samples_.memory_throttle_reason_ = vector_type{ static_cast(static_cast(frequency_state.throttleReasons)) }; + } + { + using vector_type = decltype(clock_samples_.memory_throttle_reason_string_)::value_type; + clock_samples_.memory_throttle_reason_string_ = vector_type{ static_cast(detail::throttle_reason_to_string(frequency_state.throttleReasons)) }; + } } } break; @@ -525,7 +537,10 @@ void gpu_intel_hardware_sampler::sampling_loop() { clock_samples_.clock_frequency_->push_back(frequency_state.actual); } if (clock_samples_.throttle_reason_.has_value()) { - clock_samples_.throttle_reason_->push_back(detail::throttle_reason_to_string(frequency_state.throttleReasons)); + clock_samples_.throttle_reason_->push_back(static_cast(frequency_state.throttleReasons)); + } + if (clock_samples_.throttle_reason_string_.has_value()) { + clock_samples_.throttle_reason_string_->push_back(detail::throttle_reason_to_string(frequency_state.throttleReasons)); } } break; @@ -538,7 +553,10 @@ void gpu_intel_hardware_sampler::sampling_loop() { clock_samples_.memory_clock_frequency_->push_back(frequency_state.actual); } if (clock_samples_.memory_throttle_reason_.has_value()) { - clock_samples_.memory_throttle_reason_->push_back(detail::throttle_reason_to_string(frequency_state.throttleReasons)); + clock_samples_.memory_throttle_reason_->push_back(static_cast(frequency_state.throttleReasons)); + } + if (clock_samples_.memory_throttle_reason_string_.has_value()) { + clock_samples_.memory_throttle_reason_string_->push_back(detail::throttle_reason_to_string(frequency_state.throttleReasons)); } } break; diff --git a/src/hardware_sampling/gpu_intel/level_zero_samples.cpp b/src/hardware_sampling/gpu_intel/level_zero_samples.cpp index a10a358..ab749fb 100644 --- a/src/hardware_sampling/gpu_intel/level_zero_samples.cpp +++ b/src/hardware_sampling/gpu_intel/level_zero_samples.cpp @@ -129,7 +129,8 @@ bool level_zero_clock_samples::has_samples() const { return this->clock_frequency_min_.has_value() || this->clock_frequency_max_.has_value() || this->memory_clock_frequency_min_.has_value() || this->memory_clock_frequency_max_.has_value() || this->available_clock_frequencies_.has_value() || this->available_memory_clock_frequencies_.has_value() || this->clock_frequency_.has_value() || this->memory_clock_frequency_.has_value() || this->throttle_reason_.has_value() - || this->memory_throttle_reason_.has_value() || this->frequency_limit_tdp_.has_value() || this->memory_frequency_limit_tdp_.has_value(); + || this->throttle_reason_string_.has_value() || this->memory_throttle_reason_.has_value() || this->memory_throttle_reason_string_.has_value() + || this->frequency_limit_tdp_.has_value() || this->memory_frequency_limit_tdp_.has_value(); } std::string level_zero_clock_samples::generate_yaml_string() const { @@ -197,20 +198,34 @@ std::string level_zero_clock_samples::generate_yaml_string() const { " values: [{}]\n", fmt::join(this->memory_clock_frequency_.value(), ", ")); } - // the current GPU core throttle reason + // the current GPU core throttle reason as bitmask if (this->throttle_reason_.has_value()) { str += fmt::format(" throttle_reason:\n" - " unit: \"string\"\n" + " unit: \"bitmask\"\n" " values: [{}]\n", fmt::join(this->throttle_reason_.value(), ", ")); } - // the current memory throttle reason + // the current GPU core throttle reason as string + if (this->throttle_reason_string_.has_value()) { + str += fmt::format(" throttle_reason_string:\n" + " unit: \"string\"\n" + " values: [{}]\n", + fmt::join(this->throttle_reason_string_.value(), ", ")); + } + // the current memory throttle reason as bitmask if (this->memory_throttle_reason_.has_value()) { str += fmt::format(" memory_throttle_reason:\n" - " unit: \"string\"\n" + " unit: \"bitmask\"\n" " values: [{}]\n", fmt::join(this->memory_throttle_reason_.value(), ", ")); } + // the current memory throttle reason as string + if (this->memory_throttle_reason_string_.has_value()) { + str += fmt::format(" memory_throttle_reason_string:\n" + " unit: \"string\"\n" + " values: [{}]\n", + fmt::join(this->memory_throttle_reason_string_.value(), ", ")); + } // the maximum GPU core frequency based on the current TDP limit if (this->frequency_limit_tdp_.has_value()) { str += fmt::format(" frequency_limit_tdp:\n" @@ -238,8 +253,10 @@ std::ostream &operator<<(std::ostream &out, const level_zero_clock_samples &samp "available_memory_clock_frequencies [MHz]: [{}]\n" "clock_frequency [MHz]: [{}]\n" "memory_clock_frequency [MHz]: [{}]\n" - "throttle_reason [string]: [{}]\n" - "memory_throttle_reason [string]: [{}]\n" + "throttle_reason [bitmask]: [{}]\n" + "throttle_reason_string [string]: [{}]\n" + "memory_throttle_reason [bitmask]: [{}]\n" + "memory_throttle_reason_string [string]: [{}]\n" "frequency_limit_tdp [MHz]: [{}]\n" "memory_frequency_limit_tdp [MHz]: [{}]", detail::value_or_default(samples.get_clock_frequency_min()), @@ -251,7 +268,9 @@ std::ostream &operator<<(std::ostream &out, const level_zero_clock_samples &samp fmt::join(detail::value_or_default(samples.get_clock_frequency()), ", "), fmt::join(detail::value_or_default(samples.get_memory_clock_frequency()), ", "), fmt::join(detail::value_or_default(samples.get_throttle_reason()), ", "), + fmt::join(detail::value_or_default(samples.get_throttle_reason_string()), ", "), fmt::join(detail::value_or_default(samples.get_memory_throttle_reason()), ", "), + fmt::join(detail::value_or_default(samples.get_memory_throttle_reason_string()), ", "), fmt::join(detail::value_or_default(samples.get_frequency_limit_tdp()), ", "), fmt::join(detail::value_or_default(samples.get_memory_frequency_limit_tdp()), ", ")); } diff --git a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp index 7d16a0a..14d6a9f 100644 --- a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp +++ b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp @@ -266,9 +266,10 @@ void gpu_nvidia_hardware_sampler::sampling_loop() { clock_samples_.memory_clock_frequency_ = decltype(clock_samples_.memory_clock_frequency_)::value_type{ static_cast(clock_mem) }; } - unsigned long long clock_throttle_reason{}; + decltype(clock_samples_.throttle_reason_)::value_type::value_type clock_throttle_reason{}; if (nvmlDeviceGetCurrentClocksEventReasons(device, &clock_throttle_reason) == NVML_SUCCESS) { - clock_samples_.throttle_reason_ = decltype(clock_samples_.throttle_reason_)::value_type{ detail::throttle_event_reason_to_string(clock_throttle_reason) }; + clock_samples_.throttle_reason_ = decltype(clock_samples_.throttle_reason_)::value_type{ clock_throttle_reason }; + clock_samples_.throttle_reason_string_ = decltype(clock_samples_.throttle_reason_string_)::value_type{ detail::throttle_event_reason_to_string(clock_throttle_reason) }; } nvmlEnableState_t mode{}; @@ -462,10 +463,11 @@ void gpu_nvidia_hardware_sampler::sampling_loop() { clock_samples_.memory_clock_frequency_->push_back(static_cast(value)); } - if (clock_samples_.throttle_reason_.has_value()) { - unsigned long long value{}; + if (clock_samples_.throttle_reason_string_.has_value()) { + decltype(clock_samples_.throttle_reason_)::value_type::value_type value{}; HWS_NVML_ERROR_CHECK(nvmlDeviceGetCurrentClocksEventReasons(device, &value)) - clock_samples_.throttle_reason_->push_back(detail::throttle_event_reason_to_string(value)); + clock_samples_.throttle_reason_->push_back(value); + clock_samples_.throttle_reason_string_->push_back(detail::throttle_event_reason_to_string(value)); } if (clock_samples_.auto_boosted_clock_.has_value()) { diff --git a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp index 0412ef2..b07c7d2 100644 --- a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp +++ b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp @@ -134,7 +134,7 @@ bool nvml_clock_samples::has_samples() const { || this->memory_clock_frequency_min_.has_value() || this->memory_clock_frequency_max_.has_value() || this->sm_clock_frequency_max_.has_value() || this->available_clock_frequencies_.has_value() || this->available_memory_clock_frequencies_.has_value() || this->clock_frequency_.has_value() || this->memory_clock_frequency_.has_value() || this->sm_clock_frequency_.has_value() || this->throttle_reason_.has_value() - || this->auto_boosted_clock_.has_value(); + || this->throttle_reason_string_.has_value() || this->auto_boosted_clock_.has_value(); } std::string nvml_clock_samples::generate_yaml_string() const { @@ -225,13 +225,20 @@ std::string nvml_clock_samples::generate_yaml_string() const { " values: [{}]\n", fmt::join(this->sm_clock_frequency_.value(), ", ")); } - // clock throttle reason + // clock throttle reason as bitmask if (this->throttle_reason_.has_value()) { str += fmt::format(" throttle_reason:\n" - " unit: \"string\"\n" + " unit: \"bitmask\"\n" " values: [{}]\n", fmt::join(detail::quote(this->throttle_reason_.value()), ", ")); } + // clock throttle reason as string + if (this->throttle_reason_string_.has_value()) { + str += fmt::format(" throttle_reason_string:\n" + " unit: \"string\"\n" + " values: [{}]\n", + fmt::join(detail::quote(this->throttle_reason_string_.value()), ", ")); + } // clock is auto-boosted if (this->auto_boosted_clock_.has_value()) { str += fmt::format(" auto_boosted_clock:\n" @@ -255,7 +262,8 @@ std::ostream &operator<<(std::ostream &out, const nvml_clock_samples &samples) { "clock_frequency [MHz]: [{}]\n" "memory_clock_frequency [MHz]: [{}]\n" "sm_clock_frequency [MHz]: [{}]\n" - "throttle_reason [string]: [{}]\n" + "throttle_reason [bitmask]: [{}]\n" + "throttle_reason_string [string]: [{}]\n" "auto_boosted_clock [bool]: [{}]", detail::value_or_default(samples.get_auto_boosted_clock_enabled()), detail::value_or_default(samples.get_clock_frequency_min()), @@ -269,6 +277,7 @@ std::ostream &operator<<(std::ostream &out, const nvml_clock_samples &samples) { fmt::join(detail::value_or_default(samples.get_memory_clock_frequency()), ", "), fmt::join(detail::value_or_default(samples.get_sm_clock_frequency()), ", "), fmt::join(detail::value_or_default(samples.get_throttle_reason()), ", "), + fmt::join(detail::value_or_default(samples.get_throttle_reason_string()), ", "), fmt::join(detail::value_or_default(samples.get_auto_boosted_clock()), ", ")); } From 4cceea6bcd5ad5a7fa780fa4ab09aae1a6602c88 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Tue, 24 Sep 2024 17:31:06 +0200 Subject: [PATCH 50/69] Implement Intel GPU system_hardware_sampler device discovery. --- .../system_hardware_sampler.cpp | 26 +++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/src/hardware_sampling/system_hardware_sampler.cpp b/src/hardware_sampling/system_hardware_sampler.cpp index dc5d34e..4f2c15c 100644 --- a/src/hardware_sampling/system_hardware_sampler.cpp +++ b/src/hardware_sampling/system_hardware_sampler.cpp @@ -15,7 +15,9 @@ #endif #if defined(HWS_FOR_NVIDIA_GPUS_ENABLED) #include "hardware_sampling/gpu_nvidia/hardware_sampler.hpp" // hws::gpu_nvidia_hardware_sampler - #include "hardware_sampling/gpu_nvidia/utility.hpp" // HWS_CUDA_ERROR_CHECK, hws::detail:: + #include "hardware_sampling/gpu_nvidia/utility.hpp" // HWS_CUDA_ERROR_CHECK + + #include "cuda_runtime.h" // cudaGetDeviceCount #endif #if defined(HWS_FOR_AMD_GPUS_ENABLED) #include "hardware_sampling/gpu_amd/hardware_sampler.hpp" // hws::gpu_amd_hardware_sampler @@ -32,6 +34,8 @@ #include // std::for_each, std::all_of #include // std::chrono::milliseconds +#include // std::size_t +#include // std::uint32_t #include // std::unique_ptr, std::make_unique #include // std::out_of_range #include // std::vector @@ -68,7 +72,25 @@ system_hardware_sampler::system_hardware_sampler(const std::chrono::milliseconds #endif #if defined(HWS_FOR_INTEL_GPUS_ENABLED) { - // TODO: implement + // discover the number of drivers + std::uint32_t driver_count{ 0 }; + HWS_LEVEL_ZERO_ERROR_CHECK(zeDriverGet(&driver_count, nullptr)) + + // check if only the single GPU driver has been found + if (driver_count > 1) { + throw std::runtime_error{ fmt::format("Found too many GPU drivers ({})!", driver_count) }; + } + + // get the GPU driver + ze_driver_handle_t driver{}; + HWS_LEVEL_ZERO_ERROR_CHECK(zeDriverGet(&driver_count, &driver)) + + // get all GPUs for the current driver + std::uint32_t device_count{ 0 }; + HWS_LEVEL_ZERO_ERROR_CHECK(zeDeviceGet(driver, &device_count, nullptr)) + for (std::uint32_t device = 0; device < device_count; ++device) { + samplers_.push_back(std::make_unique(static_cast(device), sampling_interval, category)); + } } #endif } From ab809d9ec5c8d3b9a0955e147a341ef7ff358bfd Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Tue, 24 Sep 2024 18:13:55 +0200 Subject: [PATCH 51/69] Add a function to return the hardware samples as YAML string instead of only being able to directly write it to a file. --- bindings/hardware_sampler.cpp | 1 + bindings/system_hardware_sampler.cpp | 1 + .../hardware_sampling/hardware_sampler.hpp | 6 ++ .../system_hardware_sampler.hpp | 6 ++ src/hardware_sampling/hardware_sampler.cpp | 72 ++++++++++--------- .../system_hardware_sampler.cpp | 5 ++ 6 files changed, 59 insertions(+), 32 deletions(-) diff --git a/bindings/hardware_sampler.cpp b/bindings/hardware_sampler.cpp index 5f86f96..e547478 100644 --- a/bindings/hardware_sampler.cpp +++ b/bindings/hardware_sampler.cpp @@ -62,6 +62,7 @@ void init_hardware_sampler(py::module_ &m) { .def("relative_time_points", [](const hws::hardware_sampler &self) { return hws::detail::durations_from_reference_time(self.sampling_time_points(), self.get_event(0).time_point); }, "get the relative durations of the respective hardware samples in seconds (as \"normal\" number)") .def("sampling_interval", &hws::hardware_sampler::sampling_interval, "get the sampling interval of this hardware sampler (in ms)") .def("dump_yaml", py::overload_cast(&hws::hardware_sampler::dump_yaml, py::const_), "dump all hardware samples to the given YAML file") + .def("as_yaml_string", &hws::hardware_sampler::as_yaml_string, "return all hardware samples as YAML string") .def("__repr__", [](const hws::hardware_sampler &self) { #if defined(HWS_FOR_CPUS_ENABLED) if (dynamic_cast(&self)) { diff --git a/bindings/system_hardware_sampler.cpp b/bindings/system_hardware_sampler.cpp index f26a4b6..2e25e79 100644 --- a/bindings/system_hardware_sampler.cpp +++ b/bindings/system_hardware_sampler.cpp @@ -66,5 +66,6 @@ void init_system_hardware_sampler(py::module_ &m) { return out; }, "get the hardware samplers available for the whole system") .def("sampler", [](hws::system_hardware_sampler &self, const std::size_t idx) { return self.sampler(idx).get(); }, "get the i-th hardware sampler available for the whole system") .def("dump_yaml", py::overload_cast(&hws::system_hardware_sampler::dump_yaml, py::const_), "dump all hardware samples for all hardware samplers to the given YAML file") + .def("as_yaml_string", &hws::system_hardware_sampler::as_yaml_string, "return all hardware samples for all hardware samplers as YAML string") .def("__repr__", [](const hws::system_hardware_sampler &self) { return fmt::format("", self.num_samplers()); }); } diff --git a/include/hardware_sampling/hardware_sampler.hpp b/include/hardware_sampling/hardware_sampler.hpp index e534d19..ab7bd3f 100644 --- a/include/hardware_sampling/hardware_sampler.hpp +++ b/include/hardware_sampling/hardware_sampler.hpp @@ -160,6 +160,12 @@ class hardware_sampler { */ void dump_yaml(const std::filesystem::path &filename) const; + /** + * @brief Return the hardware samples as YAML string. + * @return the YAML content as string (`[[nodiscard]]`) + */ + [[nodiscard]] std::string as_yaml_string() const; + protected: /** * @brief Getter the hardware samples. Called in another std::thread. diff --git a/include/hardware_sampling/system_hardware_sampler.hpp b/include/hardware_sampling/system_hardware_sampler.hpp index d8c56fd..6346833 100644 --- a/include/hardware_sampling/system_hardware_sampler.hpp +++ b/include/hardware_sampling/system_hardware_sampler.hpp @@ -175,6 +175,12 @@ class system_hardware_sampler { */ void dump_yaml(const std::filesystem::path &filename) const; + /** + * @brief Return the hardware samples as YAML string. + * @return the YAML content as string (`[[nodiscard]]`) + */ + [[nodiscard]] std::string as_yaml_string() const; + private: /// The different hardware sampler for the current system. std::vector> samplers_; diff --git a/src/hardware_sampling/hardware_sampler.cpp b/src/hardware_sampling/hardware_sampler.cpp index db0a4f5..0ff6a76 100644 --- a/src/hardware_sampling/hardware_sampler.cpp +++ b/src/hardware_sampling/hardware_sampler.cpp @@ -127,48 +127,56 @@ void hardware_sampler::dump_yaml(const char *filename) const { std::ofstream file{ filename, std::ios_base::app }; // begin a new YAML document (only with "---" multiple YAML documents in a single file are allowed) - file << "---\n\n"; + file << "---\n\n" << this->as_yaml_string(); +} - // set the device identification - file << fmt::format("device_identification: \"{}\"\n\n", this->device_identification()); +void hardware_sampler::dump_yaml(const std::string &filename) const { + this->dump_yaml(filename.c_str()); +} - // output the start date time of this hardware sampling - file << fmt::format("start_time: \"{:%Y-%m-%d %X}\"\n\n", start_date_time_); +void hardware_sampler::dump_yaml(const std::filesystem::path &filename) const { + this->dump_yaml(filename.string().c_str()); +} - // output the event information +std::string hardware_sampler::as_yaml_string() const { + if (!this->has_sampling_stopped()) { + throw std::runtime_error{ "Can return samples as string only after the sampling has been stopped!" }; + } + + // generate the event information std::vector event_time_points{}; std::vector event_names{}; for (const auto &[time_point, name] : events_) { event_time_points.push_back(time_point); event_names.push_back(fmt::format("\"{}\"", name)); } - file << fmt::format("events:\n" - " time_points:\n" - " unit: \"s\"\n" - " values: [{}]\n" - " names: [{}]\n\n", - fmt::join(detail::durations_from_reference_time(event_time_points, this->get_event(0).time_point), ", "), - fmt::join(event_names, ", ")); - - // output the sampling information - file << fmt::format("sampling_interval:\n" - " unit: \"ms\"\n" - " values: {}\n\n" - "time_points:\n" - " unit: \"s\"\n" - " values: [{}]\n\n" - "{}\n", - this->sampling_interval().count(), - fmt::join(detail::durations_from_reference_time(this->sampling_time_points(), this->get_event(0).time_point), ", "), - this->generate_yaml_string()); -} - -void hardware_sampler::dump_yaml(const std::string &filename) const { - this->dump_yaml(filename.c_str()); -} -void hardware_sampler::dump_yaml(const std::filesystem::path &filename) const { - this->dump_yaml(filename.string().c_str()); + return fmt::format("device_identification: \"{}\"\n" + "\n" + "start_time: \"{:%Y-%m-%d %X}\"\n" + "\n" + "events:\n" + " time_points:\n" + " unit: \"s\"\n" + " values: [{}]\n" + " names: [{}]\n" + "\n" + "sampling_interval:\n" + " unit: \"ms\"\n" + " values: {}\n" + "\n" + "time_points:\n" + " unit: \"s\"\n" + " values: [{}]\n" + "\n" + "{}\n", + this->device_identification(), + start_date_time_, + fmt::join(detail::durations_from_reference_time(event_time_points, this->get_event(0).time_point), ", "), + fmt::join(event_names, ", "), + this->sampling_interval().count(), + fmt::join(detail::durations_from_reference_time(this->sampling_time_points(), this->get_event(0).time_point), ", "), + this->generate_yaml_string()); } void hardware_sampler::add_time_point(const std::chrono::steady_clock::time_point time_point) { diff --git a/src/hardware_sampling/system_hardware_sampler.cpp b/src/hardware_sampling/system_hardware_sampler.cpp index 4f2c15c..234fced 100644 --- a/src/hardware_sampling/system_hardware_sampler.cpp +++ b/src/hardware_sampling/system_hardware_sampler.cpp @@ -37,6 +37,7 @@ #include // std::size_t #include // std::uint32_t #include // std::unique_ptr, std::make_unique +#include // std::accumulate #include // std::out_of_range #include // std::vector @@ -197,4 +198,8 @@ void system_hardware_sampler::dump_yaml(const std::filesystem::path &filename) c std::for_each(samplers_.cbegin(), samplers_.cend(), [&filename](const auto &ptr) { ptr->dump_yaml(filename); }); } +std::string system_hardware_sampler::as_yaml_string() const { + return std::accumulate(samplers_.cbegin(), samplers_.cend(), std::string{}, [](const std::string str, const auto &ptr) { return str + ptr->as_yaml_string(); }); +} + } // namespace hws From 988dc77cdf56f8ec7125bc3c9d306d1cf42e4c63 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Fri, 27 Sep 2024 10:26:48 +0200 Subject: [PATCH 52/69] Add a new function to retrieve the hardware samples only excluding event or time_point information. --- bindings/cpu_hardware_sampler.cpp | 1 + bindings/gpu_amd_hardware_sampler.cpp | 1 + bindings/gpu_intel_hardware_sampler.cpp | 1 + bindings/gpu_nvidia_hardware_sampler.cpp | 1 + bindings/hardware_sampler.cpp | 3 ++- .../hardware_sampling/cpu/hardware_sampler.hpp | 10 +++++----- .../gpu_amd/hardware_sampler.hpp | 10 +++++----- .../gpu_intel/hardware_sampler.hpp | 10 +++++----- .../gpu_nvidia/hardware_sampler.hpp | 10 +++++----- include/hardware_sampling/hardware_sampler.hpp | 16 +++++++--------- .../system_hardware_sampler.hpp | 6 ++++++ src/hardware_sampling/cpu/hardware_sampler.cpp | 2 +- .../gpu_amd/hardware_sampler.cpp | 2 +- .../gpu_intel/hardware_sampler.cpp | 2 +- .../gpu_nvidia/hardware_sampler.cpp | 2 +- src/hardware_sampling/hardware_sampler.cpp | 2 +- .../system_hardware_sampler.cpp | 4 ++++ 17 files changed, 48 insertions(+), 35 deletions(-) diff --git a/bindings/cpu_hardware_sampler.cpp b/bindings/cpu_hardware_sampler.cpp index 6d18fe1..9dae939 100644 --- a/bindings/cpu_hardware_sampler.cpp +++ b/bindings/cpu_hardware_sampler.cpp @@ -137,6 +137,7 @@ void init_cpu_hardware_sampler(py::module_ &m) { .def("temperature_samples", &hws::cpu_hardware_sampler::temperature_samples, "get all temperature related samples") .def("gfx_samples", &hws::cpu_hardware_sampler::gfx_samples, "get all gfx (iGPU) related samples") .def("idle_state_samples", &hws::cpu_hardware_sampler::idle_state_samples, "get all idle state related samples") + .def("samples_only_as_yaml_string", &hws::cpu_hardware_sampler::samples_only_as_yaml_string, "return all hardware samples as YAML string") .def("__repr__", [](const hws::cpu_hardware_sampler &self) { return fmt::format("", self); }); diff --git a/bindings/gpu_amd_hardware_sampler.cpp b/bindings/gpu_amd_hardware_sampler.cpp index 55fbc75..9da321c 100644 --- a/bindings/gpu_amd_hardware_sampler.cpp +++ b/bindings/gpu_amd_hardware_sampler.cpp @@ -132,6 +132,7 @@ void init_gpu_amd_hardware_sampler(py::module_ &m) { .def("power_samples", &hws::gpu_amd_hardware_sampler::power_samples, "get all power related samples") .def("memory_samples", &hws::gpu_amd_hardware_sampler::memory_samples, "get all memory related samples") .def("temperature_samples", &hws::gpu_amd_hardware_sampler::temperature_samples, "get all temperature related samples") + .def("samples_only_as_yaml_string", &hws::gpu_amd_hardware_sampler::samples_only_as_yaml_string, "return all hardware samples as YAML string") .def("__repr__", [](const hws::gpu_amd_hardware_sampler &self) { return fmt::format("", self); }); diff --git a/bindings/gpu_intel_hardware_sampler.cpp b/bindings/gpu_intel_hardware_sampler.cpp index 4485701..901aeed 100644 --- a/bindings/gpu_intel_hardware_sampler.cpp +++ b/bindings/gpu_intel_hardware_sampler.cpp @@ -120,6 +120,7 @@ void init_gpu_intel_hardware_sampler(py::module_ &m) { .def("power_samples", &hws::gpu_intel_hardware_sampler::power_samples, "get all power related samples") .def("memory_samples", &hws::gpu_intel_hardware_sampler::memory_samples, "get all memory related samples") .def("temperature_samples", &hws::gpu_intel_hardware_sampler::temperature_samples, "get all temperature related samples") + .def("samples_only_as_yaml_string", &hws::gpu_intel_hardware_sampler::samples_only_as_yaml_string, "return all hardware samples as YAML string") .def("__repr__", [](const hws::gpu_intel_hardware_sampler &self) { return fmt::format("", self); }); diff --git a/bindings/gpu_nvidia_hardware_sampler.cpp b/bindings/gpu_nvidia_hardware_sampler.cpp index 23a0549..b049156 100644 --- a/bindings/gpu_nvidia_hardware_sampler.cpp +++ b/bindings/gpu_nvidia_hardware_sampler.cpp @@ -119,6 +119,7 @@ void init_gpu_nvidia_hardware_sampler(py::module_ &m) { .def("power_samples", &hws::gpu_nvidia_hardware_sampler::power_samples, "get all power related samples") .def("memory_samples", &hws::gpu_nvidia_hardware_sampler::memory_samples, "get all memory related samples") .def("temperature_samples", &hws::gpu_nvidia_hardware_sampler::temperature_samples, "get all temperature related samples") + .def("samples_only_as_yaml_string", &hws::gpu_nvidia_hardware_sampler::samples_only_as_yaml_string, "return all hardware samples as YAML string") .def("__repr__", [](const hws::gpu_nvidia_hardware_sampler &self) { return fmt::format("", self); }); diff --git a/bindings/hardware_sampler.cpp b/bindings/hardware_sampler.cpp index e547478..2c47046 100644 --- a/bindings/hardware_sampler.cpp +++ b/bindings/hardware_sampler.cpp @@ -62,7 +62,8 @@ void init_hardware_sampler(py::module_ &m) { .def("relative_time_points", [](const hws::hardware_sampler &self) { return hws::detail::durations_from_reference_time(self.sampling_time_points(), self.get_event(0).time_point); }, "get the relative durations of the respective hardware samples in seconds (as \"normal\" number)") .def("sampling_interval", &hws::hardware_sampler::sampling_interval, "get the sampling interval of this hardware sampler (in ms)") .def("dump_yaml", py::overload_cast(&hws::hardware_sampler::dump_yaml, py::const_), "dump all hardware samples to the given YAML file") - .def("as_yaml_string", &hws::hardware_sampler::as_yaml_string, "return all hardware samples as YAML string") + .def("as_yaml_string", &hws::hardware_sampler::as_yaml_string, "return all hardware samples including additional information like events as YAML string") + .def("samples_only_as_yaml_string", &hws::hardware_sampler::samples_only_as_yaml_string, "return all hardware samples as YAML string") .def("__repr__", [](const hws::hardware_sampler &self) { #if defined(HWS_FOR_CPUS_ENABLED) if (dynamic_cast(&self)) { diff --git a/include/hardware_sampling/cpu/hardware_sampler.hpp b/include/hardware_sampling/cpu/hardware_sampler.hpp index 8105fd4..1de0a95 100644 --- a/include/hardware_sampling/cpu/hardware_sampler.hpp +++ b/include/hardware_sampling/cpu/hardware_sampler.hpp @@ -107,6 +107,11 @@ class cpu_hardware_sampler : public hardware_sampler { */ [[nodiscard]] const cpu_idle_states_samples &idle_state_samples() const noexcept { return idle_state_samples_; } + /** + * @copydoc hws::hardware_sampler::samples_only_as_yaml_string() const + */ + [[nodiscard]] std::string samples_only_as_yaml_string() const final; + private: /** * @copydoc hws::hardware_sampler::sampling_loop @@ -118,11 +123,6 @@ class cpu_hardware_sampler : public hardware_sampler { */ [[nodiscard]] std::string device_identification() const final; - /** - * @copydoc hws::hardware_sampler::generate_yaml_string - */ - [[nodiscard]] std::string generate_yaml_string() const final; - /// The general CPU samples. cpu_general_samples general_samples_{}; /// The clock related CPU samples. diff --git a/include/hardware_sampling/gpu_amd/hardware_sampler.hpp b/include/hardware_sampling/gpu_amd/hardware_sampler.hpp index a44dec7..59eb1fc 100644 --- a/include/hardware_sampling/gpu_amd/hardware_sampler.hpp +++ b/include/hardware_sampling/gpu_amd/hardware_sampler.hpp @@ -116,6 +116,11 @@ class gpu_amd_hardware_sampler : public hardware_sampler { */ [[nodiscard]] const rocm_smi_temperature_samples &temperature_samples() const noexcept { return temperature_samples_; } + /** + * @copydoc hws::hardware_sampler::samples_only_as_yaml_string() const + */ + [[nodiscard]] std::string samples_only_as_yaml_string() const final; + private: /** * @copydoc hws::hardware_sampler::sampling_loop @@ -127,11 +132,6 @@ class gpu_amd_hardware_sampler : public hardware_sampler { */ [[nodiscard]] std::string device_identification() const final; - /** - * @copydoc hws::hardware_sampler::generate_yaml_string - */ - [[nodiscard]] std::string generate_yaml_string() const final; - /// The ID of the device to sample. std::uint32_t device_id_{}; diff --git a/include/hardware_sampling/gpu_intel/hardware_sampler.hpp b/include/hardware_sampling/gpu_intel/hardware_sampler.hpp index bea3103..0077861 100644 --- a/include/hardware_sampling/gpu_intel/hardware_sampler.hpp +++ b/include/hardware_sampling/gpu_intel/hardware_sampler.hpp @@ -116,6 +116,11 @@ class gpu_intel_hardware_sampler : public hardware_sampler { */ [[nodiscard]] const level_zero_temperature_samples &temperature_samples() const noexcept { return temperature_samples_; } + /** + * @copydoc hws::hardware_sampler::samples_only_as_yaml_string() const + */ + [[nodiscard]] std::string samples_only_as_yaml_string() const final; + private: /** * @copydoc hws::hardware_sampler::sampling_loop @@ -127,11 +132,6 @@ class gpu_intel_hardware_sampler : public hardware_sampler { */ std::string device_identification() const final; - /** - * @copydoc hws::hardware_sampler::generate_yaml_string - */ - std::string generate_yaml_string() const final; - /// The device handle for the device to sample. detail::level_zero_device_handle device_; diff --git a/include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp b/include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp index d73cd07..6716e63 100644 --- a/include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp +++ b/include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp @@ -117,6 +117,11 @@ class gpu_nvidia_hardware_sampler : public hardware_sampler { */ [[nodiscard]] const nvml_temperature_samples &temperature_samples() const noexcept { return temperature_samples_; } + /** + * @copydoc hws::hardware_sampler::samples_only_as_yaml_string() const + */ + [[nodiscard]] std::string samples_only_as_yaml_string() const final; + private: /** * @copydoc hws::hardware_sampler::sampling_loop @@ -128,11 +133,6 @@ class gpu_nvidia_hardware_sampler : public hardware_sampler { */ [[nodiscard]] std::string device_identification() const final; - /** - * @copydoc hws::hardware_sampler::generate_yaml_string - */ - [[nodiscard]] std::string generate_yaml_string() const final; - /// The device handle for the device to sample. detail::nvml_device_handle device_{}; diff --git a/include/hardware_sampling/hardware_sampler.hpp b/include/hardware_sampling/hardware_sampler.hpp index ab7bd3f..443f5bb 100644 --- a/include/hardware_sampling/hardware_sampler.hpp +++ b/include/hardware_sampling/hardware_sampler.hpp @@ -161,10 +161,16 @@ class hardware_sampler { void dump_yaml(const std::filesystem::path &filename) const; /** - * @brief Return the hardware samples as YAML string. + * @brief Return the hardware samples as well as events and time points as YAML string. * @return the YAML content as string (`[[nodiscard]]`) */ [[nodiscard]] std::string as_yaml_string() const; + /** + * @brief Return only the hardware samples as YAML string. + * @throws std::runtime_error if sampling is still running + * @return the YAML content as string (`[[nodiscard]]`) + */ + [[nodiscard]] virtual std::string samples_only_as_yaml_string() const = 0; protected: /** @@ -178,14 +184,6 @@ class hardware_sampler { */ [[nodiscard]] virtual std::string device_identification() const = 0; - /** - * @brief Assemble the YAML string containing all hardware samples. - * @param[in] start_time_point the reference time point the hardware samples occurred relative to - * @throws std::runtime_error if sampling is still running - * @return the YAML string (`[[nodiscard]]`) - */ - [[nodiscard]] virtual std::string generate_yaml_string() const = 0; - /** * @brief Add a new time point to this hardware sampler. Called during the sampling loop. * @param time_point the new time point to add diff --git a/include/hardware_sampling/system_hardware_sampler.hpp b/include/hardware_sampling/system_hardware_sampler.hpp index 6346833..96bff5c 100644 --- a/include/hardware_sampling/system_hardware_sampler.hpp +++ b/include/hardware_sampling/system_hardware_sampler.hpp @@ -180,6 +180,12 @@ class system_hardware_sampler { * @return the YAML content as string (`[[nodiscard]]`) */ [[nodiscard]] std::string as_yaml_string() const; + /** + * @brief Return only the hardware samples as YAML string. + * @throws std::runtime_error if sampling is still running + * @return the YAML content as string (`[[nodiscard]]`) + */ + [[nodiscard]] std::string samples_only_as_yaml_string() const; private: /// The different hardware sampler for the current system. diff --git a/src/hardware_sampling/cpu/hardware_sampler.cpp b/src/hardware_sampling/cpu/hardware_sampler.cpp index 4a51c10..7d20b49 100644 --- a/src/hardware_sampling/cpu/hardware_sampler.cpp +++ b/src/hardware_sampling/cpu/hardware_sampler.cpp @@ -467,7 +467,7 @@ std::string cpu_hardware_sampler::device_identification() const { return "cpu_device"; } -std::string cpu_hardware_sampler::generate_yaml_string() const { +std::string cpu_hardware_sampler::samples_only_as_yaml_string() const { // check whether it's safe to generate the YAML entry if (this->is_sampling()) { throw std::runtime_error{ "Can't create the final YAML entry if the hardware sampler is still running!" }; diff --git a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp index 7369fa3..b205718 100644 --- a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp +++ b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp @@ -680,7 +680,7 @@ std::string gpu_amd_hardware_sampler::device_identification() const { return fmt::format("gpu_amd_device_{}", device_id_); } -std::string gpu_amd_hardware_sampler::generate_yaml_string() const { +std::string gpu_amd_hardware_sampler::samples_only_as_yaml_string() const { // check whether it's safe to generate the YAML entry if (this->is_sampling()) { throw std::runtime_error{ "Can't create the final YAML entry if the hardware sampler is still running!" }; diff --git a/src/hardware_sampling/gpu_intel/hardware_sampler.cpp b/src/hardware_sampling/gpu_intel/hardware_sampler.cpp index 67abc5d..ed3aed7 100644 --- a/src/hardware_sampling/gpu_intel/hardware_sampler.cpp +++ b/src/hardware_sampling/gpu_intel/hardware_sampler.cpp @@ -691,7 +691,7 @@ std::string gpu_intel_hardware_sampler::device_identification() const { return fmt::format("gpu_intel_device_{}", prop.deviceId); } -std::string gpu_intel_hardware_sampler::generate_yaml_string() const { +std::string gpu_intel_hardware_sampler::samples_only_as_yaml_string() const { // check whether it's safe to generate the YAML entry if (this->is_sampling()) { throw std::runtime_error{ "Can't create the final YAML entry if the hardware sampler is still running!" }; diff --git a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp index 14d6a9f..2ffb6e8 100644 --- a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp +++ b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp @@ -548,7 +548,7 @@ std::string gpu_nvidia_hardware_sampler::device_identification() const { return fmt::format("gpu_nvidia_device_{}_{}", pcie_info.device, pcie_info.bus); } -std::string gpu_nvidia_hardware_sampler::generate_yaml_string() const { +std::string gpu_nvidia_hardware_sampler::samples_only_as_yaml_string() const { // check whether it's safe to generate the YAML entry if (this->is_sampling()) { throw std::runtime_error{ "Can't create the final YAML entry if the hardware sampler is still running!" }; diff --git a/src/hardware_sampling/hardware_sampler.cpp b/src/hardware_sampling/hardware_sampler.cpp index 0ff6a76..d2289cc 100644 --- a/src/hardware_sampling/hardware_sampler.cpp +++ b/src/hardware_sampling/hardware_sampler.cpp @@ -176,7 +176,7 @@ std::string hardware_sampler::as_yaml_string() const { fmt::join(event_names, ", "), this->sampling_interval().count(), fmt::join(detail::durations_from_reference_time(this->sampling_time_points(), this->get_event(0).time_point), ", "), - this->generate_yaml_string()); + this->samples_only_as_yaml_string()); } void hardware_sampler::add_time_point(const std::chrono::steady_clock::time_point time_point) { diff --git a/src/hardware_sampling/system_hardware_sampler.cpp b/src/hardware_sampling/system_hardware_sampler.cpp index 234fced..51a2fed 100644 --- a/src/hardware_sampling/system_hardware_sampler.cpp +++ b/src/hardware_sampling/system_hardware_sampler.cpp @@ -202,4 +202,8 @@ std::string system_hardware_sampler::as_yaml_string() const { return std::accumulate(samplers_.cbegin(), samplers_.cend(), std::string{}, [](const std::string str, const auto &ptr) { return str + ptr->as_yaml_string(); }); } +std::string system_hardware_sampler::samples_only_as_yaml_string() const { + return std::accumulate(samplers_.cbegin(), samplers_.cend(), std::string{}, [](const std::string str, const auto &ptr) { return str + ptr->samples_only_as_yaml_string(); }); +} + } // namespace hws From 68a3ad1c4ece8ef97cd95ab8b5a896516c206b52 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Fri, 27 Sep 2024 15:36:07 +0200 Subject: [PATCH 53/69] Make the device_identification function public. --- include/hardware_sampling/cpu/hardware_sampler.hpp | 10 +++++----- .../hardware_sampling/gpu_amd/hardware_sampler.hpp | 10 +++++----- .../hardware_sampling/gpu_intel/hardware_sampler.hpp | 10 +++++----- .../gpu_nvidia/hardware_sampler.hpp | 10 +++++----- include/hardware_sampling/hardware_sampler.hpp | 12 ++++++------ 5 files changed, 26 insertions(+), 26 deletions(-) diff --git a/include/hardware_sampling/cpu/hardware_sampler.hpp b/include/hardware_sampling/cpu/hardware_sampler.hpp index 1de0a95..bc6971f 100644 --- a/include/hardware_sampling/cpu/hardware_sampler.hpp +++ b/include/hardware_sampling/cpu/hardware_sampler.hpp @@ -107,6 +107,11 @@ class cpu_hardware_sampler : public hardware_sampler { */ [[nodiscard]] const cpu_idle_states_samples &idle_state_samples() const noexcept { return idle_state_samples_; } + /** + * @copydoc hws::hardware_sampler::device_identification + */ + [[nodiscard]] std::string device_identification() const final; + /** * @copydoc hws::hardware_sampler::samples_only_as_yaml_string() const */ @@ -118,11 +123,6 @@ class cpu_hardware_sampler : public hardware_sampler { */ void sampling_loop() final; - /** - * @copydoc hws::hardware_sampler::device_identification - */ - [[nodiscard]] std::string device_identification() const final; - /// The general CPU samples. cpu_general_samples general_samples_{}; /// The clock related CPU samples. diff --git a/include/hardware_sampling/gpu_amd/hardware_sampler.hpp b/include/hardware_sampling/gpu_amd/hardware_sampler.hpp index 59eb1fc..308ca91 100644 --- a/include/hardware_sampling/gpu_amd/hardware_sampler.hpp +++ b/include/hardware_sampling/gpu_amd/hardware_sampler.hpp @@ -116,6 +116,11 @@ class gpu_amd_hardware_sampler : public hardware_sampler { */ [[nodiscard]] const rocm_smi_temperature_samples &temperature_samples() const noexcept { return temperature_samples_; } + /** + * @copydoc hws::hardware_sampler::device_identification + */ + [[nodiscard]] std::string device_identification() const final; + /** * @copydoc hws::hardware_sampler::samples_only_as_yaml_string() const */ @@ -127,11 +132,6 @@ class gpu_amd_hardware_sampler : public hardware_sampler { */ void sampling_loop() final; - /** - * @copydoc hws::hardware_sampler::device_identification - */ - [[nodiscard]] std::string device_identification() const final; - /// The ID of the device to sample. std::uint32_t device_id_{}; diff --git a/include/hardware_sampling/gpu_intel/hardware_sampler.hpp b/include/hardware_sampling/gpu_intel/hardware_sampler.hpp index 0077861..6841c7b 100644 --- a/include/hardware_sampling/gpu_intel/hardware_sampler.hpp +++ b/include/hardware_sampling/gpu_intel/hardware_sampler.hpp @@ -116,6 +116,11 @@ class gpu_intel_hardware_sampler : public hardware_sampler { */ [[nodiscard]] const level_zero_temperature_samples &temperature_samples() const noexcept { return temperature_samples_; } + /** + * @copydoc hws::hardware_sampler::device_identification + */ + std::string device_identification() const final; + /** * @copydoc hws::hardware_sampler::samples_only_as_yaml_string() const */ @@ -127,11 +132,6 @@ class gpu_intel_hardware_sampler : public hardware_sampler { */ void sampling_loop() final; - /** - * @copydoc hws::hardware_sampler::device_identification - */ - std::string device_identification() const final; - /// The device handle for the device to sample. detail::level_zero_device_handle device_; diff --git a/include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp b/include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp index 6716e63..4180f95 100644 --- a/include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp +++ b/include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp @@ -117,6 +117,11 @@ class gpu_nvidia_hardware_sampler : public hardware_sampler { */ [[nodiscard]] const nvml_temperature_samples &temperature_samples() const noexcept { return temperature_samples_; } + /** + * @copydoc hws::hardware_sampler::device_identification + */ + [[nodiscard]] std::string device_identification() const final; + /** * @copydoc hws::hardware_sampler::samples_only_as_yaml_string() const */ @@ -128,11 +133,6 @@ class gpu_nvidia_hardware_sampler : public hardware_sampler { */ void sampling_loop() final; - /** - * @copydoc hws::hardware_sampler::device_identification - */ - [[nodiscard]] std::string device_identification() const final; - /// The device handle for the device to sample. detail::nvml_device_handle device_{}; diff --git a/include/hardware_sampling/hardware_sampler.hpp b/include/hardware_sampling/hardware_sampler.hpp index 443f5bb..a44fdce 100644 --- a/include/hardware_sampling/hardware_sampler.hpp +++ b/include/hardware_sampling/hardware_sampler.hpp @@ -160,6 +160,12 @@ class hardware_sampler { */ void dump_yaml(const std::filesystem::path &filename) const; + /** + * @brief Return the unique device identification. Can be used as unique key in the YAML string. + * @return the unique device identification (`[[nodiscard]]`) + */ + [[nodiscard]] virtual std::string device_identification() const = 0; + /** * @brief Return the hardware samples as well as events and time points as YAML string. * @return the YAML content as string (`[[nodiscard]]`) @@ -178,12 +184,6 @@ class hardware_sampler { */ virtual void sampling_loop() = 0; - /** - * @brief Return the unique device identification. Can be used as unique key in the YAML string. - * @return the unique device identification (`[[nodiscard]]`) - */ - [[nodiscard]] virtual std::string device_identification() const = 0; - /** * @brief Add a new time point to this hardware sampler. Called during the sampling loop. * @param time_point the new time point to add From d5395602d9d9d6a477c00afc155a10c83d588864 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Fri, 27 Sep 2024 16:41:40 +0200 Subject: [PATCH 54/69] Add alias targets. --- CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index f3ba9df..d28e999 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -22,6 +22,8 @@ set(HWS_SOURCES # create hardware sampling library set(HWS_LIBRARY_NAME hardware_sampling) add_library(${HWS_LIBRARY_NAME} SHARED ${HWS_SOURCES}) +add_library(hws ALIAS ${HWS_LIBRARY_NAME}) +add_library(hws::hws ALIAS ${HWS_LIBRARY_NAME}) # set install target set(HWS_TARGETS_TO_INSTALL ${HWS_LIBRARY_NAME}) From a47e8feba19b6ca1f75ee28973fbc446abfcfb94 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Mon, 30 Sep 2024 10:05:08 +0200 Subject: [PATCH 55/69] Fix turbostat logic related output bug. --- .../cpu/hardware_sampler.cpp | 311 ++++++++++++++---- 1 file changed, 241 insertions(+), 70 deletions(-) diff --git a/src/hardware_sampling/cpu/hardware_sampler.cpp b/src/hardware_sampling/cpu/hardware_sampler.cpp index 7d20b49..b996f96 100644 --- a/src/hardware_sampling/cpu/hardware_sampler.cpp +++ b/src/hardware_sampling/cpu/hardware_sampler.cpp @@ -175,109 +175,193 @@ void cpu_hardware_sampler::sampling_loop() { const std::vector values = detail::split(data[1], '\t'); for (std::size_t i = 0; i < header.size(); ++i) { - if (this->sample_category_enabled(sample_category::general)) { - if (header[i] == "Busy%") { + // general samples + if (header[i] == "Busy%") { + if (this->sample_category_enabled(sample_category::general)) { using vector_type = decltype(general_samples_.compute_utilization_)::value_type; general_samples_.compute_utilization_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "IPC") { + } + continue; + } else if (header[i] == "IPC") { + if (this->sample_category_enabled(sample_category::general)) { using vector_type = decltype(general_samples_.ipc_)::value_type; general_samples_.ipc_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "IRQ") { + } + continue; + } else if (header[i] == "IRQ") { + if (this->sample_category_enabled(sample_category::general)) { using vector_type = decltype(general_samples_.irq_)::value_type; general_samples_.irq_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "SMI") { + } + continue; + } else if (header[i] == "SMI") { + if (this->sample_category_enabled(sample_category::general)) { using vector_type = decltype(general_samples_.smi_)::value_type; general_samples_.smi_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "POLL") { + } + } else if (header[i] == "POLL") { + if (this->sample_category_enabled(sample_category::general)) { using vector_type = decltype(general_samples_.poll_)::value_type; general_samples_.poll_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "POLL%") { + } + continue; + } else if (header[i] == "POLL%") { + if (this->sample_category_enabled(sample_category::general)) { using vector_type = decltype(general_samples_.poll_percent_)::value_type; general_samples_.poll_percent_ = vector_type{ detail::convert_to(values[i]) }; } + continue; } - if (this->sample_category_enabled(sample_category::clock)) { - if (header[i] == "Avg_MHz") { + + // clock related samples + if (header[i] == "Avg_MHz") { + if (this->sample_category_enabled(sample_category::clock)) { using vector_type = decltype(clock_samples_.clock_frequency_)::value_type; clock_samples_.clock_frequency_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "Bzy_MHz") { + } + continue; + } else if (header[i] == "Bzy_MHz") { + if (this->sample_category_enabled(sample_category::clock)) { using vector_type = decltype(clock_samples_.average_non_idle_clock_frequency_)::value_type; clock_samples_.average_non_idle_clock_frequency_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "TSC_MHz") { + } + continue; + } else if (header[i] == "TSC_MHz") { + if (this->sample_category_enabled(sample_category::clock)) { using vector_type = decltype(clock_samples_.time_stamp_counter_)::value_type; clock_samples_.time_stamp_counter_ = vector_type{ detail::convert_to(values[i]) }; } + continue; } - if (this->sample_category_enabled(sample_category::power)) { - if (header[i] == "PkgWatt") { + + // power related samples + if (header[i] == "PkgWatt") { + if (this->sample_category_enabled(sample_category::power)) { using vector_type = decltype(power_samples_.power_usage_)::value_type; power_samples_.power_usage_ = vector_type{ detail::convert_to(values[i]) }; power_samples_.power_measurement_type_ = "current/instant"; power_samples_.power_total_energy_consumption_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ 0 }; - } else if (header[i] == "CorWatt") { + } + continue; + } else if (header[i] == "CorWatt") { + if (this->sample_category_enabled(sample_category::power)) { using vector_type = decltype(power_samples_.core_watt_)::value_type; power_samples_.core_watt_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "RAMWatt") { + } + continue; + } else if (header[i] == "RAMWatt") { + if (this->sample_category_enabled(sample_category::power)) { using vector_type = decltype(power_samples_.ram_watt_)::value_type; power_samples_.ram_watt_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "PKG_%") { + } + continue; + } else if (header[i] == "PKG_%") { + if (this->sample_category_enabled(sample_category::power)) { using vector_type = decltype(power_samples_.package_rapl_throttle_percent_)::value_type; power_samples_.package_rapl_throttle_percent_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "RAM_%") { + } + continue; + } else if (header[i] == "RAM_%") { + if (this->sample_category_enabled(sample_category::power)) { using vector_type = decltype(power_samples_.dram_rapl_throttle_percent_)::value_type; power_samples_.dram_rapl_throttle_percent_ = vector_type{ detail::convert_to(values[i]) }; } + continue; } - if (this->sample_category_enabled(sample_category::temperature)) { - if (header[i] == "CoreTmp") { + + // temperature related samples + if (header[i] == "CoreTmp") { + if (this->sample_category_enabled(sample_category::temperature)) { using vector_type = decltype(temperature_samples_.core_temperature_)::value_type; temperature_samples_.core_temperature_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "CoreThr") { + } + continue; + } else if (header[i] == "CoreThr") { + if (this->sample_category_enabled(sample_category::temperature)) { using vector_type = decltype(temperature_samples_.core_throttle_percent_)::value_type; temperature_samples_.core_throttle_percent_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "PkgTmp") { + } + continue; + } else if (header[i] == "PkgTmp") { + if (this->sample_category_enabled(sample_category::temperature)) { using vector_type = decltype(temperature_samples_.temperature_)::value_type; temperature_samples_.temperature_ = vector_type{ detail::convert_to(values[i]) }; } + continue; } - if (this->sample_category_enabled(sample_category::gfx)) { - if (header[i] == "GFX%rc6") { + + // gfx (iGPU) related samples + if (header[i] == "GFX%rc6") { + if (this->sample_category_enabled(sample_category::gfx)) { using vector_type = decltype(gfx_samples_.gfx_render_state_percent_)::value_type; gfx_samples_.gfx_render_state_percent_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "GFXMHz") { + } + continue; + } else if (header[i] == "GFXMHz") { + if (this->sample_category_enabled(sample_category::gfx)) { using vector_type = decltype(gfx_samples_.gfx_frequency_)::value_type; gfx_samples_.gfx_frequency_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "GFXAMHz") { + } + continue; + } else if (header[i] == "GFXAMHz") { + if (this->sample_category_enabled(sample_category::gfx)) { using vector_type = decltype(gfx_samples_.average_gfx_frequency_)::value_type; gfx_samples_.average_gfx_frequency_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "GFX%C0") { + } + continue; + } else if (header[i] == "GFX%C0") { + if (this->sample_category_enabled(sample_category::gfx)) { using vector_type = decltype(gfx_samples_.gfx_state_c0_percent_)::value_type; gfx_samples_.gfx_state_c0_percent_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "CPUGFX%") { + } + continue; + } else if (header[i] == "CPUGFX%") { + if (this->sample_category_enabled(sample_category::gfx)) { using vector_type = decltype(gfx_samples_.cpu_works_for_gpu_percent_)::value_type; gfx_samples_.cpu_works_for_gpu_percent_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "GFXWatt") { + } + continue; + } else if (header[i] == "GFXWatt") { + if (this->sample_category_enabled(sample_category::gfx)) { using vector_type = decltype(gfx_samples_.gfx_watt_)::value_type; gfx_samples_.gfx_watt_ = vector_type{ detail::convert_to(values[i]) }; } + continue; } - if (this->sample_category_enabled(sample_category::idle_state)) { - if (header[i] == "Totl%C0") { + + // idle state related samples + if (header[i] == "Totl%C0") { + if (this->sample_category_enabled(sample_category::idle_state)) { using vector_type = decltype(idle_state_samples_.all_cpus_state_c0_percent_)::value_type; idle_state_samples_.all_cpus_state_c0_percent_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "Any%C0") { + } + continue; + } else if (header[i] == "Any%C0") { + if (this->sample_category_enabled(sample_category::idle_state)) { using vector_type = decltype(idle_state_samples_.any_cpu_state_c0_percent_)::value_type; idle_state_samples_.any_cpu_state_c0_percent_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "CPU%LPI") { + } + continue; + } else if (header[i] == "CPU%LPI") { + if (this->sample_category_enabled(sample_category::idle_state)) { using vector_type = decltype(idle_state_samples_.low_power_idle_state_percent_)::value_type; idle_state_samples_.low_power_idle_state_percent_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "SYS%LPI") { + } + continue; + } else if (header[i] == "SYS%LPI") { + if (this->sample_category_enabled(sample_category::idle_state)) { using vector_type = decltype(idle_state_samples_.system_low_power_idle_state_percent_)::value_type; idle_state_samples_.system_low_power_idle_state_percent_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "Pkg%LPI") { + } + continue; + } else if (header[i] == "Pkg%LPI") { + if (this->sample_category_enabled(sample_category::idle_state)) { using vector_type = decltype(idle_state_samples_.package_low_power_idle_state_percent_)::value_type; idle_state_samples_.package_low_power_idle_state_percent_ = vector_type{ detail::convert_to(values[i]) }; - } else { + } + continue; + } else { + if (this->sample_category_enabled(sample_category::idle_state)) { // test against regex const std::string header_str{ header[i] }; const std::regex reg{ std::string{ "CPU%[0-9a-zA-Z]+|Pkg%[0-9a-zA-Z]+|Pk%[0-9a-zA-Z]+|C[0-9a-zA-Z]+%|C[0-9a-zA-Z]+" }, std::regex::extended }; @@ -291,6 +375,7 @@ void cpu_hardware_sampler::sampling_loop() { idle_state_samples_.idle_states_.value()[header_str] = vector_type{ detail::convert_to(values[i]) }; } } + continue; } } } @@ -339,41 +424,69 @@ void cpu_hardware_sampler::sampling_loop() { // add values to the respective sample entries for (std::size_t i = 0; i < header.size(); ++i) { - if (this->sample_category_enabled(sample_category::general)) { - if (header[i] == "Busy%") { + // general samples + if (header[i] == "Busy%") { + if (this->sample_category_enabled(sample_category::general)) { using vector_type = decltype(general_samples_.compute_utilization_)::value_type; general_samples_.compute_utilization_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "IPC") { + } + continue; + } else if (header[i] == "IPC") { + if (this->sample_category_enabled(sample_category::general)) { using vector_type = decltype(general_samples_.ipc_)::value_type; general_samples_.ipc_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "IRQ") { + } + continue; + } else if (header[i] == "IRQ") { + if (this->sample_category_enabled(sample_category::general)) { using vector_type = decltype(general_samples_.irq_)::value_type; general_samples_.irq_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "SMI") { + } + continue; + } else if (header[i] == "SMI") { + if (this->sample_category_enabled(sample_category::general)) { using vector_type = decltype(general_samples_.smi_)::value_type; general_samples_.smi_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "POLL") { + } + continue; + } else if (header[i] == "POLL") { + if (this->sample_category_enabled(sample_category::general)) { using vector_type = decltype(general_samples_.poll_)::value_type; general_samples_.poll_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "POLL%") { + } + continue; + } else if (header[i] == "POLL%") { + if (this->sample_category_enabled(sample_category::general)) { using vector_type = decltype(general_samples_.poll_percent_)::value_type; general_samples_.poll_percent_->push_back(detail::convert_to(values[i])); } + continue; } - if (this->sample_category_enabled(sample_category::clock)) { - if (header[i] == "Avg_MHz") { + + // clock related samples + if (header[i] == "Avg_MHz") { + if (this->sample_category_enabled(sample_category::clock)) { using vector_type = decltype(clock_samples_.clock_frequency_)::value_type; clock_samples_.clock_frequency_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "Bzy_MHz") { + } + continue; + } else if (header[i] == "Bzy_MHz") { + if (this->sample_category_enabled(sample_category::clock)) { using vector_type = decltype(clock_samples_.average_non_idle_clock_frequency_)::value_type; clock_samples_.average_non_idle_clock_frequency_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "TSC_MHz") { + } + continue; + } else if (header[i] == "TSC_MHz") { + if (this->sample_category_enabled(sample_category::clock)) { using vector_type = decltype(clock_samples_.time_stamp_counter_)::value_type; clock_samples_.time_stamp_counter_->push_back(detail::convert_to(values[i])); } + continue; } - if (this->sample_category_enabled(sample_category::power)) { - if (header[i] == "PkgWatt") { + + // power related samples + if (header[i] == "PkgWatt") { + if (this->sample_category_enabled(sample_category::power)) { using vector_type = decltype(power_samples_.power_usage_)::value_type; power_samples_.power_usage_->push_back(detail::convert_to(values[i])); // calculate total energy consumption @@ -382,76 +495,134 @@ void cpu_hardware_sampler::sampling_loop() { const value_type time_difference = std::chrono::duration(this->sampling_time_points()[num_time_points - 1] - this->sampling_time_points()[num_time_points - 2]).count(); const auto current = power_samples_.power_usage_->back() * time_difference; power_samples_.power_total_energy_consumption_->push_back(power_samples_.power_total_energy_consumption_->back() + current); - } else if (header[i] == "CorWatt") { + } + continue; + } else if (header[i] == "CorWatt") { + if (this->sample_category_enabled(sample_category::power)) { using vector_type = decltype(power_samples_.core_watt_)::value_type; power_samples_.core_watt_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "RAMWatt") { + } + continue; + } else if (header[i] == "RAMWatt") { + if (this->sample_category_enabled(sample_category::power)) { using vector_type = decltype(power_samples_.ram_watt_)::value_type; power_samples_.ram_watt_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "PKG_%") { + } + continue; + } else if (header[i] == "PKG_%") { + if (this->sample_category_enabled(sample_category::power)) { using vector_type = decltype(power_samples_.package_rapl_throttle_percent_)::value_type; power_samples_.package_rapl_throttle_percent_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "RAM_%") { + } + continue; + } else if (header[i] == "RAM_%") { + if (this->sample_category_enabled(sample_category::power)) { using vector_type = decltype(power_samples_.dram_rapl_throttle_percent_)::value_type; power_samples_.dram_rapl_throttle_percent_->push_back(detail::convert_to(values[i])); } + continue; } - if (this->sample_category_enabled(sample_category::temperature)) { - if (header[i] == "CoreTmp") { + + // temperature related samples + if (header[i] == "CoreTmp") { + if (this->sample_category_enabled(sample_category::temperature)) { using vector_type = decltype(temperature_samples_.core_temperature_)::value_type; temperature_samples_.core_temperature_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "CoreThr") { + } + continue; + } else if (header[i] == "CoreThr") { + if (this->sample_category_enabled(sample_category::temperature)) { using vector_type = decltype(temperature_samples_.core_throttle_percent_)::value_type; temperature_samples_.core_throttle_percent_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "PkgTmp") { + } + continue; + } else if (header[i] == "PkgTmp") { + if (this->sample_category_enabled(sample_category::temperature)) { using vector_type = decltype(temperature_samples_.temperature_)::value_type; temperature_samples_.temperature_->push_back(detail::convert_to(values[i])); } + continue; } - if (this->sample_category_enabled(sample_category::gfx)) { - if (header[i] == "GFX%rc6") { + + // gfx (iGPU) related samples + if (header[i] == "GFX%rc6") { + if (this->sample_category_enabled(sample_category::gfx)) { using vector_type = decltype(gfx_samples_.gfx_render_state_percent_)::value_type; gfx_samples_.gfx_render_state_percent_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "GFXMHz") { + } + continue; + } else if (header[i] == "GFXMHz") { + if (this->sample_category_enabled(sample_category::gfx)) { using vector_type = decltype(gfx_samples_.gfx_frequency_)::value_type; gfx_samples_.gfx_frequency_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "GFXAMHz") { + } + continue; + } else if (header[i] == "GFXAMHz") { + if (this->sample_category_enabled(sample_category::gfx)) { using vector_type = decltype(gfx_samples_.average_gfx_frequency_)::value_type; gfx_samples_.average_gfx_frequency_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "GFX%C0") { + } + continue; + } else if (header[i] == "GFX%C0") { + if (this->sample_category_enabled(sample_category::gfx)) { using vector_type = decltype(gfx_samples_.gfx_state_c0_percent_)::value_type; gfx_samples_.gfx_state_c0_percent_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "CPUGFX%") { + } + continue; + } else if (header[i] == "CPUGFX%") { + if (this->sample_category_enabled(sample_category::gfx)) { using vector_type = decltype(gfx_samples_.cpu_works_for_gpu_percent_)::value_type; gfx_samples_.cpu_works_for_gpu_percent_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "GFXWatt") { + } + continue; + } else if (header[i] == "GFXWatt") { + if (this->sample_category_enabled(sample_category::gfx)) { using vector_type = decltype(gfx_samples_.gfx_watt_)::value_type; gfx_samples_.gfx_watt_->push_back(detail::convert_to(values[i])); } + continue; } - if (this->sample_category_enabled(sample_category::idle_state)) { - if (header[i] == "Totl%C0") { + + // idle state related samples + if (header[i] == "Totl%C0") { + if (this->sample_category_enabled(sample_category::idle_state)) { using vector_type = decltype(idle_state_samples_.all_cpus_state_c0_percent_)::value_type; idle_state_samples_.all_cpus_state_c0_percent_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "Any%C0") { + } + continue; + } else if (header[i] == "Any%C0") { + if (this->sample_category_enabled(sample_category::idle_state)) { using vector_type = decltype(idle_state_samples_.any_cpu_state_c0_percent_)::value_type; idle_state_samples_.any_cpu_state_c0_percent_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "CPU%LPI") { + } + continue; + } else if (header[i] == "CPU%LPI") { + if (this->sample_category_enabled(sample_category::idle_state)) { using vector_type = decltype(idle_state_samples_.low_power_idle_state_percent_)::value_type; idle_state_samples_.low_power_idle_state_percent_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "SYS%LPI") { + } + continue; + } else if (header[i] == "SYS%LPI") { + if (this->sample_category_enabled(sample_category::idle_state)) { using vector_type = decltype(idle_state_samples_.system_low_power_idle_state_percent_)::value_type; idle_state_samples_.system_low_power_idle_state_percent_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "Pkg%LPI") { + } + continue; + } else if (header[i] == "Pkg%LPI") { + if (this->sample_category_enabled(sample_category::idle_state)) { using vector_type = decltype(idle_state_samples_.package_low_power_idle_state_percent_)::value_type; idle_state_samples_.package_low_power_idle_state_percent_->push_back(detail::convert_to(values[i])); - } else { + } + continue; + } else { + if (this->sample_category_enabled(sample_category::idle_state)) { const std::string header_str{ header[i] }; if (idle_state_samples_.idle_states_.value().count(header_str) > decltype(idle_state_samples_)::map_type::size_type{ 0 }) { using vector_type = cpu_idle_states_samples::map_type::mapped_type; idle_state_samples_.idle_states_.value()[header_str].push_back(detail::convert_to(values[i])); } } + continue; } } } From f5747aec6eb20200e1d2f0cf19961fa225fed7b4 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Mon, 30 Sep 2024 10:25:17 +0200 Subject: [PATCH 56/69] Add version information using CMake configuration. --- .gitignore | 5 ++- CMakeLists.txt | 14 +++++- bindings/CMakeLists.txt | 1 + bindings/main.cpp | 3 ++ bindings/version.cpp | 26 +++++++++++ include/hardware_sampling/core.hpp | 1 + include/hardware_sampling/version.hpp.in | 51 ++++++++++++++++++++++ src/hardware_sampling/hardware_sampler.cpp | 7 ++- 8 files changed, 104 insertions(+), 4 deletions(-) create mode 100644 bindings/version.cpp create mode 100644 include/hardware_sampling/version.hpp.in diff --git a/.gitignore b/.gitignore index 1d90c9b..4ed094d 100644 --- a/.gitignore +++ b/.gitignore @@ -54,4 +54,7 @@ CTestTestfile.cmake # IDEs ================================ .idea/ .vscode/ -.vs/ \ No newline at end of file +.vs/ + +# auto-generated version header +include/hardware_sampling/version.hpp \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index d28e999..eea2868 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,10 +6,10 @@ cmake_minimum_required(VERSION 3.22) -project("HWS - Hardware Sampling for GPUs and CPUs" +project("hws - Hardware Sampling for GPUs and CPUs" VERSION 1.0.0 LANGUAGES CXX - DESCRIPTION "Hardware sampling (e.g., clock frequencies, memory consumption, temperatures, or energy draw) for CPUs, and GPUS.") + DESCRIPTION "Hardware sampling (e.g., clock frequencies, memory consumption, temperatures, or energy draw) for CPUs and GPUS.") # explicitly set library source files set(HWS_SOURCES @@ -90,6 +90,16 @@ else () endif () target_link_libraries(${HWS_LIBRARY_NAME} PUBLIC fmt::fmt) +######################################################################################################################## +## configure version header ## +######################################################################################################################## +message(STATUS "Configuring version information.") +configure_file( + ${CMAKE_CURRENT_SOURCE_DIR}/include/hardware_sampling/version.hpp.in + ${CMAKE_CURRENT_SOURCE_DIR}/include/hardware_sampling/version.hpp + @ONLY +) + #################################################################################################################### ## CPU measurements ## #################################################################################################################### diff --git a/bindings/CMakeLists.txt b/bindings/CMakeLists.txt index ffa7984..f2ef8d8 100644 --- a/bindings/CMakeLists.txt +++ b/bindings/CMakeLists.txt @@ -36,6 +36,7 @@ set(HWS_PYTHON_BINDINGS_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/hardware_sampler.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sample_category.cpp ${CMAKE_CURRENT_SOURCE_DIR}/system_hardware_sampler.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/version.cpp ${CMAKE_CURRENT_SOURCE_DIR}/main.cpp ) diff --git a/bindings/main.cpp b/bindings/main.cpp index 39eb521..f3dca3f 100644 --- a/bindings/main.cpp +++ b/bindings/main.cpp @@ -24,6 +24,7 @@ void init_cpu_hardware_sampler(py::module_ &); void init_gpu_nvidia_hardware_sampler(py::module_ &); void init_gpu_amd_hardware_sampler(py::module_ &); void init_gpu_intel_hardware_sampler(py::module_ &); +void init_version(py::module_ &); PYBIND11_MODULE(HardwareSampling, m) { m.doc() = "Hardware Sampling for CPUs and GPUs"; @@ -57,4 +58,6 @@ PYBIND11_MODULE(HardwareSampling, m) { init_gpu_intel_hardware_sampler(m); #endif m.def("has_gpu_intel_hardware_sampler", []() { return HWS_IS_DEFINED(HWS_FOR_INTEL_GPUS_ENABLED); }); + + init_version(m); } diff --git a/bindings/version.cpp b/bindings/version.cpp new file mode 100644 index 0000000..df892ca --- /dev/null +++ b/bindings/version.cpp @@ -0,0 +1,26 @@ +/** +* @author Marcel Breyer +* @copyright 2024-today All Rights Reserved +* @license This file is released under the MIT license. +* See the LICENSE.md file in the project root for full license information. +*/ + +#include "hardware_sampling/version.hpp" // hws::version + +#include "pybind11/pybind11.h" // py::module_ + +namespace py = pybind11; + +// dummy class +class version { }; + +void init_version(py::module_ &m) { + // bind global version information + // complexity necessary to enforce read-only + py::class_(m, "version") + .def_property_readonly_static("name", [](const py::object & /* self */) { return hws::version::name; }, "the name of the hws library") + .def_property_readonly_static("version", [](const py::object & /* self */) { return hws::version::version; }, "the used version of the hws library") + .def_property_readonly_static("major", [](const py::object & /* self */) { return hws::version::major; }, "the used major version of the hws library") + .def_property_readonly_static("minor", [](const py::object & /* self */) { return hws::version::minor; }, "the used minor version of the hws library") + .def_property_readonly_static("patch", [](const py::object & /* self */) { return hws::version::patch; }, "the used patch version of the hws library"); +} diff --git a/include/hardware_sampling/core.hpp b/include/hardware_sampling/core.hpp index 7b259f4..44d8922 100644 --- a/include/hardware_sampling/core.hpp +++ b/include/hardware_sampling/core.hpp @@ -16,6 +16,7 @@ #include "hardware_sampling/hardware_sampler.hpp" #include "hardware_sampling/sample_category.hpp" #include "hardware_sampling/system_hardware_sampler.hpp" +#include "hardware_sampling/version.hpp" #if defined(HWS_FOR_CPUS_ENABLED) #include "hardware_sampling/cpu/cpu_samples.hpp" diff --git a/include/hardware_sampling/version.hpp.in b/include/hardware_sampling/version.hpp.in new file mode 100644 index 0000000..88d0c1e --- /dev/null +++ b/include/hardware_sampling/version.hpp.in @@ -0,0 +1,51 @@ +/** + * @file + * @author Marcel Breyer + * @copyright 2024-today All Rights Reserved + * @license This file is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + * + * @brief Version information for the hardware sampling. + */ + +#ifndef HARDWARE_SAMPLING_VERSION_HPP_ +#define HARDWARE_SAMPLING_VERSION_HPP_ +#pragma once + +#include // std::string_view + +namespace hws::version { + +/** + * @brief The name of the library. + * @details The value gets automatically set during the [`CMake`](https://cmake.org/) configuration step. + */ +constexpr std::string_view name = "@PROJECT_NAME@"; + +/** + * @brief The current version of the library in the form: "major.minor.patch". + * @details The value gets automatically set during the [`CMake`](https://cmake.org/) configuration step. + */ +constexpr std::string_view version = "@PROJECT_VERSION@"; + +/** + * @brief The current major version of the library. + * @details The value gets automatically set during the [`CMake`](https://cmake.org/) configuration step. + */ +constexpr int major = @PROJECT_VERSION_MAJOR@; + +/** + * @brief The current minor version of the library. + * @details The value gets automatically set during the [`CMake`](https://cmake.org/) configuration step. + */ +constexpr int minor = @PROJECT_VERSION_MINOR@; + +/** + * @brief The current patch version of the library. + * @details The value gets automatically set during the [`CMake`](https://cmake.org/) configuration step. + */ +constexpr int patch = @PROJECT_VERSION_PATCH@; + +} // namespace hws::version + +#endif // HARDWARE_SAMPLING_VERSION_HPP_ diff --git a/src/hardware_sampling/hardware_sampler.cpp b/src/hardware_sampling/hardware_sampler.cpp index d2289cc..e8813d9 100644 --- a/src/hardware_sampling/hardware_sampler.cpp +++ b/src/hardware_sampling/hardware_sampler.cpp @@ -9,6 +9,7 @@ #include "hardware_sampling/event.hpp" // hws::event #include "hardware_sampling/utility.hpp" // hws::detail::durations_from_reference_time +#include "hardware_sampling/version.hpp" // hws::version::version #include "fmt/chrono.h" // direct formatting of std::chrono types #include "fmt/format.h" // fmt::format @@ -127,7 +128,8 @@ void hardware_sampler::dump_yaml(const char *filename) const { std::ofstream file{ filename, std::ios_base::app }; // begin a new YAML document (only with "---" multiple YAML documents in a single file are allowed) - file << "---\n\n" << this->as_yaml_string(); + file << "---\n\n" + << this->as_yaml_string(); } void hardware_sampler::dump_yaml(const std::string &filename) const { @@ -152,6 +154,8 @@ std::string hardware_sampler::as_yaml_string() const { } return fmt::format("device_identification: \"{}\"\n" + "\n" + "version: \"{}\"\n" "\n" "start_time: \"{:%Y-%m-%d %X}\"\n" "\n" @@ -171,6 +175,7 @@ std::string hardware_sampler::as_yaml_string() const { "\n" "{}\n", this->device_identification(), + version::version, start_date_time_, fmt::join(detail::durations_from_reference_time(event_time_points, this->get_event(0).time_point), ", "), fmt::join(event_names, ", "), From dc71dce017d1b19e4ef103bbabbdc29412f38d11 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Mon, 30 Sep 2024 10:47:03 +0200 Subject: [PATCH 57/69] Add check that the sampling interval must not be zero. --- include/hardware_sampling/hardware_sampler.hpp | 1 + src/hardware_sampling/hardware_sampler.cpp | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/include/hardware_sampling/hardware_sampler.hpp b/include/hardware_sampling/hardware_sampler.hpp index a44fdce..8824ac3 100644 --- a/include/hardware_sampling/hardware_sampler.hpp +++ b/include/hardware_sampling/hardware_sampler.hpp @@ -34,6 +34,7 @@ class hardware_sampler { * @brief Construct a new hardware sampler with the provided @p sampling_interval. * @param[in] sampling_interval the used sampling interval * @param[in] category the sample categories that are enabled for hardware sampling + * @throws std::invalid_argument if the @p sampling_interval is zero */ hardware_sampler(std::chrono::milliseconds sampling_interval, sample_category category); diff --git a/src/hardware_sampling/hardware_sampler.cpp b/src/hardware_sampling/hardware_sampler.cpp index e8813d9..d5ec9fc 100644 --- a/src/hardware_sampling/hardware_sampler.cpp +++ b/src/hardware_sampling/hardware_sampler.cpp @@ -28,7 +28,11 @@ namespace hws { hardware_sampler::hardware_sampler(const std::chrono::milliseconds sampling_interval, const sample_category category) : sampling_interval_{ sampling_interval }, - sample_category_{ category } { } + sample_category_{ category } { + if (sampling_interval == std::chrono::milliseconds{ 0 }) { + throw std::invalid_argument{ "The sampling interval must be larger than 0ms!" }; + } +} hardware_sampler::~hardware_sampler() = default; From ed858306a5c0cb4f16e681586fd5e4d1a7416b3d Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Mon, 30 Sep 2024 10:48:20 +0200 Subject: [PATCH 58/69] Update code examples. --- examples/cpp/main.cpp | 2 +- examples/python/main.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/cpp/main.cpp b/examples/cpp/main.cpp index 166a967..551c721 100644 --- a/examples/cpp/main.cpp +++ b/examples/cpp/main.cpp @@ -12,7 +12,7 @@ #include // std::vector int main() { - hws::cpu_hardware_sampler sampler{}; + hws::system_hardware_sampler sampler{}; // could also be, e.g., // hws::gpu_nvidia_hardware_sampler sampler{}; sampler.start_sampling(); diff --git a/examples/python/main.py b/examples/python/main.py index 7f384ca..da0809f 100644 --- a/examples/python/main.py +++ b/examples/python/main.py @@ -8,12 +8,12 @@ # See the LICENSE.md file in the project root for full license information. # ######################################################################################################################## -import HardwareSampling +import HardwareSampling as hws import numpy as np -sampler = HardwareSampling.CpuHardwareSampler() +sampler = hws.SystemHardwareSampler() # could also be, e.g., -# sampler = HardwareSampling.GpuNvidiaHardwareSampler() +# sampler = hws.GpuNvidiaHardwareSampler() sampler.start() sampler.add_event("init") From 257ca3d9cc53625437294ec5a56284cb4acba547 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Mon, 30 Sep 2024 10:52:15 +0200 Subject: [PATCH 59/69] Fix usage of wrong C++ standard in documentation string. --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index eea2868..a96756b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,7 +28,7 @@ add_library(hws::hws ALIAS ${HWS_LIBRARY_NAME}) # set install target set(HWS_TARGETS_TO_INSTALL ${HWS_LIBRARY_NAME}) -# use C++20 +# use C++17 target_compile_features(${HWS_LIBRARY_NAME} PUBLIC cxx_std_17) # add target include directory From 63bb80f3e7e2bb21f7e24b2c07b56005c763714c Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Mon, 30 Sep 2024 11:57:36 +0200 Subject: [PATCH 60/69] Add the possibility to generate a Doxygen documentation. --- .github/workflows/documentation.yml | 43 ++++++++++++++ .gitignore | 1 + CMakeLists.txt | 10 +++- docs/CMakeLists.txt | 56 +++++++++++++++++++ include/hardware_sampling/cpu/cpu_samples.hpp | 4 ++ .../cpu/hardware_sampler.hpp | 4 ++ include/hardware_sampling/event.hpp | 4 ++ .../gpu_amd/hardware_sampler.hpp | 4 ++ .../gpu_amd/rocm_smi_samples.hpp | 4 ++ include/hardware_sampling/gpu_amd/utility.hpp | 7 ++- .../gpu_intel/hardware_sampler.hpp | 4 ++ .../gpu_intel/level_zero_samples.hpp | 4 ++ .../gpu_nvidia/hardware_sampler.hpp | 4 ++ .../gpu_nvidia/nvml_samples.hpp | 5 ++ .../hardware_sampling/gpu_nvidia/utility.hpp | 5 ++ .../hardware_sampling/hardware_sampler.hpp | 11 ++-- .../system_hardware_sampler.hpp | 6 +- include/hardware_sampling/utility.hpp | 15 +++++ 18 files changed, 181 insertions(+), 10 deletions(-) create mode 100644 .github/workflows/documentation.yml create mode 100644 docs/CMakeLists.txt diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml new file mode 100644 index 0000000..7f41715 --- /dev/null +++ b/.github/workflows/documentation.yml @@ -0,0 +1,43 @@ +name: Generate documentation + +# only trigger this action on specific events +on: + push: + branches: + - main + pull_request: + branches: + - main + +jobs: + build-documentation: + runs-on: ubuntu-latest + steps: + # checkout repository + - name: Checkout hws + uses: actions/checkout@v4.2.0 + with: + path: hardware_sampling + # install dependencies + - name: Dependencies + run: | + sudo apt update + sudo apt-get install -y doxygen graphviz + # configure project via CMake + - name: Configure + run: | + cd hardware_sampling + mkdir build + cd build + cmake -DHWS_ENABLE_DOCUMENTATION=ON .. + # build project + - name: Generate + run: | + cd hardware_sampling/build + make doc + # deploy generated documentation using github.io + - name: Deploy + uses: peaceiris/actions-gh-pages@v4 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + publish_dir: ./hardware_sampling/docs/html \ No newline at end of file diff --git a/.gitignore b/.gitignore index 4ed094d..36754d0 100644 --- a/.gitignore +++ b/.gitignore @@ -36,6 +36,7 @@ Prerequisites # CMake ================================ bin/ build*/ +docs/html install*/ cmake-build*/ CMakeLists.txt.user diff --git a/CMakeLists.txt b/CMakeLists.txt index a96756b..f4e50c8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -282,12 +282,20 @@ endif () ## enable Python bindings ## #################################################################################################################### option(HWS_ENABLE_PYTHON_BINDINGS "Build language bindings for Python." ON) - if (HWS_ENABLE_PYTHON_BINDINGS) add_subdirectory(bindings) endif () +######################################################################################################################## +## add documentation ## +######################################################################################################################## +option(HWS_ENABLE_DOCUMENTATION "Add documentation using Doxygen." OFF) +if (HWS_ENABLE_DOCUMENTATION) + add_subdirectory(docs) +endif () + + ######################################################################################################################## ## add support for `make install` ## ######################################################################################################################## diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt new file mode 100644 index 0000000..1623953 --- /dev/null +++ b/docs/CMakeLists.txt @@ -0,0 +1,56 @@ +## Authors: Marcel Breyer +## Copyright (C): 2024-today All Rights Reserved +## License: This file is released under the MIT license. +## See the LICENSE.md file in the project root for full license information. +######################################################################################################################## + +######################################################################################################################## +## setup documentation generation with doxygen ## +######################################################################################################################## +## use installed doxygen +find_package(Doxygen REQUIRED OPTIONAL_COMPONENTS dot) + +## configure doxygen +set(DOXYGEN_OUTPUT_DIRECTORY "${PROJECT_SOURCE_DIR}/docs") +set(DOXYGEN_USE_MDFILE_AS_MAINPAGE "${PROJECT_SOURCE_DIR}/README.md") +set(DOXYGEN_FILE_PATTERNS "*.hpp;") +set(DOXYGEN_STRIP_FROM_PATH "${PROJECT_SOURCE_DIR}") +set(DOXYGEN_ABBREVIATE_BRIEF "") +set(DOXYGEN_QUIET "YES") +set(DOXYGEN_HTML_TIMESTAMP "YES") +set(DOXYGEN_NUM_PROC_THREADS 0) +set(DOXYGEN_WARN_NO_PARAMDOC "YES") +set(DOXYGEN_SORT_MEMBER_DOCS "NO") +set(DOXYGEN_INLINE_INHERITED_MEMB "YES") +set(DOXYGEN_USE_MATHJAX "YES") +set(DOXYGEN_EXCLUDE_SYMBOLS "*_HPP_") + +set(DOXYGEN_DOT_IMAGE_FORMAT "svg") +set(DOXYGEN_INTERACTIVE_SVG "YES") +set(DOXYGEN_INCLUDE_GRAPH "NO") +set(DOXYGEN_EXTRACT_PRIVATE "YES") + +## enable processing of specific attributes and macros +set(DOXYGEN_ENABLE_PREPROCESSING "YES") +set(DOXYGEN_MACRO_EXPANSION "YES") +set(DOXYGEN_EXPAND_ONLY_PREDEF "YES") +set(DOXYGEN_EXPAND_AS_DEFINED "YES") + +set(DOXYGEN_VERBATIM_VARS DOXYGEN_ALIASES) +set(DOXYGEN_ALIASES + [[license="\par License^^\parblock^^" ]] +) + +## add doxygen as target +doxygen_add_docs( + doc + "${PROJECT_SOURCE_DIR}/include;${PROJECT_SOURCE_DIR}/README.md;" + WORKING_DIRECTORY "${PROJECT_SOURCE_DIR}" + COMMENT "Generating API documentation with Doxygen." +) + +## install targets for the documentation +include(GNUInstallDirs) +install(DIRECTORY "${PROJECT_SOURCE_DIR}/docs/html" + DESTINATION "${CMAKE_INSTALL_DOCDIR}" +) diff --git a/include/hardware_sampling/cpu/cpu_samples.hpp b/include/hardware_sampling/cpu/cpu_samples.hpp index 3bd8a49..1343909 100644 --- a/include/hardware_sampling/cpu/cpu_samples.hpp +++ b/include/hardware_sampling/cpu/cpu_samples.hpp @@ -339,6 +339,8 @@ std::ostream &operator<<(std::ostream &out, const cpu_idle_states_samples &sampl } // namespace hws +/// @cond Doxygen_suppress + template <> struct fmt::formatter : fmt::ostream_formatter { }; @@ -360,4 +362,6 @@ struct fmt::formatter : fmt::ostream_formatter { }; template <> struct fmt::formatter : fmt::ostream_formatter { }; +/// @endcond + #endif // HARDWARE_SAMPLING_CPU_CPU_SAMPLES_HPP_ diff --git a/include/hardware_sampling/cpu/hardware_sampler.hpp b/include/hardware_sampling/cpu/hardware_sampler.hpp index bc6971f..4ae805f 100644 --- a/include/hardware_sampling/cpu/hardware_sampler.hpp +++ b/include/hardware_sampling/cpu/hardware_sampler.hpp @@ -150,7 +150,11 @@ std::ostream &operator<<(std::ostream &out, const cpu_hardware_sampler &sampler) } // namespace hws +/// @cond Doxygen_suppress + template <> struct fmt::formatter : fmt::ostream_formatter { }; +/// @endcond + #endif // HARDWARE_SAMPLING_CPU_HARDWARE_SAMPLER_HPP_ diff --git a/include/hardware_sampling/event.hpp b/include/hardware_sampling/event.hpp index 7129141..2a60581 100644 --- a/include/hardware_sampling/event.hpp +++ b/include/hardware_sampling/event.hpp @@ -50,7 +50,11 @@ std::ostream &operator<<(std::ostream &out, const event &e); } // namespace hws +/// @cond Doxygen_suppress + template <> struct fmt::formatter : fmt::ostream_formatter { }; +/// @endcond + #endif // HARDWARE_SAMPLING_EVENT_HPP_ diff --git a/include/hardware_sampling/gpu_amd/hardware_sampler.hpp b/include/hardware_sampling/gpu_amd/hardware_sampler.hpp index 308ca91..00dc90f 100644 --- a/include/hardware_sampling/gpu_amd/hardware_sampler.hpp +++ b/include/hardware_sampling/gpu_amd/hardware_sampler.hpp @@ -163,7 +163,11 @@ std::ostream &operator<<(std::ostream &out, const gpu_amd_hardware_sampler &samp } // namespace hws +/// @cond Doxygen_suppress + template <> struct fmt::formatter : fmt::ostream_formatter { }; +/// @endcond + #endif // HARDWARE_SAMPLING_GPU_AMD_HARDWARE_SAMPLER_HPP_ diff --git a/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp b/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp index 727e683..958aa3a 100644 --- a/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp +++ b/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp @@ -272,6 +272,8 @@ std::ostream &operator<<(std::ostream &out, const rocm_smi_temperature_samples & } // namespace hws +/// @cond Doxygen_suppress + template <> struct fmt::formatter : fmt::ostream_formatter { }; @@ -287,4 +289,6 @@ struct fmt::formatter : fmt::ostream_formatter { } template <> struct fmt::formatter : fmt::ostream_formatter { }; +/// @endcond + #endif // HARDWARE_SAMPLING_GPU_AMD_ROCM_SMI_SAMPLES_HPP_ diff --git a/include/hardware_sampling/gpu_amd/utility.hpp b/include/hardware_sampling/gpu_amd/utility.hpp index a277e06..aa21ba0 100644 --- a/include/hardware_sampling/gpu_amd/utility.hpp +++ b/include/hardware_sampling/gpu_amd/utility.hpp @@ -25,6 +25,11 @@ namespace hws::detail { * @brief Defines the `HWS_ROCM_SMI_ERROR_CHECK` macro if `HWS_ERROR_CHECKS_ENABLED` is defined, does nothing otherwise. * @details Throws an exception if a ROCm SMI call returns with an error. Additionally outputs a more concrete error string if possible. */ +/** + * @def HWS_HIP_ERROR_CHECK + * @brief Defines the `HWS_HIP_ERROR_CHECK` macro if `HWS_ERROR_CHECKS_ENABLED` is defined, does nothing otherwise. + * @details Throws an exception if a HIP call returns with an error. Additionally outputs a more concrete error string. + */ #if defined(HWS_ERROR_CHECKS_ENABLED) #define HWS_ROCM_SMI_ERROR_CHECK(rocm_smi_func) \ { \ @@ -58,7 +63,7 @@ namespace hws::detail { /** * @brief Convert the performance level value (`rsmi_dev_perf_level_t`) to a string. - * @param[in] clocks_event_reasons the bitmask to convert to a string + * @param[in] perf_level the bitmask to convert to a string * @return all event throttle reasons (`[[nodiscard]]`) */ [[nodiscard]] std::string performance_level_to_string(rsmi_dev_perf_level_t perf_level); diff --git a/include/hardware_sampling/gpu_intel/hardware_sampler.hpp b/include/hardware_sampling/gpu_intel/hardware_sampler.hpp index 6841c7b..cf97f41 100644 --- a/include/hardware_sampling/gpu_intel/hardware_sampler.hpp +++ b/include/hardware_sampling/gpu_intel/hardware_sampler.hpp @@ -163,7 +163,11 @@ std::ostream &operator<<(std::ostream &out, const gpu_intel_hardware_sampler &sa } // namespace hws +/// @cond Doxygen_suppress + template <> struct fmt::formatter : fmt::ostream_formatter { }; +/// @endcond + #endif // HARDWARE_SAMPLING_GPU_INTEL_HARDWARE_SAMPLER_HPP_ diff --git a/include/hardware_sampling/gpu_intel/level_zero_samples.hpp b/include/hardware_sampling/gpu_intel/level_zero_samples.hpp index f53422e..2ade186 100644 --- a/include/hardware_sampling/gpu_intel/level_zero_samples.hpp +++ b/include/hardware_sampling/gpu_intel/level_zero_samples.hpp @@ -267,6 +267,8 @@ std::ostream &operator<<(std::ostream &out, const level_zero_temperature_samples } // namespace hws +/// @cond Doxygen_suppress + template <> struct fmt::formatter : fmt::ostream_formatter { }; @@ -282,4 +284,6 @@ struct fmt::formatter : fmt::ostream_formatter { template <> struct fmt::formatter : fmt::ostream_formatter { }; +/// @endcond + #endif // HARDWARE_SAMPLING_GPU_INTEL_LEVEL_ZERO_SAMPLES_HPP_ diff --git a/include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp b/include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp index 4180f95..5dba1ca 100644 --- a/include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp +++ b/include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp @@ -164,7 +164,11 @@ std::ostream &operator<<(std::ostream &out, const gpu_nvidia_hardware_sampler &s } // namespace hws +/// @cond Doxygen_suppress + template <> struct fmt::formatter : fmt::ostream_formatter { }; +/// @endcond + #endif // HARDWARE_SAMPLING_GPU_NVIDIA_HARDWARE_SAMPLER_HPP_ diff --git a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp index 91f4e6b..631e572 100644 --- a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp +++ b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp @@ -81,6 +81,7 @@ class nvml_clock_samples { // befriend hardware sampler class friend class gpu_nvidia_hardware_sampler; + /// The map type used to map the available clock frequencies to a specific memory frequency. using map_type = std::map>; public: @@ -260,6 +261,8 @@ std::ostream &operator<<(std::ostream &out, const nvml_temperature_samples &samp } // namespace hws +/// @cond Doxygen_suppress + template <> struct fmt::formatter : fmt::ostream_formatter { }; @@ -275,4 +278,6 @@ struct fmt::formatter : fmt::ostream_formatter { }; template <> struct fmt::formatter : fmt::ostream_formatter { }; +/// @endcond + #endif // HARDWARE_SAMPLING_GPU_NVIDIA_NVML_SAMPLES_HPP_ diff --git a/include/hardware_sampling/gpu_nvidia/utility.hpp b/include/hardware_sampling/gpu_nvidia/utility.hpp index 0352915..aaf0420 100644 --- a/include/hardware_sampling/gpu_nvidia/utility.hpp +++ b/include/hardware_sampling/gpu_nvidia/utility.hpp @@ -26,6 +26,11 @@ namespace hws::detail { * @brief Defines the `HWS_NVML_ERROR_CHECK` macro if `HWS_ERROR_CHECKS_ENABLED` is defined, does nothing otherwise. * @details Throws an exception if an NVML call returns with an error. Additionally outputs a more concrete error string. */ +/** + * @def HWS_CUDA_ERROR_CHECK + * @brief Defines the `HWS_CUDA_ERROR_CHECK` macro if `HWS_ERROR_CHECKS_ENABLED` is defined, does nothing otherwise. + * @details Throws an exception if a CUDA call returns with an error. Additionally outputs a more concrete error string. + */ #if defined(HWS_ERROR_CHECKS_ENABLED) #define HWS_NVML_ERROR_CHECK(nvml_func) \ { \ diff --git a/include/hardware_sampling/hardware_sampler.hpp b/include/hardware_sampling/hardware_sampler.hpp index 8824ac3..cc59331 100644 --- a/include/hardware_sampling/hardware_sampler.hpp +++ b/include/hardware_sampling/hardware_sampler.hpp @@ -123,15 +123,16 @@ class hardware_sampler { [[nodiscard]] std::size_t num_events() const noexcept { return events_.size(); } /** - * @brief Return the number of recorded events. + * @brief Return a vector of all recorded events. * @return the events (`[[nodiscard]]`) */ [[nodiscard]] const std::vector &get_events() const noexcept { return events_; } /** - * @brief Return the number of recorded events. + * @brief Return the event at index @p idx. + * @param[in] idx the event to return * @throws std::out_of_range the the @p idx is out of bounce - * @return the number of events (`[[nodiscard]]`) + * @return the event at index @p idx (`[[nodiscard]]`) */ [[nodiscard]] event get_event(std::size_t idx) const; @@ -153,11 +154,11 @@ class hardware_sampler { */ void dump_yaml(const char *filename) const; /** - * @copydoc hws::hardware_sampler::dump_yaml(const char *) + * @copydoc hws::hardware_sampler::dump_yaml(const char *) const */ void dump_yaml(const std::string &filename) const; /** - * @copydoc hws::hardware_sampler::dump_yaml(const char *) + * @copydoc hws::hardware_sampler::dump_yaml(const char *) const */ void dump_yaml(const std::filesystem::path &filename) const; diff --git a/include/hardware_sampling/system_hardware_sampler.hpp b/include/hardware_sampling/system_hardware_sampler.hpp index 96bff5c..15279de 100644 --- a/include/hardware_sampling/system_hardware_sampler.hpp +++ b/include/hardware_sampling/system_hardware_sampler.hpp @@ -157,7 +157,7 @@ class system_hardware_sampler { */ [[nodiscard]] const std::unique_ptr &sampler(std::size_t idx) const; /** - * @copydoc hws::system_hardware_sampler::samplers(std::size_t idx) const + * @copydoc hws::system_hardware_sampler::sampler(std::size_t idx) const */ [[nodiscard]] std::unique_ptr &sampler(std::size_t idx); @@ -167,11 +167,11 @@ class system_hardware_sampler { */ void dump_yaml(const char *filename) const; /** - * @copydoc hws::system_hardware_sampler::dump_yaml(const char *) + * @copydoc hws::system_hardware_sampler::dump_yaml(const char *) const */ void dump_yaml(const std::string &filename) const; /** - * @copydoc hws::system_hardware_sampler::dump_yaml(const char *) + * @copydoc hws::system_hardware_sampler::dump_yaml(const char *) const */ void dump_yaml(const std::filesystem::path &filename) const; diff --git a/include/hardware_sampling/utility.hpp b/include/hardware_sampling/utility.hpp index c70b4c2..4a99a31 100644 --- a/include/hardware_sampling/utility.hpp +++ b/include/hardware_sampling/utility.hpp @@ -58,15 +58,30 @@ namespace hws::detail { /** type_traits **/ /*****************************************************************************************************/ +/** + * @brief Remove the topmost cv-qualifiers from type @p T. + */ template using remove_cvref_t = std::remove_cv_t>; +/** + * @brief The case if the type @p T isn't a std::vector. + * @tparam T the type to check + */ template struct is_vector : std::false_type { }; +/** + * @brief The case if the type @p T is a std::vector. + * @tparam T the type to check + */ template struct is_vector> : std::true_type { }; +/** + * @brief Evaluates to `true` if @p T is a std::vector, otherwise `false`. + * @tparam T the type to check + */ template constexpr bool is_vector_v = is_vector::value; From 621f50dba9392090ab474d8a66f2589979bf328e Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Mon, 30 Sep 2024 13:53:16 +0200 Subject: [PATCH 61/69] Rename hardware_sampling folder to hws and change target library name. --- .clang-format | 2 +- .gitignore | 2 +- CMakeLists.txt | 61 +++++++++---------- README.md | 8 +-- bindings/cpu_hardware_sampler.cpp | 8 +-- bindings/event.cpp | 2 +- bindings/gpu_amd_hardware_sampler.cpp | 8 +-- bindings/gpu_intel_hardware_sampler.cpp | 8 +-- bindings/gpu_nvidia_hardware_sampler.cpp | 8 +-- bindings/hardware_sampler.cpp | 14 ++--- bindings/relative_event.hpp | 6 +- bindings/sample_category.cpp | 2 +- bindings/system_hardware_sampler.cpp | 8 +-- bindings/version.cpp | 2 +- ...lingConfig.cmake.in => hwsConfig.cmake.in} | 4 +- examples/cpp/CMakeLists.txt | 4 +- examples/cpp/main.cpp | 2 +- include/hardware_sampling/core.hpp | 41 ------------- include/hws/core.hpp | 41 +++++++++++++ .../cpu/cpu_samples.hpp | 8 +-- .../cpu/hardware_sampler.hpp | 12 ++-- .../cpu/utility.hpp | 6 +- include/{hardware_sampling => hws}/event.hpp | 6 +- .../gpu_amd/hardware_sampler.hpp | 12 ++-- .../gpu_amd/rocm_smi_samples.hpp | 10 +-- .../gpu_amd/utility.hpp | 6 +- .../gpu_intel/hardware_sampler.hpp | 14 ++--- .../gpu_intel/level_zero_device_handle.hpp | 10 +-- .../level_zero_device_handle_impl.hpp | 10 +-- .../gpu_intel/level_zero_samples.hpp | 8 +-- .../gpu_intel/utility.hpp | 6 +- .../gpu_nvidia/hardware_sampler.hpp | 14 ++--- .../gpu_nvidia/nvml_device_handle.hpp | 10 +-- .../gpu_nvidia/nvml_device_handle_impl.hpp | 10 +-- .../gpu_nvidia/nvml_samples.hpp | 8 +-- .../gpu_nvidia/utility.hpp | 6 +- .../hardware_sampler.hpp | 10 +-- .../sample_category.hpp | 6 +- .../system_hardware_sampler.hpp | 12 ++-- .../{hardware_sampling => hws}/utility.hpp | 6 +- .../{hardware_sampling => hws}/version.hpp.in | 6 +- .../cpu/cpu_samples.cpp | 4 +- .../cpu/hardware_sampler.cpp | 12 ++-- .../cpu/utility.cpp | 4 +- src/{hardware_sampling => hws}/event.cpp | 2 +- .../gpu_amd/hardware_sampler.cpp | 12 ++-- .../gpu_amd/rocm_smi_samples.cpp | 4 +- .../gpu_amd/utility.cpp | 2 +- .../gpu_intel/hardware_sampler.cpp | 16 ++--- .../gpu_intel/level_zero_samples.cpp | 4 +- .../gpu_intel/utility.cpp | 2 +- .../gpu_nvidia/hardware_sampler.cpp | 16 ++--- .../gpu_nvidia/nvml_samples.cpp | 4 +- .../gpu_nvidia/utility.cpp | 2 +- .../hardware_sampler.cpp | 8 +-- .../system_hardware_sampler.cpp | 20 +++--- src/{hardware_sampling => hws}/utility.cpp | 2 +- 57 files changed, 270 insertions(+), 271 deletions(-) rename cmake/{hardware_samplingConfig.cmake.in => hwsConfig.cmake.in} (84%) delete mode 100644 include/hardware_sampling/core.hpp create mode 100644 include/hws/core.hpp rename include/{hardware_sampling => hws}/cpu/cpu_samples.hpp (98%) rename include/{hardware_sampling => hws}/cpu/hardware_sampler.hpp (92%) rename include/{hardware_sampling => hws}/cpu/utility.hpp (93%) rename include/{hardware_sampling => hws}/event.hpp (93%) rename include/{hardware_sampling => hws}/gpu_amd/hardware_sampler.hpp (93%) rename include/{hardware_sampling => hws}/gpu_amd/rocm_smi_samples.hpp (98%) rename include/{hardware_sampling => hws}/gpu_amd/utility.hpp (96%) rename include/{hardware_sampling => hws}/gpu_intel/hardware_sampler.hpp (91%) rename include/{hardware_sampling => hws}/gpu_intel/level_zero_device_handle.hpp (85%) rename include/{hardware_sampling => hws}/gpu_intel/level_zero_device_handle_impl.hpp (85%) rename include/{hardware_sampling => hws}/gpu_intel/level_zero_samples.hpp (98%) rename include/{hardware_sampling => hws}/gpu_intel/utility.hpp (95%) rename include/{hardware_sampling => hws}/gpu_nvidia/hardware_sampler.hpp (92%) rename include/{hardware_sampling => hws}/gpu_nvidia/nvml_device_handle.hpp (85%) rename include/{hardware_sampling => hws}/gpu_nvidia/nvml_device_handle_impl.hpp (74%) rename include/{hardware_sampling => hws}/gpu_nvidia/nvml_samples.hpp (98%) rename include/{hardware_sampling => hws}/gpu_nvidia/utility.hpp (96%) rename include/{hardware_sampling => hws}/hardware_sampler.hpp (96%) rename include/{hardware_sampling => hws}/sample_category.hpp (96%) rename include/{hardware_sampling => hws}/system_hardware_sampler.hpp (95%) rename include/{hardware_sampling => hws}/utility.hpp (99%) rename include/{hardware_sampling => hws}/version.hpp.in (92%) rename src/{hardware_sampling => hws}/cpu/cpu_samples.cpp (99%) rename src/{hardware_sampling => hws}/cpu/hardware_sampler.cpp (98%) rename src/{hardware_sampling => hws}/cpu/utility.cpp (95%) rename src/{hardware_sampling => hws}/event.cpp (93%) rename src/{hardware_sampling => hws}/gpu_amd/hardware_sampler.cpp (98%) rename src/{hardware_sampling => hws}/gpu_amd/rocm_smi_samples.cpp (99%) rename src/{hardware_sampling => hws}/gpu_amd/utility.cpp (96%) rename src/{hardware_sampling => hws}/gpu_intel/hardware_sampler.cpp (98%) rename src/{hardware_sampling => hws}/gpu_intel/level_zero_samples.cpp (99%) rename src/{hardware_sampling => hws}/gpu_intel/utility.cpp (99%) rename src/{hardware_sampling => hws}/gpu_nvidia/hardware_sampler.cpp (97%) rename src/{hardware_sampling => hws}/gpu_nvidia/nvml_samples.cpp (99%) rename src/{hardware_sampling => hws}/gpu_nvidia/utility.cpp (97%) rename src/{hardware_sampling => hws}/hardware_sampler.cpp (96%) rename src/{hardware_sampling => hws}/system_hardware_sampler.cpp (90%) rename src/{hardware_sampling => hws}/utility.cpp (97%) diff --git a/.clang-format b/.clang-format index 97d4dc9..84b7fa5 100644 --- a/.clang-format +++ b/.clang-format @@ -77,7 +77,7 @@ ForEachMacros: [ 'foreach', 'Q_FOREACH', 'BOOST_FOREACH' ] IfMacros: [ ] IncludeBlocks: Regroup IncludeCategories: - - Regex: '^"hardware_sampling/' + - Regex: '^"hws/' Priority: 1 - Regex: '^"(pybind|nvml|cuda|rocm_smi|hip|level_zero|subprocess|fmt)' Priority: 2 diff --git a/.gitignore b/.gitignore index 36754d0..9f74de0 100644 --- a/.gitignore +++ b/.gitignore @@ -58,4 +58,4 @@ CTestTestfile.cmake .vs/ # auto-generated version header -include/hardware_sampling/version.hpp \ No newline at end of file +include/hws/version.hpp \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index f4e50c8..97ccbe1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,16 +13,15 @@ project("hws - Hardware Sampling for GPUs and CPUs" # explicitly set library source files set(HWS_SOURCES - ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/event.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/hardware_sampler.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/system_hardware_sampler.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/utility.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/hws/event.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/hws/hardware_sampler.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/hws/system_hardware_sampler.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/hws/utility.cpp ) # create hardware sampling library -set(HWS_LIBRARY_NAME hardware_sampling) +set(HWS_LIBRARY_NAME hws) add_library(${HWS_LIBRARY_NAME} SHARED ${HWS_SOURCES}) -add_library(hws ALIAS ${HWS_LIBRARY_NAME}) add_library(hws::hws ALIAS ${HWS_LIBRARY_NAME}) # set install target @@ -95,8 +94,8 @@ target_link_libraries(${HWS_LIBRARY_NAME} PUBLIC fmt::fmt) ######################################################################################################################## message(STATUS "Configuring version information.") configure_file( - ${CMAKE_CURRENT_SOURCE_DIR}/include/hardware_sampling/version.hpp.in - ${CMAKE_CURRENT_SOURCE_DIR}/include/hardware_sampling/version.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/include/hws/version.hpp.in + ${CMAKE_CURRENT_SOURCE_DIR}/include/hws/version.hpp @ONLY ) @@ -189,9 +188,9 @@ if (HWS_LSCPU_FOUND OR HWS_FREE_FOUND OR HWS_TURBOSTAT_EXECUTION_TYPE) # add source file to source file list target_sources(${HWS_LIBRARY_NAME} PRIVATE $) # add compile definitions @@ -214,9 +213,9 @@ if (CUDAToolkit_FOUND) # add source file to source file list target_sources(${HWS_LIBRARY_NAME} PRIVATE $) # add compile definition @@ -241,9 +240,9 @@ if (rocm_smi_FOUND) # add source file to source file list target_sources(${HWS_LIBRARY_NAME} PRIVATE $) # add compile definition @@ -266,9 +265,9 @@ if (level_zero_FOUND) # add source file to source file list target_sources(${HWS_LIBRARY_NAME} PRIVATE $) # add compile definition @@ -302,7 +301,7 @@ endif () include(GNUInstallDirs) ## install all necessary library targets install(TARGETS ${HWS_TARGETS_TO_INSTALL} - EXPORT hardware_sampling_Targets + EXPORT hws_Targets ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}" # all files that are neither executables, shared lib or headers LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" # all shared lib files RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" # all executables @@ -316,28 +315,28 @@ install(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/include/" ## manage version comparison include(CMakePackageConfigHelpers) write_basic_package_version_file( - "hardware_samplingConfigVersion.cmake" + "hwsConfigVersion.cmake" VERSION ${PROJECT_VERSION} COMPATIBILITY SameMajorVersion ) ## generate configuration file configure_package_config_file( - "${CMAKE_CURRENT_SOURCE_DIR}/cmake/hardware_samplingConfig.cmake.in" - "${PROJECT_BINARY_DIR}/hardware_samplingConfig.cmake" - INSTALL_DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/hardware_sampling/cmake + "${CMAKE_CURRENT_SOURCE_DIR}/cmake/hwsConfig.cmake.in" + "${PROJECT_BINARY_DIR}/hwsConfig.cmake" + INSTALL_DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/hws/cmake ) ## create and copy install-targets file -install(EXPORT hardware_sampling_Targets - FILE hardware_samplingTargets.cmake +install(EXPORT hws_Targets + FILE hwsTargets.cmake NAMESPACE hws:: - DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/hardware_sampling/cmake + DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/hws/cmake ) ## create file containing the build configuration and version information install(FILES - "${PROJECT_BINARY_DIR}/hardware_samplingConfig.cmake" - "${PROJECT_BINARY_DIR}/hardware_samplingConfigVersion.cmake" - DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/hardware_sampling/cmake + "${PROJECT_BINARY_DIR}/hwsConfig.cmake" + "${PROJECT_BINARY_DIR}/hwsConfigVersion.cmake" + DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/hws/cmake ) \ No newline at end of file diff --git a/README.md b/README.md index 77869b4..477738a 100644 --- a/README.md +++ b/README.md @@ -66,7 +66,7 @@ cmake --install . --prefix "/home/myuser/installdir" Afterward, the necessary exports should be performed: ```bash -export CMAKE_PREFIX_PATH=${CMAKE_INSTALL_PREFIX}/share/hardware_sampling/cmake:${CMAKE_PREFIX_PATH} +export CMAKE_PREFIX_PATH=${CMAKE_INSTALL_PREFIX}/share/hws/cmake:${CMAKE_PREFIX_PATH} export LD_LIBRARY_PATH=${CMAKE_INSTALL_PREFIX}/lib:${LD_LIBRARY_PATH} export CPLUS_INCLUDE_PATH=${CMAKE_INSTALL_PREFIX}/include:${CPLUS_INCLUDE_PATH} export PYTHONPATH=${CMAKE_INSTALL_PREFIX}/lib:${PYTHONPATH} @@ -242,15 +242,15 @@ current clock frequencies, temperatures, or memory consumption. ## Example Python usage ```python -import HardwareSampling +import HardwareSampling as hws import numpy as np import matplotlib.pyplot as plt import matplotlib.dates as mdates import datetime -sampler = HardwareSampling.CpuHardwareSampler() +sampler = hws.CpuHardwareSampler() # could also be, e.g., -# sampler = HardwareSampling.GpuNvidiaHardwareSampler() +# sampler = hws.GpuNvidiaHardwareSampler() sampler.start() sampler.add_event("init") diff --git a/bindings/cpu_hardware_sampler.cpp b/bindings/cpu_hardware_sampler.cpp index 9dae939..8e3e104 100644 --- a/bindings/cpu_hardware_sampler.cpp +++ b/bindings/cpu_hardware_sampler.cpp @@ -5,10 +5,10 @@ * See the LICENSE.md file in the project root for full license information. */ -#include "hardware_sampling/cpu/cpu_samples.hpp" // hws::{cpu_general_samples, clock_samples, power_samples, memory_samples, temperature_samples, gfx_samples, idle_state_samples} -#include "hardware_sampling/cpu/hardware_sampler.hpp" // hws::cpu_hardware_sampler -#include "hardware_sampling/hardware_sampler.hpp" // hws::hardware_sampler -#include "hardware_sampling/sample_category.hpp" // hws::sample_category +#include "hws/cpu/cpu_samples.hpp" // hws::{cpu_general_samples, clock_samples, power_samples, memory_samples, temperature_samples, gfx_samples, idle_state_samples} +#include "hws/cpu/hardware_sampler.hpp" // hws::cpu_hardware_sampler +#include "hws/hardware_sampler.hpp" // hws::hardware_sampler +#include "hws/sample_category.hpp" // hws::sample_category #include "fmt/format.h" // fmt::format #include "pybind11/chrono.h" // automatic bindings for std::chrono::milliseconds diff --git a/bindings/event.cpp b/bindings/event.cpp index 8a9696a..f19315a 100644 --- a/bindings/event.cpp +++ b/bindings/event.cpp @@ -5,7 +5,7 @@ * See the LICENSE.md file in the project root for full license information. */ -#include "hardware_sampling/event.hpp" // hws::event +#include "hws/event.hpp" // hws::event #include "fmt/format.h" // fmt::format #include "pybind11/chrono.h" // bind std::chrono types diff --git a/bindings/gpu_amd_hardware_sampler.cpp b/bindings/gpu_amd_hardware_sampler.cpp index 9da321c..db846da 100644 --- a/bindings/gpu_amd_hardware_sampler.cpp +++ b/bindings/gpu_amd_hardware_sampler.cpp @@ -5,10 +5,10 @@ * See the LICENSE.md file in the project root for full license information. */ -#include "hardware_sampling/gpu_amd/hardware_sampler.hpp" // hws::gpu_amd_hardware_sampler -#include "hardware_sampling/gpu_amd/rocm_smi_samples.hpp" // hws::{rocm_smi_general_samples, rocm_smi_clock_samples, rocm_smi_power_samples, rocm_smi_memory_samples, rocm_smi_temperature_samples} -#include "hardware_sampling/hardware_sampler.hpp" // hws::hardware_sampler -#include "hardware_sampling/sample_category.hpp" // hws::sample_category +#include "hws/gpu_amd/hardware_sampler.hpp" // hws::gpu_amd_hardware_sampler +#include "hws/gpu_amd/rocm_smi_samples.hpp" // hws::{rocm_smi_general_samples, rocm_smi_clock_samples, rocm_smi_power_samples, rocm_smi_memory_samples, rocm_smi_temperature_samples} +#include "hws/hardware_sampler.hpp" // hws::hardware_sampler +#include "hws/sample_category.hpp" // hws::sample_category #include "fmt/format.h" // fmt::format #include "pybind11/chrono.h" // automatic bindings for std::chrono::milliseconds diff --git a/bindings/gpu_intel_hardware_sampler.cpp b/bindings/gpu_intel_hardware_sampler.cpp index 901aeed..aaae9ed 100644 --- a/bindings/gpu_intel_hardware_sampler.cpp +++ b/bindings/gpu_intel_hardware_sampler.cpp @@ -5,10 +5,10 @@ * See the LICENSE.md file in the project root for full license information. */ -#include "hardware_sampling/gpu_intel/hardware_sampler.hpp" // hws::gpu_intel_hardware_sampler -#include "hardware_sampling/gpu_intel/level_zero_samples.hpp" // hws::{level_zero_general_samples, level_zero_clock_samples, level_zero_power_samples, level_zero_memory_samples, level_zero_temperature_samples} -#include "hardware_sampling/hardware_sampler.hpp" // hws::hardware_sampler -#include "hardware_sampling/sample_category.hpp" // hws::sample_category +#include "hws/gpu_intel/hardware_sampler.hpp" // hws::gpu_intel_hardware_sampler +#include "hws/gpu_intel/level_zero_samples.hpp" // hws::{level_zero_general_samples, level_zero_clock_samples, level_zero_power_samples, level_zero_memory_samples, level_zero_temperature_samples} +#include "hws/hardware_sampler.hpp" // hws::hardware_sampler +#include "hws/sample_category.hpp" // hws::sample_category #include "fmt/format.h" // fmt::format #include "pybind11/chrono.h" // automatic bindings for std::chrono::milliseconds diff --git a/bindings/gpu_nvidia_hardware_sampler.cpp b/bindings/gpu_nvidia_hardware_sampler.cpp index b049156..a32283a 100644 --- a/bindings/gpu_nvidia_hardware_sampler.cpp +++ b/bindings/gpu_nvidia_hardware_sampler.cpp @@ -5,10 +5,10 @@ * See the LICENSE.md file in the project root for full license information. */ -#include "hardware_sampling/gpu_nvidia/hardware_sampler.hpp" // hws::gpu_nvidia_hardware_sampler -#include "hardware_sampling/gpu_nvidia/nvml_samples.hpp" // hws::{nvml_general_samples, nvml_clock_samples, nvml_power_samples, nvml_memory_samples, nvml_temperature_samples} -#include "hardware_sampling/hardware_sampler.hpp" // hws::hardware_sampler -#include "hardware_sampling/sample_category.hpp" // hws::sample_category +#include "hws/gpu_nvidia/hardware_sampler.hpp" // hws::gpu_nvidia_hardware_sampler +#include "hws/gpu_nvidia/nvml_samples.hpp" // hws::{nvml_general_samples, nvml_clock_samples, nvml_power_samples, nvml_memory_samples, nvml_temperature_samples} +#include "hws/hardware_sampler.hpp" // hws::hardware_sampler +#include "hws/sample_category.hpp" // hws::sample_category #include "fmt/format.h" // fmt::format #include "pybind11/chrono.h" // automatic bindings for std::chrono::milliseconds diff --git a/bindings/hardware_sampler.cpp b/bindings/hardware_sampler.cpp index 2c47046..5a12141 100644 --- a/bindings/hardware_sampler.cpp +++ b/bindings/hardware_sampler.cpp @@ -5,22 +5,22 @@ * See the LICENSE.md file in the project root for full license information. */ -#include "hardware_sampling/hardware_sampler.hpp" // hws::hardware_sampler +#include "hws/hardware_sampler.hpp" // hws::hardware_sampler -#include "hardware_sampling/event.hpp" // hws::event -#include "hardware_sampling/utility.hpp" // hws::detail::durations_from_reference_time +#include "hws/event.hpp" // hws::event +#include "hws/utility.hpp" // hws::detail::durations_from_reference_time #if defined(HWS_FOR_CPUS_ENABLED) - #include "hardware_sampling/cpu/hardware_sampler.hpp" // hws::cpu_hardware_sampler + #include "hws/cpu/hardware_sampler.hpp" // hws::cpu_hardware_sampler #endif #if defined(HWS_FOR_NVIDIA_GPUS_ENABLED) - #include "hardware_sampling/gpu_nvidia/hardware_sampler.hpp" // hws::gpu_nvidia_hardware_sampler + #include "hws/gpu_nvidia/hardware_sampler.hpp" // hws::gpu_nvidia_hardware_sampler #endif #if defined(HWS_FOR_AMD_GPUS_ENABLED) - #include "hardware_sampling/gpu_amd/hardware_sampler.hpp" // hws::gpu_amd_hardware_sampler + #include "hws/gpu_amd/hardware_sampler.hpp" // hws::gpu_amd_hardware_sampler #endif #if defined(HWS_FOR_INTEL_GPUS_ENABLED) - #include "hardware_sampling/gpu_intel/hardware_sampler.hpp" // hws::gpu_intel_hardware_sampler + #include "hws/gpu_intel/hardware_sampler.hpp" // hws::gpu_intel_hardware_sampler #endif #include "fmt/format.h" // fmt::format diff --git a/bindings/relative_event.hpp b/bindings/relative_event.hpp index 2033f12..fcdd02e 100644 --- a/bindings/relative_event.hpp +++ b/bindings/relative_event.hpp @@ -8,8 +8,8 @@ * @brief Defines a struct encapsulating a single event with a relative time point. */ -#ifndef HARDWARE_SAMPLING_BINDINGS_RELATIVE_EVENT_HPP_ -#define HARDWARE_SAMPLING_BINDINGS_RELATIVE_EVENT_HPP_ +#ifndef HWS_BINDINGS_RELATIVE_EVENT_HPP_ +#define HWS_BINDINGS_RELATIVE_EVENT_HPP_ #include // std::string #include // std::move @@ -37,4 +37,4 @@ struct relative_event { } // namespace hws::detail -#endif // HARDWARE_SAMPLING_BINDINGS_RELATIVE_EVENT_HPP_ +#endif // HWS_BINDINGS_RELATIVE_EVENT_HPP_ diff --git a/bindings/sample_category.cpp b/bindings/sample_category.cpp index 2db6563..455914c 100644 --- a/bindings/sample_category.cpp +++ b/bindings/sample_category.cpp @@ -5,7 +5,7 @@ * See the LICENSE.md file in the project root for full license information. */ -#include "hardware_sampling/sample_category.hpp" // hws::sample_category +#include "hws/sample_category.hpp" // hws::sample_category #include "pybind11/operators.h" // operator overloading #include "pybind11/pybind11.h" // py::module_, py::overload_cast diff --git a/bindings/system_hardware_sampler.cpp b/bindings/system_hardware_sampler.cpp index 2e25e79..d9af622 100644 --- a/bindings/system_hardware_sampler.cpp +++ b/bindings/system_hardware_sampler.cpp @@ -5,11 +5,11 @@ * See the LICENSE.md file in the project root for full license information. */ -#include "hardware_sampling/system_hardware_sampler.hpp" // hws::system_hardware_sampler +#include "hws/system_hardware_sampler.hpp" // hws::system_hardware_sampler -#include "hardware_sampling/event.hpp" // hws::event -#include "hardware_sampling/sample_category.hpp" // hws::sample_category -#include "hardware_sampling/utility.hpp" // hws::detail::durations_from_reference_time +#include "hws/event.hpp" // hws::event +#include "hws/sample_category.hpp" // hws::sample_category +#include "hws/utility.hpp" // hws::detail::durations_from_reference_time #include "fmt/format.h" // fmt::format #include "pybind11/chrono.h" // bind std::chrono types diff --git a/bindings/version.cpp b/bindings/version.cpp index df892ca..e5481d1 100644 --- a/bindings/version.cpp +++ b/bindings/version.cpp @@ -5,7 +5,7 @@ * See the LICENSE.md file in the project root for full license information. */ -#include "hardware_sampling/version.hpp" // hws::version +#include "hws/version.hpp" // hws::version #include "pybind11/pybind11.h" // py::module_ diff --git a/cmake/hardware_samplingConfig.cmake.in b/cmake/hwsConfig.cmake.in similarity index 84% rename from cmake/hardware_samplingConfig.cmake.in rename to cmake/hwsConfig.cmake.in index 53829a0..852e638 100644 --- a/cmake/hardware_samplingConfig.cmake.in +++ b/cmake/hwsConfig.cmake.in @@ -15,5 +15,5 @@ list(APPEND CMAKE_PREFIX_PATH "${CMAKE_CURRENT_LIST_DIR}/../../../lib/cmake/fmt" find_dependency(fmt REQUIRED) # sanity checks -include("${CMAKE_CURRENT_LIST_DIR}/hardware_samplingTargets.cmake") -check_required_components("hardware_sampling") \ No newline at end of file +include("${CMAKE_CURRENT_LIST_DIR}/hwsTargets.cmake") +check_required_components("hws") \ No newline at end of file diff --git a/examples/cpp/CMakeLists.txt b/examples/cpp/CMakeLists.txt index 56cff22..1ffbc0b 100644 --- a/examples/cpp/CMakeLists.txt +++ b/examples/cpp/CMakeLists.txt @@ -8,9 +8,9 @@ cmake_minimum_required(VERSION 3.22) project(LibraryUsageExample LANGUAGES CXX) -find_package(hardware_sampling REQUIRED) +find_package(hws REQUIRED) add_executable(prog main.cpp) target_compile_features(prog PUBLIC cxx_std_17) -target_link_libraries(prog PUBLIC hws::hardware_sampling) \ No newline at end of file +target_link_libraries(prog PUBLIC hws::hws) \ No newline at end of file diff --git a/examples/cpp/main.cpp b/examples/cpp/main.cpp index 551c721..63e4160 100644 --- a/examples/cpp/main.cpp +++ b/examples/cpp/main.cpp @@ -5,7 +5,7 @@ * See the LICENSE.md file in the project root for full license information. */ -#include "hardware_sampling/core.hpp" +#include "hws/core.hpp" #include // std::size_t #include // std::iota diff --git a/include/hardware_sampling/core.hpp b/include/hardware_sampling/core.hpp deleted file mode 100644 index 44d8922..0000000 --- a/include/hardware_sampling/core.hpp +++ /dev/null @@ -1,41 +0,0 @@ -/** - * @file - * @author Marcel Breyer - * @copyright 2024-today All Rights Reserved - * @license This file is released under the MIT license. - * See the LICENSE.md file in the project root for full license information. - * - * @brief Core header containing all other necessary other headers. - */ - -#ifndef HARDWARE_SAMPLING_CORE_HPP_ -#define HARDWARE_SAMPLING_CORE_HPP_ -#pragma once - -#include "hardware_sampling/event.hpp" -#include "hardware_sampling/hardware_sampler.hpp" -#include "hardware_sampling/sample_category.hpp" -#include "hardware_sampling/system_hardware_sampler.hpp" -#include "hardware_sampling/version.hpp" - -#if defined(HWS_FOR_CPUS_ENABLED) - #include "hardware_sampling/cpu/cpu_samples.hpp" - #include "hardware_sampling/cpu/hardware_sampler.hpp" -#endif - -#if defined(HWS_FOR_NVIDIA_GPUS_ENABLED) - #include "hardware_sampling/gpu_nvidia//nvml_samples.hpp" - #include "hardware_sampling/gpu_nvidia/hardware_sampler.hpp" -#endif - -#if defined(HWS_FOR_AMD_GPUS_ENABLED) - #include "hardware_sampling/gpu_amd/hardware_sampler.hpp" - #include "hardware_sampling/gpu_amd/rocm_smi_samples.hpp" -#endif - -#if defined(HWS_FOR_INTEL_GPUS_ENABLED) - #include "hardware_sampling/gpu_intel/hardware_sampler.hpp" - #include "hardware_sampling/gpu_intel/level_zero_samples.hpp" -#endif - -#endif // HARDWARE_SAMPLING_CORE_HPP_ diff --git a/include/hws/core.hpp b/include/hws/core.hpp new file mode 100644 index 0000000..8c7a474 --- /dev/null +++ b/include/hws/core.hpp @@ -0,0 +1,41 @@ +/** + * @file + * @author Marcel Breyer + * @copyright 2024-today All Rights Reserved + * @license This file is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + * + * @brief Core header containing all other necessary other headers. + */ + +#ifndef HWS_CORE_HPP_ +#define HWS_CORE_HPP_ +#pragma once + +#include "hws/event.hpp" +#include "hws/hardware_sampler.hpp" +#include "hws/sample_category.hpp" +#include "hws/system_hardware_sampler.hpp" +#include "hws/version.hpp" + +#if defined(HWS_FOR_CPUS_ENABLED) + #include "hws/cpu/cpu_samples.hpp" + #include "hws/cpu/hardware_sampler.hpp" +#endif + +#if defined(HWS_FOR_NVIDIA_GPUS_ENABLED) + #include "hws/gpu_nvidia//nvml_samples.hpp" + #include "hws/gpu_nvidia/hardware_sampler.hpp" +#endif + +#if defined(HWS_FOR_AMD_GPUS_ENABLED) + #include "hws/gpu_amd/hardware_sampler.hpp" + #include "hws/gpu_amd/rocm_smi_samples.hpp" +#endif + +#if defined(HWS_FOR_INTEL_GPUS_ENABLED) + #include "hws/gpu_intel/hardware_sampler.hpp" + #include "hws/gpu_intel/level_zero_samples.hpp" +#endif + +#endif // HWS_CORE_HPP_ diff --git a/include/hardware_sampling/cpu/cpu_samples.hpp b/include/hws/cpu/cpu_samples.hpp similarity index 98% rename from include/hardware_sampling/cpu/cpu_samples.hpp rename to include/hws/cpu/cpu_samples.hpp index 1343909..bcea2d4 100644 --- a/include/hardware_sampling/cpu/cpu_samples.hpp +++ b/include/hws/cpu/cpu_samples.hpp @@ -8,11 +8,11 @@ * @brief Defines the samples used with turbostat, lscpu, and free. */ -#ifndef HARDWARE_SAMPLING_CPU_CPU_SAMPLES_HPP_ -#define HARDWARE_SAMPLING_CPU_CPU_SAMPLES_HPP_ +#ifndef HWS_CPU_CPU_SAMPLES_HPP_ +#define HWS_CPU_CPU_SAMPLES_HPP_ #pragma once -#include "hardware_sampling/utility.hpp" // HWS_SAMPLE_STRUCT_FIXED_MEMBER, HWS_SAMPLE_STRUCT_SAMPLING_MEMBER +#include "hws/utility.hpp" // HWS_SAMPLE_STRUCT_FIXED_MEMBER, HWS_SAMPLE_STRUCT_SAMPLING_MEMBER #include "fmt/ostream.h" // fmt::formatter, fmt::ostream_formatter @@ -364,4 +364,4 @@ struct fmt::formatter : fmt::ostream_formatter { } /// @endcond -#endif // HARDWARE_SAMPLING_CPU_CPU_SAMPLES_HPP_ +#endif // HWS_CPU_CPU_SAMPLES_HPP_ diff --git a/include/hardware_sampling/cpu/hardware_sampler.hpp b/include/hws/cpu/hardware_sampler.hpp similarity index 92% rename from include/hardware_sampling/cpu/hardware_sampler.hpp rename to include/hws/cpu/hardware_sampler.hpp index 4ae805f..d1b4102 100644 --- a/include/hardware_sampling/cpu/hardware_sampler.hpp +++ b/include/hws/cpu/hardware_sampler.hpp @@ -8,13 +8,13 @@ * @brief Defines a hardware sampler for CPUs using the turbostat, lscpu, and free utilities (requires root). */ -#ifndef HARDWARE_SAMPLING_CPU_HARDWARE_SAMPLER_HPP_ -#define HARDWARE_SAMPLING_CPU_HARDWARE_SAMPLER_HPP_ +#ifndef HWS_CPU_HARDWARE_SAMPLER_HPP_ +#define HWS_CPU_HARDWARE_SAMPLER_HPP_ #pragma once -#include "hardware_sampling/cpu/cpu_samples.hpp" // hws::{cpu_general_samples, clock_samples, power_samples, memory_samples, temperature_samples, gfx_samples, idle_state_samples} -#include "hardware_sampling/hardware_sampler.hpp" // hws::hardware_sampler -#include "hardware_sampling/sample_category.hpp" // hws::sample_category +#include "hws/cpu/cpu_samples.hpp" // hws::{cpu_general_samples, clock_samples, power_samples, memory_samples, temperature_samples, gfx_samples, idle_state_samples} +#include "hws/hardware_sampler.hpp" // hws::hardware_sampler +#include "hws/sample_category.hpp" // hws::sample_category #include "fmt/ostream.h" // fmt::formatter, fmt::ostream_formatter @@ -157,4 +157,4 @@ struct fmt::formatter : fmt::ostream_formatter { }; /// @endcond -#endif // HARDWARE_SAMPLING_CPU_HARDWARE_SAMPLER_HPP_ +#endif // HWS_CPU_HARDWARE_SAMPLER_HPP_ diff --git a/include/hardware_sampling/cpu/utility.hpp b/include/hws/cpu/utility.hpp similarity index 93% rename from include/hardware_sampling/cpu/utility.hpp rename to include/hws/cpu/utility.hpp index 467d4e5..9efd008 100644 --- a/include/hardware_sampling/cpu/utility.hpp +++ b/include/hws/cpu/utility.hpp @@ -8,8 +8,8 @@ * @brief Implements utility functionality for the CPU sampler. */ -#ifndef HARDWARE_SAMPLING_CPU_UTILITY_HPP_ -#define HARDWARE_SAMPLING_CPU_UTILITY_HPP_ +#ifndef HWS_CPU_UTILITY_HPP_ +#define HWS_CPU_UTILITY_HPP_ #pragma once #include "fmt/format.h" // fmt::format @@ -46,4 +46,4 @@ namespace hws::detail { } // namespace hws::detail -#endif // HARDWARE_SAMPLING_CPU_UTILITY_HPP_ +#endif // HWS_CPU_UTILITY_HPP_ diff --git a/include/hardware_sampling/event.hpp b/include/hws/event.hpp similarity index 93% rename from include/hardware_sampling/event.hpp rename to include/hws/event.hpp index 2a60581..7252a75 100644 --- a/include/hardware_sampling/event.hpp +++ b/include/hws/event.hpp @@ -8,8 +8,8 @@ * @brief Defines an event type. */ -#ifndef HARDWARE_SAMPLING_EVENT_HPP_ -#define HARDWARE_SAMPLING_EVENT_HPP_ +#ifndef HWS_EVENT_HPP_ +#define HWS_EVENT_HPP_ #pragma once #include "fmt/ostream.h" // fmt::formatter, fmt::ostream_formatter @@ -57,4 +57,4 @@ struct fmt::formatter : fmt::ostream_formatter { }; /// @endcond -#endif // HARDWARE_SAMPLING_EVENT_HPP_ +#endif // HWS_EVENT_HPP_ diff --git a/include/hardware_sampling/gpu_amd/hardware_sampler.hpp b/include/hws/gpu_amd/hardware_sampler.hpp similarity index 93% rename from include/hardware_sampling/gpu_amd/hardware_sampler.hpp rename to include/hws/gpu_amd/hardware_sampler.hpp index 00dc90f..668cc9a 100644 --- a/include/hardware_sampling/gpu_amd/hardware_sampler.hpp +++ b/include/hws/gpu_amd/hardware_sampler.hpp @@ -8,13 +8,13 @@ * @brief Defines a hardware sampler for AMD GPUs using AMD's ROCm SMI library. */ -#ifndef HARDWARE_SAMPLING_GPU_AMD_HARDWARE_SAMPLER_HPP_ -#define HARDWARE_SAMPLING_GPU_AMD_HARDWARE_SAMPLER_HPP_ +#ifndef HWS_GPU_AMD_HARDWARE_SAMPLER_HPP_ +#define HWS_GPU_AMD_HARDWARE_SAMPLER_HPP_ #pragma once -#include "hardware_sampling/gpu_amd/rocm_smi_samples.hpp" // hws::{rocm_smi_general_samples, rocm_smi_clock_samples, rocm_smi_power_samples, rocm_smi_memory_samples, rocm_smi_temperature_samples} -#include "hardware_sampling/hardware_sampler.hpp" // hws::hardware_sampler -#include "hardware_sampling/sample_category.hpp" // hws::sample_category +#include "hws/gpu_amd/rocm_smi_samples.hpp" // hws::{rocm_smi_general_samples, rocm_smi_clock_samples, rocm_smi_power_samples, rocm_smi_memory_samples, rocm_smi_temperature_samples} +#include "hws/hardware_sampler.hpp" // hws::hardware_sampler +#include "hws/sample_category.hpp" // hws::sample_category #include "fmt/ostream.h" // fmt::formatter, fmt::ostream_formatter @@ -170,4 +170,4 @@ struct fmt::formatter : fmt::ostream_formatter { /// @endcond -#endif // HARDWARE_SAMPLING_GPU_AMD_HARDWARE_SAMPLER_HPP_ +#endif // HWS_GPU_AMD_HARDWARE_SAMPLER_HPP_ diff --git a/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp b/include/hws/gpu_amd/rocm_smi_samples.hpp similarity index 98% rename from include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp rename to include/hws/gpu_amd/rocm_smi_samples.hpp index 958aa3a..8ace761 100644 --- a/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp +++ b/include/hws/gpu_amd/rocm_smi_samples.hpp @@ -8,11 +8,11 @@ * @brief Defines the samples used with ROCm SMI. */ -#ifndef HARDWARE_SAMPLING_GPU_AMD_ROCM_SMI_SAMPLES_HPP_ -#define HARDWARE_SAMPLING_GPU_AMD_ROCM_SMI_SAMPLES_HPP_ +#ifndef HWS_GPU_AMD_ROCM_SMI_SAMPLES_HPP_ +#define HWS_GPU_AMD_ROCM_SMI_SAMPLES_HPP_ #pragma once -#include "hardware_sampling/utility.hpp" // HWS_SAMPLE_STRUCT_FIXED_MEMBER, HWS_SAMPLE_STRUCT_SAMPLING_MEMBER +#include "hws/utility.hpp" // HWS_SAMPLE_STRUCT_FIXED_MEMBER, HWS_SAMPLE_STRUCT_SAMPLING_MEMBER #include "fmt/ostream.h" // fmt::formatter, fmt::ostream_formatter @@ -56,7 +56,7 @@ class rocm_smi_general_samples { HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint32_t, compute_utilization) // the GPU compute utilization in percent HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint32_t, memory_utilization) // the GPU memory utilization in percent - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::string, performance_level) // the performance level: one of rsmi_dev_perf_level_t + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::string, performance_level) // the performance level: one of rsmi_dev_perf_level_t }; /** @@ -291,4 +291,4 @@ struct fmt::formatter : fmt::ostream_formatte /// @endcond -#endif // HARDWARE_SAMPLING_GPU_AMD_ROCM_SMI_SAMPLES_HPP_ +#endif // HWS_GPU_AMD_ROCM_SMI_SAMPLES_HPP_ diff --git a/include/hardware_sampling/gpu_amd/utility.hpp b/include/hws/gpu_amd/utility.hpp similarity index 96% rename from include/hardware_sampling/gpu_amd/utility.hpp rename to include/hws/gpu_amd/utility.hpp index aa21ba0..4889976 100644 --- a/include/hardware_sampling/gpu_amd/utility.hpp +++ b/include/hws/gpu_amd/utility.hpp @@ -8,8 +8,8 @@ * @brief Implements utility functionality for the AMD GPU sampler. */ -#ifndef HARDWARE_SAMPLING_GPU_AMD_UTILITY_HPP_ -#define HARDWARE_SAMPLING_GPU_AMD_UTILITY_HPP_ +#ifndef HWS_GPU_AMD_UTILITY_HPP_ +#define HWS_GPU_AMD_UTILITY_HPP_ #pragma once #include "fmt/format.h" // fmt::format @@ -70,4 +70,4 @@ namespace hws::detail { } // namespace hws::detail -#endif // HARDWARE_SAMPLING_GPU_AMD_UTILITY_HPP_ +#endif // HWS_GPU_AMD_UTILITY_HPP_ diff --git a/include/hardware_sampling/gpu_intel/hardware_sampler.hpp b/include/hws/gpu_intel/hardware_sampler.hpp similarity index 91% rename from include/hardware_sampling/gpu_intel/hardware_sampler.hpp rename to include/hws/gpu_intel/hardware_sampler.hpp index cf97f41..db068fe 100644 --- a/include/hardware_sampling/gpu_intel/hardware_sampler.hpp +++ b/include/hws/gpu_intel/hardware_sampler.hpp @@ -8,14 +8,14 @@ * @brief Defines a hardware sampler for Intel GPUs using Intel's Level Zero. */ -#ifndef HARDWARE_SAMPLING_GPU_INTEL_HARDWARE_SAMPLER_HPP_ -#define HARDWARE_SAMPLING_GPU_INTEL_HARDWARE_SAMPLER_HPP_ +#ifndef HWS_GPU_INTEL_HARDWARE_SAMPLER_HPP_ +#define HWS_GPU_INTEL_HARDWARE_SAMPLER_HPP_ #pragma once -#include "hardware_sampling/gpu_intel/level_zero_device_handle.hpp" // hws::detail::level_zero_device_handle -#include "hardware_sampling/gpu_intel/level_zero_samples.hpp" // hws::{level_zero_general_samples, level_zero_clock_samples, level_zero_power_samples, level_zero_memory_samples, level_zero_temperature_samples} -#include "hardware_sampling/hardware_sampler.hpp" // hws::hardware_sampler -#include "hardware_sampling/sample_category.hpp" // hws::sample_category +#include "hws/gpu_intel/level_zero_device_handle.hpp" // hws::detail::level_zero_device_handle +#include "hws/gpu_intel/level_zero_samples.hpp" // hws::{level_zero_general_samples, level_zero_clock_samples, level_zero_power_samples, level_zero_memory_samples, level_zero_temperature_samples} +#include "hws/hardware_sampler.hpp" // hws::hardware_sampler +#include "hws/sample_category.hpp" // hws::sample_category #include "fmt/format.h" // fmt::formatter, fmt::ostream_formatter @@ -170,4 +170,4 @@ struct fmt::formatter : fmt::ostream_formatter /// @endcond -#endif // HARDWARE_SAMPLING_GPU_INTEL_HARDWARE_SAMPLER_HPP_ +#endif // HWS_GPU_INTEL_HARDWARE_SAMPLER_HPP_ diff --git a/include/hardware_sampling/gpu_intel/level_zero_device_handle.hpp b/include/hws/gpu_intel/level_zero_device_handle.hpp similarity index 85% rename from include/hardware_sampling/gpu_intel/level_zero_device_handle.hpp rename to include/hws/gpu_intel/level_zero_device_handle.hpp index f84d8a5..c05f630 100644 --- a/include/hardware_sampling/gpu_intel/level_zero_device_handle.hpp +++ b/include/hws/gpu_intel/level_zero_device_handle.hpp @@ -8,8 +8,8 @@ * @brief Defines a pImpl class for a Level Zero device handle. */ -#ifndef HARDWARE_SAMPLING_GPU_INTEL_LEVEL_ZERO_DEVICE_HANDLE_HPP_ -#define HARDWARE_SAMPLING_GPU_INTEL_LEVEL_ZERO_DEVICE_HANDLE_HPP_ +#ifndef HWS_GPU_INTEL_LEVEL_ZERO_DEVICE_HANDLE_HPP_ +#define HWS_GPU_INTEL_LEVEL_ZERO_DEVICE_HANDLE_HPP_ #pragma once #include // std::size_t @@ -40,7 +40,7 @@ class level_zero_device_handle { /** * @brief Get the level_zero_device_handle implementation used to access the actual ze_driver_handle_t and ze_device_handle_t. - * @throws hardware_sampling_exception if `*this` has been default constructed + * @throws std::runtime_error if `*this` has been default constructed * @return the device handle (`[[nodiscard]]`) */ [[nodiscard]] level_zero_device_handle_impl &get_impl() { @@ -52,7 +52,7 @@ class level_zero_device_handle { /** * @brief Get the level_zero_device_handle implementation used to access the actual ze_driver_handle_t and ze_device_handle_t. - * @throws hardware_sampling_exception if `*this` has been default constructed + * @throws std::runtime_error if `*this` has been default constructed * @return the device handle (`[[nodiscard]]`) */ [[nodiscard]] const level_zero_device_handle_impl &get_impl() const { @@ -69,4 +69,4 @@ class level_zero_device_handle { } // namespace hws::detail -#endif // HARDWARE_SAMPLING_GPU_INTEL_LEVEL_ZERO_DEVICE_HANDLE_HPP_ +#endif // HWS_GPU_INTEL_LEVEL_ZERO_DEVICE_HANDLE_HPP_ diff --git a/include/hardware_sampling/gpu_intel/level_zero_device_handle_impl.hpp b/include/hws/gpu_intel/level_zero_device_handle_impl.hpp similarity index 85% rename from include/hardware_sampling/gpu_intel/level_zero_device_handle_impl.hpp rename to include/hws/gpu_intel/level_zero_device_handle_impl.hpp index a0f2ccd..1c3b269 100644 --- a/include/hardware_sampling/gpu_intel/level_zero_device_handle_impl.hpp +++ b/include/hws/gpu_intel/level_zero_device_handle_impl.hpp @@ -8,12 +8,12 @@ * @brief Implements a pImpl class for a Level Zero device handle. */ -#ifndef HARDWARE_SAMPLING_GPU_INTEL_LEVEL_ZERO_DEVICE_HANDLE_IMPL_HPP_ -#define HARDWARE_SAMPLING_GPU_INTEL_LEVEL_ZERO_DEVICE_HANDLE_IMPL_HPP_ +#ifndef HWS_GPU_INTEL_LEVEL_ZERO_DEVICE_HANDLE_IMPL_HPP_ +#define HWS_GPU_INTEL_LEVEL_ZERO_DEVICE_HANDLE_IMPL_HPP_ #pragma once -#include "hardware_sampling/gpu_intel/level_zero_device_handle.hpp" // hws::detail::level_zero_device_handle -#include "hardware_sampling/gpu_intel/utility.hpp" // HWS_LEVEL_ZERO_ERROR_CHECK +#include "hws/gpu_intel/level_zero_device_handle.hpp" // hws::detail::level_zero_device_handle +#include "hws/gpu_intel/utility.hpp" // HWS_LEVEL_ZERO_ERROR_CHECK #include "fmt/format.h" // fmt::format #include "level_zero/ze_api.h" // Level Zero runtime functions @@ -76,4 +76,4 @@ inline level_zero_device_handle::level_zero_device_handle(const std::size_t devi } // namespace hws::detail -#endif // HARDWARE_SAMPLING_GPU_INTEL_LEVEL_ZERO_DEVICE_HANDLE_IMPL_HPP_ +#endif // HWS_GPU_INTEL_LEVEL_ZERO_DEVICE_HANDLE_IMPL_HPP_ diff --git a/include/hardware_sampling/gpu_intel/level_zero_samples.hpp b/include/hws/gpu_intel/level_zero_samples.hpp similarity index 98% rename from include/hardware_sampling/gpu_intel/level_zero_samples.hpp rename to include/hws/gpu_intel/level_zero_samples.hpp index 2ade186..dec6ec5 100644 --- a/include/hardware_sampling/gpu_intel/level_zero_samples.hpp +++ b/include/hws/gpu_intel/level_zero_samples.hpp @@ -8,11 +8,11 @@ * @brief Defines the samples used with Level Zero. */ -#ifndef HARDWARE_SAMPLING_GPU_INTEL_LEVEL_ZERO_SAMPLES_HPP_ -#define HARDWARE_SAMPLING_GPU_INTEL_LEVEL_ZERO_SAMPLES_HPP_ +#ifndef HWS_GPU_INTEL_LEVEL_ZERO_SAMPLES_HPP_ +#define HWS_GPU_INTEL_LEVEL_ZERO_SAMPLES_HPP_ #pragma once -#include "hardware_sampling/utility.hpp" // HWS_SAMPLE_STRUCT_FIXED_MEMBER, HWS_SAMPLE_STRUCT_SAMPLING_MEMBER +#include "hws/utility.hpp" // HWS_SAMPLE_STRUCT_FIXED_MEMBER, HWS_SAMPLE_STRUCT_SAMPLING_MEMBER #include "fmt/ostream.h" // fmt::formatter, fmt::ostream_formatter @@ -286,4 +286,4 @@ struct fmt::formatter : fmt::ostream_format /// @endcond -#endif // HARDWARE_SAMPLING_GPU_INTEL_LEVEL_ZERO_SAMPLES_HPP_ +#endif // HWS_GPU_INTEL_LEVEL_ZERO_SAMPLES_HPP_ diff --git a/include/hardware_sampling/gpu_intel/utility.hpp b/include/hws/gpu_intel/utility.hpp similarity index 95% rename from include/hardware_sampling/gpu_intel/utility.hpp rename to include/hws/gpu_intel/utility.hpp index 03f9f8d..04626a8 100644 --- a/include/hardware_sampling/gpu_intel/utility.hpp +++ b/include/hws/gpu_intel/utility.hpp @@ -8,8 +8,8 @@ * @brief Implements utility functionality for the Intel GPU sampler. */ -#ifndef HARDWARE_SAMPLING_GPU_INTEL_UTILITY_HPP_ -#define HARDWARE_SAMPLING_GPU_INTEL_UTILITY_HPP_ +#ifndef HWS_GPU_INTEL_UTILITY_HPP_ +#define HWS_GPU_INTEL_UTILITY_HPP_ #pragma once #include "fmt/format.h" // fmt::format @@ -77,4 +77,4 @@ namespace hws::detail { } // namespace hws::detail -#endif // HARDWARE_SAMPLING_GPU_INTEL_UTILITY_HPP_ +#endif // HWS_GPU_INTEL_UTILITY_HPP_ diff --git a/include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp b/include/hws/gpu_nvidia/hardware_sampler.hpp similarity index 92% rename from include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp rename to include/hws/gpu_nvidia/hardware_sampler.hpp index 5dba1ca..59a5e31 100644 --- a/include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp +++ b/include/hws/gpu_nvidia/hardware_sampler.hpp @@ -8,14 +8,14 @@ * @brief Defines a hardware sampler for NVIDIA GPUs using NVIDIA's Management Library (NVML). */ -#ifndef HARDWARE_SAMPLING_GPU_NVIDIA_HARDWARE_SAMPLER_HPP_ -#define HARDWARE_SAMPLING_GPU_NVIDIA_HARDWARE_SAMPLER_HPP_ +#ifndef HWS_GPU_NVIDIA_HARDWARE_SAMPLER_HPP_ +#define HWS_GPU_NVIDIA_HARDWARE_SAMPLER_HPP_ #pragma once -#include "hardware_sampling/gpu_nvidia/nvml_device_handle.hpp" // hws::nvml_device_handle -#include "hardware_sampling/gpu_nvidia/nvml_samples.hpp" // hws::{nvml_general_samples, nvml_clock_samples, nvml_power_samples, nvml_memory_samples, nvml_temperature_samples} -#include "hardware_sampling/hardware_sampler.hpp" // hws::hardware_sampler -#include "hardware_sampling/sample_category.hpp" // hws::sample_category +#include "hws/gpu_nvidia/nvml_device_handle.hpp" // hws::nvml_device_handle +#include "hws/gpu_nvidia/nvml_samples.hpp" // hws::{nvml_general_samples, nvml_clock_samples, nvml_power_samples, nvml_memory_samples, nvml_temperature_samples} +#include "hws/hardware_sampler.hpp" // hws::hardware_sampler +#include "hws/sample_category.hpp" // hws::sample_category #include "fmt/format.h" // fmt::formatter, fmt::ostream_formatter @@ -171,4 +171,4 @@ struct fmt::formatter : fmt::ostream_formatter /// @endcond -#endif // HARDWARE_SAMPLING_GPU_NVIDIA_HARDWARE_SAMPLER_HPP_ +#endif // HWS_GPU_NVIDIA_HARDWARE_SAMPLER_HPP_ diff --git a/include/hardware_sampling/gpu_nvidia/nvml_device_handle.hpp b/include/hws/gpu_nvidia/nvml_device_handle.hpp similarity index 85% rename from include/hardware_sampling/gpu_nvidia/nvml_device_handle.hpp rename to include/hws/gpu_nvidia/nvml_device_handle.hpp index f52fb84..eb3da33 100644 --- a/include/hardware_sampling/gpu_nvidia/nvml_device_handle.hpp +++ b/include/hws/gpu_nvidia/nvml_device_handle.hpp @@ -8,8 +8,8 @@ * @brief Defines a pImpl class for an NVML device handle. */ -#ifndef HARDWARE_SAMPLING_GPU_NVIDIA_NVML_DEVICE_HANDLE_HPP_ -#define HARDWARE_SAMPLING_GPU_NVIDIA_NVML_DEVICE_HANDLE_HPP_ +#ifndef HWS_GPU_NVIDIA_NVML_DEVICE_HANDLE_HPP_ +#define HWS_GPU_NVIDIA_NVML_DEVICE_HANDLE_HPP_ #pragma once #include // std::size_t @@ -40,7 +40,7 @@ class nvml_device_handle { /** * @brief Get the nvml_device_handle implementation used to access the actual nvmlDevice_t. - * @throws hardware_sampling_exception if `*this` has been default constructed + * @throws std::runtime_error if `*this` has been default constructed * @return the device handle (`[[nodiscard]]`) */ [[nodiscard]] nvml_device_handle_impl &get_impl() { @@ -52,7 +52,7 @@ class nvml_device_handle { /** * @brief Get the nvml_device_handle implementation used to access the actual nvmlDevice_t. - * @throws hardware_sampling_exception if `*this` has been default constructed + * @throws std::runtime_error if `*this` has been default constructed * @return the device handle (`[[nodiscard]]`) */ [[nodiscard]] const nvml_device_handle_impl &get_impl() const { @@ -69,4 +69,4 @@ class nvml_device_handle { } // namespace hws::detail -#endif // HARDWARE_SAMPLING_GPU_NVIDIA_NVML_DEVICE_HANDLE_HPP_ +#endif // HWS_GPU_NVIDIA_NVML_DEVICE_HANDLE_HPP_ diff --git a/include/hardware_sampling/gpu_nvidia/nvml_device_handle_impl.hpp b/include/hws/gpu_nvidia/nvml_device_handle_impl.hpp similarity index 74% rename from include/hardware_sampling/gpu_nvidia/nvml_device_handle_impl.hpp rename to include/hws/gpu_nvidia/nvml_device_handle_impl.hpp index df6147c..7656599 100644 --- a/include/hardware_sampling/gpu_nvidia/nvml_device_handle_impl.hpp +++ b/include/hws/gpu_nvidia/nvml_device_handle_impl.hpp @@ -8,12 +8,12 @@ * @brief Implements a pImpl class for an NVML device handle. */ -#ifndef HARDWARE_SAMPLING_GPU_NVIDIA_NVML_DEVICE_HANDLE_IMPL_HPP_ -#define HARDWARE_SAMPLING_GPU_NVIDIA_NVML_DEVICE_HANDLE_IMPL_HPP_ +#ifndef HWS_GPU_NVIDIA_NVML_DEVICE_HANDLE_IMPL_HPP_ +#define HWS_GPU_NVIDIA_NVML_DEVICE_HANDLE_IMPL_HPP_ #pragma once -#include "hardware_sampling/gpu_nvidia/nvml_device_handle.hpp" // hws::detail::nvml_device_handle -#include "hardware_sampling/gpu_nvidia/utility.hpp" // HWS_NVML_ERROR_CHECK +#include "hws/gpu_nvidia/nvml_device_handle.hpp" // hws::detail::nvml_device_handle +#include "hws/gpu_nvidia/utility.hpp" // HWS_NVML_ERROR_CHECK #include "nvml.h" // nvmlDevice_t @@ -44,4 +44,4 @@ inline nvml_device_handle::nvml_device_handle(const std::size_t device_id) : } // namespace hws::detail -#endif // HARDWARE_SAMPLING_GPU_NVIDIA_NVML_DEVICE_HANDLE_IMPL_HPP_ +#endif // HWS_GPU_NVIDIA_NVML_DEVICE_HANDLE_IMPL_HPP_ diff --git a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp b/include/hws/gpu_nvidia/nvml_samples.hpp similarity index 98% rename from include/hardware_sampling/gpu_nvidia/nvml_samples.hpp rename to include/hws/gpu_nvidia/nvml_samples.hpp index 631e572..0ddd6ae 100644 --- a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp +++ b/include/hws/gpu_nvidia/nvml_samples.hpp @@ -8,11 +8,11 @@ * @brief Defines the samples used with NVML. */ -#ifndef HARDWARE_SAMPLING_GPU_NVIDIA_NVML_SAMPLES_HPP_ -#define HARDWARE_SAMPLING_GPU_NVIDIA_NVML_SAMPLES_HPP_ +#ifndef HWS_GPU_NVIDIA_NVML_SAMPLES_HPP_ +#define HWS_GPU_NVIDIA_NVML_SAMPLES_HPP_ #pragma once -#include "hardware_sampling/utility.hpp" // HWS_SAMPLE_STRUCT_FIXED_MEMBER, HWS_SAMPLE_STRUCT_SAMPLING_MEMBER +#include "hws/utility.hpp" // HWS_SAMPLE_STRUCT_FIXED_MEMBER, HWS_SAMPLE_STRUCT_SAMPLING_MEMBER #include "fmt/ostream.h" // fmt::formatter, fmt::ostream_formatter @@ -280,4 +280,4 @@ struct fmt::formatter : fmt::ostream_formatter { /// @endcond -#endif // HARDWARE_SAMPLING_GPU_NVIDIA_NVML_SAMPLES_HPP_ +#endif // HWS_GPU_NVIDIA_NVML_SAMPLES_HPP_ diff --git a/include/hardware_sampling/gpu_nvidia/utility.hpp b/include/hws/gpu_nvidia/utility.hpp similarity index 96% rename from include/hardware_sampling/gpu_nvidia/utility.hpp rename to include/hws/gpu_nvidia/utility.hpp index aaf0420..c405386 100644 --- a/include/hardware_sampling/gpu_nvidia/utility.hpp +++ b/include/hws/gpu_nvidia/utility.hpp @@ -8,8 +8,8 @@ * @brief Implements utility functionality for the NVIDIA GPU sampler. */ -#ifndef HARDWARE_SAMPLING_GPU_NVIDIA_UTILITY_HPP_ -#define HARDWARE_SAMPLING_GPU_NVIDIA_UTILITY_HPP_ +#ifndef HWS_GPU_NVIDIA_UTILITY_HPP_ +#define HWS_GPU_NVIDIA_UTILITY_HPP_ #pragma once #include "cuda_runtime_api.h" // CUDA runtime functions @@ -61,4 +61,4 @@ namespace hws::detail { } // namespace hws::detail -#endif // HARDWARE_SAMPLING_GPU_NVIDIA_UTILITY_HPP_ +#endif // HWS_GPU_NVIDIA_UTILITY_HPP_ diff --git a/include/hardware_sampling/hardware_sampler.hpp b/include/hws/hardware_sampler.hpp similarity index 96% rename from include/hardware_sampling/hardware_sampler.hpp rename to include/hws/hardware_sampler.hpp index cc59331..326eb7e 100644 --- a/include/hardware_sampling/hardware_sampler.hpp +++ b/include/hws/hardware_sampler.hpp @@ -8,12 +8,12 @@ * @brief Defines the base class for all hardware samplers. */ -#ifndef HARDWARE_SAMPLING_HARDWARE_SAMPLER_HPP_ -#define HARDWARE_SAMPLING_HARDWARE_SAMPLER_HPP_ +#ifndef HWS_HARDWARE_SAMPLER_HPP_ +#define HWS_HARDWARE_SAMPLER_HPP_ #pragma once -#include "hardware_sampling/event.hpp" // hws::event -#include "hardware_sampling/sample_category.hpp" // hws::sample_category +#include "hws/event.hpp" // hws::event +#include "hws/sample_category.hpp" // hws::sample_category #include // std::atomic #include // std::chrono::{system_clock::time_point, steady_clock::time_point, milliseconds} @@ -228,4 +228,4 @@ class hardware_sampler { } // namespace hws -#endif // HARDWARE_SAMPLING_HARDWARE_SAMPLER_HPP_ +#endif // HWS_HARDWARE_SAMPLER_HPP_ diff --git a/include/hardware_sampling/sample_category.hpp b/include/hws/sample_category.hpp similarity index 96% rename from include/hardware_sampling/sample_category.hpp rename to include/hws/sample_category.hpp index e740544..0ec500b 100644 --- a/include/hardware_sampling/sample_category.hpp +++ b/include/hws/sample_category.hpp @@ -8,8 +8,8 @@ * @brief Defines an enum class with all sample categories to be able to only selectively enable some samples. */ -#ifndef HARDWARE_SAMPLING_SAMPLE_CATEGORY_HPP_ -#define HARDWARE_SAMPLING_SAMPLE_CATEGORY_HPP_ +#ifndef HWS_SAMPLE_CATEGORY_HPP_ +#define HWS_SAMPLE_CATEGORY_HPP_ #pragma once namespace hws { @@ -114,4 +114,4 @@ constexpr sample_category &operator^=(sample_category &lhs, const sample_categor } // namespace hws -#endif // HARDWARE_SAMPLING_SAMPLE_CATEGORY_HPP_ +#endif // HWS_SAMPLE_CATEGORY_HPP_ diff --git a/include/hardware_sampling/system_hardware_sampler.hpp b/include/hws/system_hardware_sampler.hpp similarity index 95% rename from include/hardware_sampling/system_hardware_sampler.hpp rename to include/hws/system_hardware_sampler.hpp index 15279de..42924ac 100644 --- a/include/hardware_sampling/system_hardware_sampler.hpp +++ b/include/hws/system_hardware_sampler.hpp @@ -8,12 +8,12 @@ * @brief Defines a hardware sampler for the whole system, i.e., automatically creates CPU and GPU hardware samples if the respective sampler and hardware are available. */ -#ifndef HARDWARE_SAMPLING_SYSTEM_HARDWARE_SAMPLER_HPP_ -#define HARDWARE_SAMPLING_SYSTEM_HARDWARE_SAMPLER_HPP_ +#ifndef HWS_SYSTEM_HARDWARE_SAMPLER_HPP_ +#define HWS_SYSTEM_HARDWARE_SAMPLER_HPP_ -#include "hardware_sampling/event.hpp" // hws::event -#include "hardware_sampling/hardware_sampler.hpp" // hws::hardware_sampler -#include "hardware_sampling/sample_category.hpp" // hws::sample_category +#include "hws/event.hpp" // hws::event +#include "hws/hardware_sampler.hpp" // hws::hardware_sampler +#include "hws/sample_category.hpp" // hws::sample_category #include // std::chrono::{milliseconds, steady_clock::time_point} #include // std::size_t @@ -194,4 +194,4 @@ class system_hardware_sampler { } // namespace hws -#endif // HARDWARE_SAMPLING_SYSTEM_HARDWARE_SAMPLER_HPP_ +#endif // HWS_SYSTEM_HARDWARE_SAMPLER_HPP_ diff --git a/include/hardware_sampling/utility.hpp b/include/hws/utility.hpp similarity index 99% rename from include/hardware_sampling/utility.hpp rename to include/hws/utility.hpp index 4a99a31..db37390 100644 --- a/include/hardware_sampling/utility.hpp +++ b/include/hws/utility.hpp @@ -8,8 +8,8 @@ * @brief Utility functions for the hardware sampling. */ -#ifndef HARDWARE_SAMPLING_UTILITY_HPP_ -#define HARDWARE_SAMPLING_UTILITY_HPP_ +#ifndef HWS_UTILITY_HPP_ +#define HWS_UTILITY_HPP_ #pragma once #include "fmt/format.h" // fmt::format @@ -307,4 +307,4 @@ template } // namespace hws::detail -#endif // HARDWARE_SAMPLING_UTILITY_HPP_ +#endif // HWS_UTILITY_HPP_ diff --git a/include/hardware_sampling/version.hpp.in b/include/hws/version.hpp.in similarity index 92% rename from include/hardware_sampling/version.hpp.in rename to include/hws/version.hpp.in index 88d0c1e..225072f 100644 --- a/include/hardware_sampling/version.hpp.in +++ b/include/hws/version.hpp.in @@ -8,8 +8,8 @@ * @brief Version information for the hardware sampling. */ -#ifndef HARDWARE_SAMPLING_VERSION_HPP_ -#define HARDWARE_SAMPLING_VERSION_HPP_ +#ifndef HWS_VERSION_HPP_ +#define HWS_VERSION_HPP_ #pragma once #include // std::string_view @@ -48,4 +48,4 @@ constexpr int patch = @PROJECT_VERSION_PATCH@; } // namespace hws::version -#endif // HARDWARE_SAMPLING_VERSION_HPP_ +#endif // HWS_VERSION_HPP_ diff --git a/src/hardware_sampling/cpu/cpu_samples.cpp b/src/hws/cpu/cpu_samples.cpp similarity index 99% rename from src/hardware_sampling/cpu/cpu_samples.cpp rename to src/hws/cpu/cpu_samples.cpp index 3ef3ad7..e5690d2 100644 --- a/src/hardware_sampling/cpu/cpu_samples.cpp +++ b/src/hws/cpu/cpu_samples.cpp @@ -5,9 +5,9 @@ * See the LICENSE.md file in the project root for full license information. */ -#include "hardware_sampling/cpu/cpu_samples.hpp" +#include "hws/cpu/cpu_samples.hpp" -#include "hardware_sampling/utility.hpp" // hws::detail::{value_or_default, quote} +#include "hws/utility.hpp" // hws::detail::{value_or_default, quote} #include "fmt/format.h" // fmt::format #include "fmt/ranges.h" // fmt::join diff --git a/src/hardware_sampling/cpu/hardware_sampler.cpp b/src/hws/cpu/hardware_sampler.cpp similarity index 98% rename from src/hardware_sampling/cpu/hardware_sampler.cpp rename to src/hws/cpu/hardware_sampler.cpp index b996f96..505e0bb 100644 --- a/src/hardware_sampling/cpu/hardware_sampler.cpp +++ b/src/hws/cpu/hardware_sampler.cpp @@ -5,13 +5,13 @@ * See the LICENSE.md file in the project root for full license information. */ -#include "hardware_sampling/cpu/hardware_sampler.hpp" +#include "hws/cpu/hardware_sampler.hpp" -#include "hardware_sampling/cpu/cpu_samples.hpp" // hws::{cpu_general_samples, clock_samples, power_samples, memory_samples, temperature_samples, gfx_samples, idle_state_samples} -#include "hardware_sampling/cpu/utility.hpp" // HWS_SUBPROCESS_ERROR_CHECK, hws::detail::run_subprocess -#include "hardware_sampling/hardware_sampler.hpp" // hws::tracking::hardware_sampler -#include "hardware_sampling/sample_category.hpp" // hws::sample_category -#include "hardware_sampling/utility.hpp" // hws::detail::{split, split_as, trim, convert_to, starts_with} +#include "hws/cpu/cpu_samples.hpp" // hws::{cpu_general_samples, clock_samples, power_samples, memory_samples, temperature_samples, gfx_samples, idle_state_samples} +#include "hws/cpu/utility.hpp" // HWS_SUBPROCESS_ERROR_CHECK, hws::detail::run_subprocess +#include "hws/hardware_sampler.hpp" // hws::tracking::hardware_sampler +#include "hws/sample_category.hpp" // hws::sample_category +#include "hws/utility.hpp" // hws::detail::{split, split_as, trim, convert_to, starts_with} #include "fmt/format.h" // fmt::format #include "fmt/ranges.h" // fmt::join diff --git a/src/hardware_sampling/cpu/utility.cpp b/src/hws/cpu/utility.cpp similarity index 95% rename from src/hardware_sampling/cpu/utility.cpp rename to src/hws/cpu/utility.cpp index 3a17995..7bb6b3d 100644 --- a/src/hardware_sampling/cpu/utility.cpp +++ b/src/hws/cpu/utility.cpp @@ -5,9 +5,9 @@ * See the LICENSE.md file in the project root for full license information. */ -#include "hardware_sampling/cpu/utility.hpp" +#include "hws/cpu/utility.hpp" -#include "hardware_sampling/utility.hpp" // hws::detail::split_as +#include "hws/utility.hpp" // hws::detail::split_as #include "fmt/format.h" // fmt::format #include "subprocess.h" // subprocess_s, subprocess_create, subprocess_join, subprocess_stdout, subprocess_option_e diff --git a/src/hardware_sampling/event.cpp b/src/hws/event.cpp similarity index 93% rename from src/hardware_sampling/event.cpp rename to src/hws/event.cpp index e21c715..373990e 100644 --- a/src/hardware_sampling/event.cpp +++ b/src/hws/event.cpp @@ -5,7 +5,7 @@ * See the LICENSE.md file in the project root for full license information. */ -#include "hardware_sampling/event.hpp" +#include "hws/event.hpp" #include "fmt/format.h" // fmt::format diff --git a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp b/src/hws/gpu_amd/hardware_sampler.cpp similarity index 98% rename from src/hardware_sampling/gpu_amd/hardware_sampler.cpp rename to src/hws/gpu_amd/hardware_sampler.cpp index b205718..6d52e03 100644 --- a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp +++ b/src/hws/gpu_amd/hardware_sampler.cpp @@ -5,13 +5,13 @@ * See the LICENSE.md file in the project root for full license information. */ -#include "hardware_sampling/gpu_amd/hardware_sampler.hpp" +#include "hws/gpu_amd/hardware_sampler.hpp" -#include "hardware_sampling/gpu_amd/rocm_smi_samples.hpp" // hws::{rocm_smi_general_samples, rocm_smi_clock_samples, rocm_smi_power_samples, rocm_smi_memory_samples, rocm_smi_temperature_samples} -#include "hardware_sampling/gpu_amd/utility.hpp" // hws::detail::performance_level_to_string, HWS_ROCM_SMI_ERROR_CHECK -#include "hardware_sampling/hardware_sampler.hpp" // hws::hardware_sampler -#include "hardware_sampling/sample_category.hpp" // hws::sample_category -#include "hardware_sampling/utility.hpp" // hws::detail::time_points_to_epoch +#include "hws/gpu_amd/rocm_smi_samples.hpp" // hws::{rocm_smi_general_samples, rocm_smi_clock_samples, rocm_smi_power_samples, rocm_smi_memory_samples, rocm_smi_temperature_samples} +#include "hws/gpu_amd/utility.hpp" // hws::detail::performance_level_to_string, HWS_ROCM_SMI_ERROR_CHECK +#include "hws/hardware_sampler.hpp" // hws::hardware_sampler +#include "hws/sample_category.hpp" // hws::sample_category +#include "hws/utility.hpp" // hws::detail::time_points_to_epoch #include "fmt/format.h" // fmt::format #include "fmt/ranges.h" // fmt::join diff --git a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp b/src/hws/gpu_amd/rocm_smi_samples.cpp similarity index 99% rename from src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp rename to src/hws/gpu_amd/rocm_smi_samples.cpp index e93c36b..f149c4e 100644 --- a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp +++ b/src/hws/gpu_amd/rocm_smi_samples.cpp @@ -5,9 +5,9 @@ * See the LICENSE.md file in the project root for full license information. */ -#include "hardware_sampling/gpu_amd/rocm_smi_samples.hpp" +#include "hws/gpu_amd/rocm_smi_samples.hpp" -#include "hardware_sampling/utility.hpp" // hws::detail::{value_or_default, quote} +#include "hws/utility.hpp" // hws::detail::{value_or_default, quote} #include "fmt/format.h" // fmt::format #include "fmt/ranges.h" // fmt::join diff --git a/src/hardware_sampling/gpu_amd/utility.cpp b/src/hws/gpu_amd/utility.cpp similarity index 96% rename from src/hardware_sampling/gpu_amd/utility.cpp rename to src/hws/gpu_amd/utility.cpp index 35d375c..a88969a 100644 --- a/src/hardware_sampling/gpu_amd/utility.cpp +++ b/src/hws/gpu_amd/utility.cpp @@ -5,7 +5,7 @@ * See the LICENSE.md file in the project root for full license information. */ -#include "hardware_sampling/gpu_amd/utility.hpp" +#include "hws/gpu_amd/utility.hpp" #include "rocm_smi/rocm_smi.h" // ROCm SMI runtime functions diff --git a/src/hardware_sampling/gpu_intel/hardware_sampler.cpp b/src/hws/gpu_intel/hardware_sampler.cpp similarity index 98% rename from src/hardware_sampling/gpu_intel/hardware_sampler.cpp rename to src/hws/gpu_intel/hardware_sampler.cpp index ed3aed7..3054e22 100644 --- a/src/hardware_sampling/gpu_intel/hardware_sampler.cpp +++ b/src/hws/gpu_intel/hardware_sampler.cpp @@ -5,14 +5,14 @@ * See the LICENSE.md file in the project root for full license information. */ -#include "hardware_sampling/gpu_intel/hardware_sampler.hpp" - -#include "hardware_sampling/gpu_intel/level_zero_device_handle_impl.hpp" // hws::level_zero_device_handle implementation -#include "hardware_sampling/gpu_intel/level_zero_samples.hpp" // hws::{level_zero_general_samples, level_zero_clock_samples, level_zero_power_samples, level_zero_memory_samples, level_zero_temperature_samples} -#include "hardware_sampling/gpu_intel/utility.hpp" // HWS_LEVEL_ZERO_ERROR_CHECK -#include "hardware_sampling/hardware_sampler.hpp" // hws::hardware_sampler -#include "hardware_sampling/sample_category.hpp" // hws::sample_category -#include "hardware_sampling/utility.hpp" // hws::{durations_from_reference_time, join} +#include "hws/gpu_intel/hardware_sampler.hpp" + +#include "hws/gpu_intel/level_zero_device_handle_impl.hpp" // hws::level_zero_device_handle implementation +#include "hws/gpu_intel/level_zero_samples.hpp" // hws::{level_zero_general_samples, level_zero_clock_samples, level_zero_power_samples, level_zero_memory_samples, level_zero_temperature_samples} +#include "hws/gpu_intel/utility.hpp" // HWS_LEVEL_ZERO_ERROR_CHECK +#include "hws/hardware_sampler.hpp" // hws::hardware_sampler +#include "hws/sample_category.hpp" // hws::sample_category +#include "hws/utility.hpp" // hws::{durations_from_reference_time, join} #include "fmt/format.h" // fmt::format #include "level_zero/ze_api.h" // Level Zero runtime functions diff --git a/src/hardware_sampling/gpu_intel/level_zero_samples.cpp b/src/hws/gpu_intel/level_zero_samples.cpp similarity index 99% rename from src/hardware_sampling/gpu_intel/level_zero_samples.cpp rename to src/hws/gpu_intel/level_zero_samples.cpp index ab749fb..e296cab 100644 --- a/src/hardware_sampling/gpu_intel/level_zero_samples.cpp +++ b/src/hws/gpu_intel/level_zero_samples.cpp @@ -5,9 +5,9 @@ * See the LICENSE.md file in the project root for full license information. */ -#include "hardware_sampling/gpu_intel/level_zero_samples.hpp" +#include "hws/gpu_intel/level_zero_samples.hpp" -#include "hardware_sampling/utility.hpp" // hws::detail::{value_or_default, remove_cvref_t} +#include "hws/utility.hpp" // hws::detail::{value_or_default, remove_cvref_t} #include // std::ostream #include // std::string diff --git a/src/hardware_sampling/gpu_intel/utility.cpp b/src/hws/gpu_intel/utility.cpp similarity index 99% rename from src/hardware_sampling/gpu_intel/utility.cpp rename to src/hws/gpu_intel/utility.cpp index 635b5c1..5a29eee 100644 --- a/src/hardware_sampling/gpu_intel/utility.cpp +++ b/src/hws/gpu_intel/utility.cpp @@ -5,7 +5,7 @@ * See the LICENSE.md file in the project root for full license information. */ -#include "hardware_sampling/gpu_intel/utility.hpp" +#include "hws/gpu_intel/utility.hpp" #include "fmt/format.h" // fmt::format #include "fmt/ranges.h" // fmt::join diff --git a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp b/src/hws/gpu_nvidia/hardware_sampler.cpp similarity index 97% rename from src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp rename to src/hws/gpu_nvidia/hardware_sampler.cpp index 2ffb6e8..9c2a927 100644 --- a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp +++ b/src/hws/gpu_nvidia/hardware_sampler.cpp @@ -5,14 +5,14 @@ * See the LICENSE.md file in the project root for full license information. */ -#include "hardware_sampling/gpu_nvidia/hardware_sampler.hpp" - -#include "hardware_sampling/gpu_nvidia/nvml_device_handle_impl.hpp" // hws::detail::nvml_device_handle implementation -#include "hardware_sampling/gpu_nvidia/nvml_samples.hpp" // hws::{nvml_general_samples, nvml_clock_samples, nvml_power_samples, nvml_memory_samples, nvml_temperature_samples} -#include "hardware_sampling/gpu_nvidia/utility.hpp" // HWS_NVML_ERROR_CHECK -#include "hardware_sampling/hardware_sampler.hpp" // hws::hardware_sampler -#include "hardware_sampling/sample_category.hpp" // hws::sample_category -#include "hardware_sampling/utility.hpp" // hws::detail::time_points_to_epoch +#include "hws/gpu_nvidia/hardware_sampler.hpp" + +#include "hws/gpu_nvidia/nvml_device_handle_impl.hpp" // hws::detail::nvml_device_handle implementation +#include "hws/gpu_nvidia/nvml_samples.hpp" // hws::{nvml_general_samples, nvml_clock_samples, nvml_power_samples, nvml_memory_samples, nvml_temperature_samples} +#include "hws/gpu_nvidia/utility.hpp" // HWS_NVML_ERROR_CHECK +#include "hws/hardware_sampler.hpp" // hws::hardware_sampler +#include "hws/sample_category.hpp" // hws::sample_category +#include "hws/utility.hpp" // hws::detail::time_points_to_epoch #include "fmt/format.h" // fmt::format #include "fmt/ranges.h" // fmt::join diff --git a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp b/src/hws/gpu_nvidia/nvml_samples.cpp similarity index 99% rename from src/hardware_sampling/gpu_nvidia/nvml_samples.cpp rename to src/hws/gpu_nvidia/nvml_samples.cpp index b07c7d2..3ce65c3 100644 --- a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp +++ b/src/hws/gpu_nvidia/nvml_samples.cpp @@ -5,9 +5,9 @@ * See the LICENSE.md file in the project root for full license information. */ -#include "hardware_sampling/gpu_nvidia/nvml_samples.hpp" +#include "hws/gpu_nvidia/nvml_samples.hpp" -#include "hardware_sampling/utility.hpp" // hws::detail::{value_or_default, map_entry_to_string, quote} +#include "hws/utility.hpp" // hws::detail::{value_or_default, map_entry_to_string, quote} #include "fmt/format.h" // fmt::format #include "fmt/ranges.h" // fmt::join diff --git a/src/hardware_sampling/gpu_nvidia/utility.cpp b/src/hws/gpu_nvidia/utility.cpp similarity index 97% rename from src/hardware_sampling/gpu_nvidia/utility.cpp rename to src/hws/gpu_nvidia/utility.cpp index 70883e6..7c1b9f7 100644 --- a/src/hardware_sampling/gpu_nvidia/utility.cpp +++ b/src/hws/gpu_nvidia/utility.cpp @@ -5,7 +5,7 @@ * See the LICENSE.md file in the project root for full license information. */ -#include "hardware_sampling/gpu_nvidia/utility.hpp" +#include "hws/gpu_nvidia/utility.hpp" #include "fmt/format.h" // fmt::format #include "fmt/ranges.h" // fmt::join diff --git a/src/hardware_sampling/hardware_sampler.cpp b/src/hws/hardware_sampler.cpp similarity index 96% rename from src/hardware_sampling/hardware_sampler.cpp rename to src/hws/hardware_sampler.cpp index d5ec9fc..abd907d 100644 --- a/src/hardware_sampling/hardware_sampler.cpp +++ b/src/hws/hardware_sampler.cpp @@ -5,11 +5,11 @@ * See the LICENSE.md file in the project root for full license information. */ -#include "hardware_sampling/hardware_sampler.hpp" +#include "hws/hardware_sampler.hpp" -#include "hardware_sampling/event.hpp" // hws::event -#include "hardware_sampling/utility.hpp" // hws::detail::durations_from_reference_time -#include "hardware_sampling/version.hpp" // hws::version::version +#include "hws/event.hpp" // hws::event +#include "hws/utility.hpp" // hws::detail::durations_from_reference_time +#include "hws/version.hpp" // hws::version::version #include "fmt/chrono.h" // direct formatting of std::chrono types #include "fmt/format.h" // fmt::format diff --git a/src/hardware_sampling/system_hardware_sampler.cpp b/src/hws/system_hardware_sampler.cpp similarity index 90% rename from src/hardware_sampling/system_hardware_sampler.cpp rename to src/hws/system_hardware_sampler.cpp index 51a2fed..14e75de 100644 --- a/src/hardware_sampling/system_hardware_sampler.cpp +++ b/src/hws/system_hardware_sampler.cpp @@ -5,29 +5,29 @@ * See the LICENSE.md file in the project root for full license information. */ -#include "hardware_sampling/system_hardware_sampler.hpp" +#include "hws/system_hardware_sampler.hpp" -#include "hardware_sampling/event.hpp" // hws::event -#include "hardware_sampling/sample_category.hpp" // hws::sample_category +#include "hws/event.hpp" // hws::event +#include "hws/sample_category.hpp" // hws::sample_category #if defined(HWS_FOR_CPUS_ENABLED) - #include "hardware_sampling/cpu/hardware_sampler.hpp" // hws::cpu_hardware_sampler + #include "hws/cpu/hardware_sampler.hpp" // hws::cpu_hardware_sampler #endif #if defined(HWS_FOR_NVIDIA_GPUS_ENABLED) - #include "hardware_sampling/gpu_nvidia/hardware_sampler.hpp" // hws::gpu_nvidia_hardware_sampler - #include "hardware_sampling/gpu_nvidia/utility.hpp" // HWS_CUDA_ERROR_CHECK + #include "hws/gpu_nvidia/hardware_sampler.hpp" // hws::gpu_nvidia_hardware_sampler + #include "hws/gpu_nvidia/utility.hpp" // HWS_CUDA_ERROR_CHECK #include "cuda_runtime.h" // cudaGetDeviceCount #endif #if defined(HWS_FOR_AMD_GPUS_ENABLED) - #include "hardware_sampling/gpu_amd/hardware_sampler.hpp" // hws::gpu_amd_hardware_sampler - #include "hardware_sampling/gpu_amd/utility.hpp" // HWS_HIP_ERROR_CHECK + #include "hws/gpu_amd/hardware_sampler.hpp" // hws::gpu_amd_hardware_sampler + #include "hws/gpu_amd/utility.hpp" // HWS_HIP_ERROR_CHECK #include "hip/hip_runtime.h" // hipGetDeviceCount #endif #if defined(HWS_FOR_INTEL_GPUS_ENABLED) - #include "hardware_sampling/gpu_intel/hardware_sampler.hpp" // hws::gpu_intel_hardware_sampler - #include "hardware_sampling/gpu_intel/utility.hpp" // HWS_LEVEL_ZERO_ERROR_CHECK + #include "hws/gpu_intel/hardware_sampler.hpp" // hws::gpu_intel_hardware_sampler + #include "hws/gpu_intel/utility.hpp" // HWS_LEVEL_ZERO_ERROR_CHECK #endif #include "fmt/format.h" // fmt::format diff --git a/src/hardware_sampling/utility.cpp b/src/hws/utility.cpp similarity index 97% rename from src/hardware_sampling/utility.cpp rename to src/hws/utility.cpp index 9e2dbc2..6651763 100644 --- a/src/hardware_sampling/utility.cpp +++ b/src/hws/utility.cpp @@ -5,7 +5,7 @@ * See the LICENSE.md file in the project root for full license information. */ -#include "hardware_sampling/utility.hpp" +#include "hws/utility.hpp" #include // std::min, std::transform #include // std::tolower From 5c21328789eba5d2309d46b83f95513c978e6ff7 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Mon, 7 Oct 2024 09:46:53 +0200 Subject: [PATCH 62/69] Add {fmt} to the install targets. --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 97ccbe1..8f48d0f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -76,7 +76,7 @@ else () set(FMT_FUZZ OFF CACHE INTERNAL "" FORCE) set(FMT_CUDA_TEST OFF CACHE INTERNAL "" FORCE) set(FMT_MODULE OFF CACHE INTERNAL "" FORCE) - set(FMT_SYSTEM_HEADERS ON CACHE INTERNAL "" FORCE) + set(FMT_SYSTEM_HEADERS OFF CACHE INTERNAL "" FORCE) # fetch string formatting library fmt FetchContent_Declare(fmt GIT_REPOSITORY https://github.com/fmtlib/fmt.git @@ -328,7 +328,7 @@ configure_package_config_file( ) ## create and copy install-targets file -install(EXPORT hws_Targets +install(EXPORT hws_Targets fmt FILE hwsTargets.cmake NAMESPACE hws:: DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/hws/cmake From f8b4427ff19dab1b8671acfad87cf76be65b6c3d Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Mon, 7 Oct 2024 10:05:02 +0200 Subject: [PATCH 63/69] Undo last commit. --- CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8f48d0f..aa5ae22 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -306,6 +306,7 @@ install(TARGETS ${HWS_TARGETS_TO_INSTALL} LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" # all shared lib files RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" # all executables ) +install(TARGETS fmt) ## mark header to install via 'make install' install(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/include/" @@ -328,7 +329,7 @@ configure_package_config_file( ) ## create and copy install-targets file -install(EXPORT hws_Targets fmt +install(EXPORT hws_Targets FILE hwsTargets.cmake NAMESPACE hws:: DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/hws/cmake From 55e2936574ba76564285cfd8da11df3e82a1baf9 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Tue, 8 Oct 2024 11:30:23 +0200 Subject: [PATCH 64/69] Fix compilation error in the level zero error check function. --- include/hws/gpu_intel/utility.hpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/include/hws/gpu_intel/utility.hpp b/include/hws/gpu_intel/utility.hpp index 04626a8..76e15a1 100644 --- a/include/hws/gpu_intel/utility.hpp +++ b/include/hws/gpu_intel/utility.hpp @@ -36,12 +36,12 @@ namespace hws::detail { * @details Throws an exception if a Level Zero call returns with an error. Additionally outputs a more concrete custom error string. */ #if defined(HWS_ERROR_CHECKS_ENABLED) - #define HWS_LEVEL_ZERO_ERROR_CHECK(level_zero_func) \ - { \ - const ze_result_t errc = level_zero_func; \ - if (errc != ZE_RESULT_SUCCESS) { \ - throw std::runtime_error{ fmt::format("Error in Level Zero function call \"{}\": {}", #level_zero_func, to_result_string(errc)) }; \ - } \ + #define HWS_LEVEL_ZERO_ERROR_CHECK(level_zero_func) \ + { \ + const ze_result_t errc = level_zero_func; \ + if (errc != ZE_RESULT_SUCCESS) { \ + throw std::runtime_error{ fmt::format("Error in Level Zero function call \"{}\": {}", #level_zero_func, ::hws::detail::to_result_string(errc)) }; \ + } \ } #else #define HWS_LEVEL_ZERO_ERROR_CHECK(level_zero_func) level_zero_func; From d1e878ea2080a4545e633ba1ec1ebd1496323942 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Tue, 8 Oct 2024 11:30:37 +0200 Subject: [PATCH 65/69] Add missing comparison to ZE_RESULT_SUCCESS. --- src/hws/gpu_intel/hardware_sampler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hws/gpu_intel/hardware_sampler.cpp b/src/hws/gpu_intel/hardware_sampler.cpp index 3054e22..9a44369 100644 --- a/src/hws/gpu_intel/hardware_sampler.cpp +++ b/src/hws/gpu_intel/hardware_sampler.cpp @@ -146,7 +146,7 @@ void gpu_intel_hardware_sampler::sampling_loop() { for (zes_freq_handle_t handle : frequency_handles) { // get frequency properties zes_freq_properties_t prop{}; - if (zesFrequencyGetProperties(handle, &prop)) { + if (zesFrequencyGetProperties(handle, &prop) == ZE_RESULT_SUCCESS) { // determine the frequency domain (e.g. GPU, memory, etc) switch (prop.type) { case ZES_FREQ_DOMAIN_GPU: From 751adee8bfbe6326bcbf10f8b6bf2c057e7c1ce9 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Tue, 8 Oct 2024 11:31:24 +0200 Subject: [PATCH 66/69] Fix power related wrong units and values. --- src/hws/gpu_intel/hardware_sampler.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/hws/gpu_intel/hardware_sampler.cpp b/src/hws/gpu_intel/hardware_sampler.cpp index 9a44369..0be124e 100644 --- a/src/hws/gpu_intel/hardware_sampler.cpp +++ b/src/hws/gpu_intel/hardware_sampler.cpp @@ -272,7 +272,7 @@ void gpu_intel_hardware_sampler::sampling_loop() { break; } - power_samples_.power_enforced_limit_ = static_cast(desc.limit); + power_samples_.power_enforced_limit_ = static_cast(desc.limit) / 1000.0; } // get total power consumption @@ -581,7 +581,7 @@ void gpu_intel_hardware_sampler::sampling_loop() { // calculate current power draw as (Energy Difference [J]) / (Time Difference [s]) const std::size_t last_index = this->sampling_time_points().size() - 1; - const double power_usage = (power_consumption - power_samples_.power_total_energy_consumption_->back()) / (std::chrono::duration(this->sampling_time_points()[last_index] - this->sampling_time_points()[last_index - 1]).count()); + const double power_usage = ((power_consumption - initial_total_power_consumption) - power_samples_.power_total_energy_consumption_->back()) / (std::chrono::duration(this->sampling_time_points()[last_index] - this->sampling_time_points()[last_index - 1]).count()); power_samples_.power_usage_->push_back(power_usage); // add power consumption last to be able to use the std::vector::back() function From 13daaa8764401fdbcc0e15aedd6f5a55dc2a16c1 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Tue, 8 Oct 2024 11:39:35 +0200 Subject: [PATCH 67/69] Correctly init level zero driver. --- src/hws/system_hardware_sampler.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/hws/system_hardware_sampler.cpp b/src/hws/system_hardware_sampler.cpp index 14e75de..1c08762 100644 --- a/src/hws/system_hardware_sampler.cpp +++ b/src/hws/system_hardware_sampler.cpp @@ -73,6 +73,9 @@ system_hardware_sampler::system_hardware_sampler(const std::chrono::milliseconds #endif #if defined(HWS_FOR_INTEL_GPUS_ENABLED) { + // init level zero driver + HWS_LEVEL_ZERO_ERROR_CHECK(zeInit(ZE_INIT_FLAG_GPU_ONLY)) + // discover the number of drivers std::uint32_t driver_count{ 0 }; HWS_LEVEL_ZERO_ERROR_CHECK(zeDriverGet(&driver_count, nullptr)) From 12da0d9bfbb36d844e0bd2e7255f7e3e06b22101 Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Tue, 8 Oct 2024 11:39:54 +0200 Subject: [PATCH 68/69] Try fixing installation issues. --- CMakeLists.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index aa5ae22..97ccbe1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -76,7 +76,7 @@ else () set(FMT_FUZZ OFF CACHE INTERNAL "" FORCE) set(FMT_CUDA_TEST OFF CACHE INTERNAL "" FORCE) set(FMT_MODULE OFF CACHE INTERNAL "" FORCE) - set(FMT_SYSTEM_HEADERS OFF CACHE INTERNAL "" FORCE) + set(FMT_SYSTEM_HEADERS ON CACHE INTERNAL "" FORCE) # fetch string formatting library fmt FetchContent_Declare(fmt GIT_REPOSITORY https://github.com/fmtlib/fmt.git @@ -306,7 +306,6 @@ install(TARGETS ${HWS_TARGETS_TO_INSTALL} LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" # all shared lib files RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" # all executables ) -install(TARGETS fmt) ## mark header to install via 'make install' install(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/include/" From 7c0ce1eecccfdb7c108f35769b439f1b32a9356c Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Tue, 8 Oct 2024 11:45:15 +0200 Subject: [PATCH 69/69] Update README. --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 477738a..f21b0eb 100644 --- a/README.md +++ b/README.md @@ -72,6 +72,8 @@ export CPLUS_INCLUDE_PATH=${CMAKE_INSTALL_PREFIX}/include:${CPLUS_INCLUDE_PATH} export PYTHONPATH=${CMAKE_INSTALL_PREFIX}/lib:${PYTHONPATH} ``` +Note: when using Intel GPUs and segmentation faults are encountered in calls to `zes` functions, it may be necessary to set `export ZES_ENABLE_SYSMAN=1`. + ## Available samples The sampling type `fixed` denotes samples that are gathered once per hardware samples like maximum clock frequencies or