From 7618926b257d2fc7c4d55ada11cbac70c4232f00 Mon Sep 17 00:00:00 2001
From: Marcel Breyer <breyer.marcel@web.de>
Date: Tue, 6 Aug 2024 22:02:16 +0200
Subject: [PATCH 01/69] Add architecture query to GPU NVIDIA hardware sampler.

---
 .../gpu_nvidia/nvml_samples.hpp               |  7 ++-
 .../gpu_nvidia/hardware_sampler.cpp           | 58 +++++++++++++++++++
 .../gpu_nvidia/nvml_samples.cpp               | 11 +++-
 3 files changed, 72 insertions(+), 4 deletions(-)

diff --git a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp
index 29b1d5a..89bc57c 100644
--- a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp
+++ b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp
@@ -41,9 +41,10 @@ class nvml_general_samples {
      */
     [[nodiscard]] std::string generate_yaml_string() const;
 
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, name)        // the name of the device
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(bool, persistence_mode)   // the persistence mode: if true, the driver is always loaded reducing the latency for the first API call
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, num_cores)  // the number of CUDA cores
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, architecture)  // the architecture name of the device
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, name)          // the name of the device
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(bool, persistence_mode)     // the persistence mode: if true, the driver is always loaded reducing the latency for the first API call
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, num_cores)    // the number of CUDA cores
 
     HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(int, performance_state)         // the performance state: 0 - 15 where 0 is the maximum performance and 15 the minimum performance
     HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, utilization_gpu)  // the GPU compute utilization in percent
diff --git a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
index 8e4729f..0ea5b8c 100644
--- a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
+++ b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
@@ -89,6 +89,64 @@ void gpu_nvidia_hardware_sampler::sampling_loop() {
     // retrieve initial general information
     {
         // fixed information -> only retrieved once
+        nvmlDeviceArchitecture_t device_arch{};
+        if (nvmlDeviceGetArchitecture(device, &device_arch) == NVML_SUCCESS) {
+            switch (device_arch) {
+#if defined(NVML_DEVICE_ARCH_KEPLER)
+                case NVML_DEVICE_ARCH_KEPLER:
+                    general_samples_.architecture_ = "Kepler";
+                    break;
+#endif
+#if defined(NVML_DEVICE_ARCH_MAXWELL)
+                case NVML_DEVICE_ARCH_MAXWELL:
+                    general_samples_.architecture_ = "Maxwell";
+                    break;
+#endif
+#if defined(NVML_DEVICE_ARCH_PASCAL)
+                case NVML_DEVICE_ARCH_PASCAL:
+                    general_samples_.architecture_ = "Pascal";
+                    break;
+#endif
+#if defined(NVML_DEVICE_ARCH_VOLTA)
+                case NVML_DEVICE_ARCH_VOLTA:
+                    general_samples_.architecture_ = "Volta";
+                    break;
+#endif
+#if defined(NVML_DEVICE_ARCH_TURING)
+                case NVML_DEVICE_ARCH_TURING:
+                    general_samples_.architecture_ = "Turing";
+                    break;
+#endif
+#if defined(NVML_DEVICE_ARCH_AMPERE)
+                case NVML_DEVICE_ARCH_AMPERE:
+                    general_samples_.architecture_ = "Ampere";
+                    break;
+#endif
+#if defined(NVML_DEVICE_ARCH_ADA)
+                case NVML_DEVICE_ARCH_ADA:
+                    general_samples_.architecture_ = "Ada";
+                    break;
+#endif
+#if defined(NVML_DEVICE_ARCH_HOPPER)
+                case NVML_DEVICE_ARCH_HOPPER:
+                    general_samples_.architecture_ = "Hopper";
+                    break;
+#endif
+#if defined(NVML_DEVICE_ARCH_BLACKWELL)
+                case NVML_DEVICE_ARCH_BLACKWELL:
+                    general_samples_.architecture_ = "Blackwell";
+                    break;
+#endif
+#if defined(NVML_DEVICE_ARCH_T23X)
+                case NVML_DEVICE_ARCH_T23X:
+                    general_samples_.architecture_ = "Orin";
+                    break;
+#endif
+                default:
+                    break;
+            }
+        }
+
         std::string name(NVML_DEVICE_NAME_V2_BUFFER_SIZE, '\0');
         if (nvmlDeviceGetName(device, name.data(), name.size()) == NVML_SUCCESS) {
             general_samples_.name_ = name.substr(0, name.find_first_of('\0'));
diff --git a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp
index 76ffe47..878877f 100644
--- a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp
+++ b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp
@@ -24,6 +24,13 @@ namespace hws {
 std::string nvml_general_samples::generate_yaml_string() const {
     std::string str{ "general:\n" };
 
+    // device architecture
+    if (this->architecture_.has_value()) {
+        str += std::format("  architecture:\n"
+                           "    unit: \"string\"\n"
+                           "    values: \"{}\"\n",
+                           this->architecture_.value());
+    }
     // device name
     if (this->name_.has_value()) {
         str += std::format("  name:\n"
@@ -76,12 +83,14 @@ std::string nvml_general_samples::generate_yaml_string() const {
 }
 
 std::ostream &operator<<(std::ostream &out, const nvml_general_samples &samples) {
-    return out << std::format("name [string]: {}\n"
+    return out << std::format("architecture [string]: {}\n"
+                              "name [string]: {}\n"
                               "persistence_mode [bool]: {}\n"
                               "num_cores [int]: {}\n"
                               "performance_state [int]: [{}]\n"
                               "utilization_gpu [%]: [{}]\n"
                               "utilization_mem [%]: [{}]",
+                              detail::value_or_default(samples.get_architecture()),
                               detail::value_or_default(samples.get_name()),
                               detail::value_or_default(samples.get_persistence_mode()),
                               detail::value_or_default(samples.get_num_cores()),

From 8618468b3d16d31a558cce7a3eb4e28eacaaff87 Mon Sep 17 00:00:00 2001
From: Marcel Breyer <breyer.marcel@web.de>
Date: Tue, 6 Aug 2024 22:47:43 +0200
Subject: [PATCH 02/69] Add endianness queries for all GPUs.

---
 .../hardware_sampling/gpu_amd/rocm_smi_samples.hpp    |  3 ++-
 .../gpu_intel/level_zero_samples.hpp                  |  1 +
 include/hardware_sampling/gpu_nvidia/nvml_samples.hpp |  1 +
 src/hardware_sampling/gpu_amd/hardware_sampler.cpp    |  3 +++
 src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp    | 11 ++++++++++-
 src/hardware_sampling/gpu_intel/hardware_sampler.cpp  |  3 +++
 .../gpu_intel/level_zero_samples.cpp                  | 11 ++++++++++-
 src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp |  3 +++
 src/hardware_sampling/gpu_nvidia/nvml_samples.cpp     |  9 +++++++++
 9 files changed, 42 insertions(+), 3 deletions(-)

diff --git a/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp b/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp
index 8f5d120..0b97bd0 100644
--- a/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp
+++ b/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp
@@ -42,7 +42,8 @@ class rocm_smi_general_samples {
      */
     [[nodiscard]] std::string generate_yaml_string() const;
 
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, name)  // the name of the device
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, byte_order)  // the byte order (e.g., little/big endian)
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, name)        // the name of the device
 
     HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(int, performance_level)          // the performance level: one of rsmi_dev_perf_level_t
     HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint32_t, utilization_gpu)  // the GPU compute utilization in percent
diff --git a/include/hardware_sampling/gpu_intel/level_zero_samples.hpp b/include/hardware_sampling/gpu_intel/level_zero_samples.hpp
index 7d0f713..94d7d3a 100644
--- a/include/hardware_sampling/gpu_intel/level_zero_samples.hpp
+++ b/include/hardware_sampling/gpu_intel/level_zero_samples.hpp
@@ -43,6 +43,7 @@ class level_zero_general_samples {
      */
     [[nodiscard]] std::string generate_yaml_string() const;
 
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, byte_order)            // the byte order (e.g., little/big endian)
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, name)                  // the model name of the device
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, standby_mode)          // the enabled standby mode (power saving or never)
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint32_t, num_threads_per_eu)  // the number of threads per EU unit
diff --git a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp
index 89bc57c..3de4053 100644
--- a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp
+++ b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp
@@ -42,6 +42,7 @@ class nvml_general_samples {
     [[nodiscard]] std::string generate_yaml_string() const;
 
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, architecture)  // the architecture name of the device
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, byte_order)    // the byte order (e.g., little/big endian)
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, name)          // the name of the device
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(bool, persistence_mode)     // the persistence mode: if true, the driver is always loaded reducing the latency for the first API call
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, num_cores)    // the number of CUDA cores
diff --git a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
index 3e20ba8..4586c8d 100644
--- a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
+++ b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
@@ -84,6 +84,9 @@ void gpu_amd_hardware_sampler::sampling_loop() {
     // retrieve initial general information
     {
         // fixed information -> only retrieved once
+        // the byte order is given by AMD directly
+        general_samples_.byte_order_ = "Little Endian";
+
         std::string name(static_cast<std::string::size_type>(1024), '\0');
         if (rsmi_dev_name_get(device_id_, name.data(), name.size()) == RSMI_STATUS_SUCCESS) {
             general_samples_.name_ = name.substr(0, name.find_first_of('\0'));
diff --git a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp b/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp
index 52a1ae8..de9f77e 100644
--- a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp
+++ b/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp
@@ -24,6 +24,13 @@ namespace hws {
 std::string rocm_smi_general_samples::generate_yaml_string() const {
     std::string str{ "general:\n" };
 
+    // device byte order
+    if (this->byte_order_.has_value()) {
+        str += std::format("  byte_order:\n"
+                           "    unit: \"string\"\n"
+                           "    values: \"{}\"\n",
+                           this->byte_order_.value());
+    }
     // device name
     if (this->name_.has_value()) {
         str += std::format("  name:\n"
@@ -61,10 +68,12 @@ std::string rocm_smi_general_samples::generate_yaml_string() const {
 }
 
 std::ostream &operator<<(std::ostream &out, const rocm_smi_general_samples &samples) {
-    return out << std::format("name [string]: {}\n"
+    return out << std::format("byte_order [string]: {}\n"
+                              "name [string]: {}\n"
                               "performance_level [int]: [{}]\n"
                               "utilization_gpu [%]: [{}]\n"
                               "utilization_mem [%]: [{}]",
+                              detail::value_or_default(samples.get_byte_order()),
                               detail::value_or_default(samples.get_name()),
                               detail::join(detail::value_or_default(samples.get_performance_level()), ", "),
                               detail::join(detail::value_or_default(samples.get_utilization_gpu()), ", "),
diff --git a/src/hardware_sampling/gpu_intel/hardware_sampler.cpp b/src/hardware_sampling/gpu_intel/hardware_sampler.cpp
index 7bfa1c6..1583bf0 100644
--- a/src/hardware_sampling/gpu_intel/hardware_sampler.cpp
+++ b/src/hardware_sampling/gpu_intel/hardware_sampler.cpp
@@ -88,6 +88,9 @@ void gpu_intel_hardware_sampler::sampling_loop() {
 
     // retrieve initial general information
     {
+        // the byte order is given by Intel directly
+        general_samples_.byte_order_ = "Little Endian";
+
         ze_device_properties_t ze_device_prop{};
         if (zeDeviceGetProperties(device, &ze_device_prop) == ZE_RESULT_SUCCESS) {
             general_samples_.num_threads_per_eu_ = ze_device_prop.numThreadsPerEU;
diff --git a/src/hardware_sampling/gpu_intel/level_zero_samples.cpp b/src/hardware_sampling/gpu_intel/level_zero_samples.cpp
index ea564e2..096a6ba 100644
--- a/src/hardware_sampling/gpu_intel/level_zero_samples.cpp
+++ b/src/hardware_sampling/gpu_intel/level_zero_samples.cpp
@@ -51,6 +51,13 @@ void append_map_values(std::string &str, const std::string_view entry_name, cons
 std::string level_zero_general_samples::generate_yaml_string() const {
     std::string str{ "general:\n" };
 
+    // device byte order
+    if (this->byte_order_.has_value()) {
+        str += std::format("  byte_order:\n"
+                           "    unit: \"string\"\n"
+                           "    values: \"{}\"\n",
+                           this->byte_order_.value());
+    }
     // the model name
     if (this->name_.has_value()) {
         str += std::format("  model_name:\n"
@@ -87,10 +94,12 @@ std::string level_zero_general_samples::generate_yaml_string() const {
 }
 
 std::ostream &operator<<(std::ostream &out, const level_zero_general_samples &samples) {
-    return out << std::format("name [string]: {}\n"
+    return out << std::format("byte_order [string]: {}\n"
+                              "name [string]: {}\n"
                               "standby_mode [string]: {}\n"
                               "num_threads_per_eu [int]: {}\n"
                               "eu_simd_width [int]: {}",
+                              detail::value_or_default(samples.get_byte_order()),
                               detail::value_or_default(samples.get_name()),
                               detail::value_or_default(samples.get_standby_mode()),
                               detail::value_or_default(samples.get_num_threads_per_eu()),
diff --git a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
index 0ea5b8c..003b34b 100644
--- a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
+++ b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
@@ -147,6 +147,9 @@ void gpu_nvidia_hardware_sampler::sampling_loop() {
             }
         }
 
+        // the byte order is given by the NVIDIA CUDA guide
+        general_samples_.byte_order_ = "Little Endian";
+
         std::string name(NVML_DEVICE_NAME_V2_BUFFER_SIZE, '\0');
         if (nvmlDeviceGetName(device, name.data(), name.size()) == NVML_SUCCESS) {
             general_samples_.name_ = name.substr(0, name.find_first_of('\0'));
diff --git a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp
index 878877f..e492b07 100644
--- a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp
+++ b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp
@@ -31,6 +31,13 @@ std::string nvml_general_samples::generate_yaml_string() const {
                            "    values: \"{}\"\n",
                            this->architecture_.value());
     }
+    // device byte order
+    if (this->byte_order_.has_value()) {
+        str += std::format("  byte_order:\n"
+                           "    unit: \"string\"\n"
+                           "    values: \"{}\"\n",
+                           this->byte_order_.value());
+    }
     // device name
     if (this->name_.has_value()) {
         str += std::format("  name:\n"
@@ -84,6 +91,7 @@ std::string nvml_general_samples::generate_yaml_string() const {
 
 std::ostream &operator<<(std::ostream &out, const nvml_general_samples &samples) {
     return out << std::format("architecture [string]: {}\n"
+                              "byte_order [string]: {}\n"
                               "name [string]: {}\n"
                               "persistence_mode [bool]: {}\n"
                               "num_cores [int]: {}\n"
@@ -91,6 +99,7 @@ std::ostream &operator<<(std::ostream &out, const nvml_general_samples &samples)
                               "utilization_gpu [%]: [{}]\n"
                               "utilization_mem [%]: [{}]",
                               detail::value_or_default(samples.get_architecture()),
+                              detail::value_or_default(samples.get_byte_order()),
                               detail::value_or_default(samples.get_name()),
                               detail::value_or_default(samples.get_persistence_mode()),
                               detail::value_or_default(samples.get_num_cores()),

From 8ed1bbbb5193d73b09c8fc0a9bbd2ae1366fe68c Mon Sep 17 00:00:00 2001
From: Marcel Breyer <breyer.marcel@web.de>
Date: Tue, 6 Aug 2024 23:10:06 +0200
Subject: [PATCH 03/69] Add vendor_id queries for all GPUs.

---
 include/hardware_sampling/cpu/cpu_samples.hpp         |  2 +-
 .../hardware_sampling/gpu_amd/rocm_smi_samples.hpp    |  1 +
 .../gpu_intel/level_zero_samples.hpp                  |  1 +
 include/hardware_sampling/gpu_nvidia/nvml_samples.hpp |  1 +
 src/hardware_sampling/gpu_amd/hardware_sampler.cpp    |  5 +++++
 src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp    |  9 +++++++++
 src/hardware_sampling/gpu_intel/hardware_sampler.cpp  |  1 +
 .../gpu_intel/level_zero_samples.cpp                  | 11 ++++++++++-
 src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp |  3 +++
 src/hardware_sampling/gpu_nvidia/nvml_samples.cpp     |  9 +++++++++
 10 files changed, 41 insertions(+), 2 deletions(-)

diff --git a/include/hardware_sampling/cpu/cpu_samples.hpp b/include/hardware_sampling/cpu/cpu_samples.hpp
index da08f84..aa4fa0c 100644
--- a/include/hardware_sampling/cpu/cpu_samples.hpp
+++ b/include/hardware_sampling/cpu/cpu_samples.hpp
@@ -49,7 +49,7 @@ class cpu_general_samples {
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, cores_per_socket)   // the number of physical cores per socket
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, num_sockets)        // the number of sockets
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, numa_nodes)         // the number of NUMA nodes
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, vendor_id)           // the vendor ID (e.g. GenuineIntel)
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, vendor_id)           // the vendor ID (e.g., GenuineIntel)
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, name)                // the name of the CPU
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector<std::string>, flags)  // potential CPU flags (e.g., sse4_1, avx, avx, etc)
 
diff --git a/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp b/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp
index 0b97bd0..7f1211a 100644
--- a/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp
+++ b/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp
@@ -43,6 +43,7 @@ class rocm_smi_general_samples {
     [[nodiscard]] std::string generate_yaml_string() const;
 
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, byte_order)  // the byte order (e.g., little/big endian)
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, vendor_id)   // the vendor ID
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, name)        // the name of the device
 
     HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(int, performance_level)          // the performance level: one of rsmi_dev_perf_level_t
diff --git a/include/hardware_sampling/gpu_intel/level_zero_samples.hpp b/include/hardware_sampling/gpu_intel/level_zero_samples.hpp
index 94d7d3a..1510199 100644
--- a/include/hardware_sampling/gpu_intel/level_zero_samples.hpp
+++ b/include/hardware_sampling/gpu_intel/level_zero_samples.hpp
@@ -44,6 +44,7 @@ class level_zero_general_samples {
     [[nodiscard]] std::string generate_yaml_string() const;
 
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, byte_order)            // the byte order (e.g., little/big endian)
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, vendor_id)             // the vendor ID
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, name)                  // the model name of the device
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, standby_mode)          // the enabled standby mode (power saving or never)
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint32_t, num_threads_per_eu)  // the number of threads per EU unit
diff --git a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp
index 3de4053..189c66c 100644
--- a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp
+++ b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp
@@ -43,6 +43,7 @@ class nvml_general_samples {
 
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, architecture)  // the architecture name of the device
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, byte_order)    // the byte order (e.g., little/big endian)
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, vendor_id)     // the vendor ID
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, name)          // the name of the device
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(bool, persistence_mode)     // the persistence mode: if true, the driver is always loaded reducing the latency for the first API call
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, num_cores)    // the number of CUDA cores
diff --git a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
index 4586c8d..af2e98f 100644
--- a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
+++ b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
@@ -87,6 +87,11 @@ void gpu_amd_hardware_sampler::sampling_loop() {
         // the byte order is given by AMD directly
         general_samples_.byte_order_ = "Little Endian";
 
+        std::string vendor_id(static_cast<std::string::size_type>(1024), '\0');
+        if (rsmi_dev_vendor_name_get(device_id_, vendor_id.data(), vendor_id.size()) == RSMI_STATUS_SUCCESS) {
+            general_samples_.vendor_id_ = vendor_id.substr(0, vendor_id.find_first_of('\0'));
+        }
+
         std::string name(static_cast<std::string::size_type>(1024), '\0');
         if (rsmi_dev_name_get(device_id_, name.data(), name.size()) == RSMI_STATUS_SUCCESS) {
             general_samples_.name_ = name.substr(0, name.find_first_of('\0'));
diff --git a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp b/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp
index de9f77e..27b16b4 100644
--- a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp
+++ b/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp
@@ -31,6 +31,13 @@ std::string rocm_smi_general_samples::generate_yaml_string() const {
                            "    values: \"{}\"\n",
                            this->byte_order_.value());
     }
+    // the vendor specific ID
+    if (this->vendor_id_.has_value()) {
+        str += std::format("  vendor_id:\n"
+                           "    unit: \"string\"\n"
+                           "    values: \"{}\"\n",
+                           this->vendor_id_.value());
+    }
     // device name
     if (this->name_.has_value()) {
         str += std::format("  name:\n"
@@ -69,11 +76,13 @@ std::string rocm_smi_general_samples::generate_yaml_string() const {
 
 std::ostream &operator<<(std::ostream &out, const rocm_smi_general_samples &samples) {
     return out << std::format("byte_order [string]: {}\n"
+                              "vendor_id [string]: {}\n"
                               "name [string]: {}\n"
                               "performance_level [int]: [{}]\n"
                               "utilization_gpu [%]: [{}]\n"
                               "utilization_mem [%]: [{}]",
                               detail::value_or_default(samples.get_byte_order()),
+                              detail::value_or_default(samples.get_vendor_id()),
                               detail::value_or_default(samples.get_name()),
                               detail::join(detail::value_or_default(samples.get_performance_level()), ", "),
                               detail::join(detail::value_or_default(samples.get_utilization_gpu()), ", "),
diff --git a/src/hardware_sampling/gpu_intel/hardware_sampler.cpp b/src/hardware_sampling/gpu_intel/hardware_sampler.cpp
index 1583bf0..3fd9a1e 100644
--- a/src/hardware_sampling/gpu_intel/hardware_sampler.cpp
+++ b/src/hardware_sampling/gpu_intel/hardware_sampler.cpp
@@ -93,6 +93,7 @@ void gpu_intel_hardware_sampler::sampling_loop() {
 
         ze_device_properties_t ze_device_prop{};
         if (zeDeviceGetProperties(device, &ze_device_prop) == ZE_RESULT_SUCCESS) {
+            general_samples_.vendor_id_ = std::format("{:x}", ze_device_prop.vendorId);  // TODO: PCI configuration ID to name?
             general_samples_.num_threads_per_eu_ = ze_device_prop.numThreadsPerEU;
             general_samples_.eu_simd_width_ = ze_device_prop.physicalEUSimdWidth;
         }
diff --git a/src/hardware_sampling/gpu_intel/level_zero_samples.cpp b/src/hardware_sampling/gpu_intel/level_zero_samples.cpp
index 096a6ba..e88c7ab 100644
--- a/src/hardware_sampling/gpu_intel/level_zero_samples.cpp
+++ b/src/hardware_sampling/gpu_intel/level_zero_samples.cpp
@@ -58,7 +58,14 @@ std::string level_zero_general_samples::generate_yaml_string() const {
                            "    values: \"{}\"\n",
                            this->byte_order_.value());
     }
-    // the model name
+    // the vendor specific ID
+    if (this->vendor_id_.has_value()) {
+        str += std::format("  vendor_id:\n"
+                           "    unit: \"string\"\n"
+                           "    values: \"{}\"\n",
+                           this->vendor_id_.value());
+    }
+    // device name
     if (this->name_.has_value()) {
         str += std::format("  model_name:\n"
                            "    unit: \"string\"\n"
@@ -95,11 +102,13 @@ std::string level_zero_general_samples::generate_yaml_string() const {
 
 std::ostream &operator<<(std::ostream &out, const level_zero_general_samples &samples) {
     return out << std::format("byte_order [string]: {}\n"
+                              "vendor_id [string]: {}\n"
                               "name [string]: {}\n"
                               "standby_mode [string]: {}\n"
                               "num_threads_per_eu [int]: {}\n"
                               "eu_simd_width [int]: {}",
                               detail::value_or_default(samples.get_byte_order()),
+                              detail::value_or_default(samples.get_vendor_id()),
                               detail::value_or_default(samples.get_name()),
                               detail::value_or_default(samples.get_standby_mode()),
                               detail::value_or_default(samples.get_num_threads_per_eu()),
diff --git a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
index 003b34b..9ba8125 100644
--- a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
+++ b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
@@ -150,6 +150,9 @@ void gpu_nvidia_hardware_sampler::sampling_loop() {
         // the byte order is given by the NVIDIA CUDA guide
         general_samples_.byte_order_ = "Little Endian";
 
+        // the vendor ID is fixed for NVIDIA GPUs
+        general_samples_.byte_order_ = "NVIDIA";
+
         std::string name(NVML_DEVICE_NAME_V2_BUFFER_SIZE, '\0');
         if (nvmlDeviceGetName(device, name.data(), name.size()) == NVML_SUCCESS) {
             general_samples_.name_ = name.substr(0, name.find_first_of('\0'));
diff --git a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp
index e492b07..c8f17a8 100644
--- a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp
+++ b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp
@@ -38,6 +38,13 @@ std::string nvml_general_samples::generate_yaml_string() const {
                            "    values: \"{}\"\n",
                            this->byte_order_.value());
     }
+    // the vendor specific ID
+    if (this->vendor_id_.has_value()) {
+        str += std::format("  vendor_id:\n"
+                           "    unit: \"string\"\n"
+                           "    values: \"{}\"\n",
+                           this->vendor_id_.value());
+    }
     // device name
     if (this->name_.has_value()) {
         str += std::format("  name:\n"
@@ -92,6 +99,7 @@ std::string nvml_general_samples::generate_yaml_string() const {
 std::ostream &operator<<(std::ostream &out, const nvml_general_samples &samples) {
     return out << std::format("architecture [string]: {}\n"
                               "byte_order [string]: {}\n"
+                              "vendor_id [string]: {}\n"
                               "name [string]: {}\n"
                               "persistence_mode [bool]: {}\n"
                               "num_cores [int]: {}\n"
@@ -100,6 +108,7 @@ std::ostream &operator<<(std::ostream &out, const nvml_general_samples &samples)
                               "utilization_mem [%]: [{}]",
                               detail::value_or_default(samples.get_architecture()),
                               detail::value_or_default(samples.get_byte_order()),
+                              detail::value_or_default(samples.get_vendor_id()),
                               detail::value_or_default(samples.get_name()),
                               detail::value_or_default(samples.get_persistence_mode()),
                               detail::value_or_default(samples.get_num_cores()),

From 9d0093d00a9b5443095ae23a20cafb9c9f2b7fdd Mon Sep 17 00:00:00 2001
From: Marcel Breyer <breyer.marcel@web.de>
Date: Tue, 6 Aug 2024 23:10:31 +0200
Subject: [PATCH 04/69] Use same YAML entry for all samplers.

---
 src/hardware_sampling/gpu_intel/level_zero_samples.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/hardware_sampling/gpu_intel/level_zero_samples.cpp b/src/hardware_sampling/gpu_intel/level_zero_samples.cpp
index e88c7ab..70f1016 100644
--- a/src/hardware_sampling/gpu_intel/level_zero_samples.cpp
+++ b/src/hardware_sampling/gpu_intel/level_zero_samples.cpp
@@ -67,7 +67,7 @@ std::string level_zero_general_samples::generate_yaml_string() const {
     }
     // device name
     if (this->name_.has_value()) {
-        str += std::format("  model_name:\n"
+        str += std::format("  name:\n"
                            "    unit: \"string\"\n"
                            "    values: \"{}\"\n",
                            this->name_.value());

From 62dfc334d5880a25739bbc69bf91023446215bc5 Mon Sep 17 00:00:00 2001
From: Marcel Breyer <breyer.marcel@web.de>
Date: Wed, 7 Aug 2024 20:12:11 +0200
Subject: [PATCH 05/69] Fix typo.

---
 CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index c1bea7f..272d1c0 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -237,7 +237,7 @@ endif ()
 ####################################################################################################################
 ##                                             enable Python bindings                                             ##
 ####################################################################################################################
-option(HWS_ENABLE_PYTHON_BINDINGS "Build langauge bindings for Python." ON)
+option(HWS_ENABLE_PYTHON_BINDINGS "Build language bindings for Python." ON)
 
 if (HWS_ENABLE_PYTHON_BINDINGS)
     add_subdirectory(bindings)

From 473eae69d4c9657d0b349f2bef882d9732968ad4 Mon Sep 17 00:00:00 2001
From: Marcel Breyer <breyer.marcel@web.de>
Date: Wed, 7 Aug 2024 20:42:27 +0200
Subject: [PATCH 06/69] Rename utilization samples.

---
 include/hardware_sampling/cpu/cpu_samples.hpp | 12 +++---
 .../gpu_amd/rocm_smi_samples.hpp              |  6 +--
 .../gpu_nvidia/nvml_samples.hpp               |  6 +--
 src/hardware_sampling/cpu/cpu_samples.cpp     | 10 ++---
 .../cpu/hardware_sampler.cpp                  |  8 ++--
 .../gpu_amd/hardware_sampler.cpp              | 20 +++++-----
 .../gpu_amd/rocm_smi_samples.cpp              | 38 +++++++++---------
 .../gpu_nvidia/hardware_sampler.cpp           | 10 ++---
 .../gpu_nvidia/nvml_samples.cpp               | 40 +++++++++----------
 9 files changed, 75 insertions(+), 75 deletions(-)

diff --git a/include/hardware_sampling/cpu/cpu_samples.hpp b/include/hardware_sampling/cpu/cpu_samples.hpp
index aa4fa0c..b422ff3 100644
--- a/include/hardware_sampling/cpu/cpu_samples.hpp
+++ b/include/hardware_sampling/cpu/cpu_samples.hpp
@@ -53,12 +53,12 @@ class cpu_general_samples {
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, name)                // the name of the CPU
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector<std::string>, flags)  // potential CPU flags (e.g., sse4_1, avx, avx, etc)
 
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, busy_percent)  // the percent the CPU was busy doing work
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, ipc)           // the instructions-per-cycle count
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, irq)     // the number of interrupts
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, smi)     // the number of system management interrupts
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, poll)    // the number of times the CPU was in the polling state
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, poll_percent)  // the percent of the CPU was in the polling state
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, compute_utilization)  // the percent the CPU was busy doing work
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, ipc)                  // the instructions-per-cycle count
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, irq)            // the number of interrupts
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, smi)            // the number of system management interrupts
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, poll)           // the number of times the CPU was in the polling state
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, poll_percent)         // the percent of the CPU was in the polling state
 };
 
 /**
diff --git a/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp b/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp
index 7f1211a..a301aef 100644
--- a/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp
+++ b/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp
@@ -46,9 +46,9 @@ class rocm_smi_general_samples {
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, vendor_id)   // the vendor ID
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, name)        // the name of the device
 
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(int, performance_level)          // the performance level: one of rsmi_dev_perf_level_t
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint32_t, utilization_gpu)  // the GPU compute utilization in percent
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint32_t, utilization_mem)  // the GPU memory utilization in percent
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint32_t, compute_utilization)  // the GPU compute utilization in percent
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint32_t, memory_utilization)   // the GPU memory utilization in percent
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(int, performance_level)              // the performance level: one of rsmi_dev_perf_level_t
 };
 
 /**
diff --git a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp
index 189c66c..bc5a9d1 100644
--- a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp
+++ b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp
@@ -48,9 +48,9 @@ class nvml_general_samples {
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(bool, persistence_mode)     // the persistence mode: if true, the driver is always loaded reducing the latency for the first API call
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, num_cores)    // the number of CUDA cores
 
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(int, performance_state)         // the performance state: 0 - 15 where 0 is the maximum performance and 15 the minimum performance
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, utilization_gpu)  // the GPU compute utilization in percent
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, utilization_mem)  // the GPU memory utilization in percent
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, compute_utilization)  // the GPU compute utilization in percent
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, memory_utilization)   // the GPU memory utilization in percent
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(int, performance_state)             // the performance state: 0 - 15 where 0 is the maximum performance and 15 the minimum performance
 };
 
 /**
diff --git a/src/hardware_sampling/cpu/cpu_samples.cpp b/src/hardware_sampling/cpu/cpu_samples.cpp
index ef5a3b9..2bfb12b 100644
--- a/src/hardware_sampling/cpu/cpu_samples.cpp
+++ b/src/hardware_sampling/cpu/cpu_samples.cpp
@@ -99,12 +99,12 @@ std::string cpu_general_samples::generate_yaml_string() const {
     }
 
     // the percent the CPU was busy
-    if (this->busy_percent_.has_value()) {
-        str += std::format("  utilization:\n"
+    if (this->compute_utilization_.has_value()) {
+        str += std::format("  compute_utilization:\n"
                            "    turbostat_name: \"Busy%\"\n"
                            "    unit: \"percentage\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->busy_percent_.value(), ", "));
+                           detail::join(this->compute_utilization_.value(), ", "));
     }
     // the instructions per cycle count
     if (this->ipc_.has_value()) {
@@ -164,7 +164,7 @@ std::ostream &operator<<(std::ostream &out, const cpu_general_samples &samples)
                                   "vendor_id [string]: {}\n"
                                   "name [string]: {}\n"
                                   "flags [string]: [{}]\n"
-                                  "busy_percent [%]: [{}]\n"
+                                  "compute_utilization [%]: [{}]\n"
                                   "ipc [float]: [{}]\n"
                                   "irq [int]: [{}]\n"
                                   "smi [int]: [{}]\n"
@@ -180,7 +180,7 @@ std::ostream &operator<<(std::ostream &out, const cpu_general_samples &samples)
                                   detail::value_or_default(samples.get_vendor_id()),
                                   detail::value_or_default(samples.get_name()),
                                   detail::join(detail::value_or_default(samples.get_flags()), ", "),
-                                  detail::join(detail::value_or_default(samples.get_busy_percent()), ", "),
+                                  detail::join(detail::value_or_default(samples.get_compute_utilization()), ", "),
                                   detail::join(detail::value_or_default(samples.get_ipc()), ", "),
                                   detail::join(detail::value_or_default(samples.get_irq()), ", "),
                                   detail::join(detail::value_or_default(samples.get_smi()), ", "),
diff --git a/src/hardware_sampling/cpu/hardware_sampler.cpp b/src/hardware_sampling/cpu/hardware_sampler.cpp
index 6c967da..3101428 100644
--- a/src/hardware_sampling/cpu/hardware_sampler.cpp
+++ b/src/hardware_sampling/cpu/hardware_sampler.cpp
@@ -161,8 +161,8 @@ void cpu_hardware_sampler::sampling_loop() {
                 using vector_type = decltype(clock_samples_.average_frequency_)::value_type;
                 clock_samples_.average_frequency_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
             } else if (header[i] == "Busy%") {
-                using vector_type = decltype(general_samples_.busy_percent_)::value_type;
-                general_samples_.busy_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                using vector_type = decltype(general_samples_.compute_utilization_)::value_type;
+                general_samples_.compute_utilization_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
             } else if (header[i] == "Bzy_MHz") {
                 using vector_type = decltype(clock_samples_.average_non_idle_frequency_)::value_type;
                 clock_samples_.average_non_idle_frequency_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
@@ -306,8 +306,8 @@ void cpu_hardware_sampler::sampling_loop() {
                         using vector_type = decltype(clock_samples_.average_frequency_)::value_type;
                         clock_samples_.average_frequency_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
                     } else if (header[i] == "Busy%") {
-                        using vector_type = decltype(general_samples_.busy_percent_)::value_type;
-                        general_samples_.busy_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        using vector_type = decltype(general_samples_.compute_utilization_)::value_type;
+                        general_samples_.compute_utilization_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
                     } else if (header[i] == "Bzy_MHz") {
                         using vector_type = decltype(clock_samples_.average_non_idle_frequency_)::value_type;
                         clock_samples_.average_non_idle_frequency_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
diff --git a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
index af2e98f..15592af 100644
--- a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
+++ b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
@@ -103,14 +103,14 @@ void gpu_amd_hardware_sampler::sampling_loop() {
             general_samples_.performance_level_ = decltype(general_samples_.performance_level_)::value_type{ static_cast<decltype(general_samples_.performance_level_)::value_type::value_type>(pstate) };
         }
 
-        decltype(general_samples_.utilization_gpu_)::value_type::value_type utilization_gpu{};
+        decltype(general_samples_.compute_utilization_)::value_type::value_type utilization_gpu{};
         if (rsmi_dev_busy_percent_get(device_id_, &utilization_gpu) == RSMI_STATUS_SUCCESS) {
-            general_samples_.utilization_gpu_ = decltype(general_samples_.utilization_gpu_)::value_type{ utilization_gpu };
+            general_samples_.compute_utilization_ = decltype(general_samples_.compute_utilization_)::value_type{ utilization_gpu };
         }
 
-        decltype(general_samples_.utilization_mem_)::value_type::value_type utilization_mem{};
+        decltype(general_samples_.memory_utilization_)::value_type::value_type utilization_mem{};
         if (rsmi_dev_memory_busy_percent_get(device_id_, &utilization_mem) == RSMI_STATUS_SUCCESS) {
-            general_samples_.utilization_mem_ = decltype(general_samples_.utilization_mem_)::value_type{ utilization_mem };
+            general_samples_.memory_utilization_ = decltype(general_samples_.memory_utilization_)::value_type{ utilization_mem };
         }
     }
 
@@ -441,16 +441,16 @@ void gpu_amd_hardware_sampler::sampling_loop() {
                     general_samples_.performance_level_->push_back(static_cast<decltype(general_samples_.performance_level_)::value_type::value_type>(pstate));
                 }
 
-                if (general_samples_.utilization_gpu_.has_value()) {
-                    decltype(general_samples_.utilization_gpu_)::value_type::value_type value{};
+                if (general_samples_.compute_utilization_.has_value()) {
+                    decltype(general_samples_.compute_utilization_)::value_type::value_type value{};
                     HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_busy_percent_get(device_id_, &value));
-                    general_samples_.utilization_gpu_->push_back(value);
+                    general_samples_.compute_utilization_->push_back(value);
                 }
 
-                if (general_samples_.utilization_mem_.has_value()) {
-                    decltype(general_samples_.utilization_mem_)::value_type::value_type value{};
+                if (general_samples_.memory_utilization_.has_value()) {
+                    decltype(general_samples_.memory_utilization_)::value_type::value_type value{};
                     HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_memory_busy_percent_get(device_id_, &value));
-                    general_samples_.utilization_mem_->push_back(value);
+                    general_samples_.memory_utilization_->push_back(value);
                 }
             }
 
diff --git a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp b/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp
index 27b16b4..0c43f95 100644
--- a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp
+++ b/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp
@@ -46,26 +46,26 @@ std::string rocm_smi_general_samples::generate_yaml_string() const {
                            this->name_.value());
     }
 
-    // performance state
-    if (this->performance_level_.has_value()) {
-        str += std::format("  performance_state:\n"
-                           "    unit: \"int - see rsmi_dev_perf_level_t\"\n"
-                           "    values: [{}]\n",
-                           detail::join(this->performance_level_.value(), ", "));
-    }
     // device compute utilization
-    if (this->utilization_gpu_.has_value()) {
-        str += std::format("  utilization_gpu:\n"
+    if (this->compute_utilization_.has_value()) {
+        str += std::format("  compute_utilization:\n"
                            "    unit: \"percentage\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->utilization_gpu_.value(), ", "));
+                           detail::join(this->compute_utilization_.value(), ", "));
     }
     // device memory utilization
-    if (this->utilization_mem_.has_value()) {
-        str += std::format("  utilization_mem:\n"
+    if (this->memory_utilization_.has_value()) {
+        str += std::format("  memory_utilization:\n"
                            "    unit: \"percentage\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->utilization_mem_.value(), ", "));
+                           detail::join(this->memory_utilization_.value(), ", "));
+    }
+    // performance state
+    if (this->performance_level_.has_value()) {
+        str += std::format("  performance_state:\n"
+                           "    unit: \"int - see rsmi_dev_perf_level_t\"\n"
+                           "    values: [{}]\n",
+                           detail::join(this->performance_level_.value(), ", "));
     }
 
     // remove last newline
@@ -78,15 +78,15 @@ std::ostream &operator<<(std::ostream &out, const rocm_smi_general_samples &samp
     return out << std::format("byte_order [string]: {}\n"
                               "vendor_id [string]: {}\n"
                               "name [string]: {}\n"
-                              "performance_level [int]: [{}]\n"
-                              "utilization_gpu [%]: [{}]\n"
-                              "utilization_mem [%]: [{}]",
+                              "compute_utilization [%]: [{}]\n"
+                              "memory_utilization [%]: [{}]\n"
+                              "performance_level [int]: [{}]",
                               detail::value_or_default(samples.get_byte_order()),
                               detail::value_or_default(samples.get_vendor_id()),
                               detail::value_or_default(samples.get_name()),
-                              detail::join(detail::value_or_default(samples.get_performance_level()), ", "),
-                              detail::join(detail::value_or_default(samples.get_utilization_gpu()), ", "),
-                              detail::join(detail::value_or_default(samples.get_utilization_mem()), ", "));
+                              detail::join(detail::value_or_default(samples.get_compute_utilization()), ", "),
+                              detail::join(detail::value_or_default(samples.get_memory_utilization()), ", "),
+                              detail::join(detail::value_or_default(samples.get_performance_level()), ", "));
 }
 
 //*************************************************************************************************************************************//
diff --git a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
index 9ba8125..6590402 100644
--- a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
+++ b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
@@ -176,8 +176,8 @@ void gpu_nvidia_hardware_sampler::sampling_loop() {
 
         nvmlUtilization_t util{};
         if (nvmlDeviceGetUtilizationRates(device, &util) == NVML_SUCCESS) {
-            general_samples_.utilization_gpu_ = decltype(general_samples_.utilization_gpu_)::value_type{ util.gpu };
-            general_samples_.utilization_mem_ = decltype(general_samples_.utilization_gpu_)::value_type{ util.memory };
+            general_samples_.compute_utilization_ = decltype(general_samples_.compute_utilization_)::value_type{ util.gpu };
+            general_samples_.memory_utilization_ = decltype(general_samples_.memory_utilization_)::value_type{ util.memory };
         }
     }
 
@@ -380,11 +380,11 @@ void gpu_nvidia_hardware_sampler::sampling_loop() {
                     general_samples_.performance_state_->push_back(static_cast<decltype(general_samples_.performance_state_)::value_type::value_type>(pstate));
                 }
 
-                if (general_samples_.utilization_gpu_.has_value() && general_samples_.utilization_mem_.has_value()) {
+                if (general_samples_.compute_utilization_.has_value() && general_samples_.memory_utilization_.has_value()) {
                     nvmlUtilization_t util{};
                     HWS_NVML_ERROR_CHECK(nvmlDeviceGetUtilizationRates(device, &util));
-                    general_samples_.utilization_gpu_->push_back(util.gpu);
-                    general_samples_.utilization_mem_->push_back(util.memory);
+                    general_samples_.compute_utilization_->push_back(util.gpu);
+                    general_samples_.memory_utilization_->push_back(util.memory);
                 }
             }
 
diff --git a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp
index c8f17a8..24329ad 100644
--- a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp
+++ b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp
@@ -67,27 +67,27 @@ std::string nvml_general_samples::generate_yaml_string() const {
                            this->num_cores_.value());
     }
 
-    // performance state
-    if (this->performance_state_.has_value()) {
-        str += std::format("  performance_state:\n"
-                           "    unit: \"0 - maximum performance; 15 - minimum performance; 32 - unknown\"\n"
-                           "    values: [{}]\n",
-                           detail::join(this->performance_state_.value(), ", "));
-    }
     // device compute utilization
-    if (this->utilization_gpu_.has_value()) {
-        str += std::format("  utilization_gpu:\n"
+    if (this->compute_utilization_.has_value()) {
+        str += std::format("  compute_utilization:\n"
                            "    unit: \"percentage\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->utilization_gpu_.value(), ", "));
+                           detail::join(this->compute_utilization_.value(), ", "));
     }
 
-    // device compute utilization
-    if (this->utilization_mem_.has_value()) {
-        str += std::format("  utilization_mem:\n"
+    // device memory utilization
+    if (this->memory_utilization_.has_value()) {
+        str += std::format("  memory_utilization:\n"
                            "    unit: \"percentage\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->utilization_mem_.value(), ", "));
+                           detail::join(this->memory_utilization_.value(), ", "));
+    }
+    // performance state
+    if (this->performance_state_.has_value()) {
+        str += std::format("  performance_state:\n"
+                           "    unit: \"0 - maximum performance; 15 - minimum performance; 32 - unknown\"\n"
+                           "    values: [{}]\n",
+                           detail::join(this->performance_state_.value(), ", "));
     }
 
     // remove last newline
@@ -103,18 +103,18 @@ std::ostream &operator<<(std::ostream &out, const nvml_general_samples &samples)
                               "name [string]: {}\n"
                               "persistence_mode [bool]: {}\n"
                               "num_cores [int]: {}\n"
-                              "performance_state [int]: [{}]\n"
-                              "utilization_gpu [%]: [{}]\n"
-                              "utilization_mem [%]: [{}]",
+                              "compute_utilization [%]: [{}]\n"
+                              "memory_utilization [%]: [{}]\n"
+                              "performance_state [int]: [{}]",
                               detail::value_or_default(samples.get_architecture()),
                               detail::value_or_default(samples.get_byte_order()),
                               detail::value_or_default(samples.get_vendor_id()),
                               detail::value_or_default(samples.get_name()),
                               detail::value_or_default(samples.get_persistence_mode()),
                               detail::value_or_default(samples.get_num_cores()),
-                              detail::join(detail::value_or_default(samples.get_performance_state()), ", "),
-                              detail::join(detail::value_or_default(samples.get_utilization_gpu()), ", "),
-                              detail::join(detail::value_or_default(samples.get_utilization_mem()), ", "));
+                              detail::join(detail::value_or_default(samples.get_compute_utilization()), ", "),
+                              detail::join(detail::value_or_default(samples.get_memory_utilization()), ", "),
+                              detail::join(detail::value_or_default(samples.get_performance_state()), ", "));
 }
 
 //*************************************************************************************************************************************//

From 11f98317a2cd63367a074c944a9c453335bb19ba Mon Sep 17 00:00:00 2001
From: Marcel Breyer <breyer.marcel@web.de>
Date: Wed, 7 Aug 2024 21:19:05 +0200
Subject: [PATCH 07/69] Fix usage of wrong variable type.

---
 src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
index 6590402..43af02a 100644
--- a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
+++ b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
@@ -271,7 +271,7 @@ void gpu_nvidia_hardware_sampler::sampling_loop() {
         // queried samples -> retrieved every iteration if available
         nvmlPstates_t pstate{};
         if (nvmlDeviceGetPowerState(device, &pstate) == NVML_SUCCESS) {
-            power_samples_.power_state_ = decltype(general_samples_.performance_state_)::value_type{ static_cast<decltype(power_samples_.power_state_)::value_type::value_type>(pstate) };
+            power_samples_.power_state_ = decltype(power_samples_.power_state_)::value_type{ static_cast<decltype(power_samples_.power_state_)::value_type::value_type>(pstate) };
         }
 
         decltype(power_samples_.power_usage_)::value_type::value_type power_usage{};

From e7cc2b234c8599ac729299576dc225b72677bc4a Mon Sep 17 00:00:00 2001
From: Marcel Breyer <breyer.marcel@web.de>
Date: Wed, 7 Aug 2024 21:19:33 +0200
Subject: [PATCH 08/69] Rename performance_state to performance_level.

---
 include/hardware_sampling/gpu_nvidia/nvml_samples.hpp |  2 +-
 src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp |  6 +++---
 src/hardware_sampling/gpu_nvidia/nvml_samples.cpp     | 10 +++++-----
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp
index bc5a9d1..3c973f4 100644
--- a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp
+++ b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp
@@ -50,7 +50,7 @@ class nvml_general_samples {
 
     HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, compute_utilization)  // the GPU compute utilization in percent
     HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, memory_utilization)   // the GPU memory utilization in percent
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(int, performance_state)             // the performance state: 0 - 15 where 0 is the maximum performance and 15 the minimum performance
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(int, performance_level)             // the performance state: 0 - 15 where 0 is the maximum performance and 15 the minimum performance
 };
 
 /**
diff --git a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
index 43af02a..c971725 100644
--- a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
+++ b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
@@ -171,7 +171,7 @@ void gpu_nvidia_hardware_sampler::sampling_loop() {
         // queried samples -> retrieved every iteration if available
         nvmlPstates_t pstate{};
         if (nvmlDeviceGetPerformanceState(device, &pstate) == NVML_SUCCESS) {
-            general_samples_.performance_state_ = decltype(general_samples_.performance_state_)::value_type{ static_cast<decltype(general_samples_.performance_state_)::value_type::value_type>(pstate) };
+            general_samples_.performance_level_ = decltype(general_samples_.performance_level_)::value_type{ static_cast<decltype(general_samples_.performance_level_)::value_type::value_type>(pstate) };
         }
 
         nvmlUtilization_t util{};
@@ -374,10 +374,10 @@ void gpu_nvidia_hardware_sampler::sampling_loop() {
 
             // retrieve general samples
             {
-                if (general_samples_.performance_state_.has_value()) {
+                if (general_samples_.performance_level_.has_value()) {
                     nvmlPstates_t pstate{};
                     HWS_NVML_ERROR_CHECK(nvmlDeviceGetPerformanceState(device, &pstate));
-                    general_samples_.performance_state_->push_back(static_cast<decltype(general_samples_.performance_state_)::value_type::value_type>(pstate));
+                    general_samples_.performance_level_->push_back(static_cast<decltype(general_samples_.performance_level_)::value_type::value_type>(pstate));
                 }
 
                 if (general_samples_.compute_utilization_.has_value() && general_samples_.memory_utilization_.has_value()) {
diff --git a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp
index 24329ad..64b9a05 100644
--- a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp
+++ b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp
@@ -83,11 +83,11 @@ std::string nvml_general_samples::generate_yaml_string() const {
                            detail::join(this->memory_utilization_.value(), ", "));
     }
     // performance state
-    if (this->performance_state_.has_value()) {
-        str += std::format("  performance_state:\n"
+    if (this->performance_level_.has_value()) {
+        str += std::format("  performance_level:\n"
                            "    unit: \"0 - maximum performance; 15 - minimum performance; 32 - unknown\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->performance_state_.value(), ", "));
+                           detail::join(this->performance_level_.value(), ", "));
     }
 
     // remove last newline
@@ -105,7 +105,7 @@ std::ostream &operator<<(std::ostream &out, const nvml_general_samples &samples)
                               "num_cores [int]: {}\n"
                               "compute_utilization [%]: [{}]\n"
                               "memory_utilization [%]: [{}]\n"
-                              "performance_state [int]: [{}]",
+                              "performance_level [int]: [{}]",
                               detail::value_or_default(samples.get_architecture()),
                               detail::value_or_default(samples.get_byte_order()),
                               detail::value_or_default(samples.get_vendor_id()),
@@ -114,7 +114,7 @@ std::ostream &operator<<(std::ostream &out, const nvml_general_samples &samples)
                               detail::value_or_default(samples.get_num_cores()),
                               detail::join(detail::value_or_default(samples.get_compute_utilization()), ", "),
                               detail::join(detail::value_or_default(samples.get_memory_utilization()), ", "),
-                              detail::join(detail::value_or_default(samples.get_performance_state()), ", "));
+                              detail::join(detail::value_or_default(samples.get_performance_level()), ", "));
 }
 
 //*************************************************************************************************************************************//

From e5f73374cad885a999c12d038f922c90ad7fbe2e Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Fri, 16 Aug 2024 12:25:27 +0200
Subject: [PATCH 09/69] Add samples to README (including TODOs).

---
 README.md | 179 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 179 insertions(+)

diff --git a/README.md b/README.md
index 39c2c3d..97a1b11 100644
--- a/README.md
+++ b/README.md
@@ -61,6 +61,185 @@ export CPLUS_INCLUDE_PATH=${CMAKE_INSTALL_PREFIX}/include:${CPLUS_INCLUDE_PATH}
 export PYTHONPATH=${CMAKE_INSTALL_PREFIX}/lib:${PYTHONPATH}
 ```
 
+## Available samples
+
+### General samples
+
+| sample              | CPUs  | NVIDIA GPUs | AMD GPUs  |  Intel GPUs  |
+|:--------------------|:-----:|:-----------:|:---------:|:------------:|
+| architecture        |  str  |     str     |     ?     |      ?       |
+| byte_order          |  str  |  str (fix)  | str (fix) |  str (fix)   |
+| num_threads         |  int  |             |           |              |
+| threads_per_core    |  int  |             |           |              |
+| cores_per_socket    |  int  |             |           |              |
+| num_sockets         |  int  |             |           |              |
+| numa_nodes          |  int  |             |           |              |
+| vendor_id           |  str  |  str (fix)  |    str    | str (PCIe ID |
+| name                |  str  |     str     |    str    |     str      |
+| flags               |  str  |             |           |              |
+| compute_utilization |   %   |      %      |     %     |      ?       |
+| memory_utilization  |   -   |      %      |     %     |      ?       |
+| ipc                 | float |      -      |     -     |      -       |
+| irq                 |  int  |      -      |     -     |      -       |
+| smi                 |  int  |      -      |     -     |      -       |
+| poll                |  int  |      -      |     -     |      -       |
+| poll_percent        |   %   |      -      |     -     |      -       |
+| performance_level   |       |     int     |    int    |              |
+| standby_mode        |       |             |           |     str      |
+| num_threads_per_eu  |       |             |           |     int      |
+| eu_simd_width       |       |             |           |     int      |
+| persistence_mode    |       |    bool     |           |              |
+| num_cores           |       |     int     |           |              |
+
+### clock-related samples
+
+| sample                     | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs |
+|:---------------------------|:----:|:-----------:|:--------:|:----------:|
+| frequency_boost            | bool |             |          |            |
+| min_cpu_frequency          | MHz  |             |          |            |
+| max_cpu_frequency          | MHz  |             |          |            |
+| average_frequency          | MHz  |             |          |            |
+| average_non_idle_frequency | MHz  |             |          |            |
+| time_stamp_counter         | MHz  |             |          |            |
+| clock_socket_min           |      |             |    Hz    |            |
+| clock_socket_max           |      |             |    Hz    |            |
+| clock_memory_min           |      |             |    Hz    |            |
+| clock_memory_max           |      |             |    Hz    |            |
+| clock_gpu_min              |      |     MHz     |    Hz    |    MHz     |
+| clock_gpu_max              |      |     MHz     |    Hz    |    MHz     |
+| clock_socket               |      |             |    Hz    |            |
+| clock_memory               |      |             |    Hz    |            |
+| clock_gpu                  |      |     MHz     |    Hz    |    MHz     |
+| overdrive_level            |      |             |    %     |            |
+| memory_overdrive_level     |      |             |    %     |            |
+| available_clocks_gpu       |      |             |          |    MHz     |
+| clock_mem_min              |      |     MHz     |          |    MHz     |
+| clock_mem_max              |      |     MHz     |          |    MHz     |
+| available_clocks_mem       |      |             |          |    MHz     |
+| tdp_frequency_limit_gpu    |      |             |          |    MHz     |
+| throttle_reason_gpu        |      |             |          |  bitmask   |
+| tdp_frequency_limit_mem    |      |             |          |    MHz     |
+
+### power-related samples
+
+| sample                      | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs |
+|:----------------------------|:----:|:-----------:|:--------:|:----------:|
+| package_power               |  W   |             |          |            |
+| core_watt                   |  W   |             |          |            |
+| dram_watt                   |  W   |             |          |            |
+| package_rapl_throttling     |  %   |             |          |            |
+| dram_rapl_throttling        |  %   |             |          |            |
+| power_management_limit      |      |     mW      |   muW    |            |
+| power_enforced_limit        |      |     mW      |   muW    |            |
+| power_measurement_type      |      |             |   str    |            |
+| available_power_profiles    |      |             |   str    |            |
+| power_usage                 |      |     mW      |   muW    |            |
+| power_total_energy_consumed |      |      J      |   muJ    |     J      |
+| power_profile               |      |             |   str    |            |
+| energy_threshold_enabled    |      |             |   bool   |            |
+| energy_threshold            |      |             |    J     |            |
+| power_management_mode       |      |    bool     |          |            |
+| power_state                 |      |     int     |          |            |
+
+### memory-related samples
+
+| sample                      | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs |
+|:----------------------------|:----:|:-----------:|:--------:|:----------:|
+| cache_size_L1d              | str  |             |          |            |
+| cache_size_L1i              | str  |             |          |            |
+| cache_size_L2               | str  |             |          |            |
+| cache_size_L3               | str  |             |          |            |
+| memory_total                |  B   |      B      |    B     |            |
+| swap_memory_total           |  B   |             |          |            |
+| memory_free                 |  B   |      B      |    B     |            |
+| memory_used                 |  B   |      B      |    B     |            |
+| swap_memory_free            |  B   |             |          |            |
+| swap_memory_used            |  B   |             |          |            |
+| visible_memory_total        |      |             |    B     |            |
+| min_num_pcie_lanes          |      |             |   int    |            |
+| max_num_pcie_lanes          |      |             |   int    |            |
+| pcie_bandwidth              |      |    MBPS     |   T/s    |    MBPS    |
+| num_pcie_lanes              |      |             |   int    |            |
+| memory_total_{}             |      |             |          |     B      |
+| allocatable_memory_total_{} |      |             |          |     B      |
+| pcie_max_bandwidth          |      |    MBPS     |          |    BPS     |
+| max_pcie_link_width         |      |             |          |    int     |
+| max_pcie_link_generation    |      |     int     |          |    int     |
+| memory_bus_width_{}         |      |             |          |    Bit     |
+| memory_num_channels_{}      |      |             |          |    int     |
+| memory_location_{}          |      |             |          |    str     |
+| memory_free_{}              |      |             |          |     B      |
+| memory_used_{}              |      |             |          |     B      |
+| pcie_link_width             |      |     int     |          |    int     |
+| pcie_link_generation        |      |     int     |          |    int     |
+| memory_bus_width            |      |     Bit     |          |            |
+
+### temperature-related samples
+
+| sample                   | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs |
+|:-------------------------|:----:|:-----------:|:--------:|:----------:|
+| per_core_temperature     |  °C  |             |          |            |
+| core_throttle_percentage |  %   |             |          |            |
+| per_package_temperature  |  °C  |             |          |            |
+| num_fans                 |      |     int     |   int    |            |
+| max_fan_speed            |      |             |   int    |            |
+| temperature_gpu_min      |      |             |   m°C    |            |
+| temperature_gpu_max      |      |     °C      |   m°C    |            |
+| temperature_hotspot_min  |      |             |   m°C    |            |
+| temperature_hotspot_max  |      |             |   m°C    |            |
+| temperature_memory_min   |      |             |   m°C    |            |
+| temperature_memory_max   |      |             |   m°C    |            |
+| temperature_hbm_0_min    |      |             |   m°C    |            |
+| temperature_hbm_0_max    |      |             |   m°C    |            |
+| temperature_hbm_1_min    |      |             |   m°C    |    MBPS    |
+| temperature_hbm_1_max    |      |             |   m°C    |            |
+| temperature_hbm_2_min    |      |             |   m°C    |     B      |
+| temperature_hbm_2_max    |      |             |   m°C    |     B      |
+| temperature_hbm_3_min    |      |             |   m°C    |    BPS     |
+| temperature_hbm_3_max    |      |             |   m°C    |    int     |
+| fan_speed                |      |      %      |    %     |    int     |
+| temperature_gpu          |      |     °C      |   m°C    |    Bit     |
+| temperature_hotspot      |      |             |   m°C    |    int     |
+| temperature_memory       |      |             |   m°C    |    str     |
+| temperature_hbm_0        |      |             |   m°C    |     B      |
+| temperature_hbm_1        |      |             |   m°C    |     B      |
+| temperature_hbm_2        |      |             |   m°C    |    int     |
+| temperature_hbm_3        |      |             |   m°C    |    int     |
+| temperature_{}_max       |      |             |          |            |
+| temperature_psu          |      |             |          |            |
+| temperature_{}           |      |             |          |            |
+| min_fan_speed            |      |      %      |          |            |
+| max_fan_speed            |      |      %      |          |            |
+| temperature_mem_max      |      |     °C      |          |            |
+
+### gfx-related (iGPU) samples
+
+| sample                     | CPUs |
+|:---------------------------|:----:|
+| graphics_render_state      |  %   |
+| graphics_frequency         | MHz  |
+| average_graphics_frequency | MHz  |
+| gpu_state_c0               |  %   |
+| cpu_works_for_gpu          |  %   |
+| graphics_power             |  W   |
+
+### "idle states"-related samples
+
+| sample                           | CPUs |
+|:---------------------------------|:----:|
+| all_cpus_state_c0                |  %   |
+| any_cpu_state_c0                 |  %   |
+| lower_power_idle_state           |  %   |
+| system_lower_power_idle_state    |  %   |
+| package_lower_power_idle_state   |  %   |
+| cpu_idle_state_{}_percentage     |  %   |
+| package_idle_state_{}_percentage |  %   |
+| package_idle_state_{}_percentage |  %   |
+| idle_state_{}_percentage         |  %   |
+| idle_state_{}                    | int  |
+
+
+
 ## Example Python usage
 
 ```python

From 1221588e8e2b29a312639be25093f61308608432 Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Fri, 16 Aug 2024 13:42:44 +0200
Subject: [PATCH 10/69] Add architecture function for AMD GPUs

---
 .clang-format                                    |  2 +-
 CMakeLists.txt                                   |  3 ++-
 README.md                                        | 16 ++++++++--------
 .../gpu_amd/rocm_smi_samples.hpp                 |  7 ++++---
 include/hardware_sampling/gpu_amd/utility.hpp    | 10 ++++++++++
 .../gpu_amd/hardware_sampler.cpp                 |  9 ++++++++-
 .../gpu_amd/rocm_smi_samples.cpp                 | 11 ++++++++++-
 7 files changed, 43 insertions(+), 15 deletions(-)

diff --git a/.clang-format b/.clang-format
index 9fc54fe..5d6a911 100644
--- a/.clang-format
+++ b/.clang-format
@@ -79,7 +79,7 @@ IncludeBlocks: Regroup
 IncludeCategories:
   - Regex: '^"hardware_sampling/'
     Priority: 1
-  - Regex: '^"(pybind|nvml|rocm_smi|level_zero|subprocess)'
+  - Regex: '^"(pybind|nvml|cuda|rocm_smi|hip|level_zero|subprocess)'
     Priority: 2
   - Regex: '^.*'
     Priority: 3
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 272d1c0..e32478a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -190,7 +190,8 @@ endif ()
 ## try finding ROCm SMI
 find_package(rocm_smi QUIET)
 if (rocm_smi_FOUND)
-    target_link_libraries(${HWS_LIBRARY_NAME} PRIVATE -lrocm_smi64)
+    find_package(HIP REQUIRED)
+    target_link_libraries(${HWS_LIBRARY_NAME} PRIVATE -lrocm_smi64 hip::host)
     target_include_directories(${HWS_LIBRARY_NAME} PRIVATE ${ROCM_SMI_INCLUDE_DIR})
 
     message(STATUS "Enable sampling of AMD GPU information using ROCm SMI.")
diff --git a/README.md b/README.md
index 97a1b11..8305ba4 100644
--- a/README.md
+++ b/README.md
@@ -67,29 +67,29 @@ export PYTHONPATH=${CMAKE_INSTALL_PREFIX}/lib:${PYTHONPATH}
 
 | sample              | CPUs  | NVIDIA GPUs | AMD GPUs  |  Intel GPUs  |
 |:--------------------|:-----:|:-----------:|:---------:|:------------:|
-| architecture        |  str  |     str     |     ?     |      ?       |
+| name                |  str  |     str     |    str    |     str      |
+| vendor_id           |  str  |  str (fix)  |    str    | str (PCIe ID |
+| architecture        |  str  |     str     |    str    |      ?       |
 | byte_order          |  str  |  str (fix)  | str (fix) |  str (fix)   |
+| compute_utilization |   %   |      %      |     %     |      ?       |
+| memory_utilization  |   -   |      %      |     %     |      ?       |
+| performance_level   |       |     int     |    int    |              |
 | num_threads         |  int  |             |           |              |
 | threads_per_core    |  int  |             |           |              |
 | cores_per_socket    |  int  |             |           |              |
 | num_sockets         |  int  |             |           |              |
 | numa_nodes          |  int  |             |           |              |
-| vendor_id           |  str  |  str (fix)  |    str    | str (PCIe ID |
-| name                |  str  |     str     |    str    |     str      |
 | flags               |  str  |             |           |              |
-| compute_utilization |   %   |      %      |     %     |      ?       |
-| memory_utilization  |   -   |      %      |     %     |      ?       |
 | ipc                 | float |      -      |     -     |      -       |
 | irq                 |  int  |      -      |     -     |      -       |
 | smi                 |  int  |      -      |     -     |      -       |
 | poll                |  int  |      -      |     -     |      -       |
 | poll_percent        |   %   |      -      |     -     |      -       |
-| performance_level   |       |     int     |    int    |              |
+| persistence_mode    |       |    bool     |           |              |
+| num_cores           |       |     int     |           |              |
 | standby_mode        |       |             |           |     str      |
 | num_threads_per_eu  |       |             |           |     int      |
 | eu_simd_width       |       |             |           |     int      |
-| persistence_mode    |       |    bool     |           |              |
-| num_cores           |       |     int     |           |              |
 
 ### clock-related samples
 
diff --git a/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp b/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp
index a301aef..b89402d 100644
--- a/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp
+++ b/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp
@@ -42,9 +42,10 @@ class rocm_smi_general_samples {
      */
     [[nodiscard]] std::string generate_yaml_string() const;
 
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, byte_order)  // the byte order (e.g., little/big endian)
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, vendor_id)   // the vendor ID
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, name)        // the name of the device
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, byte_order)    // the byte order (e.g., little/big endian)
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, vendor_id)     // the vendor ID
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, name)          // the name of the device
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, architecture)  // the architecture name of the device
 
     HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint32_t, compute_utilization)  // the GPU compute utilization in percent
     HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint32_t, memory_utilization)   // the GPU memory utilization in percent
diff --git a/include/hardware_sampling/gpu_amd/utility.hpp b/include/hardware_sampling/gpu_amd/utility.hpp
index 5d039c7..d96387a 100644
--- a/include/hardware_sampling/gpu_amd/utility.hpp
+++ b/include/hardware_sampling/gpu_amd/utility.hpp
@@ -38,8 +38,18 @@ namespace hws {
                 }                                                                                                                                  \
             }                                                                                                                                      \
         }
+
+    #define HWS_HIP_ERROR_CHECK(hip_func)                                                                                             \
+        {                                                                                                                             \
+            const hiperror_t errc = hip_func;                                                                                         \
+            if (errc != hipSuccess) {                                                                                                 \
+                throw std::runtime_error{ std::format("Error in HIP function call \"{}\": {}", #hip_func, hipGetErrorString(errc)) }; \
+            }                                                                                                                         \
+        }
+
 #else
     #define HWS_ROCM_SMI_ERROR_CHECK(rocm_smi_func) rocm_smi_func;
+    #define HWS_HIP_ERROR_CHECK(hip_func) hip_func;
 #endif
 
 }  // namespace hws
diff --git a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
index a30da35..083b083 100644
--- a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
+++ b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
@@ -12,7 +12,8 @@
 #include "hardware_sampling/hardware_sampler.hpp"          // hws::hardware_sampler
 #include "hardware_sampling/utility.hpp"                   // hws::detail::{time_points_to_epoch, join}
 
-#include "rocm_smi/rocm_smi.h"  // ROCm SMI runtime functions
+#include "hip/hip_runtime_api.h"  // HIP runtime functions
+#include "rocm_smi/rocm_smi.h"    // ROCm SMI runtime functions
 
 #include <chrono>     // std::chrono::{steady_clock, duration_cast, milliseconds}
 #include <cstddef>    // std::size_t
@@ -87,6 +88,12 @@ void gpu_amd_hardware_sampler::sampling_loop() {
         // the byte order is given by AMD directly
         general_samples_.byte_order_ = "Little Endian";
 
+        hipDeviceProp_t prop{};
+        if (hipGetDeviceProperties(&prop, device_id_) == hipSuccess) {
+            std::string architecture{ prop.gcnArchName };
+            general_samples_.architecture_ = architecture.substr(0, architecture.find_first_of('\0'));
+        }
+
         std::string vendor_id(static_cast<std::string::size_type>(1024), '\0');
         if (rsmi_dev_vendor_name_get(device_id_, vendor_id.data(), vendor_id.size()) == RSMI_STATUS_SUCCESS) {
             general_samples_.vendor_id_ = vendor_id.substr(0, vendor_id.find_first_of('\0'));
diff --git a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp b/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp
index 0c43f95..0bc1041 100644
--- a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp
+++ b/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp
@@ -24,6 +24,13 @@ namespace hws {
 std::string rocm_smi_general_samples::generate_yaml_string() const {
     std::string str{ "general:\n" };
 
+    // device architecture
+    if (this->architecture_.has_value()) {
+        str += std::format("  architecture:\n"
+                           "    unit: \"string\"\n"
+                           "    values: \"{}\"\n",
+                           this->architecture_.value());
+    }
     // device byte order
     if (this->byte_order_.has_value()) {
         str += std::format("  byte_order:\n"
@@ -75,12 +82,14 @@ std::string rocm_smi_general_samples::generate_yaml_string() const {
 }
 
 std::ostream &operator<<(std::ostream &out, const rocm_smi_general_samples &samples) {
-    return out << std::format("byte_order [string]: {}\n"
+    return out << std::format("architecture [string]: {}\n"
+                              "byte_order [string]: {}\n"
                               "vendor_id [string]: {}\n"
                               "name [string]: {}\n"
                               "compute_utilization [%]: [{}]\n"
                               "memory_utilization [%]: [{}]\n"
                               "performance_level [int]: [{}]",
+                              detail::value_or_default(samples.get_architecture()),
                               detail::value_or_default(samples.get_byte_order()),
                               detail::value_or_default(samples.get_vendor_id()),
                               detail::value_or_default(samples.get_name()),

From ce69a52c8fde04a79d6c504537a81dd9d89cb9dc Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Fri, 16 Aug 2024 14:14:05 +0200
Subject: [PATCH 11/69] Add query for the number of cores.

---
 README.md                                      | 2 +-
 include/hardware_sampling/cpu/cpu_samples.hpp  | 1 +
 src/hardware_sampling/cpu/cpu_samples.cpp      | 9 +++++++++
 src/hardware_sampling/cpu/hardware_sampler.cpp | 5 +++++
 4 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 8305ba4..df20d89 100644
--- a/README.md
+++ b/README.md
@@ -74,6 +74,7 @@ export PYTHONPATH=${CMAKE_INSTALL_PREFIX}/lib:${PYTHONPATH}
 | compute_utilization |   %   |      %      |     %     |      ?       |
 | memory_utilization  |   -   |      %      |     %     |      ?       |
 | performance_level   |       |     int     |    int    |              |
+| num_cores           |  int  |     int     |     -     |              |
 | num_threads         |  int  |             |           |              |
 | threads_per_core    |  int  |             |           |              |
 | cores_per_socket    |  int  |             |           |              |
@@ -86,7 +87,6 @@ export PYTHONPATH=${CMAKE_INSTALL_PREFIX}/lib:${PYTHONPATH}
 | poll                |  int  |      -      |     -     |      -       |
 | poll_percent        |   %   |      -      |     -     |      -       |
 | persistence_mode    |       |    bool     |           |              |
-| num_cores           |       |     int     |           |              |
 | standby_mode        |       |             |           |     str      |
 | num_threads_per_eu  |       |             |           |     int      |
 | eu_simd_width       |       |             |           |     int      |
diff --git a/include/hardware_sampling/cpu/cpu_samples.hpp b/include/hardware_sampling/cpu/cpu_samples.hpp
index b422ff3..59cb9a1 100644
--- a/include/hardware_sampling/cpu/cpu_samples.hpp
+++ b/include/hardware_sampling/cpu/cpu_samples.hpp
@@ -44,6 +44,7 @@ class cpu_general_samples {
 
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, architecture)        // the CPU architecture (e.g., x86_64)
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, byte_order)          // the byte order (e.g., little/big endian)
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, num_cores)          // the total number of cores of the CPU(s)
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, num_threads)        // the number of threads of the CPU(s) including potential hyper-threads
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, threads_per_core)   // the number of hyper-threads per core
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, cores_per_socket)   // the number of physical cores per socket
diff --git a/src/hardware_sampling/cpu/cpu_samples.cpp b/src/hardware_sampling/cpu/cpu_samples.cpp
index 2bfb12b..d73f86b 100644
--- a/src/hardware_sampling/cpu/cpu_samples.cpp
+++ b/src/hardware_sampling/cpu/cpu_samples.cpp
@@ -41,6 +41,13 @@ std::string cpu_general_samples::generate_yaml_string() const {
                            "    values: \"{}\"\n",
                            this->byte_order_.value());
     }
+    // number of cores
+    if (this->num_cores_.has_value()) {
+        str += std::format("  num_cores:\n"
+                           "    unit: \"int\"\n"
+                           "    values: {}\n",
+                           this->num_cores_.value());
+    }
     // number of threads including hyper-threads
     if (this->num_threads_.has_value()) {
         str += std::format("  num_threads:\n"
@@ -156,6 +163,7 @@ std::string cpu_general_samples::generate_yaml_string() const {
 std::ostream &operator<<(std::ostream &out, const cpu_general_samples &samples) {
     std::string str = std::format("architecture [string]: {}\n"
                                   "byte_order [string]: {}\n"
+                                  "num_cores [int]: {}\n"
                                   "num_threads [int]: {}\n"
                                   "threads_per_core [int]: {}\n"
                                   "cores_per_socket [int]: {}\n"
@@ -172,6 +180,7 @@ std::ostream &operator<<(std::ostream &out, const cpu_general_samples &samples)
                                   "poll_percent [%]: [{}]",
                                   detail::value_or_default(samples.get_architecture()),
                                   detail::value_or_default(samples.get_byte_order()),
+                                  detail::value_or_default(samples.get_num_cores()),
                                   detail::value_or_default(samples.get_num_threads()),
                                   detail::value_or_default(samples.get_threads_per_core()),
                                   detail::value_or_default(samples.get_cores_per_socket()),
diff --git a/src/hardware_sampling/cpu/hardware_sampler.cpp b/src/hardware_sampling/cpu/hardware_sampler.cpp
index 29747f7..d63efc8 100644
--- a/src/hardware_sampling/cpu/hardware_sampler.cpp
+++ b/src/hardware_sampling/cpu/hardware_sampler.cpp
@@ -105,6 +105,11 @@ void cpu_hardware_sampler::sampling_loop() {
                 memory_samples_.l3_cache_ = detail::convert_to<decltype(memory_samples_.l3_cache_)::value_type>(value);
             }
         }
+
+        // check if the number of cores can be derived from the otherwise found values
+        if (general_samples_.num_threads_.has_value() && general_samples_.threads_per_core_.has_value()) {
+            general_samples_.num_cores_ = general_samples_.num_threads_.value() / general_samples_.threads_per_core_.value();
+        }
     }
 #endif
 

From 567afcdc8747baa6dacad59a78b34df11d0ded61 Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Thu, 12 Sep 2024 15:30:00 +0200
Subject: [PATCH 12/69] Update power related query to be more uniform (except
 Intel Level Zero).

---
 README.md                                     | 43 ++++++-----
 include/hardware_sampling/cpu/cpu_samples.hpp | 13 ++--
 .../gpu_amd/rocm_smi_samples.hpp              | 12 +--
 .../gpu_nvidia/nvml_samples.hpp               | 16 ++--
 src/hardware_sampling/cpu/cpu_samples.cpp     | 30 ++++++--
 .../cpu/hardware_sampler.cpp                  | 16 +++-
 .../gpu_amd/hardware_sampler.cpp              | 45 +++++------
 .../gpu_amd/rocm_smi_samples.cpp              | 48 ++++++------
 .../gpu_nvidia/hardware_sampler.cpp           | 60 ++++++++++-----
 .../gpu_nvidia/nvml_samples.cpp               | 74 +++++++++++--------
 10 files changed, 212 insertions(+), 145 deletions(-)

diff --git a/README.md b/README.md
index df20d89..642e56b 100644
--- a/README.md
+++ b/README.md
@@ -75,10 +75,11 @@ export PYTHONPATH=${CMAKE_INSTALL_PREFIX}/lib:${PYTHONPATH}
 | memory_utilization  |   -   |      %      |     %     |      ?       |
 | performance_level   |       |     int     |    int    |              |
 | num_cores           |  int  |     int     |     -     |              |
-| num_threads         |  int  |             |           |              |
-| threads_per_core    |  int  |             |           |              |
-| cores_per_socket    |  int  |             |           |              |
-| num_sockets         |  int  |             |           |              |
+| num_compute_units   |   -   |     int     |    int    |      ?       | TODO
+| num_threads         |  int  |      -      |     -     |      -       |
+| threads_per_core    |  int  |      -      |     -     |      -       |
+| cores_per_socket    |  int  |      -      |     -     |      -       |
+| num_sockets         |  int  |      -      |     -     |      -       |
 | numa_nodes          |  int  |             |           |              |
 | flags               |  str  |             |           |              |
 | ipc                 | float |      -      |     -     |      -       |
@@ -122,24 +123,22 @@ export PYTHONPATH=${CMAKE_INSTALL_PREFIX}/lib:${PYTHONPATH}
 
 ### power-related samples
 
-| sample                      | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs |
-|:----------------------------|:----:|:-----------:|:--------:|:----------:|
-| package_power               |  W   |             |          |            |
-| core_watt                   |  W   |             |          |            |
-| dram_watt                   |  W   |             |          |            |
-| package_rapl_throttling     |  %   |             |          |            |
-| dram_rapl_throttling        |  %   |             |          |            |
-| power_management_limit      |      |     mW      |   muW    |            |
-| power_enforced_limit        |      |     mW      |   muW    |            |
-| power_measurement_type      |      |             |   str    |            |
-| available_power_profiles    |      |             |   str    |            |
-| power_usage                 |      |     mW      |   muW    |            |
-| power_total_energy_consumed |      |      J      |   muJ    |     J      |
-| power_profile               |      |             |   str    |            |
-| energy_threshold_enabled    |      |             |   bool   |            |
-| energy_threshold            |      |             |    J     |            |
-| power_management_mode       |      |    bool     |          |            |
-| power_state                 |      |     int     |          |            |
+| sample                      |   CPUs    | NVIDIA GPUs |  AMD GPUs   | Intel GPUs |
+|:----------------------------|:---------:|:-----------:|:-----------:|:----------:|
+| power_management_limit      |     -     |      W      |      W      |            |
+| power_enforced_limit        |     -     |      W      |      W      |            |
+| power_measurement_type      | str (fix) |     str     |     str     |            |
+| power_management_mode       |     -     |    bool     |      -      |            |
+| available_power_profiles    |     -     | list of int | list of str |            |
+| power_usage                 |     W     |      W      |      W      |            |
+| core_watt                   |     W     |      -      |      -      |     -      |
+| dram_watt                   |     W     |      -      |      -      |     -      |
+| package_rapl_throttling     |     %     |      -      |      -      |     -      |
+| dram_rapl_throttling        |     %     |      -      |      -      |     -      |
+| power_total_energy_consumed |     J     |      J      |      J      |     J      |
+| power_profile               |     -     |     int     |     str     |            |
+| energy_threshold_enabled    |           |             |             |    bool    |
+| energy_threshold            |           |             |             |     J      |
 
 ### memory-related samples
 
diff --git a/include/hardware_sampling/cpu/cpu_samples.hpp b/include/hardware_sampling/cpu/cpu_samples.hpp
index 59cb9a1..3263d77 100644
--- a/include/hardware_sampling/cpu/cpu_samples.hpp
+++ b/include/hardware_sampling/cpu/cpu_samples.hpp
@@ -127,11 +127,14 @@ class cpu_power_samples {
      */
     [[nodiscard]] std::string generate_yaml_string() const;
 
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, package_watt)                   // the currently consumed power of the package of the CPU in W
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, core_watt)                      // the currently consumed power of the core part of the CPU in W
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, ram_watt)                       // the currently consumed power of the RAM part of the CPU in W
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, package_rapl_throttle_percent)  // the percent of time the package throttled due to RAPL limiters
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, dram_rapl_throttle_percent)     // the percent of time the DRAM throttled due to RAPL limiters
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, power_measurement_type)  // the type of the power readings: always "instant/current"
+
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, power_usage)                     // the currently consumed power of the package of the CPU in W
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, power_total_energy_consumption)  // the total power consumption in J
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, core_watt)                       // the currently consumed power of the core part of the CPU in W
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, ram_watt)                        // the currently consumed power of the RAM part of the CPU in W
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, package_rapl_throttle_percent)   // the percent of time the package throttled due to RAPL limiters
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, dram_rapl_throttle_percent)      // the percent of time the DRAM throttled due to RAPL limiters
 };
 
 /**
diff --git a/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp b/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp
index b89402d..3f89615 100644
--- a/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp
+++ b/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp
@@ -122,14 +122,14 @@ class rocm_smi_power_samples {
      */
     [[nodiscard]] std::string generate_yaml_string() const;
 
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, power_default_cap)                    // the default power cap, may be different from power cap
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, power_cap)                            // if the GPU draws more power (μW) than the power cap, the GPU may throttle
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, power_type)                             // the type of the power management: either current power draw or average power draw
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, power_management_limit)                      // the default power cap (W), may be different from power cap
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, power_enforced_limit)                        // if the GPU draws more power (W) than the power cap, the GPU may throttle
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, power_measurement_type)                 // the type of the power readings: either current power draw or average power draw
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector<std::string>, available_power_profiles)  // a list of the available power profiles
 
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint64_t, power_usage)                     // the current GPU socket power draw in μW
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint64_t, power_total_energy_consumption)  // the total power consumption since the last driver reload in μJ
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::string, power_profile)                     // the current active power profile; one of 'available_power_profiles'
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, power_usage)                     // the current GPU socket power draw in W
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, power_total_energy_consumption)  // the total power consumption since the last driver reload in J
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::string, power_profile)              // the current active power profile; one of 'available_power_profiles'
 };
 
 /**
diff --git a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp
index 3c973f4..86420c8 100644
--- a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp
+++ b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp
@@ -123,13 +123,15 @@ class nvml_power_samples {
      */
     [[nodiscard]] std::string generate_yaml_string() const;
 
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(bool, power_management_mode)           // true if power management algorithms are supported and active
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, power_management_limit)  // if the GPU draws more power (mW) than the power management limit, the GPU may throttle
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, power_enforced_limit)    // the actually enforced power limit, may be different from power management limit if external limiters are set
-
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(int, power_state)                                    // the current GPU power state: 0 - 15 where 0 is the maximum power and 15 the minimum power
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, power_usage)                           // the current power draw of the GPU and its related circuity (e.g., memory) in mW
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned long long, power_total_energy_consumption)  // the total power consumption since the last driver reload in mJ
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, power_management_limit)              // if the GPU draws more power (W) than the power management limit, the GPU may throttle
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, power_enforced_limit)                // the actually enforced power limit (W), may be different from power management limit if external limiters are set
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, power_measurement_type)         // the type of the power readings: either current power draw or average power draw
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(bool, power_management_mode)                 // true if power management algorithms are supported and active
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector<int>, available_power_profiles)  // a list of the available power profiles
+
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, power_usage)                     // the current power draw of the GPU and its related circuity (e.g., memory) in W
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, power_total_energy_consumption)  // the total power consumption since the last driver reload in J
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(int, power_profile)                      // the current GPU power state: 0 - 15 where 0 is the maximum power and 15 the minimum power; 32 indicates unknown
 };
 
 /**
diff --git a/src/hardware_sampling/cpu/cpu_samples.cpp b/src/hardware_sampling/cpu/cpu_samples.cpp
index d73f86b..92bf8a9 100644
--- a/src/hardware_sampling/cpu/cpu_samples.cpp
+++ b/src/hardware_sampling/cpu/cpu_samples.cpp
@@ -284,14 +284,30 @@ std::ostream &operator<<(std::ostream &out, const cpu_clock_samples &samples) {
 std::string cpu_power_samples::generate_yaml_string() const {
     std::string str{ "power:\n" };
 
+    // power measurement type
+    if (this->power_measurement_type_.has_value()) {
+        str += std::format("  power_measurement_type:\n"
+                           "    unit: \"string\"\n"
+                           "    values: \"{}\"\n",
+                           this->power_measurement_type_.value());
+    }
+
     // the package Watt
-    if (this->package_watt_.has_value()) {
-        str += std::format("  package_power:\n"
+    if (this->power_usage_.has_value()) {
+        str += std::format("  power_usage:\n"
                            "    turbostat_name: \"PkgWatt\"\n"
                            "    unit: \"W\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->package_watt_.value(), ", "));
+                           detail::join(this->power_usage_.value(), ", "));
     }
+    // total energy consumed
+    if (this->power_total_energy_consumption_.has_value()) {
+        str += std::format("  power_total_energy_consumed:\n"
+                           "    unit: \"J\"\n"
+                           "    values: [{}]\n",
+                           detail::join(this->power_total_energy_consumption_.value(), ", "));
+    }
+
     // the core Watt
     if (this->core_watt_.has_value()) {
         str += std::format("  core_power:\n"
@@ -332,12 +348,16 @@ std::string cpu_power_samples::generate_yaml_string() const {
 }
 
 std::ostream &operator<<(std::ostream &out, const cpu_power_samples &samples) {
-    return out << std::format("package_watt [W]: [{}]\n"
+    return out << std::format("power_measurement_type [string]: {}\n"
+                              "power_usage [W]: [{}]\n"
+                              "power_total_energy_consumption [J]: [{}]\n"
                               "core_watt [W]: [{}]\n"
                               "ram_watt [W]: [{}]\n"
                               "package_rapl_throttle_percent [%]: [{}]\n"
                               "dram_rapl_throttle_percent [%]: [{}]",
-                              detail::join(detail::value_or_default(samples.get_package_watt()), ", "),
+                              detail::value_or_default(samples.get_power_measurement_type()),
+                              detail::join(detail::value_or_default(samples.get_power_usage()), ", "),
+                              detail::join(detail::value_or_default(samples.get_power_total_energy_consumption()), ", "),
                               detail::join(detail::value_or_default(samples.get_core_watt()), ", "),
                               detail::join(detail::value_or_default(samples.get_ram_watt()), ", "),
                               detail::join(detail::value_or_default(samples.get_package_rapl_throttle_percent()), ", "),
diff --git a/src/hardware_sampling/cpu/hardware_sampler.cpp b/src/hardware_sampling/cpu/hardware_sampler.cpp
index d63efc8..e645fde 100644
--- a/src/hardware_sampling/cpu/hardware_sampler.cpp
+++ b/src/hardware_sampling/cpu/hardware_sampler.cpp
@@ -229,8 +229,10 @@ void cpu_hardware_sampler::sampling_loop() {
                 using vector_type = decltype(idle_state_samples_.package_low_power_idle_state_percent_)::value_type;
                 idle_state_samples_.package_low_power_idle_state_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
             } else if (header[i] == "PkgWatt") {
-                using vector_type = decltype(power_samples_.package_watt_)::value_type;
-                power_samples_.package_watt_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                using vector_type = decltype(power_samples_.power_usage_)::value_type;
+                power_samples_.power_usage_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                power_samples_.power_measurement_type_ = "current/instant";
+                power_samples_.power_total_energy_consumption_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ 0 };
             } else if (header[i] == "CorWatt") {
                 using vector_type = decltype(power_samples_.core_watt_)::value_type;
                 power_samples_.core_watt_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
@@ -374,8 +376,14 @@ void cpu_hardware_sampler::sampling_loop() {
                         using vector_type = decltype(idle_state_samples_.package_low_power_idle_state_percent_)::value_type;
                         idle_state_samples_.package_low_power_idle_state_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
                     } else if (header[i] == "PkgWatt") {
-                        using vector_type = decltype(power_samples_.package_watt_)::value_type;
-                        power_samples_.package_watt_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        using vector_type = decltype(power_samples_.power_usage_)::value_type;
+                        power_samples_.power_usage_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        // calculate total energy consumption
+                        using value_type = decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type;
+                        const std::size_t num_time_points = this->sampling_time_points().size();
+                        const value_type time_difference = std::chrono::duration<value_type>(this->sampling_time_points()[num_time_points - 1] - this->sampling_time_points()[num_time_points - 2]).count();
+                        const auto current = power_samples_.power_usage_->back() * time_difference;
+                        power_samples_.power_total_energy_consumption_->push_back(power_samples_.power_total_energy_consumption_->back() + current);
                     } else if (header[i] == "CorWatt") {
                         using vector_type = decltype(power_samples_.core_watt_)::value_type;
                         power_samples_.core_watt_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
diff --git a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
index 083b083..434b047 100644
--- a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
+++ b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
@@ -82,6 +82,8 @@ void gpu_amd_hardware_sampler::sampling_loop() {
 
     this->add_time_point(std::chrono::steady_clock::now());
 
+    std::uint64_t initial_power_usage{};
+
     // retrieve initial general information
     {
         // fixed information -> only retrieved once
@@ -174,33 +176,32 @@ void gpu_amd_hardware_sampler::sampling_loop() {
 
     // retrieve initial power related information
     {
-        decltype(power_samples_.power_default_cap_)::value_type power_default_cap{};
+        std::uint64_t power_default_cap{};
         if (rsmi_dev_power_cap_default_get(device_id_, &power_default_cap) == RSMI_STATUS_SUCCESS) {
-            power_samples_.power_default_cap_ = power_default_cap;
+            power_samples_.power_management_limit_ = static_cast<decltype(power_samples_.power_management_limit_)::value_type>(power_default_cap) / 1000.0 / 1000.0;
         }
 
-        decltype(power_samples_.power_cap_)::value_type power_cap{};
+        std::uint64_t power_cap{};
         if (rsmi_dev_power_cap_get(device_id_, std::uint32_t{ 0 }, &power_cap) == RSMI_STATUS_SUCCESS) {
-            power_samples_.power_cap_ = power_cap;
+            power_samples_.power_enforced_limit_ = static_cast<decltype(power_samples_.power_enforced_limit_)::value_type>(power_cap) / 1000.0 / 1000.0;
         }
 
         {
-            decltype(power_samples_.power_usage_)::value_type::value_type power_usage{};
             RSMI_POWER_TYPE power_type{};
-            if (rsmi_dev_power_get(device_id_, &power_usage, &power_type) == RSMI_STATUS_SUCCESS) {
+            if (rsmi_dev_power_get(device_id_, &initial_power_usage, &power_type) == RSMI_STATUS_SUCCESS) {
                 switch (power_type) {
                     case RSMI_POWER_TYPE::RSMI_AVERAGE_POWER:
-                        power_samples_.power_type_ = "average";
+                        power_samples_.power_measurement_type_ = "average";
                         break;
                     case RSMI_POWER_TYPE::RSMI_CURRENT_POWER:
-                        power_samples_.power_type_ = "current/instant";
+                        power_samples_.power_measurement_type_ = "current/instant";
                         break;
                     case RSMI_POWER_TYPE::RSMI_INVALID_POWER:
-                        power_samples_.power_type_ = "invalid/undetected";
+                        power_samples_.power_measurement_type_ = "invalid/undetected";
                         break;
                 }
-                // queried samples -> retrieved every iteration if available
-                power_samples_.power_usage_ = decltype(power_samples_.power_usage_)::value_type{ power_usage };
+                // report power usage since the first sample
+                power_samples_.power_usage_ = decltype(power_samples_.power_usage_)::value_type{ static_cast<decltype(power_samples_.power_usage_)::value_type::value_type>(0) };
             }
         }
 
@@ -263,10 +264,11 @@ void gpu_amd_hardware_sampler::sampling_loop() {
         // queried samples -> retrieved every iteration if available
         [[maybe_unused]] std::uint64_t timestamp{};
         float resolution{};
-        decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type power_total_energy_consumption{};
-        if (rsmi_dev_energy_count_get(device_id_, &power_total_energy_consumption, &resolution, &timestamp) == RSMI_STATUS_SUCCESS) {  // TODO: returns the same value for all invocations
-            const double scaled_value = static_cast<double>(power_total_energy_consumption) * static_cast<double>(resolution);
-            power_samples_.power_total_energy_consumption_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ static_cast<decltype(power_total_energy_consumption)>(scaled_value) };
+        std::uint64_t power_total_energy_consumption{};
+        if (rsmi_dev_energy_count_get(device_id_, &power_total_energy_consumption, &resolution, &timestamp) == RSMI_STATUS_SUCCESS) {
+            const auto scaled_value = static_cast<decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type>(power_total_energy_consumption) *
+                                      static_cast<decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type>(resolution);
+            power_samples_.power_total_energy_consumption_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ scaled_value / 1000.0 / 1000.0 };
         }
     }
 
@@ -514,18 +516,19 @@ void gpu_amd_hardware_sampler::sampling_loop() {
             {
                 if (power_samples_.power_usage_.has_value()) {
                     [[maybe_unused]] RSMI_POWER_TYPE power_type{};
-                    decltype(power_samples_.power_usage_)::value_type::value_type value{};
+                    std::uint64_t value{};
                     HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_power_get(device_id_, &value, &power_type));
-                    power_samples_.power_usage_->push_back(value);
+                    power_samples_.power_usage_->push_back(static_cast<decltype(power_samples_.power_usage_)::value_type::value_type>(value - initial_power_usage) / 1000.0 / 1000.0);
                 }
 
                 if (power_samples_.power_total_energy_consumption_.has_value()) {
                     [[maybe_unused]] std::uint64_t timestamp{};
                     float resolution{};
-                    decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type value{};
-                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_energy_count_get(device_id_, &value, &resolution, &timestamp));  // TODO: returns the same value for all invocations
-                    const double scaled_value = static_cast<double>(value) * static_cast<double>(resolution);
-                    power_samples_.power_total_energy_consumption_->push_back(static_cast<decltype(value)>(scaled_value));
+                    std::uint64_t value{};
+                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_energy_count_get(device_id_, &value, &resolution, &timestamp));
+                    const auto scaled_value = static_cast<decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type>(value) *
+                                              static_cast<decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type>(resolution);
+                    power_samples_.power_total_energy_consumption_->push_back(scaled_value / 1000.0);
                 }
 
                 if (power_samples_.power_profile_.has_value()) {
diff --git a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp b/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp
index 0bc1041..c7f7d88 100644
--- a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp
+++ b/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp
@@ -224,26 +224,26 @@ std::ostream &operator<<(std::ostream &out, const rocm_smi_clock_samples &sample
 std::string rocm_smi_power_samples::generate_yaml_string() const {
     std::string str{ "power:\n" };
 
-    // default power cap
-    if (this->power_default_cap_.has_value()) {
+    // power management limit
+    if (this->power_management_limit_.has_value()) {
         str += std::format("  power_management_limit:\n"
-                           "    unit: \"muW\"\n"
+                           "    unit: \"W\"\n"
                            "    values: {}\n",
-                           this->power_default_cap_.value());
+                           this->power_management_limit_.value());
     }
-    // power cap
-    if (this->power_cap_.has_value()) {
+    // power enforced limit
+    if (this->power_enforced_limit_.has_value()) {
         str += std::format("  power_enforced_limit:\n"
-                           "    unit: \"muW\"\n"
+                           "    unit: \"W\"\n"
                            "    values: {}\n",
-                           this->power_cap_.value());
+                           this->power_enforced_limit_.value());
     }
     // power measurement type
-    if (this->power_type_.has_value()) {
+    if (this->power_measurement_type_.has_value()) {
         str += std::format("  power_measurement_type:\n"
                            "    unit: \"string\"\n"
-                           "    values: {}\n",
-                           this->power_type_.value());
+                           "    values: \"{}\"\n",
+                           this->power_measurement_type_.value());
     }
     // available power levels
     if (this->available_power_profiles_.has_value()) {
@@ -256,20 +256,16 @@ std::string rocm_smi_power_samples::generate_yaml_string() const {
     // current power usage
     if (this->power_usage_.has_value()) {
         str += std::format("  power_usage:\n"
-                           "    unit: \"muW\"\n"
+                           "    unit: \"W\"\n"
                            "    values: [{}]\n",
                            detail::join(this->power_usage_.value(), ", "));
     }
     // total energy consumed
     if (this->power_total_energy_consumption_.has_value()) {
-        decltype(rocm_smi_power_samples::power_total_energy_consumption_)::value_type consumed_energy(this->power_total_energy_consumption_->size());
-        for (std::size_t i = 0; i < consumed_energy.size(); ++i) {
-            consumed_energy[i] = this->power_total_energy_consumption_.value()[i] - this->power_total_energy_consumption_->front();
-        }
         str += std::format("  power_total_energy_consumed:\n"
-                           "    unit: \"muJ\"\n"
+                           "    unit: \"J\"\n"
                            "    values: [{}]\n",
-                           detail::join(consumed_energy, ", "));
+                           detail::join(this->power_total_energy_consumption_.value(), ", "));
     }
     // current power level
     if (this->power_profile_.has_value()) {
@@ -286,16 +282,16 @@ std::string rocm_smi_power_samples::generate_yaml_string() const {
 }
 
 std::ostream &operator<<(std::ostream &out, const rocm_smi_power_samples &samples) {
-    return out << std::format("power_default_cap [muW]: {}\n"
-                              "power_cap [muW]: {}\n"
-                              "power_type [string]: {}\n"
+    return out << std::format("power_management_limit [W]: {}\n"
+                              "power_enforced_limit [W]: {}\n"
+                              "power_measurement_type [string]: {}\n"
                               "available_power_profiles [string]: [{}]\n"
-                              "power_usage [muW]: [{}]\n"
-                              "power_total_energy_consumption [muJ]: [{}]\n"
+                              "power_usage [W]: [{}]\n"
+                              "power_total_energy_consumption [J]: [{}]\n"
                               "power_profile [string]: [{}]",
-                              detail::value_or_default(samples.get_power_default_cap()),
-                              detail::value_or_default(samples.get_power_cap()),
-                              detail::value_or_default(samples.get_power_type()),
+                              detail::value_or_default(samples.get_power_management_limit()),
+                              detail::value_or_default(samples.get_power_enforced_limit()),
+                              detail::value_or_default(samples.get_power_measurement_type()),
                               detail::join(detail::value_or_default(samples.get_available_power_profiles()), ", "),
                               detail::join(detail::value_or_default(samples.get_power_usage()), ", "),
                               detail::join(detail::value_or_default(samples.get_power_total_energy_consumption()), ", "),
diff --git a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
index c971725..41b3c0b 100644
--- a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
+++ b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
@@ -22,6 +22,7 @@
 #include <format>     // std::format
 #include <ios>        // std::ios_base
 #include <iostream>   // std::cerr, std::endl
+#include <numeric>    // std::iota
 #include <optional>   // std::optional
 #include <ostream>    // std::ostream
 #include <stdexcept>  // std::runtime_error
@@ -86,6 +87,8 @@ void gpu_nvidia_hardware_sampler::sampling_loop() {
 
     this->add_time_point(std::chrono::steady_clock::now());
 
+    unsigned int initial_power_usage{};
+
     // retrieve initial general information
     {
         // fixed information -> only retrieved once
@@ -258,30 +261,49 @@ void gpu_nvidia_hardware_sampler::sampling_loop() {
             power_samples_.power_management_mode_ = mode == NVML_FEATURE_ENABLED;
         }
 
-        decltype(power_samples_.power_management_limit_)::value_type power_management_limit{};
+        unsigned int power_management_limit{};
         if (nvmlDeviceGetPowerManagementLimit(device, &power_management_limit) == NVML_SUCCESS) {
-            power_samples_.power_management_limit_ = power_management_limit;
+            power_samples_.power_management_limit_ = static_cast<decltype(power_samples_.power_management_limit_)::value_type>(power_management_limit) / 1000.0;
         }
 
-        decltype(power_samples_.power_enforced_limit_)::value_type power_enforced_limit{};
+        unsigned int power_enforced_limit{};
         if (nvmlDeviceGetEnforcedPowerLimit(device, &power_enforced_limit) == NVML_SUCCESS) {
-            power_samples_.power_enforced_limit_ = power_enforced_limit;
+            power_samples_.power_enforced_limit_ = static_cast<decltype(power_samples_.power_enforced_limit_)::value_type>(power_enforced_limit) / 1000.0;
+        }
+
+        if (general_samples_.architecture_.has_value()) {
+            // based on https://docs.nvidia.com/deploy/nvml-api/group__nvmlDeviceQueries.html#group__nvmlDeviceQueries_1gf754f109beca3a4a8c8c1cd650d7d66c
+            if (general_samples_.architecture_ == "Kepler" || general_samples_.architecture_ == "Maxwell" || general_samples_.architecture_ == "Pascal" || general_samples_.architecture_ == "Volta" || general_samples_.architecture_ == "Turing") {
+                power_samples_.power_measurement_type_ = "current/instant";
+            } else if (general_samples_.architecture_ == "Ampere" || general_samples_.architecture_ == "Ada" || general_samples_.architecture_ == "Hopper" || general_samples_.architecture_ == "Blackwell" || general_samples_.architecture_ == "Orin") {
+                if (general_samples_.name_.has_value() && general_samples_.name_.value().find("A100") != std::string::npos) {
+                    // GA100 also has instant power draw measurements
+                    power_samples_.power_measurement_type_ = "current/instant";
+                } else {
+                    power_samples_.power_measurement_type_ = "average";
+                }
+            } else {
+                power_samples_.power_measurement_type_ = "invalid/undetected";
+            }
         }
 
+        decltype(power_samples_.available_power_profiles_)::value_type power_states(17, 32);  // 17 power states, value 32 = unknown
+        std::iota(power_states.begin(), power_states.end() - 1, decltype(power_samples_.available_power_profiles_)::value_type::value_type{ 0 });
+        power_samples_.available_power_profiles_ = power_states;
+
         // queried samples -> retrieved every iteration if available
-        nvmlPstates_t pstate{};
-        if (nvmlDeviceGetPowerState(device, &pstate) == NVML_SUCCESS) {
-            power_samples_.power_state_ = decltype(power_samples_.power_state_)::value_type{ static_cast<decltype(power_samples_.power_state_)::value_type::value_type>(pstate) };
+        if (nvmlDeviceGetPowerUsage(device, &initial_power_usage) == NVML_SUCCESS) {
+            power_samples_.power_usage_ = decltype(power_samples_.power_usage_)::value_type{ static_cast<decltype(power_samples_.power_usage_)::value_type::value_type>(0) };
         }
 
-        decltype(power_samples_.power_usage_)::value_type::value_type power_usage{};
-        if (nvmlDeviceGetPowerUsage(device, &power_usage) == NVML_SUCCESS) {
-            power_samples_.power_usage_ = decltype(power_samples_.power_usage_)::value_type{ power_usage };
+        unsigned long long power_total_energy_consumption{};
+        if (nvmlDeviceGetTotalEnergyConsumption(device, &power_total_energy_consumption) == NVML_SUCCESS) {
+            power_samples_.power_total_energy_consumption_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ static_cast<decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type>(power_total_energy_consumption) / 1000.0 };
         }
 
-        decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type power_total_energy_consumption{};
-        if (nvmlDeviceGetTotalEnergyConsumption(device, &power_total_energy_consumption) == NVML_SUCCESS) {
-            power_samples_.power_total_energy_consumption_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ power_total_energy_consumption };
+        nvmlPstates_t pstate{};
+        if (nvmlDeviceGetPowerState(device, &pstate) == NVML_SUCCESS) {
+            power_samples_.power_profile_ = decltype(power_samples_.power_profile_)::value_type{ static_cast<decltype(power_samples_.power_profile_)::value_type::value_type>(pstate) };
         }
     }
 
@@ -424,22 +446,22 @@ void gpu_nvidia_hardware_sampler::sampling_loop() {
 
             // retrieve power related information
             {
-                if (power_samples_.power_state_.has_value()) {
+                if (power_samples_.power_profile_.has_value()) {
                     nvmlPstates_t pstate{};
                     HWS_NVML_ERROR_CHECK(nvmlDeviceGetPowerState(device, &pstate));
-                    power_samples_.power_state_->push_back(static_cast<decltype(power_samples_.power_state_)::value_type::value_type>(pstate));
+                    power_samples_.power_profile_->push_back(static_cast<decltype(power_samples_.power_profile_)::value_type::value_type>(pstate));
                 }
 
                 if (power_samples_.power_usage_.has_value()) {
-                    decltype(power_samples_.power_usage_)::value_type::value_type value{};
+                    unsigned int value{};
                     HWS_NVML_ERROR_CHECK(nvmlDeviceGetPowerUsage(device, &value));
-                    power_samples_.power_usage_->push_back(value);
+                    power_samples_.power_usage_->push_back(static_cast<decltype(power_samples_.power_usage_)::value_type::value_type>(value - initial_power_usage) / 1000.0);
                 }
 
                 if (power_samples_.power_total_energy_consumption_.has_value()) {
-                    decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type value{};
+                    unsigned long long value{};
                     HWS_NVML_ERROR_CHECK(nvmlDeviceGetTotalEnergyConsumption(device, &value));
-                    power_samples_.power_total_energy_consumption_->push_back(value);
+                    power_samples_.power_total_energy_consumption_->push_back(static_cast<decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type>(value) / 1000.0);
                 }
             }
 
diff --git a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp
index 64b9a05..95cfa17 100644
--- a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp
+++ b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp
@@ -241,52 +241,62 @@ std::ostream &operator<<(std::ostream &out, const nvml_clock_samples &samples) {
 std::string nvml_power_samples::generate_yaml_string() const {
     std::string str{ "power:\n" };
 
-    // the power management mode
-    if (this->power_management_mode_.has_value()) {
-        str += std::format("  power_management_mode:\n"
-                           "    unit: \"bool\"\n"
-                           "    values: {}\n",
-                           this->power_management_mode_.value());
-    }
     // power management limit
     if (this->power_management_limit_.has_value()) {
         str += std::format("  power_management_limit:\n"
-                           "    unit: \"mW\"\n"
+                           "    unit: \"W\"\n"
                            "    values: {}\n",
                            this->power_management_limit_.value());
     }
     // power enforced limit
     if (this->power_enforced_limit_.has_value()) {
         str += std::format("  power_enforced_limit:\n"
-                           "    unit: \"mW\"\n"
+                           "    unit: \"W\"\n"
                            "    values: {}\n",
                            this->power_enforced_limit_.value());
     }
-
-    // power state
-    if (this->power_state_.has_value()) {
-        str += std::format("  power_state:\n"
-                           "    unit: \"0 - maximum performance; 15 - minimum performance; 32 - unknown\"\n"
+    // power measurement type
+    if (this->power_measurement_type_.has_value()) {
+        str += std::format("  power_measurement_type:\n"
+                           "    unit: \"string\"\n"
+                           "    values: \"{}\"\n",
+                           this->power_measurement_type_.value());
+    }
+    // the power management mode
+    if (this->power_management_mode_.has_value()) {
+        str += std::format("  power_management_mode:\n"
+                           "    unit: \"bool\"\n"
+                           "    values: {}\n",
+                           this->power_management_mode_.value());
+    }
+    // available power levels
+    if (this->available_power_profiles_.has_value()) {
+        str += std::format("  available_power_profiles:\n"
+                           "    unit: \"int\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->power_state_.value(), ", "));
+                           detail::join(this->available_power_profiles_.value(), ", "));
     }
+
     // current power usage
     if (this->power_usage_.has_value()) {
         str += std::format("  power_usage:\n"
-                           "    unit: \"mW\"\n"
+                           "    unit: \"W\"\n"
                            "    values: [{}]\n",
                            detail::join(this->power_usage_.value(), ", "));
     }
     // total energy consumed
     if (this->power_total_energy_consumption_.has_value()) {
-        decltype(nvml_power_samples::power_total_energy_consumption_)::value_type consumed_energy(this->power_total_energy_consumption_->size());
-        for (std::size_t i = 0; i < consumed_energy.size(); ++i) {
-            consumed_energy[i] = this->power_total_energy_consumption_.value()[i] - this->power_total_energy_consumption_->front();
-        }
         str += std::format("  power_total_energy_consumed:\n"
                            "    unit: \"J\"\n"
                            "    values: [{}]\n",
-                           detail::join(consumed_energy, ", "));
+                           detail::join(this->power_total_energy_consumption_.value(), ", "));
+    }
+    // power state
+    if (this->power_profile_.has_value()) {
+        str += std::format("  power_profile:\n"
+                           "    unit: \"int\"\n"
+                           "    values: [{}]\n",
+                           detail::join(this->power_profile_.value(), ", "));
     }
 
     // remove last newline
@@ -296,18 +306,22 @@ std::string nvml_power_samples::generate_yaml_string() const {
 }
 
 std::ostream &operator<<(std::ostream &out, const nvml_power_samples &samples) {
-    return out << std::format("power_management_mode [bool]: {}\n"
-                              "power_management_limit [mW]: {}\n"
-                              "power_enforced_limit [mW]: {}\n"
-                              "power_state [int]: [{}]\n"
-                              "power_usage [mW]: [{}]\n"
-                              "power_total_energy_consumption [J]: [{}]",
-                              detail::value_or_default(samples.get_power_management_mode()),
+    return out << std::format("power_management_limit [W]: {}\n"
+                              "power_enforced_limit [W]: {}\n"
+                              "power_measurement_type [string]: {}\n"
+                              "power_management_mode [bool]: {}\n"
+                              "available_power_profiles [int]: [{}]\n"
+                              "power_usage [W]: [{}]\n"
+                              "power_total_energy_consumption [J]: [{}]"
+                              "power_profile [int]: [{}]\n",
                               detail::value_or_default(samples.get_power_management_limit()),
                               detail::value_or_default(samples.get_power_enforced_limit()),
-                              detail::join(detail::value_or_default(samples.get_power_state()), ", "),
+                              detail::value_or_default(samples.get_power_measurement_type()),
+                              detail::value_or_default(samples.get_power_management_mode()),
+                              detail::join(detail::value_or_default(samples.get_available_power_profiles()), ", "),
                               detail::join(detail::value_or_default(samples.get_power_usage()), ", "),
-                              detail::join(detail::value_or_default(samples.get_power_total_energy_consumption()), ", "));
+                              detail::join(detail::value_or_default(samples.get_power_total_energy_consumption()), ", "),
+                              detail::join(detail::value_or_default(samples.get_power_profile()), ", "));
 }
 
 //*************************************************************************************************************************************//

From 0c317841328b7c4f59aedcce908ac1848d170cf6 Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Thu, 12 Sep 2024 15:40:26 +0200
Subject: [PATCH 13/69] Clarify total energy consumption is only calculated and
 not sampled via turbostat.

---
 README.md | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/README.md b/README.md
index 642e56b..d6c5ec3 100644
--- a/README.md
+++ b/README.md
@@ -123,22 +123,22 @@ export PYTHONPATH=${CMAKE_INSTALL_PREFIX}/lib:${PYTHONPATH}
 
 ### power-related samples
 
-| sample                      |   CPUs    | NVIDIA GPUs |  AMD GPUs   | Intel GPUs |
-|:----------------------------|:---------:|:-----------:|:-----------:|:----------:|
-| power_management_limit      |     -     |      W      |      W      |            |
-| power_enforced_limit        |     -     |      W      |      W      |            |
-| power_measurement_type      | str (fix) |     str     |     str     |            |
-| power_management_mode       |     -     |    bool     |      -      |            |
-| available_power_profiles    |     -     | list of int | list of str |            |
-| power_usage                 |     W     |      W      |      W      |            |
-| core_watt                   |     W     |      -      |      -      |     -      |
-| dram_watt                   |     W     |      -      |      -      |     -      |
-| package_rapl_throttling     |     %     |      -      |      -      |     -      |
-| dram_rapl_throttling        |     %     |      -      |      -      |     -      |
-| power_total_energy_consumed |     J     |      J      |      J      |     J      |
-| power_profile               |     -     |     int     |     str     |            |
-| energy_threshold_enabled    |           |             |             |    bool    |
-| energy_threshold            |           |             |             |     J      |
+| sample                      |               CPUs                | NVIDIA GPUs |  AMD GPUs   | Intel GPUs |
+|:----------------------------|:---------------------------------:|:-----------:|:-----------:|:----------:|
+| power_management_limit      |                 -                 |      W      |      W      |            |
+| power_enforced_limit        |                 -                 |      W      |      W      |            |
+| power_measurement_type      |             str (fix)             |     str     |     str     |            |
+| power_management_mode       |                 -                 |    bool     |      -      |            |
+| available_power_profiles    |                 -                 | list of int | list of str |            |
+| power_usage                 |                 W                 |      W      |      W      |            |
+| core_watt                   |                 W                 |      -      |      -      |     -      |
+| dram_watt                   |                 W                 |      -      |      -      |     -      |
+| package_rapl_throttling     |                 %                 |      -      |      -      |     -      |
+| dram_rapl_throttling        |                 %                 |      -      |      -      |     -      |
+| power_total_energy_consumed | J<br>(calculated via power_usage) |      J      |      J      |     J      |
+| power_profile               |                 -                 |     int     |     str     |            |
+| energy_threshold_enabled    |                                   |             |             |    bool    |
+| energy_threshold            |                                   |             |             |     J      |
 
 ### memory-related samples
 

From 046753ce0982e91205baa12c632abdeb9e29bfa5 Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Thu, 12 Sep 2024 15:47:24 +0200
Subject: [PATCH 14/69] Clarify total energy consumption is only calculated and
 not sampled via turbostat.

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 9e03206..1d90c9b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -36,6 +36,7 @@ Prerequisites
 # CMake ================================
 bin/
 build*/
+install*/
 cmake-build*/
 CMakeLists.txt.user
 CMakeCache.txt

From 453cffe6d87b3d223db0837f986320c234b7546c Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Thu, 12 Sep 2024 16:14:27 +0200
Subject: [PATCH 15/69] Split time_point output into unit and values such that
 the unit prefix is not repeated in the actual values.

---
 include/hardware_sampling/utility.hpp      | 14 +++++++-------
 src/hardware_sampling/hardware_sampler.cpp |  8 ++++++--
 2 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/include/hardware_sampling/utility.hpp b/include/hardware_sampling/utility.hpp
index e66d6c8..2a62da3 100644
--- a/include/hardware_sampling/utility.hpp
+++ b/include/hardware_sampling/utility.hpp
@@ -14,6 +14,7 @@
 
 #include <charconv>      // std::from_chars
 #include <chrono>        // std::chrono::{milliseconds, duration_cast}
+#include <cmath>         // std::trunc
 #include <cstddef>       // std::size_t
 #include <format>        // std::format, std::formatter, std::basic_format_context, std::format_to
 #include <iterator>      // std::back_inserter, std::next, std::prev
@@ -54,19 +55,18 @@ namespace hws::detail {
     std::optional<std::vector<sample_type>> sample_name##_{};
 
 /**
- * @brief Convert all time points to their duration passed since the @p reference time point.
- * @tparam Duration the duration type to return
+ * @brief Convert all time points to their duration in seconds (using double) truncated to three decimal places passed since the @p reference time point.
  * @tparam TimePoint the type if the time points
  * @param[in] time_points the time points
  * @param[in] reference the reference time point
- * @return the duration passed since the @p reference time point (`[[nodiscard]]`)
+ * @return the duration passed in seconds since the @p reference time point (`[[nodiscard]]`)
  */
-template <typename Duration = std::chrono::milliseconds, typename TimePoint>
-[[nodiscard]] inline std::vector<Duration> durations_from_reference_time(const std::vector<TimePoint> &time_points, const TimePoint &reference) {
-    std::vector<Duration> durations(time_points.size());
+template <typename TimePoint>
+[[nodiscard]] inline std::vector<double> durations_from_reference_time(const std::vector<TimePoint> &time_points, const TimePoint &reference) {
+    std::vector<double> durations(time_points.size());
 
     for (std::size_t i = 0; i < durations.size(); ++i) {
-        durations[i] = std::chrono::duration_cast<Duration>(time_points[i] - reference);
+        durations[i] = std::trunc(std::chrono::duration<double>(time_points[i] - reference).count() * 1000.0) / 1000.0;
     }
 
     return durations;
diff --git a/src/hardware_sampling/hardware_sampler.cpp b/src/hardware_sampling/hardware_sampler.cpp
index c6554cd..1c3ff49 100644
--- a/src/hardware_sampling/hardware_sampler.cpp
+++ b/src/hardware_sampling/hardware_sampler.cpp
@@ -139,14 +139,18 @@ void hardware_sampler::dump_yaml(const char *filename) {
         event_names.push_back(name);
     }
     file << std::format("events:\n"
-                        "  time_points: [{}]\n"
+                        "  time_points:\n"
+                        "    unit: \"s\"\n"
+                        "    values: [{}]\n"
                         "  names: [{}]\n\n",
                         detail::join(detail::durations_from_reference_time(event_time_points, this->get_event(0).time_point), ", "),
                         detail::join(event_names, ", "));
 
     // output the sampling information
     file << std::format("sampling_interval: {}\n"
-                        "time_points: [{}]\n"
+                        "time_points:\n"
+                        "  unit: \"s\"\n"
+                        "  values: [{}]\n"
                         "{}\n\n",
                         this->sampling_interval(),
                         detail::join(detail::durations_from_reference_time(this->sampling_time_points(), this->get_event(0).time_point), ", "),

From 5d376d49f36e6f891099dd9552e984309a0ab167 Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Fri, 13 Sep 2024 16:28:25 +0200
Subject: [PATCH 16/69] Unify clock related samples and add new ones depending
 on the target hardware.

---
 README.md                                     |  51 +++---
 include/hardware_sampling/cpu/cpu_samples.hpp |   8 +-
 .../gpu_amd/rocm_smi_samples.hpp              |  22 +--
 .../gpu_nvidia/nvml_samples.hpp               |  29 ++--
 .../hardware_sampling/gpu_nvidia/utility.hpp  |  45 ++++++
 include/hardware_sampling/utility.hpp         |  32 ++++
 src/hardware_sampling/cpu/cpu_samples.cpp     |  40 ++---
 .../cpu/hardware_sampler.cpp                  |  14 +-
 .../gpu_amd/hardware_sampler.cpp              |  60 ++++---
 .../gpu_amd/rocm_smi_samples.cpp              | 140 +++++++++--------
 .../gpu_nvidia/hardware_sampler.cpp           |  99 +++++++-----
 .../gpu_nvidia/nvml_samples.cpp               | 148 ++++++++++--------
 12 files changed, 420 insertions(+), 268 deletions(-)

diff --git a/README.md b/README.md
index d6c5ec3..ce9598d 100644
--- a/README.md
+++ b/README.md
@@ -94,32 +94,31 @@ export PYTHONPATH=${CMAKE_INSTALL_PREFIX}/lib:${PYTHONPATH}
 
 ### clock-related samples
 
-| sample                     | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs |
-|:---------------------------|:----:|:-----------:|:--------:|:----------:|
-| frequency_boost            | bool |             |          |            |
-| min_cpu_frequency          | MHz  |             |          |            |
-| max_cpu_frequency          | MHz  |             |          |            |
-| average_frequency          | MHz  |             |          |            |
-| average_non_idle_frequency | MHz  |             |          |            |
-| time_stamp_counter         | MHz  |             |          |            |
-| clock_socket_min           |      |             |    Hz    |            |
-| clock_socket_max           |      |             |    Hz    |            |
-| clock_memory_min           |      |             |    Hz    |            |
-| clock_memory_max           |      |             |    Hz    |            |
-| clock_gpu_min              |      |     MHz     |    Hz    |    MHz     |
-| clock_gpu_max              |      |     MHz     |    Hz    |    MHz     |
-| clock_socket               |      |             |    Hz    |            |
-| clock_memory               |      |             |    Hz    |            |
-| clock_gpu                  |      |     MHz     |    Hz    |    MHz     |
-| overdrive_level            |      |             |    %     |            |
-| memory_overdrive_level     |      |             |    %     |            |
-| available_clocks_gpu       |      |             |          |    MHz     |
-| clock_mem_min              |      |     MHz     |          |    MHz     |
-| clock_mem_max              |      |     MHz     |          |    MHz     |
-| available_clocks_mem       |      |             |          |    MHz     |
-| tdp_frequency_limit_gpu    |      |             |          |    MHz     |
-| throttle_reason_gpu        |      |             |          |  bitmask   |
-| tdp_frequency_limit_mem    |      |             |          |    MHz     |
+| sample                             | CPUs |   NVIDIA GPUs    | AMD GPUs | Intel GPUs |
+|:-----------------------------------|:----:|:----------------:|:--------:|:----------:|
+| auto_boosted_clock_enabled         | bool |       bool       |    -     |            |
+| clock_frequency_min                | MHz  |       MHz        |   MHz    |            |
+| clock_frequency_max                | MHz  |       MHz        |   MHz    |            |
+| memory_clock_frequency_min         |  -   |       MHz        |   MHz    |            |
+| memory_clock_frequency_max         |  -   |       MHz        |   MHz    |            |
+| socket_clock_frequency_min         |  -   |        -         |   MHz    |     -      |
+| socket_clock_frequency_min         |  -   |        -         |   MHz    |     -      |
+| sm_clock_frequency_max             |  -   |       MHz        |    -     |     -      |
+| available_clock_frequencies        |  -   |       MHz        |   MHz    |            |
+| available_memory_clock_frequencies |  -   |       MHz        |   MHz    |            |
+| clock_frequency                    | MHz  |       MHz        |   MHz    |            |
+| average_non_idle_frequency         | MHz  |        -         |    -     |     -      |
+| time_stamp_counter                 | MHz  |        -         |    -     |     -      |
+| memory_clock_frequency             |  -   |       MHz        |   MHz    |            |
+| socket_clock_frequency             |  -   |        -         |   MHz    |     -      |
+| sm_clock_frequency                 |  -   |       MHz        |    -     |     -      |
+| overdrive_level                    |  -   |        -         |    %     |     -      |
+| memory_overdrive_level             |  -   |        -         |    %     |     -      |
+| throttle_reason                    |  -   | string (bitmask) |    -     |            |
+| memory_throttle_reason             |  -   |        -         |    -     |            |
+| auto_boosted_clock                 |  -   |       bool       |    -     |     -      |
+| tdp_frequency_limit                |  -   |        -         |    -     |            |
+| memory_tdp_frequency_limit         |  -   |        -         |    -     |            |
 
 ### power-related samples
 
diff --git a/include/hardware_sampling/cpu/cpu_samples.hpp b/include/hardware_sampling/cpu/cpu_samples.hpp
index 3263d77..196572b 100644
--- a/include/hardware_sampling/cpu/cpu_samples.hpp
+++ b/include/hardware_sampling/cpu/cpu_samples.hpp
@@ -90,11 +90,11 @@ class cpu_clock_samples {
      */
     [[nodiscard]] std::string generate_yaml_string() const;
 
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(bool, frequency_boost)  // true if frequency boosting is enabled
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, min_frequency)  // the minimum possible CPU frequency in MHz
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, max_frequency)  // the maximum possible CPU frequency in MHz
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(bool, auto_boosted_clock_enabled)  // true if frequency boosting is enabled
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, clock_frequency_min)       // the minimum possible CPU frequency in MHz
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, clock_frequency_max)       // the maximum possible CPU frequency in MHz
 
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, average_frequency)           // the average CPU frequency in MHz including idle cores
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, clock_frequency)             // the average CPU frequency in MHz including idle cores
     HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, average_non_idle_frequency)  // the average CPU frequency in MHz excluding idle cores
     HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, time_stamp_counter)          // the time stamp counter
 };
diff --git a/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp b/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp
index 3f89615..e0cb925 100644
--- a/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp
+++ b/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp
@@ -80,16 +80,18 @@ class rocm_smi_clock_samples {
      */
     [[nodiscard]] std::string generate_yaml_string() const;
 
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, clock_system_min)  // the minimum possible system clock frequency in Hz
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, clock_system_max)  // the maximum possible system clock frequency in Hz
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, clock_socket_min)  // the minimum possible socket clock frequency in Hz
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, clock_socket_max)  // the maximum possible socket clock frequency in Hz
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, clock_memory_min)  // the minimum possible memory clock frequency in Hz
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, clock_memory_max)  // the maximum possible memory clock frequency in Hz
-
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint64_t, clock_system)            // the current system clock frequency in Hz
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint64_t, clock_socket)            // the current socket clock frequency in Hz
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint64_t, clock_memory)            // the current memory clock frequency in Hz
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, clock_frequency_min)                              // the minimum possible system clock frequency in MHz
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, clock_frequency_max)                              // the maximum possible system clock frequency in MHz
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, memory_clock_frequency_min)                       // the minimum possible memory clock frequency in MHz
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, memory_clock_frequency_max)                       // the maximum possible memory clock frequency in MHz
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, socket_clock_frequency_min)                       // the minimum possible socket clock frequency in MHz
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, socket_clock_frequency_max)                       // the maximum possible socket clock frequency in MHz
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector<double>, available_clock_frequencies)         // the available clock frequencies in MHz (slowest to fastest)
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector<double>, available_memory_clock_frequencies)  // the available memory clock frequencies in MHz (slowest to fastest)
+
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, clock_frequency)                // the current system clock frequency in MHz
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, memory_clock_frequency)         // the current memory clock frequency in MHz
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, socket_clock_frequency)         // the current socket clock frequency in MHz
     HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint32_t, overdrive_level)         // the GPU overdrive percentage
     HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint32_t, memory_overdrive_level)  // the GPU memory overdrive percentage
 };
diff --git a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp
index 86420c8..f766c3d 100644
--- a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp
+++ b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp
@@ -16,6 +16,7 @@
 
 #include <format>    // std::formatter
 #include <iosfwd>    // std::ostream forward declaration
+#include <map>       // std::map
 #include <optional>  // std::optional
 #include <string>    // std::string
 #include <vector>    // std::vector
@@ -73,6 +74,8 @@ class nvml_clock_samples {
     // befriend hardware sampler class
     friend class gpu_nvidia_hardware_sampler;
 
+    using map_type = std::map<double, std::vector<double>>;
+
   public:
     /**
      * @brief Assemble the YAML string containing all available general hardware samples.
@@ -81,18 +84,20 @@ class nvml_clock_samples {
      */
     [[nodiscard]] std::string generate_yaml_string() const;
 
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, adaptive_clock_status)  // true if clock boosting is currently enabled
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, clock_graph_min)        // the minimum possible graphics clock frequency in MHz
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, clock_graph_max)        // the maximum possible graphics clock frequency in MHz
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, clock_sm_max)           // the maximum possible SM clock frequency in MHz
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, clock_mem_min)          // the minimum possible memory clock frequency in MHz
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, clock_mem_max)          // the maximum possible memory clock frequency in MHz
-
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, clock_graph)                  // the current graphics clock frequency in MHz
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, clock_sm)                     // the current SM clock frequency in Mhz
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, clock_mem)                    // the current memory clock frequency in MHz
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned long long, clock_throttle_reason)  // the reason the GPU clock throttled (bitmask)
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(bool, auto_boosted_clocks)                  // true if the clocks are currently auto boosted
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(bool, auto_boosted_clock_enabled)                         // true if clock boosting is currently enabled
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, clock_frequency_min)                              // the minimum possible graphics clock frequency in MHz
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, clock_frequency_max)                              // the maximum possible graphics clock frequency in MHz
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, memory_clock_frequency_min)                       // the minimum possible memory clock frequency in MHz
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, memory_clock_frequency_max)                       // the maximum possible memory clock frequency in MHz
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, sm_clock_frequency_max)                           // the maximum possible SM clock frequency in MHz
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type, available_clock_frequencies)                    // the available clock frequencies in MHz, based on a memory clock frequency (slowest to fastest)
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector<double>, available_memory_clock_frequencies)  // the available memory clock frequencies in MHz (slowest to fastest)
+
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, clock_frequency)         // the current graphics clock frequency in MHz
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, memory_clock_frequency)  // the current memory clock frequency in MHz
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, sm_clock_frequency)      // the current SM clock frequency in Mhz
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::string, throttle_reason)    // the reason the GPU clock throttled
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(bool, auto_boosted_clock)        // true if the clocks are currently auto boosted
 };
 
 /**
diff --git a/include/hardware_sampling/gpu_nvidia/utility.hpp b/include/hardware_sampling/gpu_nvidia/utility.hpp
index f4f8577..8a1c590 100644
--- a/include/hardware_sampling/gpu_nvidia/utility.hpp
+++ b/include/hardware_sampling/gpu_nvidia/utility.hpp
@@ -12,10 +12,14 @@
 #define HARDWARE_SAMPLING_GPU_NVIDIA_UTILITY_HPP_
 #pragma once
 
+#include "hardware_sampling/utility.hpp"  // hws::detail::join
+
 #include "nvml.h"  // NVML runtime functions
 
 #include <format>     // std::format
 #include <stdexcept>  // std::runtime_error
+#include <string>     // std::string
+#include <vector>     // std::vector
 
 namespace hws::detail {
 
@@ -36,6 +40,47 @@ namespace hws::detail {
     #define HWS_NVML_ERROR_CHECK(nvml_func) nvml_func;
 #endif
 
+/**
+ * @brief Convert the clock throttle reason event bitmask to a string representation. If the provided bitmask represents multiple reasons, they are split using "|".
+ * @param[in] clocks_event_reasons the bitmask to convert to a string
+ * @return all event throttle reasons
+ */
+[[nodiscard]] inline std::string throttle_event_reason_to_string(const unsigned long long clocks_event_reasons) {
+    if (clocks_event_reasons == 0ull) {
+        return "None";
+    } else {
+        std::vector<std::string> reasons{};
+        if ((clocks_event_reasons & nvmlClocksEventReasonApplicationsClocksSetting) != 0ull) {
+            reasons.emplace_back("ApplicationsClocksSetting");
+        }
+        if ((clocks_event_reasons & nvmlClocksEventReasonDisplayClockSetting) != 0ull) {
+            reasons.emplace_back("DisplayClockSetting");
+        }
+        if ((clocks_event_reasons & nvmlClocksEventReasonGpuIdle) != 0ull) {
+            reasons.emplace_back("GpuIdle");
+        }
+        if ((clocks_event_reasons & nvmlClocksEventReasonSwPowerCap) != 0ull) {
+            reasons.emplace_back("SwPowerCap");
+        }
+        if ((clocks_event_reasons & nvmlClocksEventReasonSwThermalSlowdown) != 0ull) {
+            reasons.emplace_back("SwThermalSlowdown");
+        }
+        if ((clocks_event_reasons & nvmlClocksEventReasonSyncBoost) != 0ull) {
+            reasons.emplace_back("SyncBoost");
+        }
+        if ((clocks_event_reasons & nvmlClocksThrottleReasonHwPowerBrakeSlowdown) != 0ull) {
+            reasons.emplace_back("HwPowerBrakeSlowdown");
+        }
+        if ((clocks_event_reasons & nvmlClocksThrottleReasonHwSlowdown) != 0ull) {
+            reasons.emplace_back("HwSlowdown");
+        }
+        if ((clocks_event_reasons & nvmlClocksThrottleReasonHwThermalSlowdown) != 0ull) {
+            reasons.emplace_back("HwThermalSlowdown");
+        }
+        return std::format("\"{}\"", detail::join(reasons, "|"));
+    }
+}
+
 }  // namespace hws::detail
 
 #endif  // HARDWARE_SAMPLING_GPU_NVIDIA_UTILITY_HPP_
diff --git a/include/hardware_sampling/utility.hpp b/include/hardware_sampling/utility.hpp
index 2a62da3..81e1136 100644
--- a/include/hardware_sampling/utility.hpp
+++ b/include/hardware_sampling/utility.hpp
@@ -243,6 +243,38 @@ template <typename Container>
     }
 }
 
+template <typename T>
+struct is_vector : std::false_type { };
+
+template <typename T>
+struct is_vector<std::vector<T>> : std::true_type { };
+
+template <typename T>
+constexpr bool is_vector_v = is_vector<T>::value;
+
+/**
+ * @brief Convert all entries in the map to a single dict-like string.
+ * @details The resulting string is of form "{KEY, VALUE}" or "{KEY, [VALUES]}".
+ * @tparam MapType the type of the map
+ * @param[in] map the map to convert to a string
+ * @return the result string (`[[nodiscard]]`(
+ */
+template <typename MapType>
+[[nodiscard]] inline std::string map_entry_to_string(const std::optional<MapType> &map) {
+    if (map.has_value()) {
+        std::vector<std::string> entries{};
+        for (const auto &[key, value] : map.value()) {
+            if constexpr (is_vector_v<std::remove_cvref_t<decltype(value)>>) {
+                entries.push_back(std::format("{{{}, [{}]}}", key, detail::join(value, ", ")));
+            } else {
+                entries.push_back(std::format("{{{}, {}}}", key, value));
+            }
+        }
+        return detail::join(entries, ", ");
+    }
+    return "";
+}
+
 }  // namespace hws::detail
 
 #endif  // HARDWARE_SAMPLING_UTILITY_HPP_
diff --git a/src/hardware_sampling/cpu/cpu_samples.cpp b/src/hardware_sampling/cpu/cpu_samples.cpp
index 92bf8a9..deb1ddc 100644
--- a/src/hardware_sampling/cpu/cpu_samples.cpp
+++ b/src/hardware_sampling/cpu/cpu_samples.cpp
@@ -210,34 +210,34 @@ std::string cpu_clock_samples::generate_yaml_string() const {
     std::string str{ "clock:\n" };
 
     // true if frequency boost is enabled
-    if (this->frequency_boost_.has_value()) {
-        str += std::format("  frequency_boost:\n"
+    if (this->auto_boosted_clock_enabled_.has_value()) {
+        str += std::format("  auto_boosted_clock_enabled:\n"
                            "    unit: \"bool\"\n"
                            "    values: {}\n",
-                           this->frequency_boost_.value());
+                           this->auto_boosted_clock_enabled_.value());
     }
     // the minimal CPU frequency
-    if (this->min_frequency_.has_value()) {
-        str += std::format("  min_cpu_frequency:\n"
+    if (this->clock_frequency_min_.has_value()) {
+        str += std::format("  clock_frequency_min:\n"
                            "    unit: \"MHz\"\n"
                            "    values: {}\n",
-                           this->min_frequency_.value());
+                           this->clock_frequency_min_.value());
     }
     // the maximum CPU frequency
-    if (this->max_frequency_.has_value()) {
-        str += std::format("  max_cpu_frequency:\n"
+    if (this->clock_frequency_max_.has_value()) {
+        str += std::format("  clock_frequency_max:\n"
                            "    unit: \"MHz\"\n"
                            "    values: {}\n",
-                           this->max_frequency_.value());
+                           this->clock_frequency_max_.value());
     }
 
     // the average CPU frequency
-    if (this->average_frequency_.has_value()) {
-        str += std::format("  average_frequency:\n"
+    if (this->clock_frequency_.has_value()) {
+        str += std::format("  clock_frequency:\n"
                            "    turbostat_name: \"Avg_MHz\"\n"
                            "    unit: \"MHz\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->average_frequency_.value(), ", "));
+                           detail::join(this->clock_frequency_.value(), ", "));
     }
     // the average CPU frequency excluding idle time
     if (this->average_non_idle_frequency_.has_value()) {
@@ -263,16 +263,16 @@ std::string cpu_clock_samples::generate_yaml_string() const {
 }
 
 std::ostream &operator<<(std::ostream &out, const cpu_clock_samples &samples) {
-    return out << std::format("frequency_boost [bool]: {}\n"
-                              "min_frequency [MHz]: {}\n"
-                              "max_frequency [MHz]: {}\n"
-                              "average_frequency [MHz]: [{}]\n"
+    return out << std::format("auto_boosted_clock_enabled [bool]: {}\n"
+                              "clock_frequency_min [MHz]: {}\n"
+                              "clock_frequency_max [MHz]: {}\n"
+                              "clock_frequency [MHz]: [{}]\n"
                               "average_non_idle_frequency [MHz]: [{}]\n"
                               "time_stamp_counter [MHz]: [{}]",
-                              detail::value_or_default(samples.get_frequency_boost()),
-                              detail::value_or_default(samples.get_min_frequency()),
-                              detail::value_or_default(samples.get_max_frequency()),
-                              detail::join(detail::value_or_default(samples.get_average_frequency()), ", "),
+                              detail::value_or_default(samples.get_auto_boosted_clock_enabled()),
+                              detail::value_or_default(samples.get_clock_frequency_min()),
+                              detail::value_or_default(samples.get_clock_frequency_max()),
+                              detail::join(detail::value_or_default(samples.get_clock_frequency()), ", "),
                               detail::join(detail::value_or_default(samples.get_average_non_idle_frequency()), ", "),
                               detail::join(detail::value_or_default(samples.get_time_stamp_counter()), ", "));
 }
diff --git a/src/hardware_sampling/cpu/hardware_sampler.cpp b/src/hardware_sampling/cpu/hardware_sampler.cpp
index e645fde..89683eb 100644
--- a/src/hardware_sampling/cpu/hardware_sampler.cpp
+++ b/src/hardware_sampling/cpu/hardware_sampler.cpp
@@ -90,11 +90,11 @@ void cpu_hardware_sampler::sampling_loop() {
             } else if (line.starts_with("Flags")) {
                 general_samples_.flags_ = detail::split_as<decltype(general_samples_.flags_)::value_type::value_type>(value, ' ');
             } else if (line.starts_with("Frequency boost")) {
-                clock_samples_.frequency_boost_ = value == "enabled";
+                clock_samples_.auto_boosted_clock_enabled_ = value == "enabled";
             } else if (line.starts_with("CPU max MHz")) {
-                clock_samples_.max_frequency_ = detail::convert_to<decltype(clock_samples_.max_frequency_)::value_type>(value);
+                clock_samples_.clock_frequency_max_ = detail::convert_to<decltype(clock_samples_.clock_frequency_max_)::value_type>(value);
             } else if (line.starts_with("CPU min MHz")) {
-                clock_samples_.min_frequency_ = detail::convert_to<decltype(clock_samples_.min_frequency_)::value_type>(value);
+                clock_samples_.clock_frequency_min_ = detail::convert_to<decltype(clock_samples_.clock_frequency_min_)::value_type>(value);
             } else if (line.starts_with("L1d cache")) {
                 memory_samples_.l1d_cache_ = detail::convert_to<decltype(memory_samples_.l1d_cache_)::value_type>(value);
             } else if (line.starts_with("L1i cache")) {
@@ -163,8 +163,8 @@ void cpu_hardware_sampler::sampling_loop() {
 
         for (std::size_t i = 0; i < header.size(); ++i) {
             if (header[i] == "Avg_MHz") {
-                using vector_type = decltype(clock_samples_.average_frequency_)::value_type;
-                clock_samples_.average_frequency_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                using vector_type = decltype(clock_samples_.clock_frequency_)::value_type;
+                clock_samples_.clock_frequency_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
             } else if (header[i] == "Busy%") {
                 using vector_type = decltype(general_samples_.compute_utilization_)::value_type;
                 general_samples_.compute_utilization_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
@@ -310,8 +310,8 @@ void cpu_hardware_sampler::sampling_loop() {
                 // add values to the respective sample entries
                 for (std::size_t i = 0; i < header.size(); ++i) {
                     if (header[i] == "Avg_MHz") {
-                        using vector_type = decltype(clock_samples_.average_frequency_)::value_type;
-                        clock_samples_.average_frequency_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        using vector_type = decltype(clock_samples_.clock_frequency_)::value_type;
+                        clock_samples_.clock_frequency_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
                     } else if (header[i] == "Busy%") {
                         using vector_type = decltype(general_samples_.compute_utilization_)::value_type;
                         general_samples_.compute_utilization_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
diff --git a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
index 434b047..63fbda4 100644
--- a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
+++ b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
@@ -127,38 +127,50 @@ void gpu_amd_hardware_sampler::sampling_loop() {
     {
         rsmi_frequencies_t frequency_info{};
         if (rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_SYS, &frequency_info) == RSMI_STATUS_SUCCESS) {
-            clock_samples_.clock_system_min_ = frequency_info.frequency[0];
-            clock_samples_.clock_system_max_ = frequency_info.frequency[frequency_info.num_supported - 1];
+            clock_samples_.clock_frequency_min_ = static_cast<decltype(clock_samples_.clock_frequency_min_)::value_type>(frequency_info.frequency[0]) / 1000.0 / 1000.0;
+            clock_samples_.clock_frequency_max_ = static_cast<decltype(clock_samples_.clock_frequency_max_)::value_type>(frequency_info.frequency[frequency_info.num_supported - 1]) / 1000.0 / 1000.0;
+            decltype(clock_samples_.available_clock_frequencies_)::value_type frequencies{};
+            for (std::size_t i = 0; i < frequency_info.num_supported; ++i) {
+                frequencies.push_back(static_cast<decltype(frequencies)::value_type>(frequency_info.frequency[i]) / 1000.0 / 1000.0);
+            }
+            clock_samples_.available_clock_frequencies_ = frequencies;
+
             // queried samples -> retrieved every iteration if available
-            clock_samples_.clock_system_ = decltype(clock_samples_.clock_system_)::value_type{};
+            clock_samples_.clock_frequency_ = decltype(clock_samples_.clock_frequency_)::value_type{};
             if (frequency_info.current < RSMI_MAX_NUM_FREQUENCIES) {
-                clock_samples_.clock_system_->push_back(frequency_info.frequency[frequency_info.current]);
+                clock_samples_.clock_frequency_->push_back(static_cast<decltype(clock_samples_.clock_frequency_)::value_type::value_type>(frequency_info.frequency[frequency_info.current]) / 1000.0 / 1000.0);
             } else {
-                clock_samples_.clock_system_->push_back(0);
+                clock_samples_.clock_frequency_->push_back(0);
             }
         }
 
         if (rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_SOC, &frequency_info) == RSMI_STATUS_SUCCESS) {
-            clock_samples_.clock_socket_min_ = frequency_info.frequency[0];
-            clock_samples_.clock_socket_max_ = frequency_info.frequency[frequency_info.num_supported - 1];
+            clock_samples_.socket_clock_frequency_min_ = static_cast<decltype(clock_samples_.socket_clock_frequency_min_)::value_type>(frequency_info.frequency[0]) / 1000.0 / 1000.0;
+            clock_samples_.socket_clock_frequency_max_ = static_cast<decltype(clock_samples_.socket_clock_frequency_max_)::value_type>(frequency_info.frequency[frequency_info.num_supported - 1]) / 1000.0 / 1000.0;
             // queried samples -> retrieved every iteration if available
-            clock_samples_.clock_socket_ = decltype(clock_samples_.clock_socket_)::value_type{};
+            clock_samples_.socket_clock_frequency_ = decltype(clock_samples_.socket_clock_frequency_)::value_type{};
             if (frequency_info.current < RSMI_MAX_NUM_FREQUENCIES) {
-                clock_samples_.clock_socket_->push_back(frequency_info.frequency[frequency_info.current]);
+                clock_samples_.socket_clock_frequency_->push_back(static_cast<decltype(clock_samples_.socket_clock_frequency_)::value_type::value_type>(frequency_info.frequency[frequency_info.current]) / 1000.0 / 1000.0);
             } else {
-                clock_samples_.clock_socket_->push_back(0);
+                clock_samples_.socket_clock_frequency_->push_back(0);
             }
         }
 
         if (rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_MEM, &frequency_info) == RSMI_STATUS_SUCCESS) {
-            clock_samples_.clock_memory_min_ = frequency_info.frequency[0];
-            clock_samples_.clock_memory_max_ = frequency_info.frequency[frequency_info.num_supported - 1];
+            clock_samples_.memory_clock_frequency_min_ = static_cast<decltype(clock_samples_.memory_clock_frequency_min_)::value_type>(frequency_info.frequency[0]) / 1000.0 / 1000.0;
+            clock_samples_.memory_clock_frequency_max_ = static_cast<decltype(clock_samples_.memory_clock_frequency_max_)::value_type>(frequency_info.frequency[frequency_info.num_supported - 1]) / 1000.0 / 1000.0;
+            decltype(clock_samples_.available_memory_clock_frequencies_)::value_type frequencies{};
+            for (std::size_t i = 0; i < frequency_info.num_supported; ++i) {
+                frequencies.push_back(static_cast<decltype(frequencies)::value_type>(frequency_info.frequency[i]) / 1000.0 / 1000.0);
+            }
+            clock_samples_.available_memory_clock_frequencies_ = frequencies;
+
             // queried samples -> retrieved every iteration if available
-            clock_samples_.clock_memory_ = decltype(clock_samples_.clock_memory_)::value_type{};
+            clock_samples_.memory_clock_frequency_ = decltype(clock_samples_.memory_clock_frequency_)::value_type{};
             if (frequency_info.current < RSMI_MAX_NUM_FREQUENCIES) {
-                clock_samples_.clock_memory_->push_back(frequency_info.frequency[frequency_info.current]);
+                clock_samples_.memory_clock_frequency_->push_back(static_cast<decltype(clock_samples_.memory_clock_frequency_)::value_type::value_type>(frequency_info.frequency[frequency_info.current]) / 1000.0 / 1000.0);
             } else {
-                clock_samples_.clock_memory_->push_back(0);
+                clock_samples_.memory_clock_frequency_->push_back(0);
             }
         }
 
@@ -466,36 +478,36 @@ void gpu_amd_hardware_sampler::sampling_loop() {
 
             // retrieve clock related samples
             {
-                if (clock_samples_.clock_system_.has_value()) {
+                if (clock_samples_.clock_frequency_.has_value()) {
                     rsmi_frequencies_t frequency_info{};
                     HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_SYS, &frequency_info));
                     if (frequency_info.current < RSMI_MAX_NUM_FREQUENCIES) {
-                        clock_samples_.clock_system_->push_back(frequency_info.frequency[frequency_info.current]);
+                        clock_samples_.clock_frequency_->push_back(static_cast<decltype(clock_samples_.clock_frequency_)::value_type::value_type>(frequency_info.frequency[frequency_info.current]) / 1000.0 / 1000.0);
                     } else {
                         // the current index is (somehow) wrong
-                        clock_samples_.clock_system_->push_back(0);
+                        clock_samples_.clock_frequency_->push_back(0);
                     }
                 }
 
-                if (clock_samples_.clock_socket_.has_value()) {
+                if (clock_samples_.socket_clock_frequency_.has_value()) {
                     rsmi_frequencies_t frequency_info{};
                     HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_SOC, &frequency_info));
                     if (frequency_info.current < RSMI_MAX_NUM_FREQUENCIES) {
-                        clock_samples_.clock_socket_->push_back(frequency_info.frequency[frequency_info.current]);
+                        clock_samples_.socket_clock_frequency_->push_back(static_cast<decltype(clock_samples_.socket_clock_frequency_)::value_type::value_type>(frequency_info.frequency[frequency_info.current]) / 1000.0 / 1000.0);
                     } else {
                         // the current index is (somehow) wrong
-                        clock_samples_.clock_socket_->push_back(0);
+                        clock_samples_.socket_clock_frequency_->push_back(0);
                     }
                 }
 
-                if (clock_samples_.clock_memory_.has_value()) {
+                if (clock_samples_.memory_clock_frequency_.has_value()) {
                     rsmi_frequencies_t frequency_info{};
                     HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_MEM, &frequency_info));
                     if (frequency_info.current < RSMI_MAX_NUM_FREQUENCIES) {
-                        clock_samples_.clock_memory_->push_back(frequency_info.frequency[frequency_info.current]);
+                        clock_samples_.memory_clock_frequency_->push_back(static_cast<decltype(clock_samples_.memory_clock_frequency_)::value_type::value_type>(frequency_info.frequency[frequency_info.current]) / 1000.0 / 1000.0);
                     } else {
                         // the current index is (somehow) wrong
-                        clock_samples_.clock_memory_->push_back(0);
+                        clock_samples_.memory_clock_frequency_->push_back(0);
                     }
                 }
 
diff --git a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp b/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp
index c7f7d88..579ea29 100644
--- a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp
+++ b/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp
@@ -105,71 +105,83 @@ std::ostream &operator<<(std::ostream &out, const rocm_smi_general_samples &samp
 std::string rocm_smi_clock_samples::generate_yaml_string() const {
     std::string str{ "clock:\n" };
 
-    // socket clock min frequencies
-    if (this->clock_socket_min_.has_value()) {
-        str += std::format("  clock_socket_min:\n"
-                           "    unit: \"Hz\"\n"
+    // system clock min frequencies
+    if (this->clock_frequency_min_.has_value()) {
+        str += std::format("  clock_frequency_min:\n"
+                           "    unit: \"MHz\"\n"
                            "    values: {}\n",
-                           this->clock_socket_min_.value());
+                           this->clock_frequency_min_.value());
     }
-    // socket clock max frequencies
-    if (this->clock_socket_max_.has_value()) {
-        str += std::format("  clock_socket_max:\n"
-                           "    unit: \"Hz\"\n"
+    // system clock max frequencies
+    if (this->clock_frequency_max_.has_value()) {
+        str += std::format("  clock_frequency_max:\n"
+                           "    unit: \"MHz\"\n"
                            "    values: {}\n",
-                           this->clock_socket_max_.value());
+                           this->clock_frequency_max_.value());
     }
-
     // memory clock min frequencies
-    if (this->clock_memory_min_.has_value()) {
-        str += std::format("  clock_memory_min:\n"
-                           "    unit: \"Hz\"\n"
+    if (this->memory_clock_frequency_min_.has_value()) {
+        str += std::format("  memory_clock_frequency_min:\n"
+                           "    unit: \"MHz\"\n"
                            "    values: {}\n",
-                           this->clock_memory_min_.value());
+                           this->memory_clock_frequency_min_.value());
     }
     // memory clock max frequencies
-    if (this->clock_memory_max_.has_value()) {
-        str += std::format("  clock_memory_max:\n"
-                           "    unit: \"Hz\"\n"
+    if (this->memory_clock_frequency_max_.has_value()) {
+        str += std::format("  memory_clock_frequency_max:\n"
+                           "    unit: \"MHz\"\n"
                            "    values: {}\n",
-                           this->clock_memory_max_.value());
+                           this->memory_clock_frequency_max_.value());
     }
-
-    // system clock min frequencies
-    if (this->clock_system_min_.has_value()) {
-        str += std::format("  clock_gpu_min:\n"
-                           "    unit: \"Hz\"\n"
+    // socket clock min frequencies
+    if (this->socket_clock_frequency_min_.has_value()) {
+        str += std::format("  socket_clock_frequency_min:\n"
+                           "    unit: \"MHz\"\n"
                            "    values: {}\n",
-                           this->clock_system_min_.value());
+                           this->socket_clock_frequency_min_.value());
     }
-    // system clock max frequencies
-    if (this->clock_system_max_.has_value()) {
-        str += std::format("  clock_gpu_max:\n"
-                           "    unit: \"Hz\"\n"
+    // socket clock max frequencies
+    if (this->socket_clock_frequency_max_.has_value()) {
+        str += std::format("  socket_clock_frequency_max:\n"
+                           "    unit: \"MHz\"\n"
                            "    values: {}\n",
-                           this->clock_system_max_.value());
+                           this->socket_clock_frequency_max_.value());
+    }
+    // the available clock frequencies
+    if (this->available_clock_frequencies_.has_value()) {
+        str += std::format("  available_clock_frequencies:\n"
+                           "    unit: \"MHz\"\n"
+                           "    values: [{}]\n",
+                           detail::join(this->available_clock_frequencies_.value(), ", "));
+    }
+    // the available memory clock frequencies
+    if (this->available_memory_clock_frequencies_.has_value()) {
+        str += std::format("  available_memory_clock_frequencies:\n"
+                           "    unit: \"MHz\"\n"
+                           "    values: [{}]\n",
+                           detail::join(this->available_memory_clock_frequencies_.value(), ", "));
     }
 
-    // socket clock frequency
-    if (this->clock_socket_.has_value()) {
-        str += std::format("  clock_socket:\n"
-                           "    unit: \"Hz\"\n"
+    // system clock frequency
+    if (this->clock_frequency_.has_value()) {
+        str += std::format("  clock_frequency:\n"
+                           "    unit: \"MHz\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->clock_socket_.value(), ", "));
+                           detail::join(this->clock_frequency_.value(), ", "));
     }
     // memory clock frequency
-    if (this->clock_memory_.has_value()) {
-        str += std::format("  clock_memory:\n"
-                           "    unit: \"Hz\"\n"
+    if (this->memory_clock_frequency_.has_value()) {
+        str += std::format("  memory_clock_frequency:\n"
+                           "    unit: \"MHz\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->clock_memory_.value(), ", "));
+                           detail::join(this->memory_clock_frequency_.value(), ", "));
     }
-    // system clock frequency
-    if (this->clock_system_.has_value()) {
-        str += std::format("  clock_gpu:\n"
-                           "    unit: \"Hz\"\n"
+    // socket clock frequency
+    if (this->socket_clock_frequency_.has_value()) {
+        str += std::format("  socket_clock_frequency:\n"
+                           "    unit: \"MHz\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->clock_system_.value(), ", "));
+                           detail::join(this->socket_clock_frequency_.value(), ", "));
     }
     // overdrive level
     if (this->overdrive_level_.has_value()) {
@@ -193,26 +205,30 @@ std::string rocm_smi_clock_samples::generate_yaml_string() const {
 }
 
 std::ostream &operator<<(std::ostream &out, const rocm_smi_clock_samples &samples) {
-    return out << std::format("clock_system_min [Hz]: {}\n"
-                              "clock_system_max [Hz]: {}\n"
-                              "clock_socket_min [Hz]: {}\n"
-                              "clock_socket_max [Hz]: {}\n"
-                              "clock_memory_min [Hz]: {}\n"
-                              "clock_memory_max [Hz]: {}\n"
-                              "clock_system [Hz]: [{}]\n"
-                              "clock_socket [Hz]: [{}]\n"
-                              "clock_memory [Hz]: [{}]\n"
+    return out << std::format("clock_frequency_min [MHz]: {}\n"
+                              "clock_frequency_max [MHz]: {}\n"
+                              "memory_clock_frequency_min [MHz]: {}\n"
+                              "memory_clock_frequency_max [MHz]: {}\n"
+                              "socket_clock_frequency_min [MHz]: {}\n"
+                              "socket_clock_frequency_max [MHz]: {}\n"
+                              "available_clock_frequencies [MHz]: [{}]\n"
+                              "available_memory_clock_frequencies [MHz]: [{}]\n"
+                              "clock_frequency [MHz]: [{}]\n"
+                              "memory_clock_frequency [MHz]: [{}]\n"
+                              "socket_clock_frequency [MHz]: [{}]\n"
                               "overdrive_level [%]: [{}]\n"
                               "memory_overdrive_level [%]: [{}]",
-                              detail::value_or_default(samples.get_clock_system_min()),
-                              detail::value_or_default(samples.get_clock_system_max()),
-                              detail::value_or_default(samples.get_clock_socket_min()),
-                              detail::value_or_default(samples.get_clock_socket_max()),
-                              detail::value_or_default(samples.get_clock_memory_min()),
-                              detail::value_or_default(samples.get_clock_memory_max()),
-                              detail::join(detail::value_or_default(samples.get_clock_system()), ", "),
-                              detail::join(detail::value_or_default(samples.get_clock_socket()), ", "),
-                              detail::join(detail::value_or_default(samples.get_clock_memory()), ", "),
+                              detail::value_or_default(samples.get_clock_frequency_min()),
+                              detail::value_or_default(samples.get_clock_frequency_max()),
+                              detail::value_or_default(samples.get_memory_clock_frequency_min()),
+                              detail::value_or_default(samples.get_memory_clock_frequency_max()),
+                              detail::value_or_default(samples.get_socket_clock_frequency_min()),
+                              detail::value_or_default(samples.get_socket_clock_frequency_max()),
+                              detail::join(detail::value_or_default(samples.get_available_clock_frequencies()), ", "),
+                              detail::join(detail::value_or_default(samples.get_available_memory_clock_frequencies()), ", "),
+                              detail::join(detail::value_or_default(samples.get_clock_frequency()), ", "),
+                              detail::join(detail::value_or_default(samples.get_memory_clock_frequency()), ", "),
+                              detail::join(detail::value_or_default(samples.get_socket_clock_frequency()), ", "),
                               detail::join(detail::value_or_default(samples.get_overdrive_level()), ", "),
                               detail::join(detail::value_or_default(samples.get_memory_overdrive_level()), ", "));
 }
diff --git a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
index 41b3c0b..6e0fe7c 100644
--- a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
+++ b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
@@ -15,7 +15,7 @@
 
 #include "nvml.h"  // NVML runtime functions
 
-#include <algorithm>  // std::min_element
+#include <algorithm>  // std::min_element, std::sort, std::transform
 #include <chrono>     // std::chrono::{steady_clock, duration_cast, milliseconds}
 #include <cstddef>    // std::size_t
 #include <exception>  // std::exception, std::terminate
@@ -187,24 +187,24 @@ void gpu_nvidia_hardware_sampler::sampling_loop() {
     // retrieve initial clock related information
     {
         // fixed information -> only retrieved once
-        decltype(clock_samples_.adaptive_clock_status_)::value_type adaptive_clock_status{};
+        unsigned int adaptive_clock_status{};
         if (nvmlDeviceGetAdaptiveClockInfoStatus(device, &adaptive_clock_status) == NVML_SUCCESS) {
-            clock_samples_.adaptive_clock_status_ = adaptive_clock_status;
+            clock_samples_.auto_boosted_clock_enabled_ = adaptive_clock_status == NVML_ADAPTIVE_CLOCKING_INFO_STATUS_ENABLED;
         }
 
-        decltype(clock_samples_.clock_graph_max_)::value_type clock_graph_max{};
+        unsigned int clock_graph_max{};
         if (nvmlDeviceGetMaxClockInfo(device, NVML_CLOCK_GRAPHICS, &clock_graph_max) == NVML_SUCCESS) {
-            clock_samples_.clock_graph_max_ = clock_graph_max;
+            clock_samples_.clock_frequency_max_ = static_cast<decltype(clock_samples_.clock_frequency_max_)::value_type>(clock_graph_max);
         }
 
-        decltype(clock_samples_.clock_sm_max_)::value_type clock_sm_max{};
+        unsigned int clock_sm_max{};
         if (nvmlDeviceGetMaxClockInfo(device, NVML_CLOCK_SM, &clock_sm_max) == NVML_SUCCESS) {
-            clock_samples_.clock_sm_max_ = clock_sm_max;
+            clock_samples_.sm_clock_frequency_max_ = static_cast<decltype(clock_samples_.sm_clock_frequency_max_)::value_type>(clock_sm_max);
         }
 
-        decltype(clock_samples_.clock_mem_max_)::value_type clock_mem_max{};
+        unsigned int clock_mem_max{};
         if (nvmlDeviceGetMaxClockInfo(device, NVML_CLOCK_MEM, &clock_mem_max) == NVML_SUCCESS) {
-            clock_samples_.clock_mem_max_ = clock_mem_max;
+            clock_samples_.memory_clock_frequency_max_ = static_cast<decltype(clock_samples_.memory_clock_frequency_max_)::value_type>(clock_mem_max);
         }
 
         {
@@ -212,44 +212,67 @@ void gpu_nvidia_hardware_sampler::sampling_loop() {
             std::vector<unsigned int> supported_clocks(clock_count);
             if (nvmlDeviceGetSupportedMemoryClocks(device, &clock_count, supported_clocks.data()) == NVML_SUCCESS) {
                 supported_clocks.resize(clock_count);
-                clock_samples_.clock_mem_min_ = *std::min_element(supported_clocks.cbegin(), supported_clocks.cend());
+                clock_samples_.memory_clock_frequency_min_ = static_cast<decltype(clock_samples_.memory_clock_frequency_min_)::value_type>(*std::min_element(supported_clocks.cbegin(), supported_clocks.cend()));
+
+                decltype(clock_samples_.available_memory_clock_frequencies_)::value_type available_memory_clock_frequencies(supported_clocks.size());
+                // convert unsigned int values to double values
+                std::transform(supported_clocks.cbegin(), supported_clocks.cend(), available_memory_clock_frequencies.begin(), [](const unsigned int c) { return static_cast<decltype(clock_samples_.available_memory_clock_frequencies_)::value_type::value_type>(c); });
+                // we want to report all supported memory clocks in ascending order
+                std::sort(available_memory_clock_frequencies.begin(), available_memory_clock_frequencies.end());
+                clock_samples_.available_memory_clock_frequencies_ = available_memory_clock_frequencies;
             }
         }
 
         {
             unsigned int clock_count{ 128 };
             std::vector<unsigned int> supported_clocks(clock_count);
-            if (clock_samples_.clock_mem_min_.has_value() && nvmlDeviceGetSupportedGraphicsClocks(device, clock_samples_.clock_mem_min_.value(), &clock_count, supported_clocks.data()) == NVML_SUCCESS) {
-                supported_clocks.resize(clock_count);
-                clock_samples_.clock_graph_min_ = *std::min_element(supported_clocks.cbegin(), supported_clocks.cend());
+            if (clock_samples_.memory_clock_frequency_min_.has_value() && nvmlDeviceGetSupportedGraphicsClocks(device, clock_samples_.memory_clock_frequency_min_.value(), &clock_count, supported_clocks.data()) == NVML_SUCCESS) {
+                clock_samples_.clock_frequency_min_ = static_cast<decltype(clock_samples_.clock_frequency_min_)::value_type>(*std::min_element(supported_clocks.cbegin(), supported_clocks.cbegin() + clock_count));
+            }
+
+            if (clock_samples_.available_memory_clock_frequencies_.has_value()) {
+                for (const auto value : clock_samples_.available_memory_clock_frequencies_.value()) {
+                    if (nvmlDeviceGetSupportedGraphicsClocks(device, static_cast<unsigned int>(value), &clock_count, supported_clocks.data()) == NVML_SUCCESS) {
+                        decltype(clock_samples_.available_clock_frequencies_)::value_type::mapped_type available_clock_frequencies(clock_count);
+                        // convert unsigned int values to double values
+                        std::transform(supported_clocks.cbegin(), supported_clocks.cbegin() + clock_count, available_clock_frequencies.begin(), [](const unsigned int c) { return static_cast<decltype(clock_samples_.available_clock_frequencies_)::value_type::mapped_type::value_type>(c); });
+                        // we want to report all supported memory clocks in ascending order
+                        std::sort(available_clock_frequencies.begin(), available_clock_frequencies.end());
+                        // if no map exists, default construct an empty map
+                        if (!clock_samples_.available_clock_frequencies_.has_value()) {
+                            clock_samples_.available_clock_frequencies_ = decltype(clock_samples_)::map_type{};
+                        }
+                        clock_samples_.available_clock_frequencies_->emplace(value, available_clock_frequencies);
+                    }
+                }
             }
         }
 
         // queried samples -> retrieved every iteration if available
-        decltype(clock_samples_.clock_graph_)::value_type::value_type clock_graph{};
+        unsigned int clock_graph{};
         if (nvmlDeviceGetClockInfo(device, NVML_CLOCK_GRAPHICS, &clock_graph) == NVML_SUCCESS) {
-            clock_samples_.clock_graph_ = decltype(clock_samples_.clock_graph_)::value_type{ clock_graph };
+            clock_samples_.clock_frequency_ = decltype(clock_samples_.clock_frequency_)::value_type{ static_cast<decltype(clock_samples_.clock_frequency_)::value_type::value_type>(clock_graph) };
         }
 
-        decltype(clock_samples_.clock_sm_)::value_type::value_type clock_sm{};
+        unsigned int clock_sm{};
         if (nvmlDeviceGetClockInfo(device, NVML_CLOCK_SM, &clock_sm) == NVML_SUCCESS) {
-            clock_samples_.clock_sm_ = decltype(clock_samples_.clock_sm_)::value_type{ clock_sm };
+            clock_samples_.sm_clock_frequency_ = decltype(clock_samples_.sm_clock_frequency_)::value_type{ static_cast<decltype(clock_samples_.sm_clock_frequency_)::value_type::value_type>(clock_sm) };
         }
 
-        decltype(clock_samples_.clock_mem_)::value_type::value_type clock_mem{};
+        unsigned int clock_mem{};
         if (nvmlDeviceGetClockInfo(device, NVML_CLOCK_MEM, &clock_mem) == NVML_SUCCESS) {
-            clock_samples_.clock_mem_ = decltype(clock_samples_.clock_mem_)::value_type{ clock_mem };
+            clock_samples_.memory_clock_frequency_ = decltype(clock_samples_.memory_clock_frequency_)::value_type{ static_cast<decltype(clock_samples_.memory_clock_frequency_)::value_type::value_type>(clock_mem) };
         }
 
-        decltype(clock_samples_.clock_throttle_reason_)::value_type::value_type clock_throttle_reason{};
-        if (nvmlDeviceGetCurrentClocksThrottleReasons(device, &clock_throttle_reason) == NVML_SUCCESS) {
-            clock_samples_.clock_throttle_reason_ = decltype(clock_samples_.clock_throttle_reason_)::value_type{ clock_throttle_reason };
+        unsigned long long clock_throttle_reason{};
+        if (nvmlDeviceGetCurrentClocksEventReasons(device, &clock_throttle_reason) == NVML_SUCCESS) {
+            clock_samples_.throttle_reason_ = decltype(clock_samples_.throttle_reason_)::value_type{ detail::throttle_event_reason_to_string(clock_throttle_reason) };
         }
 
         nvmlEnableState_t mode{};
         nvmlEnableState_t default_mode{};
         if (nvmlDeviceGetAutoBoostedClocksEnabled(device, &mode, &default_mode) == NVML_SUCCESS) {
-            clock_samples_.auto_boosted_clocks_ = decltype(clock_samples_.auto_boosted_clocks_)::value_type{ mode == NVML_FEATURE_ENABLED };
+            clock_samples_.auto_boosted_clock_ = decltype(clock_samples_.auto_boosted_clock_)::value_type{ mode == NVML_FEATURE_ENABLED };
         }
     }
 
@@ -412,35 +435,35 @@ void gpu_nvidia_hardware_sampler::sampling_loop() {
 
             // retrieve clock related samples
             {
-                if (clock_samples_.clock_graph_.has_value()) {
-                    decltype(clock_samples_.clock_graph_)::value_type::value_type value{};
+                if (clock_samples_.clock_frequency_.has_value()) {
+                    unsigned int value{};
                     HWS_NVML_ERROR_CHECK(nvmlDeviceGetClockInfo(device, NVML_CLOCK_GRAPHICS, &value));
-                    clock_samples_.clock_graph_->push_back(value);
+                    clock_samples_.clock_frequency_->push_back(static_cast<decltype(clock_samples_.clock_frequency_)::value_type::value_type>(value));
                 }
 
-                if (clock_samples_.clock_sm_.has_value()) {
-                    decltype(clock_samples_.clock_sm_)::value_type::value_type value{};
+                if (clock_samples_.sm_clock_frequency_.has_value()) {
+                    unsigned int value{};
                     HWS_NVML_ERROR_CHECK(nvmlDeviceGetClockInfo(device, NVML_CLOCK_SM, &value));
-                    clock_samples_.clock_sm_->push_back(value);
+                    clock_samples_.sm_clock_frequency_->push_back(static_cast<decltype(clock_samples_.sm_clock_frequency_)::value_type::value_type>(value));
                 }
 
-                if (clock_samples_.clock_mem_.has_value()) {
-                    decltype(clock_samples_.clock_mem_)::value_type::value_type value{};
+                if (clock_samples_.memory_clock_frequency_.has_value()) {
+                    unsigned int value{};
                     HWS_NVML_ERROR_CHECK(nvmlDeviceGetClockInfo(device, NVML_CLOCK_MEM, &value));
-                    clock_samples_.clock_mem_->push_back(value);
+                    clock_samples_.memory_clock_frequency_->push_back(static_cast<decltype(clock_samples_.memory_clock_frequency_)::value_type::value_type>(value));
                 }
 
-                if (clock_samples_.clock_throttle_reason_.has_value()) {
-                    decltype(clock_samples_.clock_throttle_reason_)::value_type::value_type value{};
-                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetCurrentClocksThrottleReasons(device, &value));
-                    clock_samples_.clock_throttle_reason_->push_back(value);
+                if (clock_samples_.throttle_reason_.has_value()) {
+                    unsigned long long value{};
+                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetCurrentClocksEventReasons(device, &value));
+                    clock_samples_.throttle_reason_->push_back(detail::throttle_event_reason_to_string(value));
                 }
 
-                if (clock_samples_.auto_boosted_clocks_.has_value()) {
+                if (clock_samples_.auto_boosted_clock_.has_value()) {
                     nvmlEnableState_t mode{};
                     nvmlEnableState_t default_mode{};
                     HWS_NVML_ERROR_CHECK(nvmlDeviceGetAutoBoostedClocksEnabled(device, &mode, &default_mode));
-                    clock_samples_.auto_boosted_clocks_->push_back(mode == NVML_FEATURE_ENABLED);
+                    clock_samples_.auto_boosted_clock_->push_back(mode == NVML_FEATURE_ENABLED);
                 }
             }
 
diff --git a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp
index 95cfa17..64f559d 100644
--- a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp
+++ b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp
@@ -7,9 +7,7 @@
 
 #include "hardware_sampling/gpu_nvidia/nvml_samples.hpp"
 
-#include "hardware_sampling/utility.hpp"  // hws::detail::{value_or_default, join}
-
-#include "nvml.h"  // NVML_ADAPTIVE_CLOCKING_INFO_STATUS_ENABLED
+#include "hardware_sampling/utility.hpp"  // hws::detail::{value_or_default, join, map_entry_to_string}
 
 #include <format>   // std::format
 #include <ostream>  // std::ostream
@@ -125,82 +123,98 @@ std::string nvml_clock_samples::generate_yaml_string() const {
     std::string str{ "clock:\n" };
 
     // adaptive clock status
-    if (this->adaptive_clock_status_.has_value()) {
-        str += std::format("  adaptive_clock_status:\n"
+    if (this->auto_boosted_clock_enabled_.has_value()) {
+        str += std::format("  auto_boosted_clock_enabled:\n"
                            "    unit: \"bool\"\n"
                            "    values: {}\n",
-                           this->adaptive_clock_status_.value() == NVML_ADAPTIVE_CLOCKING_INFO_STATUS_ENABLED);
+                           this->auto_boosted_clock_enabled_.value());
     }
-    // maximum SM clock
-    if (this->clock_sm_max_.has_value()) {
-        str += std::format("  clock_sm_max:\n"
+    // minimum graph clock
+    if (this->clock_frequency_min_.has_value()) {
+        str += std::format("  clock_frequency_min:\n"
                            "    unit: \"MHz\"\n"
                            "    values: {}\n",
-                           this->clock_sm_max_.value());
+                           this->clock_frequency_min_.value());
+    }
+    // maximum graph clock
+    if (this->clock_frequency_max_.has_value()) {
+        str += std::format("  clock_frequency_max:\n"
+                           "    unit: \"MHz\"\n"
+                           "    values: {}\n",
+                           this->clock_frequency_max_.value());
     }
     // minimum memory clock
-    if (this->clock_mem_min_.has_value()) {
-        str += std::format("  clock_mem_min:\n"
+    if (this->memory_clock_frequency_min_.has_value()) {
+        str += std::format("  memory_clock_frequency_min:\n"
                            "    unit: \"MHz\"\n"
                            "    values: {}\n",
-                           this->clock_mem_min_.value());
+                           this->memory_clock_frequency_min_.value());
     }
     // maximum memory clock
-    if (this->clock_mem_max_.has_value()) {
-        str += std::format("  clock_mem_max:\n"
+    if (this->memory_clock_frequency_max_.has_value()) {
+        str += std::format("  memory_clock_frequency_max:\n"
                            "    unit: \"MHz\"\n"
                            "    values: {}\n",
-                           this->clock_mem_max_.value());
+                           this->memory_clock_frequency_max_.value());
     }
-    // minimum graph clock
-    if (this->clock_graph_min_.has_value()) {
-        str += std::format("  clock_gpu_min:\n"
+    // maximum SM clock
+    if (this->sm_clock_frequency_max_.has_value()) {
+        str += std::format("  sm_clock_frequency_max:\n"
                            "    unit: \"MHz\"\n"
                            "    values: {}\n",
-                           this->clock_graph_min_.value());
+                           this->sm_clock_frequency_max_.value());
     }
-    // maximum graph clock
-    if (this->clock_graph_max_.has_value()) {
-        str += std::format("  clock_gpu_max:\n"
+    // the available clock frequencies
+    if (this->available_clock_frequencies_.has_value()) {
+        str += std::format("  available_clock_frequencies:\n"
                            "    unit: \"MHz\"\n"
-                           "    values: {}\n",
-                           this->clock_graph_max_.value());
+                           "    values:\n");
+        for (const auto &[key, value] : this->available_clock_frequencies_.value()) {
+            str += std::format("      {}: [{}]\n", key, detail::join(value, ", "));
+        }
+    }
+    // the available memory clock frequencies
+    if (this->available_memory_clock_frequencies_.has_value()) {
+        str += std::format("  available_memory_clock_frequencies:\n"
+                           "    unit: \"MHz\"\n"
+                           "    values: [{}]\n",
+                           detail::join(this->available_memory_clock_frequencies_.value(), ", "));
     }
 
-    // SM clock
-    if (this->clock_sm_.has_value()) {
-        str += std::format("  clock_sm:\n"
+    // graph clock
+    if (this->clock_frequency_.has_value()) {
+        str += std::format("  clock_frequency:\n"
                            "    unit: \"MHz\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->clock_sm_.value(), ", "));
+                           detail::join(this->clock_frequency_.value(), ", "));
     }
     // memory clock
-    if (this->clock_mem_.has_value()) {
-        str += std::format("  clock_mem:\n"
+    if (this->memory_clock_frequency_.has_value()) {
+        str += std::format("  memory_clock_frequency:\n"
                            "    unit: \"MHz\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->clock_mem_.value(), ", "));
+                           detail::join(this->memory_clock_frequency_.value(), ", "));
     }
-    // graph clock
-    if (this->clock_graph_.has_value()) {
-        str += std::format("  clock_gpu:\n"
+    // SM clock
+    if (this->sm_clock_frequency_.has_value()) {
+        str += std::format("  sm_clock_frequency:\n"
                            "    unit: \"MHz\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->clock_graph_.value(), ", "));
+                           detail::join(this->sm_clock_frequency_.value(), ", "));
     }
     // clock throttle reason
-    if (this->clock_throttle_reason_.has_value()) {
-        str += std::format("  clock_throttle_reason:\n"
-                           "    unit: \"bitmask\"\n"
+    if (this->throttle_reason_.has_value()) {
+        str += std::format("  throttle_reason:\n"
+                           "    unit: \"string\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->clock_throttle_reason_.value(), ", "));
+                           detail::join(this->throttle_reason_.value(), ", "));
     }
     // clock is auto-boosted
-    if (this->auto_boosted_clocks_.has_value()) {
-        str += std::format("  auto_boosted_clocks:\n"
+    if (this->auto_boosted_clock_.has_value()) {
+        str += std::format("  auto_boosted_clock:\n"
                            "    unit: \"bool\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->auto_boosted_clocks_.value(), ", "));
+                           detail::join(this->auto_boosted_clock_.value(), ", "));
     }
 
     // remove last newline
@@ -210,28 +224,32 @@ std::string nvml_clock_samples::generate_yaml_string() const {
 }
 
 std::ostream &operator<<(std::ostream &out, const nvml_clock_samples &samples) {
-    return out << std::format("adaptive_clock_status [int]: {}\n"
-                              "clock_graph_min [MHz]: {}\n"
-                              "clock_graph_max [MHz]: {}\n"
-                              "clock_sm_max [MHz]: {}\n"
-                              "clock_mem_min [MHz]: {}\n"
-                              "clock_mem_max [MHz]: {}\n"
-                              "clock_graph [MHz]: [{}]\n"
-                              "clock_sm [MHz]: [{}]\n"
-                              "clock_mem [MHz]: [{}]\n"
-                              "clock_throttle_reason [bitmask]: [{}]\n"
-                              "auto_boosted_clocks [bool]: [{}]",
-                              detail::value_or_default(samples.get_adaptive_clock_status()),
-                              detail::value_or_default(samples.get_clock_graph_min()),
-                              detail::value_or_default(samples.get_clock_graph_max()),
-                              detail::value_or_default(samples.get_clock_sm_max()),
-                              detail::value_or_default(samples.get_clock_mem_min()),
-                              detail::value_or_default(samples.get_clock_mem_max()),
-                              detail::join(detail::value_or_default(samples.get_clock_graph()), ", "),
-                              detail::join(detail::value_or_default(samples.get_clock_sm()), ", "),
-                              detail::join(detail::value_or_default(samples.get_clock_mem()), ", "),
-                              detail::join(detail::value_or_default(samples.get_clock_throttle_reason()), ", "),
-                              detail::join(detail::value_or_default(samples.get_auto_boosted_clocks()), ", "));
+    return out << std::format("auto_boosted_clock_enabled [bool]: {}\n"
+                              "clock_frequency_min [MHz]: {}\n"
+                              "clock_frequency_max [MHz]: {}\n"
+                              "memory_clock_frequency_min [MHz]: {}\n"
+                              "memory_clock_frequency_max [MHz]: {}\n"
+                              "sm_clock_frequency_max [MHz]: {}\n"
+                              "available_clock_frequencies [MHz]: [{}]\n"
+                              "available_memory_clock_frequencies [MHz]: [{}]\n"
+                              "clock_frequency [MHz]: [{}]\n"
+                              "memory_clock_frequency [MHz]: [{}]\n"
+                              "sm_clock_frequency [MHz]: [{}]\n"
+                              "throttle_reason [string]: [{}]\n"
+                              "auto_boosted_clock [bool]: [{}]",
+                              detail::value_or_default(samples.get_auto_boosted_clock_enabled()),
+                              detail::value_or_default(samples.get_clock_frequency_min()),
+                              detail::value_or_default(samples.get_clock_frequency_max()),
+                              detail::value_or_default(samples.get_memory_clock_frequency_min()),
+                              detail::value_or_default(samples.get_memory_clock_frequency_max()),
+                              detail::value_or_default(samples.get_sm_clock_frequency_max()),
+                              detail::map_entry_to_string(samples.get_available_clock_frequencies()),
+                              detail::join(detail::value_or_default(samples.get_available_memory_clock_frequencies()), ", "),
+                              detail::join(detail::value_or_default(samples.get_clock_frequency()), ", "),
+                              detail::join(detail::value_or_default(samples.get_memory_clock_frequency()), ", "),
+                              detail::join(detail::value_or_default(samples.get_sm_clock_frequency()), ", "),
+                              detail::join(detail::value_or_default(samples.get_throttle_reason()), ", "),
+                              detail::join(detail::value_or_default(samples.get_auto_boosted_clock()), ", "));
 }
 
 //*************************************************************************************************************************************//

From dfe75aedf819b97b5cb0dc3ae30271099fefb5e5 Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Fri, 13 Sep 2024 16:31:28 +0200
Subject: [PATCH 17/69] (temporarily) disable level zero support.

---
 CMakeLists.txt | 38 +++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index e32478a..3e915ea 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -214,25 +214,25 @@ endif ()
 ##                                        Intel GPU sampling via Level Zero                                       ##
 ####################################################################################################################
 # try finding Level Zero
-find_package(level_zero QUIET)
-if (level_zero_FOUND)
-    target_link_libraries(${HWS_LIBRARY_NAME} PRIVATE level_zero)
-
-    message(STATUS "Enable sampling of Intel GPU information using Level Zero.")
-
-    # add source file to source file list
-    target_sources(${HWS_LIBRARY_NAME} PRIVATE
-            $<BUILD_INTERFACE:
-            ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/gpu_intel/hardware_sampler.cpp;
-            ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/gpu_intel/level_zero_samples.cpp;
-            ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/gpu_intel/utility.cpp
-            >)
-
-    # add compile definition
-    target_compile_definitions(${HWS_LIBRARY_NAME} PUBLIC HWS_FOR_INTEL_GPUS_ENABLED)
-else ()
-    message(STATUS "Hardware sampling for Intel GPUs disabled!")
-endif ()
+#find_package(level_zero QUIET)
+#if (level_zero_FOUND)
+#    target_link_libraries(${HWS_LIBRARY_NAME} PRIVATE level_zero)
+#
+#    message(STATUS "Enable sampling of Intel GPU information using Level Zero.")
+#
+#    # add source file to source file list
+#    target_sources(${HWS_LIBRARY_NAME} PRIVATE
+#            $<BUILD_INTERFACE:
+#            ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/gpu_intel/hardware_sampler.cpp;
+#            ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/gpu_intel/level_zero_samples.cpp;
+#            ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/gpu_intel/utility.cpp
+#            >)
+#
+#    # add compile definition
+#    target_compile_definitions(${HWS_LIBRARY_NAME} PUBLIC HWS_FOR_INTEL_GPUS_ENABLED)
+#else ()
+#    message(STATUS "Hardware sampling for Intel GPUs disabled!")
+#endif ()
 
 
 ####################################################################################################################

From 2919e1c46af4d662a4db55ea72500dca3eaf9b2f Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Mon, 16 Sep 2024 09:36:19 +0200
Subject: [PATCH 18/69] Backport library to support C++17 instead of only C++20
 (mainly changing std::format to fmt::format).

---
 .clang-format                                 |   2 +-
 CMakeLists.txt                                |  30 +-
 README.md                                     |   3 +-
 cmake/hardware_samplingConfig.cmake.in        |   6 +
 examples/cpp/CMakeLists.txt                   |   2 +-
 include/hardware_sampling/cpu/cpu_samples.hpp |  19 +-
 .../cpu/hardware_sampler.hpp                  |   6 +-
 include/hardware_sampling/cpu/utility.hpp     |   7 +-
 include/hardware_sampling/event.hpp           |  22 +-
 .../gpu_amd/hardware_sampler.hpp              |   6 +-
 .../gpu_amd/rocm_smi_samples.hpp              |  15 +-
 include/hardware_sampling/gpu_amd/utility.hpp |   8 +-
 .../gpu_intel/level_zero_samples.hpp          |   7 +-
 .../gpu_nvidia/hardware_sampler.hpp           |   6 +-
 .../gpu_nvidia/nvml_samples.hpp               |  15 +-
 .../hardware_sampling/gpu_nvidia/utility.hpp  |  11 +-
 include/hardware_sampling/utility.hpp         |  85 ++----
 src/hardware_sampling/cpu/cpu_samples.cpp     | 270 +++++++++---------
 .../cpu/hardware_sampler.cpp                  |  48 ++--
 src/hardware_sampling/cpu/utility.cpp         |   4 +-
 src/hardware_sampling/event.cpp               |   5 +-
 .../gpu_amd/hardware_sampler.cpp              |  19 +-
 .../gpu_amd/rocm_smi_samples.cpp              | 235 +++++++--------
 .../gpu_intel/hardware_sampler.cpp            |  36 ++-
 .../gpu_intel/level_zero_samples.cpp          |  46 +--
 .../gpu_nvidia/hardware_sampler.cpp           |  15 +-
 .../gpu_nvidia/nvml_samples.cpp               | 190 ++++++------
 src/hardware_sampling/hardware_sampler.cpp    |  23 +-
 28 files changed, 602 insertions(+), 539 deletions(-)

diff --git a/.clang-format b/.clang-format
index 5d6a911..97d4dc9 100644
--- a/.clang-format
+++ b/.clang-format
@@ -79,7 +79,7 @@ IncludeBlocks: Regroup
 IncludeCategories:
   - Regex: '^"hardware_sampling/'
     Priority: 1
-  - Regex: '^"(pybind|nvml|cuda|rocm_smi|hip|level_zero|subprocess)'
+  - Regex: '^"(pybind|nvml|cuda|rocm_smi|hip|level_zero|subprocess|fmt)'
     Priority: 2
   - Regex: '^.*'
     Priority: 3
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3e915ea..48ed48e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -26,7 +26,7 @@ add_library(${HWS_LIBRARY_NAME} SHARED ${HWS_SOURCES})
 set(HWS_TARGETS_TO_INSTALL ${HWS_LIBRARY_NAME})
 
 # use C++20
-target_compile_features(${HWS_LIBRARY_NAME} PUBLIC cxx_std_20)
+target_compile_features(${HWS_LIBRARY_NAME} PUBLIC cxx_std_17)
 
 # add target include directory
 target_include_directories(${HWS_LIBRARY_NAME} PUBLIC
@@ -58,6 +58,34 @@ endif ()
 message(STATUS "Setting the hardware sampler interval to ${HWS_SAMPLING_INTERVAL}ms.")
 target_compile_definitions(${HWS_LIBRARY_NAME} PUBLIC HWS_SAMPLING_INTERVAL=${HWS_SAMPLING_INTERVAL}ms)
 
+# install fmt as dependency
+include(FetchContent)
+set(HWS_fmt_VERSION 11.0.2)
+find_package(fmt 11.0.2 QUIET)
+if (fmt_FOUND)
+    message(STATUS "Found package fmt.")
+else ()
+    message(STATUS "Couldn't find package fmt. Building version ${HWS_fmt_VERSION} from source.")
+    set(FMT_PEDANTIC OFF CACHE INTERNAL "" FORCE)
+    set(FMT_WERROR OFF CACHE INTERNAL "" FORCE)
+    set(FMT_DOC OFF CACHE INTERNAL "" FORCE)
+    set(FMT_INSTALL ON CACHE INTERNAL "" FORCE) # let {fmt} handle the install target
+    set(FMT_TEST OFF CACHE INTERNAL "" FORCE)
+    set(FMT_FUZZ OFF CACHE INTERNAL "" FORCE)
+    set(FMT_CUDA_TEST OFF CACHE INTERNAL "" FORCE)
+    set(FMT_MODULE OFF CACHE INTERNAL "" FORCE)
+    set(FMT_SYSTEM_HEADERS ON CACHE INTERNAL "" FORCE)
+    # fetch string formatting library fmt
+    FetchContent_Declare(fmt
+            GIT_REPOSITORY https://github.com/fmtlib/fmt.git
+            GIT_TAG ${HWS_fmt_VERSION}
+            QUIET
+    )
+    FetchContent_MakeAvailable(fmt)
+    set_property(TARGET fmt PROPERTY POSITION_INDEPENDENT_CODE ON)
+    add_dependencies(${HWS_LIBRARY_NAME} fmt)
+endif ()
+target_link_libraries(${HWS_LIBRARY_NAME} PUBLIC fmt::fmt)
 
 ####################################################################################################################
 ##                                                CPU measurements                                                ##
diff --git a/README.md b/README.md
index ce9598d..bf47501 100644
--- a/README.md
+++ b/README.md
@@ -9,7 +9,8 @@ It currently supports CPUs as well as GPUs from NVIDIA, AMD, and Intel.
 
 General dependencies:
 
-- a C++20 capable compiler supporting `std::format` (tested with GCC 14.1.0)
+- a C++17 capable compiler
+- [{fmt} > 11.0.2](https://github.com/fmtlib/fmt) for string formatting (automatically build during the CMake configuration if it couldn't be found using the respective `find_package` call)
 - [Pybind11 > v2.13.1](https://github.com/pybind/pybind11) if Python bindings are enabled (automatically build during the CMake configuration if it couldn't be found using the respective `find_package` call)
 
 Dependencies based on the hardware to sample:
diff --git a/cmake/hardware_samplingConfig.cmake.in b/cmake/hardware_samplingConfig.cmake.in
index 56ba42a..53829a0 100644
--- a/cmake/hardware_samplingConfig.cmake.in
+++ b/cmake/hardware_samplingConfig.cmake.in
@@ -8,6 +8,12 @@
 
 include(CMakeFindDependencyMacro)
 
+# always try finding {fmt}
+# -> CMAKE_PREFIX_PATH necessary if build via FetchContent
+# -> doesn't hurt to be set everytime
+list(APPEND CMAKE_PREFIX_PATH "${CMAKE_CURRENT_LIST_DIR}/../../../lib/cmake/fmt")
+find_dependency(fmt REQUIRED)
+
 # sanity checks
 include("${CMAKE_CURRENT_LIST_DIR}/hardware_samplingTargets.cmake")
 check_required_components("hardware_sampling")
\ No newline at end of file
diff --git a/examples/cpp/CMakeLists.txt b/examples/cpp/CMakeLists.txt
index 6086f5b..56cff22 100644
--- a/examples/cpp/CMakeLists.txt
+++ b/examples/cpp/CMakeLists.txt
@@ -12,5 +12,5 @@ find_package(hardware_sampling REQUIRED)
 
 add_executable(prog main.cpp)
 
-target_compile_features(prog PUBLIC cxx_std_20)
+target_compile_features(prog PUBLIC cxx_std_17)
 target_link_libraries(prog PUBLIC hws::hardware_sampling)
\ No newline at end of file
diff --git a/include/hardware_sampling/cpu/cpu_samples.hpp b/include/hardware_sampling/cpu/cpu_samples.hpp
index 196572b..b537326 100644
--- a/include/hardware_sampling/cpu/cpu_samples.hpp
+++ b/include/hardware_sampling/cpu/cpu_samples.hpp
@@ -12,9 +12,10 @@
 #define HARDWARE_SAMPLING_CPU_CPU_SAMPLES_HPP_
 #pragma once
 
-#include "hardware_sampling/utility.hpp"  // HWS_SAMPLE_STRUCT_FIXED_MEMBER, HWS_SAMPLE_STRUCT_SAMPLING_MEMBER, hws::detail::ostream_formatter
+#include "hardware_sampling/utility.hpp"  // HWS_SAMPLE_STRUCT_FIXED_MEMBER, HWS_SAMPLE_STRUCT_SAMPLING_MEMBER
+
+#include "fmt/ostream.h"  // fmt::formatter, fmt::ostream_formatter
 
-#include <format>         // std::formatter
 #include <iosfwd>         // std::ostream forward declaration
 #include <optional>       // std::optional
 #include <string>         // std::string
@@ -297,24 +298,24 @@ std::ostream &operator<<(std::ostream &out, const cpu_idle_states_samples &sampl
 }  // namespace hws
 
 template <>
-struct std::formatter<hws::cpu_general_samples> : hws::detail::ostream_formatter { };
+struct fmt::formatter<hws::cpu_general_samples> : fmt::ostream_formatter { };
 
 template <>
-struct std::formatter<hws::cpu_clock_samples> : hws::detail::ostream_formatter { };
+struct fmt::formatter<hws::cpu_clock_samples> : fmt::ostream_formatter { };
 
 template <>
-struct std::formatter<hws::cpu_power_samples> : hws::detail::ostream_formatter { };
+struct fmt::formatter<hws::cpu_power_samples> : fmt::ostream_formatter { };
 
 template <>
-struct std::formatter<hws::cpu_memory_samples> : hws::detail::ostream_formatter { };
+struct fmt::formatter<hws::cpu_memory_samples> : fmt::ostream_formatter { };
 
 template <>
-struct std::formatter<hws::cpu_temperature_samples> : hws::detail::ostream_formatter { };
+struct fmt::formatter<hws::cpu_temperature_samples> : fmt::ostream_formatter { };
 
 template <>
-struct std::formatter<hws::cpu_gfx_samples> : hws::detail::ostream_formatter { };
+struct fmt::formatter<hws::cpu_gfx_samples> : fmt::ostream_formatter { };
 
 template <>
-struct std::formatter<hws::cpu_idle_states_samples> : hws::detail::ostream_formatter { };
+struct fmt::formatter<hws::cpu_idle_states_samples> : fmt::ostream_formatter { };
 
 #endif  // HARDWARE_SAMPLING_CPU_CPU_SAMPLES_HPP_
diff --git a/include/hardware_sampling/cpu/hardware_sampler.hpp b/include/hardware_sampling/cpu/hardware_sampler.hpp
index b86771e..18b489f 100644
--- a/include/hardware_sampling/cpu/hardware_sampler.hpp
+++ b/include/hardware_sampling/cpu/hardware_sampler.hpp
@@ -14,10 +14,10 @@
 
 #include "hardware_sampling/cpu/cpu_samples.hpp"   // hws::{cpu_general_samples, clock_samples, power_samples, memory_samples, temperature_samples, gfx_samples, idle_state_samples}
 #include "hardware_sampling/hardware_sampler.hpp"  // hws::hardware_sampler
-#include "hardware_sampling/utility.hpp"           // hws::detail::ostream_formatter
+
+#include "fmt/ostream.h"  // fmt::formatter, fmt::ostream_formatter
 
 #include <chrono>  // std::chrono::milliseconds, std::chrono_literals namespace
-#include <format>  // std::formatter
 #include <iosfwd>  // std::ostream forward declaration
 
 namespace hws {
@@ -148,6 +148,6 @@ std::ostream &operator<<(std::ostream &out, const cpu_hardware_sampler &sampler)
 }  // namespace hws
 
 template <>
-struct std::formatter<hws::cpu_hardware_sampler> : hws::detail::ostream_formatter { };
+struct fmt::formatter<hws::cpu_hardware_sampler> : fmt::ostream_formatter { };
 
 #endif  // HARDWARE_SAMPLING_CPU_HARDWARE_SAMPLER_HPP_
diff --git a/include/hardware_sampling/cpu/utility.hpp b/include/hardware_sampling/cpu/utility.hpp
index d203e0b..467d4e5 100644
--- a/include/hardware_sampling/cpu/utility.hpp
+++ b/include/hardware_sampling/cpu/utility.hpp
@@ -12,7 +12,8 @@
 #define HARDWARE_SAMPLING_CPU_UTILITY_HPP_
 #pragma once
 
-#include <format>       // std::format
+#include "fmt/format.h"  // fmt::format
+
 #include <stdexcept>    // std::runtime_error
 #include <string>       // std::string
 #include <string_view>  // std::string_view
@@ -29,7 +30,7 @@ namespace hws::detail {
         {                                                                                                              \
             const int errc = subprocess_func;                                                                          \
             if (errc != 0) {                                                                                           \
-                throw std::runtime_error{ std::format("Error calling subprocess function \"{}\"", #subprocess_func) }; \
+                throw std::runtime_error{ fmt::format("Error calling subprocess function \"{}\"", #subprocess_func) }; \
             }                                                                                                          \
         }
 #else
@@ -43,6 +44,6 @@ namespace hws::detail {
  */
 [[nodiscard]] std::string run_subprocess(std::string_view cmd_line);
 
-}  // namespace hws
+}  // namespace hws::detail
 
 #endif  // HARDWARE_SAMPLING_CPU_UTILITY_HPP_
diff --git a/include/hardware_sampling/event.hpp b/include/hardware_sampling/event.hpp
index 4375813..7129141 100644
--- a/include/hardware_sampling/event.hpp
+++ b/include/hardware_sampling/event.hpp
@@ -12,12 +12,12 @@
 #define HARDWARE_SAMPLING_EVENT_HPP_
 #pragma once
 
-#include "hardware_sampling/utility.hpp"  // hws::detail::ostream_formatter
+#include "fmt/ostream.h"  // fmt::formatter, fmt::ostream_formatter
 
-#include <chrono>  // std::chrono::steady_clock::time_point
-#include <format>  // std::formatter
-#include <iosfwd>  // std::ostream forward declaration
-#include <string>  // std::string
+#include <chrono>   // std::chrono::steady_clock::time_point
+#include <iosfwd>   // std::ostream forward declaration
+#include <string>   // std::string
+#include <utility>  // std::move
 
 namespace hws {
 
@@ -25,9 +25,17 @@ namespace hws {
  * @brief A struct encapsulating a single event.
  */
 struct event {
+    /**
+     * @brief Construct a new event given a time point and name.
+     * @param[in] time_point_p the time when the event occurred
+     * @param[in] name_p the name of the event
+     */
+    event(const std::chrono::steady_clock::time_point time_point_p, std::string name_p) :
+        time_point{ time_point_p },
+        name{ std::move(name_p) } { }
+
     /// The time point this event occurred at.
     std::chrono::steady_clock::time_point time_point;
-
     /// The name of this event.
     std::string name;
 };
@@ -43,6 +51,6 @@ std::ostream &operator<<(std::ostream &out, const event &e);
 }  // namespace hws
 
 template <>
-struct std::formatter<hws::event> : hws::detail::ostream_formatter { };
+struct fmt::formatter<hws::event> : fmt::ostream_formatter { };
 
 #endif  // HARDWARE_SAMPLING_EVENT_HPP_
diff --git a/include/hardware_sampling/gpu_amd/hardware_sampler.hpp b/include/hardware_sampling/gpu_amd/hardware_sampler.hpp
index 55ab3a9..80a7dbe 100644
--- a/include/hardware_sampling/gpu_amd/hardware_sampler.hpp
+++ b/include/hardware_sampling/gpu_amd/hardware_sampler.hpp
@@ -14,13 +14,13 @@
 
 #include "hardware_sampling/gpu_amd/rocm_smi_samples.hpp"  // hws::{rocm_smi_general_samples, rocm_smi_clock_samples, rocm_smi_power_samples, rocm_smi_memory_samples, rocm_smi_temperature_samples}
 #include "hardware_sampling/hardware_sampler.hpp"          // hws::hardware_sampler
-#include "hardware_sampling/utility.hpp"                   // hws::detail::ostream_formatter
+
+#include "fmt/ostream.h"  // fmt::formatter, fmt::ostream_formatter
 
 #include <atomic>   // std::atomic
 #include <chrono>   // std::chrono::milliseconds, std::chrono_literals namespace
 #include <cstddef>  // std::size_t
 #include <cstdint>  // std::uint32_t
-#include <format>   // std::formatter
 #include <iosfwd>   // std::ostream forward declaration
 
 namespace hws {
@@ -159,6 +159,6 @@ std::ostream &operator<<(std::ostream &out, const gpu_amd_hardware_sampler &samp
 }  // namespace hws
 
 template <>
-struct std::formatter<hws::gpu_amd_hardware_sampler> : hws::detail::ostream_formatter { };
+struct fmt::formatter<hws::gpu_amd_hardware_sampler> : fmt::ostream_formatter { };
 
 #endif  // HARDWARE_SAMPLING_GPU_AMD_HARDWARE_SAMPLER_HPP_
diff --git a/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp b/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp
index e0cb925..407a68c 100644
--- a/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp
+++ b/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp
@@ -12,10 +12,11 @@
 #define HARDWARE_SAMPLING_GPU_AMD_ROCM_SMI_SAMPLES_HPP_
 #pragma once
 
-#include "hardware_sampling/utility.hpp"  // HWS_SAMPLE_STRUCT_FIXED_MEMBER, HWS_SAMPLE_STRUCT_SAMPLING_MEMBER, hws::detail::ostream_formatter
+#include "hardware_sampling/utility.hpp"  // HWS_SAMPLE_STRUCT_FIXED_MEMBER, HWS_SAMPLE_STRUCT_SAMPLING_MEMBER
+
+#include "fmt/ostream.h"  // fmt::formatter, fmt::ostream_formatter
 
 #include <cstdint>   // std::uint64_t, std::int64_t, std::uint32_t
-#include <format>    // std::formatter
 #include <iosfwd>    // std::ostream forward declaration
 #include <optional>  // std::optional
 #include <string>    // std::string
@@ -239,18 +240,18 @@ std::ostream &operator<<(std::ostream &out, const rocm_smi_temperature_samples &
 }  // namespace hws
 
 template <>
-struct std::formatter<hws::rocm_smi_general_samples> : hws::detail::ostream_formatter { };
+struct fmt::formatter<hws::rocm_smi_general_samples> : fmt::ostream_formatter { };
 
 template <>
-struct std::formatter<hws::rocm_smi_clock_samples> : hws::detail::ostream_formatter { };
+struct fmt::formatter<hws::rocm_smi_clock_samples> : fmt::ostream_formatter { };
 
 template <>
-struct std::formatter<hws::rocm_smi_power_samples> : hws::detail::ostream_formatter { };
+struct fmt::formatter<hws::rocm_smi_power_samples> : fmt::ostream_formatter { };
 
 template <>
-struct std::formatter<hws::rocm_smi_memory_samples> : hws::detail::ostream_formatter { };
+struct fmt::formatter<hws::rocm_smi_memory_samples> : fmt::ostream_formatter { };
 
 template <>
-struct std::formatter<hws::rocm_smi_temperature_samples> : hws::detail::ostream_formatter { };
+struct fmt::formatter<hws::rocm_smi_temperature_samples> : fmt::ostream_formatter { };
 
 #endif  // HARDWARE_SAMPLING_GPU_AMD_ROCM_SMI_SAMPLES_HPP_
diff --git a/include/hardware_sampling/gpu_amd/utility.hpp b/include/hardware_sampling/gpu_amd/utility.hpp
index d96387a..59e19b5 100644
--- a/include/hardware_sampling/gpu_amd/utility.hpp
+++ b/include/hardware_sampling/gpu_amd/utility.hpp
@@ -12,9 +12,9 @@
 #define HARDWARE_SAMPLING_GPU_AMD_UTILITY_HPP_
 #pragma once
 
+#include "fmt/format.h"         // fmt::format
 #include "rocm_smi/rocm_smi.h"  // ROCm SMI runtime functions
 
-#include <format>     // std::format
 #include <stdexcept>  // std::runtime_error
 
 namespace hws {
@@ -32,9 +32,9 @@ namespace hws {
                 const char *error_string;                                                                                                          \
                 const rsmi_status_t ret = rsmi_status_string(errc, &error_string);                                                                 \
                 if (ret == RSMI_STATUS_SUCCESS) {                                                                                                  \
-                    throw std::runtime_error{ std::format("Error in ROCm SMI function call \"{}\": {}", #rocm_smi_func, error_string) };           \
+                    throw std::runtime_error{ fmt::format("Error in ROCm SMI function call \"{}\": {}", #rocm_smi_func, error_string) };           \
                 } else {                                                                                                                           \
-                    throw std::runtime_error{ std::format("Error in ROCm SMI function call \"{}\": {}", #rocm_smi_func, static_cast<int>(errc)) }; \
+                    throw std::runtime_error{ fmt::format("Error in ROCm SMI function call \"{}\": {}", #rocm_smi_func, static_cast<int>(errc)) }; \
                 }                                                                                                                                  \
             }                                                                                                                                      \
         }
@@ -43,7 +43,7 @@ namespace hws {
         {                                                                                                                             \
             const hiperror_t errc = hip_func;                                                                                         \
             if (errc != hipSuccess) {                                                                                                 \
-                throw std::runtime_error{ std::format("Error in HIP function call \"{}\": {}", #hip_func, hipGetErrorString(errc)) }; \
+                throw std::runtime_error{ fmt::format("Error in HIP function call \"{}\": {}", #hip_func, hipGetErrorString(errc)) }; \
             }                                                                                                                         \
         }
 
diff --git a/include/hardware_sampling/gpu_intel/level_zero_samples.hpp b/include/hardware_sampling/gpu_intel/level_zero_samples.hpp
index 1510199..dc8b411 100644
--- a/include/hardware_sampling/gpu_intel/level_zero_samples.hpp
+++ b/include/hardware_sampling/gpu_intel/level_zero_samples.hpp
@@ -122,10 +122,11 @@ class level_zero_power_samples {
      */
     [[nodiscard]] std::string generate_yaml_string() const;
 
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(bool, energy_threshold_enabled)  // true if the energy threshold is enabled
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, energy_threshold)        // the energy threshold in J
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, power_enforced_limit)         // the actually enforced power limit (W), may be different from power management limit if external limiters are set
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, power_measurement_type)  // the type of the power readings
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(bool, power_management_mode)          // true if power management limits are enabled
 
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint64_t, power_total_energy_consumption)  // the total power consumption since the last driver reload in mJ
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, power_total_energy_consumption)  // the total power consumption since the last driver reload in J
 };
 
 /**
diff --git a/include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp b/include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp
index de22f3f..60ed693 100644
--- a/include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp
+++ b/include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp
@@ -15,12 +15,12 @@
 #include "hardware_sampling/gpu_nvidia/nvml_device_handle.hpp"  // hws::nvml_device_handle
 #include "hardware_sampling/gpu_nvidia/nvml_samples.hpp"        // hws::{nvml_general_samples, nvml_clock_samples, nvml_power_samples, nvml_memory_samples, nvml_temperature_samples}
 #include "hardware_sampling/hardware_sampler.hpp"               // hws::hardware_sampler
-#include "hardware_sampling/utility.hpp"                        // hws::detail::ostream_formatter
+
+#include "fmt/format.h"  // fmt::formatter, fmt::ostream_formatter
 
 #include <atomic>   // std::atomic
 #include <chrono>   // std::chrono::milliseconds, std::chrono_literals namespace
 #include <cstddef>  // std::size_t
-#include <format>   // std::formatter
 #include <iosfwd>   // std::ostream forward declaration
 #include <string>   // std::string
 
@@ -160,6 +160,6 @@ std::ostream &operator<<(std::ostream &out, const gpu_nvidia_hardware_sampler &s
 }  // namespace hws
 
 template <>
-struct std::formatter<hws::gpu_nvidia_hardware_sampler> : hws::detail::ostream_formatter { };
+struct fmt::formatter<hws::gpu_nvidia_hardware_sampler> : fmt::ostream_formatter { };
 
 #endif  // HARDWARE_SAMPLING_GPU_NVIDIA_HARDWARE_SAMPLER_HPP_
diff --git a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp
index f766c3d..2757a60 100644
--- a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp
+++ b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp
@@ -12,9 +12,10 @@
 #define HARDWARE_SAMPLING_GPU_NVIDIA_NVML_SAMPLES_HPP_
 #pragma once
 
-#include "hardware_sampling/utility.hpp"  // HWS_SAMPLE_STRUCT_FIXED_MEMBER, HWS_SAMPLE_STRUCT_SAMPLING_MEMBER, hws::detail::ostream_formatter
+#include "hardware_sampling/utility.hpp"  // HWS_SAMPLE_STRUCT_FIXED_MEMBER, HWS_SAMPLE_STRUCT_SAMPLING_MEMBER
+
+#include "fmt/ostream.h"  // fmt::formatter, fmt::ostream_formatter
 
-#include <format>    // std::formatter
 #include <iosfwd>    // std::ostream forward declaration
 #include <map>       // std::map
 #include <optional>  // std::optional
@@ -229,18 +230,18 @@ std::ostream &operator<<(std::ostream &out, const nvml_temperature_samples &samp
 }  // namespace hws
 
 template <>
-struct std::formatter<hws::nvml_general_samples> : hws::detail::ostream_formatter { };
+struct fmt::formatter<hws::nvml_general_samples> : fmt::ostream_formatter { };
 
 template <>
-struct std::formatter<hws::nvml_clock_samples> : hws::detail::ostream_formatter { };
+struct fmt::formatter<hws::nvml_clock_samples> : fmt::ostream_formatter { };
 
 template <>
-struct std::formatter<hws::nvml_power_samples> : hws::detail::ostream_formatter { };
+struct fmt::formatter<hws::nvml_power_samples> : fmt::ostream_formatter { };
 
 template <>
-struct std::formatter<hws::nvml_memory_samples> : hws::detail::ostream_formatter { };
+struct fmt::formatter<hws::nvml_memory_samples> : fmt::ostream_formatter { };
 
 template <>
-struct std::formatter<hws::nvml_temperature_samples> : hws::detail::ostream_formatter { };
+struct fmt::formatter<hws::nvml_temperature_samples> : fmt::ostream_formatter { };
 
 #endif  // HARDWARE_SAMPLING_GPU_NVIDIA_NVML_SAMPLES_HPP_
diff --git a/include/hardware_sampling/gpu_nvidia/utility.hpp b/include/hardware_sampling/gpu_nvidia/utility.hpp
index 8a1c590..272126b 100644
--- a/include/hardware_sampling/gpu_nvidia/utility.hpp
+++ b/include/hardware_sampling/gpu_nvidia/utility.hpp
@@ -12,11 +12,10 @@
 #define HARDWARE_SAMPLING_GPU_NVIDIA_UTILITY_HPP_
 #pragma once
 
-#include "hardware_sampling/utility.hpp"  // hws::detail::join
+#include "fmt/format.h"  // fmt::format
+#include "fmt/ranges.h"  // fmt::join
+#include "nvml.h"        // NVML runtime functions
 
-#include "nvml.h"  // NVML runtime functions
-
-#include <format>     // std::format
 #include <stdexcept>  // std::runtime_error
 #include <string>     // std::string
 #include <vector>     // std::vector
@@ -33,7 +32,7 @@ namespace hws::detail {
         {                                                                                                                                                          \
             const nvmlReturn_t errc = nvml_func;                                                                                                                   \
             if (errc != NVML_SUCCESS) {                                                                                                                            \
-                throw std::runtime_error{ std::format("Error in NVML function call \"{}\": {} ({})", #nvml_func, nvmlErrorString(errc), static_cast<int>(errc)) }; \
+                throw std::runtime_error{ fmt::format("Error in NVML function call \"{}\": {} ({})", #nvml_func, nvmlErrorString(errc), static_cast<int>(errc)) }; \
             }                                                                                                                                                      \
         }
 #else
@@ -77,7 +76,7 @@ namespace hws::detail {
         if ((clocks_event_reasons & nvmlClocksThrottleReasonHwThermalSlowdown) != 0ull) {
             reasons.emplace_back("HwThermalSlowdown");
         }
-        return std::format("\"{}\"", detail::join(reasons, "|"));
+        return fmt::format("\"{}\"", fmt::join(reasons, "|"));
     }
 }
 
diff --git a/include/hardware_sampling/utility.hpp b/include/hardware_sampling/utility.hpp
index 81e1136..91836c4 100644
--- a/include/hardware_sampling/utility.hpp
+++ b/include/hardware_sampling/utility.hpp
@@ -12,11 +12,13 @@
 #define HARDWARE_SAMPLING_UTILITY_HPP_
 #pragma once
 
+#include "fmt/format.h"  // fmt::format
+#include "fmt/ranges.h"  // fmt::join
+
 #include <charconv>      // std::from_chars
 #include <chrono>        // std::chrono::{milliseconds, duration_cast}
 #include <cmath>         // std::trunc
 #include <cstddef>       // std::size_t
-#include <format>        // std::format, std::formatter, std::basic_format_context, std::format_to
 #include <iterator>      // std::back_inserter, std::next, std::prev
 #include <optional>      // std::optional
 #include <sstream>       // std::basic_stringstream
@@ -24,7 +26,7 @@
 #include <string>        // std::string, std::stof, std::stod, std::stold
 #include <string_view>   // std::string_view, std::basic_string_view
 #include <system_error>  // std::errc
-#include <type_traits>   // std::is_same_v, std::remove_cvref_t
+#include <type_traits>   // std::is_same_v, std::remove_cv_t, std::remove_reference_t
 #include <vector>        // std::vector
 
 namespace hws::detail {
@@ -54,6 +56,21 @@ namespace hws::detail {
   private:                                                                                            \
     std::optional<std::vector<sample_type>> sample_name##_{};
 
+// TODO: clean-up
+
+/**
+ * @brief Checks whether the string @p sv starts with the substring @p start
+ * @param[in] sv the full string
+ * @param[in] start the substring
+ * @return `true` if @p sv starts with @p start, otherwise `false`
+ */
+[[nodiscard]] inline bool starts_with(const std::string_view sv, const std::string_view start) {
+    return sv.substr(0, start.size()) == start;
+}
+
+template <typename T>
+using remove_cvref_t = std::remove_cv_t<std::remove_reference_t<T>>;
+
 /**
  * @brief Convert all time points to their duration in seconds (using double) truncated to three decimal places passed since the @p reference time point.
  * @tparam TimePoint the type if the time points
@@ -125,10 +142,10 @@ template <typename T>
  */
 template <typename T>
 [[nodiscard]] inline T convert_to(const std::string_view str) {
-    if constexpr (std::is_same_v<std::remove_cvref_t<T>, std::string>) {
+    if constexpr (std::is_same_v<detail::remove_cvref_t<T>, std::string>) {
         // convert string_view to string
         return std::string{ trim(str) };
-    } else if constexpr (std::is_same_v<std::remove_cvref_t<T>, bool>) {
+    } else if constexpr (std::is_same_v<detail::remove_cvref_t<T>, bool>) {
         const std::string lower_case_str = to_lower_case(trim(str));
         // the string true
         if (lower_case_str == "true") {
@@ -140,17 +157,17 @@ template <typename T>
         }
         // convert a number to its "long long" value and convert it to a bool: 0 -> false, otherwise true
         return static_cast<bool>(convert_to<long long>(str));
-    } else if constexpr (std::is_same_v<std::remove_cvref_t<T>, char>) {
+    } else if constexpr (std::is_same_v<detail::remove_cvref_t<T>, char>) {
         const std::string_view trimmed = trim(str);
         // since we expect a character, after trimming the string must only contain exactly one character
         if (trimmed.size() != 1) {
-            throw std::runtime_error{ std::format("Can't convert '{}' to a value of type char!", str) };
+            throw std::runtime_error{ fmt::format("Can't convert '{}' to a value of type char!", str) };
         }
         return trimmed.front();
-    } else if constexpr (std::is_floating_point_v<std::remove_cvref_t<T>>) {
-        if constexpr (std::is_same_v<std::remove_cvref_t<T>, float>) {
+    } else if constexpr (std::is_floating_point_v<detail::remove_cvref_t<T>>) {
+        if constexpr (std::is_same_v<detail::remove_cvref_t<T>, float>) {
             return std::stof(std::string{ str });
-        } else if constexpr (std::is_same_v<std::remove_cvref_t<T>, double>) {
+        } else if constexpr (std::is_same_v<detail::remove_cvref_t<T>, double>) {
             return std::stod(std::string{ str });
         } else {
             return std::stold(std::string{ str });
@@ -163,7 +180,7 @@ template <typename T>
         T val;
         auto res = std::from_chars(trimmed_str.data(), trimmed_str.data() + trimmed_str.size(), val);
         if (res.ec != std::errc{}) {
-            throw std::runtime_error{ std::format("Can't convert '{}' to a value of type T!", str) };
+            throw std::runtime_error{ fmt::format("Can't convert '{}' to a value of type T!", str) };
         }
         return val;
     }
@@ -203,46 +220,6 @@ template <typename T>
  */
 [[nodiscard]] std::vector<std::string_view> split(std::string_view str, char delim = ' ');
 
-/**
- * @brief A std::formatter child class allowing to format custom types using an `operator<<` overload.
- * @tparam CharT the character type
- */
-template <typename CharT>
-struct basic_ostream_formatter : std::formatter<std::basic_string_view<CharT>, CharT> {
-    template <typename T, typename OutputIt>
-    OutputIt format(const T &value, std::basic_format_context<OutputIt, CharT> &ctx) const {
-        std::basic_stringstream<CharT> ss;
-        ss << value;
-        return std::formatter<std::basic_string_view<CharT>, CharT>::format(ss.view(), ctx);
-    }
-};
-
-/// Type alias for a basic_ostream_formatter using a normal char.
-using ostream_formatter = basic_ostream_formatter<char>;
-
-/**
- * @brief Join all values in @p c to a single string using @p delim as delimiter.
- * @tparam Container the type of the container
- * @param[in] c the container for what the values should be joined
- * @param[in] delim the delimiter used in joining the values
- * @return the joined string (`[[nodiscard]]`)
- */
-template <typename Container>
-[[nodiscard]] inline std::string join(const Container &c, const std::string_view delim) {
-    if (c.empty()) {
-        return "";
-    } else if (c.size() == 1) {
-        return std::format("{}", *c.cbegin());
-    } else {
-        std::string out{};
-        for (auto it = c.cbegin(); it != std::prev(c.cend()); it = std::next(it)) {
-            std::format_to(std::back_inserter(out), "{}{}", *it, delim);
-        }
-        std::format_to(std::back_inserter(out), "{}", *std::prev(c.end()));
-        return out;
-    }
-}
-
 template <typename T>
 struct is_vector : std::false_type { };
 
@@ -264,13 +241,13 @@ template <typename MapType>
     if (map.has_value()) {
         std::vector<std::string> entries{};
         for (const auto &[key, value] : map.value()) {
-            if constexpr (is_vector_v<std::remove_cvref_t<decltype(value)>>) {
-                entries.push_back(std::format("{{{}, [{}]}}", key, detail::join(value, ", ")));
+            if constexpr (is_vector_v<detail::remove_cvref_t<decltype(value)>>) {
+                entries.push_back(fmt::format("{{{}, [{}]}}", key, fmt::join(value, ", ")));
             } else {
-                entries.push_back(std::format("{{{}, {}}}", key, value));
+                entries.push_back(fmt::format("{{{}, {}}}", key, value));
             }
         }
-        return detail::join(entries, ", ");
+        return fmt::format("{}", fmt::join(entries, ", "));
     }
     return "";
 }
diff --git a/src/hardware_sampling/cpu/cpu_samples.cpp b/src/hardware_sampling/cpu/cpu_samples.cpp
index deb1ddc..02148f0 100644
--- a/src/hardware_sampling/cpu/cpu_samples.cpp
+++ b/src/hardware_sampling/cpu/cpu_samples.cpp
@@ -7,11 +7,13 @@
 
 #include "hardware_sampling/cpu/cpu_samples.hpp"
 
-#include "hardware_sampling/utility.hpp"  // hws::detail::{value_or_default, join}
+#include "hardware_sampling/utility.hpp"  // hws::detail::value_or_default
+
+#include "fmt/format.h"  // fmt::format
+#include "fmt/ranges.h"  // fmt::join
 
 #include <array>        // std::array
 #include <cstddef>      // std::size_t
-#include <format>       // std::format
 #include <ostream>      // std::ostream
 #include <regex>        // std::regex, std::regex::extended, std::regex_match, std::regex_replace
 #include <string>       // std::string
@@ -29,129 +31,129 @@ std::string cpu_general_samples::generate_yaml_string() const {
 
     // architecture
     if (this->architecture_.has_value()) {
-        str += std::format("  architecture:\n"
+        str += fmt::format("  architecture:\n"
                            "    unit: \"string\"\n"
                            "    values: \"{}\"\n",
                            this->architecture_.value());
     }
     // byte order
     if (this->byte_order_.has_value()) {
-        str += std::format("  byte_order:\n"
+        str += fmt::format("  byte_order:\n"
                            "    unit: \"string\"\n"
                            "    values: \"{}\"\n",
                            this->byte_order_.value());
     }
     // number of cores
     if (this->num_cores_.has_value()) {
-        str += std::format("  num_cores:\n"
+        str += fmt::format("  num_cores:\n"
                            "    unit: \"int\"\n"
                            "    values: {}\n",
                            this->num_cores_.value());
     }
     // number of threads including hyper-threads
     if (this->num_threads_.has_value()) {
-        str += std::format("  num_threads:\n"
+        str += fmt::format("  num_threads:\n"
                            "    unit: \"int\"\n"
                            "    values: {}\n",
                            this->num_threads_.value());
     }
     // number of threads per core
     if (this->threads_per_core_.has_value()) {
-        str += std::format("  threads_per_core:\n"
+        str += fmt::format("  threads_per_core:\n"
                            "    unit: \"int\"\n"
                            "    values: {}\n",
                            this->threads_per_core_.value());
     }
     // number of cores per socket
     if (this->cores_per_socket_.has_value()) {
-        str += std::format("  cores_per_socket:\n"
+        str += fmt::format("  cores_per_socket:\n"
                            "    unit: \"int\"\n"
                            "    values: {}\n",
                            this->cores_per_socket_.value());
     }
     // number of cores per socket
     if (this->num_sockets_.has_value()) {
-        str += std::format("  num_sockets:\n"
+        str += fmt::format("  num_sockets:\n"
                            "    unit: \"int\"\n"
                            "    values: {}\n",
                            this->num_sockets_.value());
     }
     // number of NUMA nodes
     if (this->numa_nodes_.has_value()) {
-        str += std::format("  numa_nodes:\n"
+        str += fmt::format("  numa_nodes:\n"
                            "    unit: \"int\"\n"
                            "    values: {}\n",
                            this->numa_nodes_.value());
     }
     // the vendor specific ID
     if (this->vendor_id_.has_value()) {
-        str += std::format("  vendor_id:\n"
+        str += fmt::format("  vendor_id:\n"
                            "    unit: \"string\"\n"
                            "    values: \"{}\"\n",
                            this->vendor_id_.value());
     }
     // the CPU name
     if (this->name_.has_value()) {
-        str += std::format("  name:\n"
+        str += fmt::format("  name:\n"
                            "    unit: \"string\"\n"
                            "    values: \"{}\"\n",
                            this->name_.value());
     }
     // CPU specific flags (like SSE, AVX, ...)
     if (this->flags_.has_value()) {
-        str += std::format("  flags:\n"
+        str += fmt::format("  flags:\n"
                            "    unit: \"string\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->flags_.value(), ", "));
+                           fmt::join(this->flags_.value(), ", "));
     }
 
     // the percent the CPU was busy
     if (this->compute_utilization_.has_value()) {
-        str += std::format("  compute_utilization:\n"
+        str += fmt::format("  compute_utilization:\n"
                            "    turbostat_name: \"Busy%\"\n"
                            "    unit: \"percentage\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->compute_utilization_.value(), ", "));
+                           fmt::join(this->compute_utilization_.value(), ", "));
     }
     // the instructions per cycle count
     if (this->ipc_.has_value()) {
-        str += std::format("  instructions_per_cycle:\n"
+        str += fmt::format("  instructions_per_cycle:\n"
                            "    turbostat_name: \"IPC\"\n"
                            "    unit: \"float\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->ipc_.value(), ", "));
+                           fmt::join(this->ipc_.value(), ", "));
     }
     // the number of interrupts
     if (this->irq_.has_value()) {
-        str += std::format("  interrupts:\n"
+        str += fmt::format("  interrupts:\n"
                            "    turbostat_name: \"IRQ\"\n"
                            "    unit: \"int\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->irq_.value(), ", "));
+                           fmt::join(this->irq_.value(), ", "));
     }
     // the number of system management interrupts
     if (this->smi_.has_value()) {
-        str += std::format("  system_management_interrupts:\n"
+        str += fmt::format("  system_management_interrupts:\n"
                            "    turbostat_name: \"SMI\"\n"
                            "    unit: \"int\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->smi_.value(), ", "));
+                           fmt::join(this->smi_.value(), ", "));
     }
     // the number of times the CPU was in the poll state
     if (this->poll_.has_value()) {
-        str += std::format("  polling_state:\n"
+        str += fmt::format("  polling_state:\n"
                            "    turbostat_name: \"POLL\"\n"
                            "    unit: \"int\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->poll_.value(), ", "));
+                           fmt::join(this->poll_.value(), ", "));
     }
     // the percent the CPU was in the polling state
     if (this->poll_percent_.has_value()) {
-        str += std::format("  polling_percentage:\n"
+        str += fmt::format("  polling_percentage:\n"
                            "    turbostat_name: \"POLL%\"\n"
                            "    unit: \"percentage\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->poll_percent_.value(), ", "));
+                           fmt::join(this->poll_percent_.value(), ", "));
     }
 
     // remove last newline
@@ -161,7 +163,7 @@ std::string cpu_general_samples::generate_yaml_string() const {
 }
 
 std::ostream &operator<<(std::ostream &out, const cpu_general_samples &samples) {
-    std::string str = std::format("architecture [string]: {}\n"
+    std::string str = fmt::format("architecture [string]: {}\n"
                                   "byte_order [string]: {}\n"
                                   "num_cores [int]: {}\n"
                                   "num_threads [int]: {}\n"
@@ -188,13 +190,13 @@ std::ostream &operator<<(std::ostream &out, const cpu_general_samples &samples)
                                   detail::value_or_default(samples.get_numa_nodes()),
                                   detail::value_or_default(samples.get_vendor_id()),
                                   detail::value_or_default(samples.get_name()),
-                                  detail::join(detail::value_or_default(samples.get_flags()), ", "),
-                                  detail::join(detail::value_or_default(samples.get_compute_utilization()), ", "),
-                                  detail::join(detail::value_or_default(samples.get_ipc()), ", "),
-                                  detail::join(detail::value_or_default(samples.get_irq()), ", "),
-                                  detail::join(detail::value_or_default(samples.get_smi()), ", "),
-                                  detail::join(detail::value_or_default(samples.get_poll()), ", "),
-                                  detail::join(detail::value_or_default(samples.get_poll_percent()), ", "));
+                                  fmt::join(detail::value_or_default(samples.get_flags()), ", "),
+                                  fmt::join(detail::value_or_default(samples.get_compute_utilization()), ", "),
+                                  fmt::join(detail::value_or_default(samples.get_ipc()), ", "),
+                                  fmt::join(detail::value_or_default(samples.get_irq()), ", "),
+                                  fmt::join(detail::value_or_default(samples.get_smi()), ", "),
+                                  fmt::join(detail::value_or_default(samples.get_poll()), ", "),
+                                  fmt::join(detail::value_or_default(samples.get_poll_percent()), ", "));
 
     // remove last newline
     str.pop_back();
@@ -211,21 +213,21 @@ std::string cpu_clock_samples::generate_yaml_string() const {
 
     // true if frequency boost is enabled
     if (this->auto_boosted_clock_enabled_.has_value()) {
-        str += std::format("  auto_boosted_clock_enabled:\n"
+        str += fmt::format("  auto_boosted_clock_enabled:\n"
                            "    unit: \"bool\"\n"
                            "    values: {}\n",
                            this->auto_boosted_clock_enabled_.value());
     }
     // the minimal CPU frequency
     if (this->clock_frequency_min_.has_value()) {
-        str += std::format("  clock_frequency_min:\n"
+        str += fmt::format("  clock_frequency_min:\n"
                            "    unit: \"MHz\"\n"
                            "    values: {}\n",
                            this->clock_frequency_min_.value());
     }
     // the maximum CPU frequency
     if (this->clock_frequency_max_.has_value()) {
-        str += std::format("  clock_frequency_max:\n"
+        str += fmt::format("  clock_frequency_max:\n"
                            "    unit: \"MHz\"\n"
                            "    values: {}\n",
                            this->clock_frequency_max_.value());
@@ -233,27 +235,27 @@ std::string cpu_clock_samples::generate_yaml_string() const {
 
     // the average CPU frequency
     if (this->clock_frequency_.has_value()) {
-        str += std::format("  clock_frequency:\n"
+        str += fmt::format("  clock_frequency:\n"
                            "    turbostat_name: \"Avg_MHz\"\n"
                            "    unit: \"MHz\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->clock_frequency_.value(), ", "));
+                           fmt::join(this->clock_frequency_.value(), ", "));
     }
     // the average CPU frequency excluding idle time
     if (this->average_non_idle_frequency_.has_value()) {
-        str += std::format("  average_non_idle_frequency:\n"
+        str += fmt::format("  average_non_idle_frequency:\n"
                            "    turbostat_name: \"Bzy_MHz\"\n"
                            "    unit: \"MHz\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->average_non_idle_frequency_.value(), ", "));
+                           fmt::join(this->average_non_idle_frequency_.value(), ", "));
     }
     // the time stamp counter
     if (this->time_stamp_counter_.has_value()) {
-        str += std::format("  time_stamp_counter:\n"
+        str += fmt::format("  time_stamp_counter:\n"
                            "    turbostat_name: \"TSC_MHz\"\n"
                            "    unit: \"MHz\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->time_stamp_counter_.value(), ", "));
+                           fmt::join(this->time_stamp_counter_.value(), ", "));
     }
 
     // remove last newline
@@ -263,7 +265,7 @@ std::string cpu_clock_samples::generate_yaml_string() const {
 }
 
 std::ostream &operator<<(std::ostream &out, const cpu_clock_samples &samples) {
-    return out << std::format("auto_boosted_clock_enabled [bool]: {}\n"
+    return out << fmt::format("auto_boosted_clock_enabled [bool]: {}\n"
                               "clock_frequency_min [MHz]: {}\n"
                               "clock_frequency_max [MHz]: {}\n"
                               "clock_frequency [MHz]: [{}]\n"
@@ -272,9 +274,9 @@ std::ostream &operator<<(std::ostream &out, const cpu_clock_samples &samples) {
                               detail::value_or_default(samples.get_auto_boosted_clock_enabled()),
                               detail::value_or_default(samples.get_clock_frequency_min()),
                               detail::value_or_default(samples.get_clock_frequency_max()),
-                              detail::join(detail::value_or_default(samples.get_clock_frequency()), ", "),
-                              detail::join(detail::value_or_default(samples.get_average_non_idle_frequency()), ", "),
-                              detail::join(detail::value_or_default(samples.get_time_stamp_counter()), ", "));
+                              fmt::join(detail::value_or_default(samples.get_clock_frequency()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_average_non_idle_frequency()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_time_stamp_counter()), ", "));
 }
 
 //*************************************************************************************************************************************//
@@ -286,7 +288,7 @@ std::string cpu_power_samples::generate_yaml_string() const {
 
     // power measurement type
     if (this->power_measurement_type_.has_value()) {
-        str += std::format("  power_measurement_type:\n"
+        str += fmt::format("  power_measurement_type:\n"
                            "    unit: \"string\"\n"
                            "    values: \"{}\"\n",
                            this->power_measurement_type_.value());
@@ -294,51 +296,51 @@ std::string cpu_power_samples::generate_yaml_string() const {
 
     // the package Watt
     if (this->power_usage_.has_value()) {
-        str += std::format("  power_usage:\n"
+        str += fmt::format("  power_usage:\n"
                            "    turbostat_name: \"PkgWatt\"\n"
                            "    unit: \"W\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->power_usage_.value(), ", "));
+                           fmt::join(this->power_usage_.value(), ", "));
     }
     // total energy consumed
     if (this->power_total_energy_consumption_.has_value()) {
-        str += std::format("  power_total_energy_consumed:\n"
+        str += fmt::format("  power_total_energy_consumed:\n"
                            "    unit: \"J\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->power_total_energy_consumption_.value(), ", "));
+                           fmt::join(this->power_total_energy_consumption_.value(), ", "));
     }
 
     // the core Watt
     if (this->core_watt_.has_value()) {
-        str += std::format("  core_power:\n"
+        str += fmt::format("  core_power:\n"
                            "    turbostat_name: \"CorWatt\"\n"
                            "    unit: \"W\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->core_watt_.value(), ", "));
+                           fmt::join(this->core_watt_.value(), ", "));
     }
     // the DRAM Watt
     if (this->ram_watt_.has_value()) {
-        str += std::format("  dram_power:\n"
+        str += fmt::format("  dram_power:\n"
                            "    turbostat_name: \"RAMWatt\"\n"
                            "    unit: \"W\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->ram_watt_.value(), ", "));
+                           fmt::join(this->ram_watt_.value(), ", "));
     }
     // the percent of time when the RAPL package throttle was active
     if (this->package_rapl_throttle_percent_.has_value()) {
-        str += std::format("  package_rapl_throttling:\n"
+        str += fmt::format("  package_rapl_throttling:\n"
                            "    turbostat_name: \"PKG_%\"\n"
                            "    unit: \"percentage\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->package_rapl_throttle_percent_.value(), ", "));
+                           fmt::join(this->package_rapl_throttle_percent_.value(), ", "));
     }
     // the percent of time when the RAPL DRAM throttle was active
     if (this->dram_rapl_throttle_percent_.has_value()) {
-        str += std::format("  dram_rapl_throttling:\n"
+        str += fmt::format("  dram_rapl_throttling:\n"
                            "    turbostat_name: \"RAM_%\"\n"
                            "    unit: \"percentage\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->dram_rapl_throttle_percent_.value(), ", "));
+                           fmt::join(this->dram_rapl_throttle_percent_.value(), ", "));
     }
 
     // remove last newline
@@ -348,7 +350,7 @@ std::string cpu_power_samples::generate_yaml_string() const {
 }
 
 std::ostream &operator<<(std::ostream &out, const cpu_power_samples &samples) {
-    return out << std::format("power_measurement_type [string]: {}\n"
+    return out << fmt::format("power_measurement_type [string]: {}\n"
                               "power_usage [W]: [{}]\n"
                               "power_total_energy_consumption [J]: [{}]\n"
                               "core_watt [W]: [{}]\n"
@@ -356,12 +358,12 @@ std::ostream &operator<<(std::ostream &out, const cpu_power_samples &samples) {
                               "package_rapl_throttle_percent [%]: [{}]\n"
                               "dram_rapl_throttle_percent [%]: [{}]",
                               detail::value_or_default(samples.get_power_measurement_type()),
-                              detail::join(detail::value_or_default(samples.get_power_usage()), ", "),
-                              detail::join(detail::value_or_default(samples.get_power_total_energy_consumption()), ", "),
-                              detail::join(detail::value_or_default(samples.get_core_watt()), ", "),
-                              detail::join(detail::value_or_default(samples.get_ram_watt()), ", "),
-                              detail::join(detail::value_or_default(samples.get_package_rapl_throttle_percent()), ", "),
-                              detail::join(detail::value_or_default(samples.get_dram_rapl_throttle_percent()), ", "));
+                              fmt::join(detail::value_or_default(samples.get_power_usage()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_power_total_energy_consumption()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_core_watt()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_ram_watt()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_package_rapl_throttle_percent()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_dram_rapl_throttle_percent()), ", "));
 }
 
 //*************************************************************************************************************************************//
@@ -373,28 +375,28 @@ std::string cpu_memory_samples::generate_yaml_string() const {
 
     // the size of the L1 data cache
     if (this->l1d_cache_.has_value()) {
-        str += std::format("  cache_size_L1d:\n"
+        str += fmt::format("  cache_size_L1d:\n"
                            "    unit: \"string\"\n"
                            "    values: \"{}\"\n",
                            this->l1d_cache_.value());
     }
     // the size of the L1 instruction cache
     if (this->l1i_cache_.has_value()) {
-        str += std::format("  cache_size_L1i:\n"
+        str += fmt::format("  cache_size_L1i:\n"
                            "    unit: \"string\"\n"
                            "    values: \"{}\"\n",
                            this->l1i_cache_.value());
     }
     // the size of the L2 cache
     if (this->l2_cache_.has_value()) {
-        str += std::format("  cache_size_L2:\n"
+        str += fmt::format("  cache_size_L2:\n"
                            "    unit: \"string\"\n"
                            "    values: \"{}\"\n",
                            this->l2_cache_.value());
     }
     // the size of the L3 cache
     if (this->l3_cache_.has_value()) {
-        str += std::format("  cache_size_L3:\n"
+        str += fmt::format("  cache_size_L3:\n"
                            "    unit: \"string\"\n"
                            "    values: \"{}\"\n",
                            this->l3_cache_.value());
@@ -402,14 +404,14 @@ std::string cpu_memory_samples::generate_yaml_string() const {
 
     // the total size of available memory
     if (this->memory_total_.has_value()) {
-        str += std::format("  memory_total:\n"
+        str += fmt::format("  memory_total:\n"
                            "    unit: \"B\"\n"
                            "    values: {}\n",
                            this->memory_total_.value());
     }
     // the total size of the swap memory
     if (this->swap_memory_total_.has_value()) {
-        str += std::format("  swap_memory_total:\n"
+        str += fmt::format("  swap_memory_total:\n"
                            "    unit: \"B\"\n"
                            "    values: {}\n",
                            this->swap_memory_total_.value());
@@ -417,31 +419,31 @@ std::string cpu_memory_samples::generate_yaml_string() const {
 
     // the available free memory
     if (this->memory_free_.has_value()) {
-        str += std::format("  memory_free:\n"
+        str += fmt::format("  memory_free:\n"
                            "    unit: \"B\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->memory_free_.value(), ", "));
+                           fmt::join(this->memory_free_.value(), ", "));
     }
     // the used memory
     if (this->memory_used_.has_value()) {
-        str += std::format("  memory_used:\n"
+        str += fmt::format("  memory_used:\n"
                            "    unit: \"B\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->memory_used_.value(), ", "));
+                           fmt::join(this->memory_used_.value(), ", "));
     }
     // the available swap memory
     if (this->swap_memory_free_.has_value()) {
-        str += std::format("  swap_memory_free:\n"
+        str += fmt::format("  swap_memory_free:\n"
                            "    unit: \"B\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->swap_memory_free_.value(), ", "));
+                           fmt::join(this->swap_memory_free_.value(), ", "));
     }
     // the swap memory
     if (this->swap_memory_used_.has_value()) {
-        str += std::format("  swap_memory_used:\n"
+        str += fmt::format("  swap_memory_used:\n"
                            "    unit: \"B\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->swap_memory_used_.value(), ", "));
+                           fmt::join(this->swap_memory_used_.value(), ", "));
     }
 
     // remove last newline
@@ -451,7 +453,7 @@ std::string cpu_memory_samples::generate_yaml_string() const {
 }
 
 std::ostream &operator<<(std::ostream &out, const cpu_memory_samples &samples) {
-    return out << std::format("l1d_cache [string]: {}\n"
+    return out << fmt::format("l1d_cache [string]: {}\n"
                               "l1i_cache [string]: {}\n"
                               "l2_cache [string]: {}\n"
                               "l3_cache [string]: {}\n"
@@ -467,10 +469,10 @@ std::ostream &operator<<(std::ostream &out, const cpu_memory_samples &samples) {
                               detail::value_or_default(samples.get_l3_cache()),
                               detail::value_or_default(samples.get_memory_total()),
                               detail::value_or_default(samples.get_swap_memory_total()),
-                              detail::join(detail::value_or_default(samples.get_memory_free()), ", "),
-                              detail::join(detail::value_or_default(samples.get_memory_used()), ", "),
-                              detail::join(detail::value_or_default(samples.get_swap_memory_free()), ", "),
-                              detail::join(detail::value_or_default(samples.get_swap_memory_used()), ", "));
+                              fmt::join(detail::value_or_default(samples.get_memory_free()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_memory_used()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_swap_memory_free()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_swap_memory_used()), ", "));
 }
 
 //*************************************************************************************************************************************//
@@ -482,27 +484,27 @@ std::string cpu_temperature_samples::generate_yaml_string() const {
 
     // the temperature of the cores
     if (this->core_temperature_.has_value()) {
-        str += std::format("  per_core_temperature:\n"
+        str += fmt::format("  per_core_temperature:\n"
                            "    turbostat_name: \"CoreTmp\"\n"
                            "    unit: \"°C\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->core_temperature_.value(), ", "));
+                           fmt::join(this->core_temperature_.value(), ", "));
     }
     // the percentage of time the core throttled due the temperature constraints
     if (this->core_throttle_percent_.has_value()) {
-        str += std::format("  core_throttle_percentage:\n"
+        str += fmt::format("  core_throttle_percentage:\n"
                            "    turbostat_name: \"CoreThr\"\n"
                            "    unit: \"percentage\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->core_throttle_percent_.value(), ", "));
+                           fmt::join(this->core_throttle_percent_.value(), ", "));
     }
     // the temperature of the whole package
     if (this->package_temperature_.has_value()) {
-        str += std::format("  per_package_temperature:\n"
+        str += fmt::format("  per_package_temperature:\n"
                            "    turbostat_name: \"PkgTmp\"\n"
                            "    unit: \"°C\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->package_temperature_.value(), ", "));
+                           fmt::join(this->package_temperature_.value(), ", "));
     }
 
     // remove last newline
@@ -512,12 +514,12 @@ std::string cpu_temperature_samples::generate_yaml_string() const {
 }
 
 std::ostream &operator<<(std::ostream &out, const cpu_temperature_samples &samples) {
-    return out << std::format("core_temperature [°C]: [{}]\n"
+    return out << fmt::format("core_temperature [°C]: [{}]\n"
                               "core_throttle_percent [%]: [{}]\n"
                               "package_temperature [°C]: [{}]",
-                              detail::join(detail::value_or_default(samples.get_core_temperature()), ", "),
-                              detail::join(detail::value_or_default(samples.get_core_throttle_percent()), ", "),
-                              detail::join(detail::value_or_default(samples.get_package_temperature()), ", "));
+                              fmt::join(detail::value_or_default(samples.get_core_temperature()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_core_throttle_percent()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_package_temperature()), ", "));
 }
 
 //*************************************************************************************************************************************//
@@ -529,51 +531,51 @@ std::string cpu_gfx_samples::generate_yaml_string() const {
 
     // the percentage of time the iGPU was in the render state
     if (this->gfx_render_state_percent_.has_value()) {
-        str += std::format("  graphics_render_state:\n"
+        str += fmt::format("  graphics_render_state:\n"
                            "    turbostat_name: \"GFX%rc6\"\n"
                            "    unit: \"percentage\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->gfx_render_state_percent_.value(), ", "));
+                           fmt::join(this->gfx_render_state_percent_.value(), ", "));
     }
     // the core frequency of the iGPU
     if (this->gfx_frequency_.has_value()) {
-        str += std::format("  graphics_frequency:\n"
+        str += fmt::format("  graphics_frequency:\n"
                            "    turbostat_name: \"GFXMHz\"\n"
                            "    unit: \"MHz\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->gfx_frequency_.value(), ", "));
+                           fmt::join(this->gfx_frequency_.value(), ", "));
     }
     // the average core frequency of the iGPU
     if (this->average_gfx_frequency_.has_value()) {
-        str += std::format("  average_graphics_frequency:\n"
+        str += fmt::format("  average_graphics_frequency:\n"
                            "    turbostat_name: \"GFXAMHz\"\n"
                            "    unit: \"MHz\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->average_gfx_frequency_.value(), ", "));
+                           fmt::join(this->average_gfx_frequency_.value(), ", "));
     }
     // the percentage of time the iGPU was in the c0 state
     if (this->gfx_state_c0_percent_.has_value()) {
-        str += std::format("  gpu_state_c0:\n"
+        str += fmt::format("  gpu_state_c0:\n"
                            "    turbostat_name: \"GFX%C0\"\n"
                            "    unit: \"percentage\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->gfx_state_c0_percent_.value(), ", "));
+                           fmt::join(this->gfx_state_c0_percent_.value(), ", "));
     }
     // the percentage of time the CPU worked for the iGPU
     if (this->cpu_works_for_gpu_percent_.has_value()) {
-        str += std::format("  cpu_works_for_gpu:\n"
+        str += fmt::format("  cpu_works_for_gpu:\n"
                            "    turbostat_name: \"CPUGFX%\"\n"
                            "    unit: \"percentage\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->cpu_works_for_gpu_percent_.value(), ", "));
+                           fmt::join(this->cpu_works_for_gpu_percent_.value(), ", "));
     }
     // the iGPU Watt
     if (this->gfx_watt_.has_value()) {
-        str += std::format("  graphics_power:\n"
+        str += fmt::format("  graphics_power:\n"
                            "    turbostat_name: \"GFXWatt\"\n"
                            "    unit: \"W\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->gfx_watt_.value(), ", "));
+                           fmt::join(this->gfx_watt_.value(), ", "));
     }
 
     // remove last newline
@@ -583,18 +585,18 @@ std::string cpu_gfx_samples::generate_yaml_string() const {
 }
 
 std::ostream &operator<<(std::ostream &out, const cpu_gfx_samples &samples) {
-    return out << std::format("gfx_render_state_percent [%]: [{}]\n"
+    return out << fmt::format("gfx_render_state_percent [%]: [{}]\n"
                               "gfx_frequency [MHz]: [{}]\n"
                               "average_gfx_frequency [MHz]: [{}]\n"
                               "gfx_state_c0_percent [%]: [{}]\n"
                               "cpu_works_for_gpu_percent [%]: [{}]\n"
                               "gfx_watt [W]: [{}]",
-                              detail::join(detail::value_or_default(samples.get_gfx_render_state_percent()), ", "),
-                              detail::join(detail::value_or_default(samples.get_gfx_frequency()), ", "),
-                              detail::join(detail::value_or_default(samples.get_average_gfx_frequency()), ", "),
-                              detail::join(detail::value_or_default(samples.get_gfx_state_c0_percent()), ", "),
-                              detail::join(detail::value_or_default(samples.get_cpu_works_for_gpu_percent()), ", "),
-                              detail::join(detail::value_or_default(samples.get_gfx_watt()), ", "));
+                              fmt::join(detail::value_or_default(samples.get_gfx_render_state_percent()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_gfx_frequency()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_average_gfx_frequency()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_gfx_state_c0_percent()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_cpu_works_for_gpu_percent()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_gfx_watt()), ", "));
 }
 
 //*************************************************************************************************************************************//
@@ -606,43 +608,43 @@ std::string cpu_idle_states_samples::generate_yaml_string() const {
 
     // the percentage of time all CPUs were in the c0 state
     if (this->all_cpus_state_c0_percent_.has_value()) {
-        str += std::format("  all_cpus_state_c0:\n"
+        str += fmt::format("  all_cpus_state_c0:\n"
                            "    turbostat_name: \"Totl%C0\"\n"
                            "    unit: \"percentage\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->all_cpus_state_c0_percent_.value(), ", "));
+                           fmt::join(this->all_cpus_state_c0_percent_.value(), ", "));
     }
     // the percentage of time any CPU was in the c0 state
     if (this->any_cpu_state_c0_percent_.has_value()) {
-        str += std::format("  any_cpu_state_c0:\n"
+        str += fmt::format("  any_cpu_state_c0:\n"
                            "    turbostat_name: \"Any%C0\"\n"
                            "    unit: \"percentage\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->any_cpu_state_c0_percent_.value(), ", "));
+                           fmt::join(this->any_cpu_state_c0_percent_.value(), ", "));
     }
     // the percentage of time the CPUs were in the low power idle state
     if (this->low_power_idle_state_percent_.has_value()) {
-        str += std::format("  lower_power_idle_state:\n"
+        str += fmt::format("  lower_power_idle_state:\n"
                            "    turbostat_name: \"CPU%LPI\"\n"
                            "    unit: \"percentage\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->low_power_idle_state_percent_.value(), ", "));
+                           fmt::join(this->low_power_idle_state_percent_.value(), ", "));
     }
     // the percentage of time the CPUs were in the system low power idle state
     if (this->system_low_power_idle_state_percent_.has_value()) {
-        str += std::format("  system_lower_power_idle_state:\n"
+        str += fmt::format("  system_lower_power_idle_state:\n"
                            "    turbostat_name: \"SYS%LPI\"\n"
                            "    unit: \"percentage\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->system_low_power_idle_state_percent_.value(), ", "));
+                           fmt::join(this->system_low_power_idle_state_percent_.value(), ", "));
     }
     // the percentage of time the package was in the low power idle state
     if (this->package_low_power_idle_state_percent_.has_value()) {
-        str += std::format("  package_lower_power_idle_state:\n"
+        str += fmt::format("  package_lower_power_idle_state:\n"
                            "    turbostat_name: \"Pkg%LPI\"\n"
                            "    unit: \"percentage\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->package_low_power_idle_state_percent_.value(), ", "));
+                           fmt::join(this->package_low_power_idle_state_percent_.value(), ", "));
     }
 
     // the other core idle states
@@ -671,14 +673,14 @@ std::string cpu_idle_states_samples::generate_yaml_string() const {
                     std::string entry_name_with_state{};
                     std::regex_replace(std::back_inserter(entry_name_with_state), entry_name_placeholder.begin(), entry_name_placeholder.end(), placeholder_reg, std::string{ state });
 
-                    str += std::format("  {}:\n"
+                    str += fmt::format("  {}:\n"
                                        "    turbostat_name: \"{}\"\n"
                                        "    unit: \"{}\"\n"
                                        "    values: [{}]\n",
                                        entry_name_with_state,
                                        entry,
                                        entry_unit,
-                                       detail::join(values, ", "));
+                                       fmt::join(values, ", "));
                     break;
                 }
             }
@@ -692,21 +694,21 @@ std::string cpu_idle_states_samples::generate_yaml_string() const {
 }
 
 std::ostream &operator<<(std::ostream &out, const cpu_idle_states_samples &samples) {
-    std::string str = std::format("all_cpus_state_c0_percent [%]: [{}]\n"
+    std::string str = fmt::format("all_cpus_state_c0_percent [%]: [{}]\n"
                                   "any_cpu_state_c0_percent [%]: [{}]\n"
                                   "low_power_idle_state_percent [%]: [{}]\n"
                                   "system_low_power_idle_state_percent [%]: [{}]\n"
                                   "package_low_power_idle_state_percent [%]: [{}]\n",
-                                  detail::join(detail::value_or_default(samples.get_all_cpus_state_c0_percent()), ", "),
-                                  detail::join(detail::value_or_default(samples.get_any_cpu_state_c0_percent()), ", "),
-                                  detail::join(detail::value_or_default(samples.get_low_power_idle_state_percent()), ", "),
-                                  detail::join(detail::value_or_default(samples.get_system_low_power_idle_state_percent()), ", "),
-                                  detail::join(detail::value_or_default(samples.get_package_low_power_idle_state_percent()), ", "));
+                                  fmt::join(detail::value_or_default(samples.get_all_cpus_state_c0_percent()), ", "),
+                                  fmt::join(detail::value_or_default(samples.get_any_cpu_state_c0_percent()), ", "),
+                                  fmt::join(detail::value_or_default(samples.get_low_power_idle_state_percent()), ", "),
+                                  fmt::join(detail::value_or_default(samples.get_system_low_power_idle_state_percent()), ", "),
+                                  fmt::join(detail::value_or_default(samples.get_package_low_power_idle_state_percent()), ", "));
 
     // add map entries
     if (samples.get_idle_states().has_value()) {
         for (const auto &[key, value] : samples.get_idle_states().value()) {
-            str += std::format("{}: [{}]\n", key, detail::join(value, ", "));
+            str += fmt::format("{}: [{}]\n", key, fmt::join(value, ", "));
         }
     }
 
diff --git a/src/hardware_sampling/cpu/hardware_sampler.cpp b/src/hardware_sampling/cpu/hardware_sampler.cpp
index 89683eb..6c8471d 100644
--- a/src/hardware_sampling/cpu/hardware_sampler.cpp
+++ b/src/hardware_sampling/cpu/hardware_sampler.cpp
@@ -10,13 +10,15 @@
 #include "hardware_sampling/cpu/cpu_samples.hpp"   // hws::{cpu_general_samples, clock_samples, power_samples, memory_samples, temperature_samples, gfx_samples, idle_state_samples}
 #include "hardware_sampling/cpu/utility.hpp"       // HWS_SUBPROCESS_ERROR_CHECK, hws::detail::run_subprocess
 #include "hardware_sampling/hardware_sampler.hpp"  // hws::tracking::hardware_sampler
-#include "hardware_sampling/utility.hpp"           // hws::detail::{split, split_as, trim, convert_to, ostream_formatter, join}
+#include "hardware_sampling/utility.hpp"           // hws::detail::{split, split_as, trim, convert_to, starts_with}
+
+#include "fmt/format.h"  // fmt::format
+#include "fmt/ranges.h"  // fmt::join
 
 #include <cassert>        // assert
 #include <chrono>         // std::chrono::{steady_clock, milliseconds}
 #include <cstddef>        // std::size_t
 #include <exception>      // std::exception, std::terminate
-#include <format>         // std::format
 #include <ios>            // std::ios_base
 #include <iostream>       // std::cerr, std::endl
 #include <optional>       // std::make_optional
@@ -69,39 +71,39 @@ void cpu_hardware_sampler::sampling_loop() {
             value = detail::trim(value);
 
             // check the lines if the start with an entry that we want to sample
-            if (line.starts_with("Architecture")) {
+            if (detail::starts_with(line, "Architecture")) {
                 general_samples_.architecture_ = detail::convert_to<decltype(general_samples_.architecture_)::value_type>(value);
-            } else if (line.starts_with("Byte Order")) {
+            } else if (detail::starts_with(line, "Byte Order")) {
                 general_samples_.byte_order_ = detail::convert_to<decltype(general_samples_.byte_order_)::value_type>(value);
-            } else if (line.starts_with("CPU(s)")) {
+            } else if (detail::starts_with(line, "CPU(s)")) {
                 general_samples_.num_threads_ = detail::convert_to<decltype(general_samples_.num_threads_)::value_type>(value);
-            } else if (line.starts_with("Thread(s) per core")) {
+            } else if (detail::starts_with(line, "Thread(s) per core")) {
                 general_samples_.threads_per_core_ = detail::convert_to<decltype(general_samples_.threads_per_core_)::value_type>(value);
-            } else if (line.starts_with("Core(s) per socket")) {
+            } else if (detail::starts_with(line, "Core(s) per socket")) {
                 general_samples_.cores_per_socket_ = detail::convert_to<decltype(general_samples_.cores_per_socket_)::value_type>(value);
-            } else if (line.starts_with("Socket(s)")) {
+            } else if (detail::starts_with(line, "Socket(s)")) {
                 general_samples_.num_sockets_ = detail::convert_to<decltype(general_samples_.num_sockets_)::value_type>(value);
-            } else if (line.starts_with("NUMA node(s)")) {
+            } else if (detail::starts_with(line, "NUMA node(s)")) {
                 general_samples_.numa_nodes_ = detail::convert_to<decltype(general_samples_.numa_nodes_)::value_type>(value);
-            } else if (line.starts_with("Vendor ID")) {
+            } else if (detail::starts_with(line, "Vendor ID")) {
                 general_samples_.vendor_id_ = detail::convert_to<decltype(general_samples_.vendor_id_)::value_type>(value);
-            } else if (line.starts_with("Model name")) {
+            } else if (detail::starts_with(line, "Model name")) {
                 general_samples_.name_ = detail::convert_to<decltype(general_samples_.name_)::value_type>(value);
-            } else if (line.starts_with("Flags")) {
+            } else if (detail::starts_with(line, "Flags")) {
                 general_samples_.flags_ = detail::split_as<decltype(general_samples_.flags_)::value_type::value_type>(value, ' ');
-            } else if (line.starts_with("Frequency boost")) {
+            } else if (detail::starts_with(line, "Frequency boost")) {
                 clock_samples_.auto_boosted_clock_enabled_ = value == "enabled";
-            } else if (line.starts_with("CPU max MHz")) {
+            } else if (detail::starts_with(line, "CPU max MHz")) {
                 clock_samples_.clock_frequency_max_ = detail::convert_to<decltype(clock_samples_.clock_frequency_max_)::value_type>(value);
-            } else if (line.starts_with("CPU min MHz")) {
+            } else if (detail::starts_with(line, "CPU min MHz")) {
                 clock_samples_.clock_frequency_min_ = detail::convert_to<decltype(clock_samples_.clock_frequency_min_)::value_type>(value);
-            } else if (line.starts_with("L1d cache")) {
+            } else if (detail::starts_with(line, "L1d cache")) {
                 memory_samples_.l1d_cache_ = detail::convert_to<decltype(memory_samples_.l1d_cache_)::value_type>(value);
-            } else if (line.starts_with("L1i cache")) {
+            } else if (detail::starts_with(line, "L1i cache")) {
                 memory_samples_.l1i_cache_ = detail::convert_to<decltype(memory_samples_.l1i_cache_)::value_type>(value);
-            } else if (line.starts_with("L2 cache")) {
+            } else if (detail::starts_with(line, "L2 cache")) {
                 memory_samples_.l2_cache_ = detail::convert_to<decltype(memory_samples_.l2_cache_)::value_type>(value);
-            } else if (line.starts_with("L3 cache")) {
+            } else if (detail::starts_with(line, "L3 cache")) {
                 memory_samples_.l3_cache_ = detail::convert_to<decltype(memory_samples_.l3_cache_)::value_type>(value);
             }
         }
@@ -401,7 +403,7 @@ void cpu_hardware_sampler::sampling_loop() {
                         power_samples_.dram_rapl_throttle_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
                     } else {
                         const std::string header_str{ header[i] };
-                        if (idle_state_samples_.idle_states_.value().contains(header_str)) {
+                        if (idle_state_samples_.idle_states_.value().count(header_str) > decltype(idle_state_samples_)::map_type::size_type{ 0 }) {
                             using vector_type = cpu_idle_states_samples::map_type::mapped_type;
                             idle_state_samples_.idle_states_.value()[header_str].push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
                         }
@@ -426,7 +428,7 @@ std::string cpu_hardware_sampler::generate_yaml_string() const {
         throw std::runtime_error{ "Can't create the final YAML entry if the hardware sampler is still running!" };
     }
 
-    return std::format("{}\n"
+    return fmt::format("{}\n"
                        "{}\n"
                        "{}\n"
                        "{}\n"
@@ -447,7 +449,7 @@ std::ostream &operator<<(std::ostream &out, const cpu_hardware_sampler &sampler)
         out.setstate(std::ios_base::failbit);
         return out;
     } else {
-        return out << std::format("sampling interval: {}\n"
+        return out << fmt::format("sampling interval: {}\n"
                                   "time points: [{}]\n\n"
                                   "general samples:\n{}\n\n"
                                   "clock samples:\n{}\n\n"
@@ -457,7 +459,7 @@ std::ostream &operator<<(std::ostream &out, const cpu_hardware_sampler &sampler)
                                   "gfx samples:\n{}\n\n"
                                   "idle state samples:\n{}",
                                   sampler.sampling_interval(),
-                                  detail::join(detail::time_points_to_epoch(sampler.sampling_time_points()), ", "),
+                                  fmt::join(detail::time_points_to_epoch(sampler.sampling_time_points()), ", "),
                                   sampler.general_samples(),
                                   sampler.clock_samples(),
                                   sampler.power_samples(),
diff --git a/src/hardware_sampling/cpu/utility.cpp b/src/hardware_sampling/cpu/utility.cpp
index 7ba16d2..2b0080f 100644
--- a/src/hardware_sampling/cpu/utility.cpp
+++ b/src/hardware_sampling/cpu/utility.cpp
@@ -9,12 +9,12 @@
 
 #include "hardware_sampling/utility.hpp"  // hws::detail::split_as
 
+#include "fmt/format.h"  // fmt::format
 #include "subprocess.h"  // subprocess_s, subprocess_create, subprocess_join, subprocess_stdout, subprocess_option_e
 
 #include <algorithm>    // std::transform
 #include <cstddef>      // std::size_t
 #include <cstdio>       // std::FILE, std::fread
-#include <format>       // std::format
 #include <stdexcept>    // std::runtime_error
 #include <string>       // std::string
 #include <string_view>  // std::string_view
@@ -41,7 +41,7 @@ std::string run_subprocess(const std::string_view cmd_line) {
     int return_code{};
     HWS_SUBPROCESS_ERROR_CHECK(subprocess_join(&proc, &return_code));
     if (return_code != 0) {
-        throw std::runtime_error{ std::format("Error: \"{}\" returned with {}!", cmd_line, return_code) };
+        throw std::runtime_error{ fmt::format("Error: \"{}\" returned with {}!", cmd_line, return_code) };
     }
 
     // get output handle and read data -> stdout and stderr are the same handle
diff --git a/src/hardware_sampling/event.cpp b/src/hardware_sampling/event.cpp
index b88eaa3..e21c715 100644
--- a/src/hardware_sampling/event.cpp
+++ b/src/hardware_sampling/event.cpp
@@ -7,13 +7,14 @@
 
 #include "hardware_sampling/event.hpp"
 
-#include <format>   // std::format
+#include "fmt/format.h"  // fmt::format
+
 #include <ostream>  // std::ostream
 
 namespace hws {
 
 std::ostream &operator<<(std::ostream &out, const event &e) {
-    return out << std::format("time_point: {}\n"
+    return out << fmt::format("time_point: {}\n"
                               "name: {}",
                               e.time_point.time_since_epoch(),
                               e.name);
diff --git a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
index 63fbda4..0b80c81 100644
--- a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
+++ b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
@@ -10,8 +10,10 @@
 #include "hardware_sampling/gpu_amd/rocm_smi_samples.hpp"  // hws::{rocm_smi_general_samples, rocm_smi_clock_samples, rocm_smi_power_samples, rocm_smi_memory_samples, rocm_smi_temperature_samples}
 #include "hardware_sampling/gpu_amd/utility.hpp"           // HWS_ROCM_SMI_ERROR_CHECK
 #include "hardware_sampling/hardware_sampler.hpp"          // hws::hardware_sampler
-#include "hardware_sampling/utility.hpp"                   // hws::detail::{time_points_to_epoch, join}
+#include "hardware_sampling/utility.hpp"                   // hws::detail::time_points_to_epoch
 
+#include "fmt/format.h"           // fmt::format
+#include "fmt/ranges.h"           // fmt::join
 #include "hip/hip_runtime_api.h"  // HIP runtime functions
 #include "rocm_smi/rocm_smi.h"    // ROCm SMI runtime functions
 
@@ -19,7 +21,6 @@
 #include <cstddef>    // std::size_t
 #include <cstdint>    // std::uint32_t, std::uint64_t
 #include <exception>  // std::exception, std::terminate
-#include <format>     // std::format
 #include <ios>        // std::ios_base
 #include <iostream>   // std::cerr, std::endl
 #include <optional>   // std::optional
@@ -278,8 +279,7 @@ void gpu_amd_hardware_sampler::sampling_loop() {
         float resolution{};
         std::uint64_t power_total_energy_consumption{};
         if (rsmi_dev_energy_count_get(device_id_, &power_total_energy_consumption, &resolution, &timestamp) == RSMI_STATUS_SUCCESS) {
-            const auto scaled_value = static_cast<decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type>(power_total_energy_consumption) *
-                                      static_cast<decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type>(resolution);
+            const auto scaled_value = static_cast<decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type>(power_total_energy_consumption) * static_cast<decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type>(resolution);
             power_samples_.power_total_energy_consumption_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ scaled_value / 1000.0 / 1000.0 };
         }
     }
@@ -538,8 +538,7 @@ void gpu_amd_hardware_sampler::sampling_loop() {
                     float resolution{};
                     std::uint64_t value{};
                     HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_energy_count_get(device_id_, &value, &resolution, &timestamp));
-                    const auto scaled_value = static_cast<decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type>(value) *
-                                              static_cast<decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type>(resolution);
+                    const auto scaled_value = static_cast<decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type>(value) * static_cast<decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type>(resolution);
                     power_samples_.power_total_energy_consumption_->push_back(scaled_value / 1000.0);
                 }
 
@@ -655,7 +654,7 @@ void gpu_amd_hardware_sampler::sampling_loop() {
 }
 
 std::string gpu_amd_hardware_sampler::device_identification() const {
-    return std::format("gpu_amd_device_{}", device_id_);
+    return fmt::format("gpu_amd_device_{}", device_id_);
 }
 
 std::string gpu_amd_hardware_sampler::generate_yaml_string() const {
@@ -664,7 +663,7 @@ std::string gpu_amd_hardware_sampler::generate_yaml_string() const {
         throw std::runtime_error{ "Can't create the final YAML entry if the hardware sampler is still running!" };
     }
 
-    return std::format("{}\n"
+    return fmt::format("{}\n"
                        "{}\n"
                        "{}\n"
                        "{}\n"
@@ -681,7 +680,7 @@ std::ostream &operator<<(std::ostream &out, const gpu_amd_hardware_sampler &samp
         out.setstate(std::ios_base::failbit);
         return out;
     } else {
-        return out << std::format("sampling interval: {}\n"
+        return out << fmt::format("sampling interval: {}\n"
                                   "time points: [{}]\n\n"
                                   "general samples:\n{}\n\n"
                                   "clock samples:\n{}\n\n"
@@ -689,7 +688,7 @@ std::ostream &operator<<(std::ostream &out, const gpu_amd_hardware_sampler &samp
                                   "memory samples:\n{}\n\n"
                                   "temperature samples:\n{}",
                                   sampler.sampling_interval(),
-                                  detail::join(detail::time_points_to_epoch(sampler.sampling_time_points()), ", "),
+                                  fmt::join(detail::time_points_to_epoch(sampler.sampling_time_points()), ", "),
                                   sampler.general_samples(),
                                   sampler.clock_samples(),
                                   sampler.power_samples(),
diff --git a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp b/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp
index 579ea29..568082e 100644
--- a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp
+++ b/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp
@@ -7,11 +7,12 @@
 
 #include "hardware_sampling/gpu_amd/rocm_smi_samples.hpp"
 
-#include "hardware_sampling/utility.hpp"  // hws::detail::{value_or_default, join}
+#include "hardware_sampling/utility.hpp"  // hws::detail::{value_or_default,}
 
+#include "fmt/format.h"         // fmt::format
+#include "fmt/ranges.h"         // fmt::join
 #include "rocm_smi/rocm_smi.h"  // RSMI_MAX_FAN_SPEED
 
-#include <format>   // std::format
 #include <ostream>  // std::ostream
 #include <string>   // std::string
 
@@ -26,28 +27,28 @@ std::string rocm_smi_general_samples::generate_yaml_string() const {
 
     // device architecture
     if (this->architecture_.has_value()) {
-        str += std::format("  architecture:\n"
+        str += fmt::format("  architecture:\n"
                            "    unit: \"string\"\n"
                            "    values: \"{}\"\n",
                            this->architecture_.value());
     }
     // device byte order
     if (this->byte_order_.has_value()) {
-        str += std::format("  byte_order:\n"
+        str += fmt::format("  byte_order:\n"
                            "    unit: \"string\"\n"
                            "    values: \"{}\"\n",
                            this->byte_order_.value());
     }
     // the vendor specific ID
     if (this->vendor_id_.has_value()) {
-        str += std::format("  vendor_id:\n"
+        str += fmt::format("  vendor_id:\n"
                            "    unit: \"string\"\n"
                            "    values: \"{}\"\n",
                            this->vendor_id_.value());
     }
     // device name
     if (this->name_.has_value()) {
-        str += std::format("  name:\n"
+        str += fmt::format("  name:\n"
                            "    unit: \"string\"\n"
                            "    values: \"{}\"\n",
                            this->name_.value());
@@ -55,24 +56,24 @@ std::string rocm_smi_general_samples::generate_yaml_string() const {
 
     // device compute utilization
     if (this->compute_utilization_.has_value()) {
-        str += std::format("  compute_utilization:\n"
+        str += fmt::format("  compute_utilization:\n"
                            "    unit: \"percentage\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->compute_utilization_.value(), ", "));
+                           fmt::join(this->compute_utilization_.value(), ", "));
     }
     // device memory utilization
     if (this->memory_utilization_.has_value()) {
-        str += std::format("  memory_utilization:\n"
+        str += fmt::format("  memory_utilization:\n"
                            "    unit: \"percentage\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->memory_utilization_.value(), ", "));
+                           fmt::join(this->memory_utilization_.value(), ", "));
     }
     // performance state
     if (this->performance_level_.has_value()) {
-        str += std::format("  performance_state:\n"
+        str += fmt::format("  performance_state:\n"
                            "    unit: \"int - see rsmi_dev_perf_level_t\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->performance_level_.value(), ", "));
+                           fmt::join(this->performance_level_.value(), ", "));
     }
 
     // remove last newline
@@ -82,7 +83,7 @@ std::string rocm_smi_general_samples::generate_yaml_string() const {
 }
 
 std::ostream &operator<<(std::ostream &out, const rocm_smi_general_samples &samples) {
-    return out << std::format("architecture [string]: {}\n"
+    return out << fmt::format("architecture [string]: {}\n"
                               "byte_order [string]: {}\n"
                               "vendor_id [string]: {}\n"
                               "name [string]: {}\n"
@@ -93,9 +94,9 @@ std::ostream &operator<<(std::ostream &out, const rocm_smi_general_samples &samp
                               detail::value_or_default(samples.get_byte_order()),
                               detail::value_or_default(samples.get_vendor_id()),
                               detail::value_or_default(samples.get_name()),
-                              detail::join(detail::value_or_default(samples.get_compute_utilization()), ", "),
-                              detail::join(detail::value_or_default(samples.get_memory_utilization()), ", "),
-                              detail::join(detail::value_or_default(samples.get_performance_level()), ", "));
+                              fmt::join(detail::value_or_default(samples.get_compute_utilization()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_memory_utilization()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_performance_level()), ", "));
 }
 
 //*************************************************************************************************************************************//
@@ -107,95 +108,95 @@ std::string rocm_smi_clock_samples::generate_yaml_string() const {
 
     // system clock min frequencies
     if (this->clock_frequency_min_.has_value()) {
-        str += std::format("  clock_frequency_min:\n"
+        str += fmt::format("  clock_frequency_min:\n"
                            "    unit: \"MHz\"\n"
                            "    values: {}\n",
                            this->clock_frequency_min_.value());
     }
     // system clock max frequencies
     if (this->clock_frequency_max_.has_value()) {
-        str += std::format("  clock_frequency_max:\n"
+        str += fmt::format("  clock_frequency_max:\n"
                            "    unit: \"MHz\"\n"
                            "    values: {}\n",
                            this->clock_frequency_max_.value());
     }
     // memory clock min frequencies
     if (this->memory_clock_frequency_min_.has_value()) {
-        str += std::format("  memory_clock_frequency_min:\n"
+        str += fmt::format("  memory_clock_frequency_min:\n"
                            "    unit: \"MHz\"\n"
                            "    values: {}\n",
                            this->memory_clock_frequency_min_.value());
     }
     // memory clock max frequencies
     if (this->memory_clock_frequency_max_.has_value()) {
-        str += std::format("  memory_clock_frequency_max:\n"
+        str += fmt::format("  memory_clock_frequency_max:\n"
                            "    unit: \"MHz\"\n"
                            "    values: {}\n",
                            this->memory_clock_frequency_max_.value());
     }
     // socket clock min frequencies
     if (this->socket_clock_frequency_min_.has_value()) {
-        str += std::format("  socket_clock_frequency_min:\n"
+        str += fmt::format("  socket_clock_frequency_min:\n"
                            "    unit: \"MHz\"\n"
                            "    values: {}\n",
                            this->socket_clock_frequency_min_.value());
     }
     // socket clock max frequencies
     if (this->socket_clock_frequency_max_.has_value()) {
-        str += std::format("  socket_clock_frequency_max:\n"
+        str += fmt::format("  socket_clock_frequency_max:\n"
                            "    unit: \"MHz\"\n"
                            "    values: {}\n",
                            this->socket_clock_frequency_max_.value());
     }
     // the available clock frequencies
     if (this->available_clock_frequencies_.has_value()) {
-        str += std::format("  available_clock_frequencies:\n"
+        str += fmt::format("  available_clock_frequencies:\n"
                            "    unit: \"MHz\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->available_clock_frequencies_.value(), ", "));
+                           fmt::join(this->available_clock_frequencies_.value(), ", "));
     }
     // the available memory clock frequencies
     if (this->available_memory_clock_frequencies_.has_value()) {
-        str += std::format("  available_memory_clock_frequencies:\n"
+        str += fmt::format("  available_memory_clock_frequencies:\n"
                            "    unit: \"MHz\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->available_memory_clock_frequencies_.value(), ", "));
+                           fmt::join(this->available_memory_clock_frequencies_.value(), ", "));
     }
 
     // system clock frequency
     if (this->clock_frequency_.has_value()) {
-        str += std::format("  clock_frequency:\n"
+        str += fmt::format("  clock_frequency:\n"
                            "    unit: \"MHz\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->clock_frequency_.value(), ", "));
+                           fmt::join(this->clock_frequency_.value(), ", "));
     }
     // memory clock frequency
     if (this->memory_clock_frequency_.has_value()) {
-        str += std::format("  memory_clock_frequency:\n"
+        str += fmt::format("  memory_clock_frequency:\n"
                            "    unit: \"MHz\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->memory_clock_frequency_.value(), ", "));
+                           fmt::join(this->memory_clock_frequency_.value(), ", "));
     }
     // socket clock frequency
     if (this->socket_clock_frequency_.has_value()) {
-        str += std::format("  socket_clock_frequency:\n"
+        str += fmt::format("  socket_clock_frequency:\n"
                            "    unit: \"MHz\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->socket_clock_frequency_.value(), ", "));
+                           fmt::join(this->socket_clock_frequency_.value(), ", "));
     }
     // overdrive level
     if (this->overdrive_level_.has_value()) {
-        str += std::format("  overdrive_level:\n"
+        str += fmt::format("  overdrive_level:\n"
                            "    unit: \"percentage\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->overdrive_level_.value(), ", "));
+                           fmt::join(this->overdrive_level_.value(), ", "));
     }
     // memory overdrive level
     if (this->memory_overdrive_level_.has_value()) {
-        str += std::format("  memory_overdrive_level:\n"
+        str += fmt::format("  memory_overdrive_level:\n"
                            "    unit: \"percentage\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->memory_overdrive_level_.value(), ", "));
+                           fmt::join(this->memory_overdrive_level_.value(), ", "));
     }
 
     // remove last newline
@@ -205,7 +206,7 @@ std::string rocm_smi_clock_samples::generate_yaml_string() const {
 }
 
 std::ostream &operator<<(std::ostream &out, const rocm_smi_clock_samples &samples) {
-    return out << std::format("clock_frequency_min [MHz]: {}\n"
+    return out << fmt::format("clock_frequency_min [MHz]: {}\n"
                               "clock_frequency_max [MHz]: {}\n"
                               "memory_clock_frequency_min [MHz]: {}\n"
                               "memory_clock_frequency_max [MHz]: {}\n"
@@ -224,13 +225,13 @@ std::ostream &operator<<(std::ostream &out, const rocm_smi_clock_samples &sample
                               detail::value_or_default(samples.get_memory_clock_frequency_max()),
                               detail::value_or_default(samples.get_socket_clock_frequency_min()),
                               detail::value_or_default(samples.get_socket_clock_frequency_max()),
-                              detail::join(detail::value_or_default(samples.get_available_clock_frequencies()), ", "),
-                              detail::join(detail::value_or_default(samples.get_available_memory_clock_frequencies()), ", "),
-                              detail::join(detail::value_or_default(samples.get_clock_frequency()), ", "),
-                              detail::join(detail::value_or_default(samples.get_memory_clock_frequency()), ", "),
-                              detail::join(detail::value_or_default(samples.get_socket_clock_frequency()), ", "),
-                              detail::join(detail::value_or_default(samples.get_overdrive_level()), ", "),
-                              detail::join(detail::value_or_default(samples.get_memory_overdrive_level()), ", "));
+                              fmt::join(detail::value_or_default(samples.get_available_clock_frequencies()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_available_memory_clock_frequencies()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_clock_frequency()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_memory_clock_frequency()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_socket_clock_frequency()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_overdrive_level()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_memory_overdrive_level()), ", "));
 }
 
 //*************************************************************************************************************************************//
@@ -242,53 +243,53 @@ std::string rocm_smi_power_samples::generate_yaml_string() const {
 
     // power management limit
     if (this->power_management_limit_.has_value()) {
-        str += std::format("  power_management_limit:\n"
+        str += fmt::format("  power_management_limit:\n"
                            "    unit: \"W\"\n"
                            "    values: {}\n",
                            this->power_management_limit_.value());
     }
     // power enforced limit
     if (this->power_enforced_limit_.has_value()) {
-        str += std::format("  power_enforced_limit:\n"
+        str += fmt::format("  power_enforced_limit:\n"
                            "    unit: \"W\"\n"
                            "    values: {}\n",
                            this->power_enforced_limit_.value());
     }
     // power measurement type
     if (this->power_measurement_type_.has_value()) {
-        str += std::format("  power_measurement_type:\n"
+        str += fmt::format("  power_measurement_type:\n"
                            "    unit: \"string\"\n"
                            "    values: \"{}\"\n",
                            this->power_measurement_type_.value());
     }
     // available power levels
     if (this->available_power_profiles_.has_value()) {
-        str += std::format("  available_power_profiles:\n"
+        str += fmt::format("  available_power_profiles:\n"
                            "    unit: \"string\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->available_power_profiles_.value(), ", "));
+                           fmt::join(this->available_power_profiles_.value(), ", "));
     }
 
     // current power usage
     if (this->power_usage_.has_value()) {
-        str += std::format("  power_usage:\n"
+        str += fmt::format("  power_usage:\n"
                            "    unit: \"W\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->power_usage_.value(), ", "));
+                           fmt::join(this->power_usage_.value(), ", "));
     }
     // total energy consumed
     if (this->power_total_energy_consumption_.has_value()) {
-        str += std::format("  power_total_energy_consumed:\n"
+        str += fmt::format("  power_total_energy_consumed:\n"
                            "    unit: \"J\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->power_total_energy_consumption_.value(), ", "));
+                           fmt::join(this->power_total_energy_consumption_.value(), ", "));
     }
     // current power level
     if (this->power_profile_.has_value()) {
-        str += std::format("  power_profile:\n"
+        str += fmt::format("  power_profile:\n"
                            "    unit: \"string\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->power_profile_.value(), ", "));
+                           fmt::join(this->power_profile_.value(), ", "));
     }
 
     // remove last newline
@@ -298,7 +299,7 @@ std::string rocm_smi_power_samples::generate_yaml_string() const {
 }
 
 std::ostream &operator<<(std::ostream &out, const rocm_smi_power_samples &samples) {
-    return out << std::format("power_management_limit [W]: {}\n"
+    return out << fmt::format("power_management_limit [W]: {}\n"
                               "power_enforced_limit [W]: {}\n"
                               "power_measurement_type [string]: {}\n"
                               "available_power_profiles [string]: [{}]\n"
@@ -308,10 +309,10 @@ std::ostream &operator<<(std::ostream &out, const rocm_smi_power_samples &sample
                               detail::value_or_default(samples.get_power_management_limit()),
                               detail::value_or_default(samples.get_power_enforced_limit()),
                               detail::value_or_default(samples.get_power_measurement_type()),
-                              detail::join(detail::value_or_default(samples.get_available_power_profiles()), ", "),
-                              detail::join(detail::value_or_default(samples.get_power_usage()), ", "),
-                              detail::join(detail::value_or_default(samples.get_power_total_energy_consumption()), ", "),
-                              detail::join(detail::value_or_default(samples.get_power_profile()), ", "));
+                              fmt::join(detail::value_or_default(samples.get_available_power_profiles()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_power_usage()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_power_total_energy_consumption()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_power_profile()), ", "));
 }
 
 //*************************************************************************************************************************************//
@@ -323,28 +324,28 @@ std::string rocm_smi_memory_samples::generate_yaml_string() const {
 
     // total memory
     if (this->memory_total_.has_value()) {
-        str += std::format("  memory_total:\n"
+        str += fmt::format("  memory_total:\n"
                            "    unit: \"B\"\n"
                            "    values: {}\n",
                            this->memory_total_.value());
     }
     // total visible memory
     if (this->visible_memory_total_.has_value()) {
-        str += std::format("  visible_memory_total:\n"
+        str += fmt::format("  visible_memory_total:\n"
                            "    unit: \"B\"\n"
                            "    values: {}\n",
                            this->visible_memory_total_.value());
     }
     // min number of PCIe lanes
     if (this->min_num_pcie_lanes_.has_value()) {
-        str += std::format("  min_num_pcie_lanes:\n"
+        str += fmt::format("  min_num_pcie_lanes:\n"
                            "    unit: \"int\"\n"
                            "    values: {}\n",
                            this->min_num_pcie_lanes_.value());
     }
     // max number of PCIe lanes
     if (this->max_num_pcie_lanes_.has_value()) {
-        str += std::format("  max_num_pcie_lanes:\n"
+        str += fmt::format("  max_num_pcie_lanes:\n"
                            "    unit: \"int\"\n"
                            "    values: {}\n",
                            this->max_num_pcie_lanes_.value());
@@ -352,10 +353,10 @@ std::string rocm_smi_memory_samples::generate_yaml_string() const {
 
     // used memory
     if (this->memory_used_.has_value()) {
-        str += std::format("  memory_used:\n"
+        str += fmt::format("  memory_used:\n"
                            "    unit: \"B\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->memory_used_.value(), ", "));
+                           fmt::join(this->memory_used_.value(), ", "));
     }
     // free memory
     if (this->memory_used_.has_value() && this->memory_total_.has_value()) {
@@ -363,25 +364,25 @@ std::string rocm_smi_memory_samples::generate_yaml_string() const {
         for (std::size_t i = 0; i < memory_free.size(); ++i) {
             memory_free[i] -= this->memory_used_.value()[i];
         }
-        str += std::format("  memory_free:\n"
+        str += fmt::format("  memory_free:\n"
                            "    unit: \"B\"\n"
                            "    values: [{}]\n",
-                           detail::join(memory_free, ", "));
+                           fmt::join(memory_free, ", "));
     }
 
     // PCIe bandwidth
     if (this->pcie_transfer_rate_.has_value()) {
-        str += std::format("  pcie_bandwidth:\n"
+        str += fmt::format("  pcie_bandwidth:\n"
                            "    unit: \"T/s\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->pcie_transfer_rate_.value(), ", "));
+                           fmt::join(this->pcie_transfer_rate_.value(), ", "));
     }
     // number of PCIe lanes
     if (this->num_pcie_lanes_.has_value()) {
-        str += std::format("  pcie_num_lanes:\n"
+        str += fmt::format("  pcie_num_lanes:\n"
                            "    unit: \"int\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->num_pcie_lanes_.value(), ", "));
+                           fmt::join(this->num_pcie_lanes_.value(), ", "));
     }
 
     // remove last newline
@@ -391,7 +392,7 @@ std::string rocm_smi_memory_samples::generate_yaml_string() const {
 }
 
 std::ostream &operator<<(std::ostream &out, const rocm_smi_memory_samples &samples) {
-    return out << std::format("memory_total [B]: {}\n"
+    return out << fmt::format("memory_total [B]: {}\n"
                               "visible_memory_total [B]: {}\n"
                               "min_num_pcie_lanes [int]: {}\n"
                               "max_num_pcie_lanes [int]: {}\n"
@@ -402,9 +403,9 @@ std::ostream &operator<<(std::ostream &out, const rocm_smi_memory_samples &sampl
                               detail::value_or_default(samples.get_visible_memory_total()),
                               detail::value_or_default(samples.get_min_num_pcie_lanes()),
                               detail::value_or_default(samples.get_max_num_pcie_lanes()),
-                              detail::join(detail::value_or_default(samples.get_memory_used()), ", "),
-                              detail::join(detail::value_or_default(samples.get_pcie_transfer_rate()), ", "),
-                              detail::join(detail::value_or_default(samples.get_num_pcie_lanes()), ", "));
+                              fmt::join(detail::value_or_default(samples.get_memory_used()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_pcie_transfer_rate()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_num_pcie_lanes()), ", "));
 }
 
 //*************************************************************************************************************************************//
@@ -416,112 +417,112 @@ std::string rocm_smi_temperature_samples::generate_yaml_string() const {
 
     // number of fans (emulated)
     if (this->num_fans_.has_value()) {
-        str += std::format("  num_fans:\n"
+        str += fmt::format("  num_fans:\n"
                            "    unit: \"int\"\n"
                            "    values: {}\n",
                            this->num_fans_.value());
     }
     // maximum fan speed
     if (this->max_fan_speed_.has_value()) {
-        str += std::format("  max_fan_speed:\n"
+        str += fmt::format("  max_fan_speed:\n"
                            "    unit: \"int\"\n"
                            "    values: {}\n",
                            this->max_fan_speed_.value());
     }
     // minimum GPU edge temperature
     if (this->temperature_edge_min_.has_value()) {
-        str += std::format("  temperature_gpu_min:\n"
+        str += fmt::format("  temperature_gpu_min:\n"
                            "    unit: \"m°C\"\n"
                            "    values: {}\n",
                            this->temperature_edge_min_.value());
     }
     // maximum GPU edge temperature
     if (this->temperature_edge_max_.has_value()) {
-        str += std::format("  temperature_gpu_max:\n"
+        str += fmt::format("  temperature_gpu_max:\n"
                            "    unit: \"m°C\"\n"
                            "    values: {}\n",
                            this->temperature_edge_max_.value());
     }
     // minimum GPU hotspot temperature
     if (this->temperature_hotspot_min_.has_value()) {
-        str += std::format("  temperature_hotspot_min:\n"
+        str += fmt::format("  temperature_hotspot_min:\n"
                            "    unit: \"m°C\"\n"
                            "    values: {}\n",
                            this->temperature_hotspot_min_.value());
     }
     // maximum GPU hotspot temperature
     if (this->temperature_hotspot_max_.has_value()) {
-        str += std::format("  temperature_hotspot_max:\n"
+        str += fmt::format("  temperature_hotspot_max:\n"
                            "    unit: \"m°C\"\n"
                            "    values: {}\n",
                            this->temperature_hotspot_max_.value());
     }
     // minimum GPU memory temperature
     if (this->temperature_memory_min_.has_value()) {
-        str += std::format("  temperature_memory_min:\n"
+        str += fmt::format("  temperature_memory_min:\n"
                            "    unit: \"m°C\"\n"
                            "    values: {}\n",
                            this->temperature_memory_min_.value());
     }
     // maximum GPU memory temperature
     if (this->temperature_memory_max_.has_value()) {
-        str += std::format("  temperature_memory_max:\n"
+        str += fmt::format("  temperature_memory_max:\n"
                            "    unit: \"m°C\"\n"
                            "    values: {}\n",
                            this->temperature_memory_max_.value());
     }
     // minimum GPU HBM 0 temperature
     if (this->temperature_hbm_0_min_.has_value()) {
-        str += std::format("  temperature_hbm_0_min:\n"
+        str += fmt::format("  temperature_hbm_0_min:\n"
                            "    unit: \"m°C\"\n"
                            "    values: {}\n",
                            this->temperature_hbm_0_min_.value());
     }
     // maximum GPU HBM 0 temperature
     if (this->temperature_hbm_0_max_.has_value()) {
-        str += std::format("  temperature_hbm_0_max:\n"
+        str += fmt::format("  temperature_hbm_0_max:\n"
                            "    unit: \"m°C\"\n"
                            "    values: {}\n",
                            this->temperature_hbm_0_max_.value());
     }
     // minimum GPU HBM 1 temperature
     if (this->temperature_hbm_1_min_.has_value()) {
-        str += std::format("  temperature_hbm_1_min:\n"
+        str += fmt::format("  temperature_hbm_1_min:\n"
                            "    unit: \"m°C\"\n"
                            "    values: {}\n",
                            this->temperature_hbm_1_min_.value());
     }
     // maximum GPU HBM 1 temperature
     if (this->temperature_hbm_1_max_.has_value()) {
-        str += std::format("  temperature_hbm_1_max:\n"
+        str += fmt::format("  temperature_hbm_1_max:\n"
                            "    unit: \"m°C\"\n"
                            "    values: {}\n",
                            this->temperature_hbm_1_max_.value());
     }
     // minimum GPU HBM 2 temperature
     if (this->temperature_hbm_2_min_.has_value()) {
-        str += std::format("  temperature_hbm_2_min:\n"
+        str += fmt::format("  temperature_hbm_2_min:\n"
                            "    unit: \"m°C\"\n"
                            "    values: {}\n",
                            this->temperature_hbm_2_min_.value());
     }
     // maximum GPU HBM 2 temperature
     if (this->temperature_hbm_2_max_.has_value()) {
-        str += std::format("  temperature_hbm_2_max:\n"
+        str += fmt::format("  temperature_hbm_2_max:\n"
                            "    unit: \"m°C\"\n"
                            "    values: {}\n",
                            this->temperature_hbm_2_max_.value());
     }
     // minimum GPU HBM 3 temperature
     if (this->temperature_hbm_3_min_.has_value()) {
-        str += std::format("  temperature_hbm_3_min:\n"
+        str += fmt::format("  temperature_hbm_3_min:\n"
                            "    unit: \"m°C\"\n"
                            "    values: {}\n",
                            this->temperature_hbm_3_min_.value());
     }
     // maximum GPU HBM 3 temperature
     if (this->temperature_hbm_3_max_.has_value()) {
-        str += std::format("  temperature_hbm_3_max:\n"
+        str += fmt::format("  temperature_hbm_3_max:\n"
                            "    unit: \"m°C\"\n"
                            "    values: {}\n",
                            this->temperature_hbm_3_max_.value());
@@ -533,59 +534,59 @@ std::string rocm_smi_temperature_samples::generate_yaml_string() const {
         for (std::size_t i = 0; i < fan_speed_percent.size(); ++i) {
             fan_speed_percent[i] = static_cast<double>(this->fan_speed_.value()[i]) / static_cast<double>(RSMI_MAX_FAN_SPEED);
         }
-        str += std::format("  fan_speed:\n"
+        str += fmt::format("  fan_speed:\n"
                            "    unit: \"percentage\"\n"
                            "    values: [{}]\n",
-                           detail::join(fan_speed_percent, ", "));
+                           fmt::join(fan_speed_percent, ", "));
     }
     // GPU edge temperature
     if (this->temperature_edge_.has_value()) {
-        str += std::format("  temperature_gpu:\n"
+        str += fmt::format("  temperature_gpu:\n"
                            "    unit: \"m°C\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->temperature_edge_.value(), ", "));
+                           fmt::join(this->temperature_edge_.value(), ", "));
     }
     // GPU hotspot temperature
     if (this->temperature_hotspot_.has_value()) {
-        str += std::format("  temperature_hotspot:\n"
+        str += fmt::format("  temperature_hotspot:\n"
                            "    unit: \"m°C\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->temperature_hotspot_.value(), ", "));
+                           fmt::join(this->temperature_hotspot_.value(), ", "));
     }
     // GPU memory temperature
     if (this->temperature_memory_.has_value()) {
-        str += std::format("  temperature_memory:\n"
+        str += fmt::format("  temperature_memory:\n"
                            "    unit: \"m°C\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->temperature_memory_.value(), ", "));
+                           fmt::join(this->temperature_memory_.value(), ", "));
     }
     // GPU HBM 0 temperature
     if (this->temperature_hbm_0_.has_value()) {
-        str += std::format("  temperature_hbm_0:\n"
+        str += fmt::format("  temperature_hbm_0:\n"
                            "    unit: \"m°C\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->temperature_hbm_0_.value(), ", "));
+                           fmt::join(this->temperature_hbm_0_.value(), ", "));
     }
     // GPU HBM 1 temperature
     if (this->temperature_hbm_1_.has_value()) {
-        str += std::format("  temperature_hbm_1:\n"
+        str += fmt::format("  temperature_hbm_1:\n"
                            "    unit: \"m°C\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->temperature_hbm_1_.value(), ", "));
+                           fmt::join(this->temperature_hbm_1_.value(), ", "));
     }
     // GPU HBM 2 temperature
     if (this->temperature_hbm_2_.has_value()) {
-        str += std::format("  temperature_hbm_2:\n"
+        str += fmt::format("  temperature_hbm_2:\n"
                            "    unit: \"m°C\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->temperature_hbm_2_.value(), ", "));
+                           fmt::join(this->temperature_hbm_2_.value(), ", "));
     }
     // GPU HBM 3 temperature
     if (this->temperature_hbm_3_.has_value()) {
-        str += std::format("  temperature_hbm_3:\n"
+        str += fmt::format("  temperature_hbm_3:\n"
                            "    unit: \"m°C\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->temperature_hbm_3_.value(), ", "));
+                           fmt::join(this->temperature_hbm_3_.value(), ", "));
     }
 
     // remove last newline
@@ -595,7 +596,7 @@ std::string rocm_smi_temperature_samples::generate_yaml_string() const {
 }
 
 std::ostream &operator<<(std::ostream &out, const rocm_smi_temperature_samples &samples) {
-    return out << std::format("num_fans [int]: {}\n"
+    return out << fmt::format("num_fans [int]: {}\n"
                               "max_fan_speed [int]: {}\n"
                               "temperature_edge_min [m°C]: {}\n"
                               "temperature_edge_max [m°C]: {}\n"
@@ -635,14 +636,14 @@ std::ostream &operator<<(std::ostream &out, const rocm_smi_temperature_samples &
                               detail::value_or_default(samples.get_temperature_hbm_2_max()),
                               detail::value_or_default(samples.get_temperature_hbm_3_min()),
                               detail::value_or_default(samples.get_temperature_hbm_3_max()),
-                              detail::join(detail::value_or_default(samples.get_fan_speed()), ", "),
-                              detail::join(detail::value_or_default(samples.get_temperature_edge()), ", "),
-                              detail::join(detail::value_or_default(samples.get_temperature_hotspot()), ", "),
-                              detail::join(detail::value_or_default(samples.get_temperature_memory()), ", "),
-                              detail::join(detail::value_or_default(samples.get_temperature_hbm_0()), ", "),
-                              detail::join(detail::value_or_default(samples.get_temperature_hbm_1()), ", "),
-                              detail::join(detail::value_or_default(samples.get_temperature_hbm_2()), ", "),
-                              detail::join(detail::value_or_default(samples.get_temperature_hbm_3()), ", "));
+                              fmt::join(detail::value_or_default(samples.get_fan_speed()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_temperature_edge()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_temperature_hotspot()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_temperature_memory()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_temperature_hbm_0()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_temperature_hbm_1()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_temperature_hbm_2()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_temperature_hbm_3()), ", "));
 }
 
 }  // namespace hws
diff --git a/src/hardware_sampling/gpu_intel/hardware_sampler.cpp b/src/hardware_sampling/gpu_intel/hardware_sampler.cpp
index 3fd9a1e..2027135 100644
--- a/src/hardware_sampling/gpu_intel/hardware_sampler.cpp
+++ b/src/hardware_sampling/gpu_intel/hardware_sampler.cpp
@@ -227,17 +227,45 @@ void gpu_intel_hardware_sampler::sampling_loop() {
             if (zesDeviceEnumPowerDomains(device, &num_power_domains, power_handles.data()) == ZE_RESULT_SUCCESS) {
                 if (!power_handles.empty()) {
                     // NOTE: only the first power domain is used here
+                    // get the power measurement type
+                    // NOTE: only the first value is used here!
+                    std::uint32_t num_power_limit_descriptors{ 1 };
+                    zes_power_limit_ext_desc_t desc{};
+                    if (zesPowerGetLimitsExt(power_handles.front(), &num_power_limit_descriptors, &desc) == ZE_RESULT_SUCCESS) {
+                        switch (desc.level) {
+                            case ZES_POWER_LEVEL_UNKNOWN:
+                                power_samples_.power_measurement_type_ = "unknown";
+                                break;
+                            case ZES_POWER_LEVEL_SUSTAINED:
+                                power_samples_.power_measurement_type_ = "sustained";
+                                break;
+                            case ZES_POWER_LEVEL_BURST:
+                                power_samples_.power_measurement_type_ = "burst";
+                                break;
+                            case ZES_POWER_LEVEL_PEAK:
+                                power_samples_.power_measurement_type_ = "peak";
+                                break;
+                            case ZES_POWER_LEVEL_INSTANTANEOUS:
+                                power_samples_.power_measurement_type_ = "current/instant";
+                                break;
+                            case ZES_POWER_LEVEL_FORCE_UINT32:
+                                power_samples_.power_measurement_type_ = "force uint32";
+                                break;
+                        }
+
+                        power_samples_.power_enforced_limit_ = static_cast<decltype(power_samples_.power_enforced_limit_)::value_type>(desc.limit);
+                    }
+
                     // get total power consumption
                     zes_power_energy_counter_t energy_counter{};
                     if (zesPowerGetEnergyCounter(power_handles.front(), &energy_counter) == ZE_RESULT_SUCCESS) {
-                        power_samples_.power_total_energy_consumption_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ energy_counter.energy };
+                        power_samples_.power_total_energy_consumption_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ static_cast<decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type>(energy_counter.energy) / 1000.0 / 1000.0 };
                     }
 
                     // get energy thresholds
                     zes_energy_threshold_t energy_threshold{};
                     if (zesPowerGetEnergyThreshold(power_handles.front(), &energy_threshold) == ZE_RESULT_SUCCESS) {
-                        power_samples_.energy_threshold_enabled_ = static_cast<decltype(power_samples_.energy_threshold_enabled_)::value_type>(energy_threshold.enable);
-                        power_samples_.energy_threshold_ = energy_threshold.threshold;
+                        power_samples_.power_management_mode_ = static_cast<decltype(power_samples_.power_management_mode_)::value_type>(energy_threshold.enable);
                     }
                 }
             }
@@ -453,7 +481,7 @@ void gpu_intel_hardware_sampler::sampling_loop() {
                         zes_power_energy_counter_t energy_counter{};
                         HWS_LEVEL_ZERO_ERROR_CHECK(zesPowerGetEnergyCounter(power_handles.front(), &energy_counter));
 
-                        power_samples_.power_total_energy_consumption_->push_back(energy_counter.energy);
+                        power_samples_.power_total_energy_consumption_->push_back(static_cast<decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type>(energy_counter.energy) / 1000.0 / 1000.0);
                     }
                 }
             }
diff --git a/src/hardware_sampling/gpu_intel/level_zero_samples.cpp b/src/hardware_sampling/gpu_intel/level_zero_samples.cpp
index 70f1016..971bfb9 100644
--- a/src/hardware_sampling/gpu_intel/level_zero_samples.cpp
+++ b/src/hardware_sampling/gpu_intel/level_zero_samples.cpp
@@ -20,15 +20,6 @@ namespace hws {
 
 namespace detail {
 
-template <typename T>
-struct is_vector : std::false_type { };
-
-template <typename T>
-struct is_vector<std::vector<T>> : std::true_type { };
-
-template <typename T>
-constexpr bool is_vector_v = is_vector<T>::value;
-
 template <typename MapType>
 void append_map_values(std::string &str, const std::string_view entry_name, const MapType &map) {
     if (map.has_value()) {
@@ -248,19 +239,26 @@ std::ostream &operator<<(std::ostream &out, const level_zero_clock_samples &samp
 std::string level_zero_power_samples::generate_yaml_string() const {
     std::string str{ "power:\n" };
 
-    // flag whether the energy threshold is enabled
-    if (this->energy_threshold_enabled_.has_value()) {
-        str += std::format("  energy_threshold_enabled:\n"
-                           "    unit: \"bool\"\n"
+    // power enforced limit
+    if (this->power_enforced_limit_.has_value()) {
+        str += std::format("  power_enforced_limit:\n"
+                           "    unit: \"W\"\n"
                            "    values: {}\n",
-                           this->energy_threshold_enabled_.value());
+                           this->power_enforced_limit_.value());
     }
-    // the energy threshold
-    if (this->energy_threshold_.has_value()) {
-        str += std::format("  energy_threshold:\n"
-                           "    unit: \"J\"\n"
+    // power measurement type
+    if (this->power_measurement_type_.has_value()) {
+        str += std::format("  power_measurement_type:\n"
+                           "    unit: \"string\"\n"
+                           "    values: {}\n",
+                           this->power_measurement_type_.value());
+    }
+    // the power management mode
+    if (this->power_management_mode_.has_value()) {
+        str += std::format("  power_management_mode:\n"
+                           "    unit: \"bool\"\n"
                            "    values: {}\n",
-                           this->energy_threshold_.value());
+                           this->power_management_mode_.value());
     }
 
     // the total consumed energy
@@ -282,11 +280,13 @@ std::string level_zero_power_samples::generate_yaml_string() const {
 }
 
 std::ostream &operator<<(std::ostream &out, const level_zero_power_samples &samples) {
-    return out << std::format("energy_threshold_enabled [bool]: {}\n"
-                              "energy_threshold [J]: {}\n"
+    return out << std::format("power_enforced_limit [W]: {}\n"
+                              "power_measurement_type [string]: {}\n"
+                              "power_management_mode [bool]: {}\n"
                               "power_total_energy_consumption [J]: [{}]",
-                              detail::value_or_default(samples.get_energy_threshold_enabled()),
-                              detail::value_or_default(samples.get_energy_threshold()),
+                              detail::value_or_default(samples.get_power_enforced_limit()),
+                              detail::value_or_default(samples.get_power_measurement_type()),
+                              detail::value_or_default(samples.get_power_management_mode()),
                               detail::join(detail::value_or_default(samples.get_power_total_energy_consumption()), ", "));
 }
 
diff --git a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
index 6e0fe7c..17e7049 100644
--- a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
+++ b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
@@ -11,15 +11,16 @@
 #include "hardware_sampling/gpu_nvidia/nvml_samples.hpp"             // hws::{nvml_general_samples, nvml_clock_samples, nvml_power_samples, nvml_memory_samples, nvml_temperature_samples}
 #include "hardware_sampling/gpu_nvidia/utility.hpp"                  // HWS_NVML_ERROR_CHECK
 #include "hardware_sampling/hardware_sampler.hpp"                    // hws::hardware_sampler
-#include "hardware_sampling/utility.hpp"                             // hws::detail::{time_points_to_epoch, join}
+#include "hardware_sampling/utility.hpp"                             // hws::detail::time_points_to_epoch
 
-#include "nvml.h"  // NVML runtime functions
+#include "fmt/format.h"  // fmt::format
+#include "fmt/ranges.h"  // fmt::join
+#include "nvml.h"        // NVML runtime functions
 
 #include <algorithm>  // std::min_element, std::sort, std::transform
 #include <chrono>     // std::chrono::{steady_clock, duration_cast, milliseconds}
 #include <cstddef>    // std::size_t
 #include <exception>  // std::exception, std::terminate
-#include <format>     // std::format
 #include <ios>        // std::ios_base
 #include <iostream>   // std::cerr, std::endl
 #include <numeric>    // std::iota
@@ -534,7 +535,7 @@ void gpu_nvidia_hardware_sampler::sampling_loop() {
 std::string gpu_nvidia_hardware_sampler::device_identification() const {
     nvmlPciInfo_st pcie_info{};
     HWS_NVML_ERROR_CHECK(nvmlDeviceGetPciInfo_v3(device_.get_impl().device, &pcie_info));
-    return std::format("gpu_nvidia_device_{}_{}", pcie_info.bus, pcie_info.device);
+    return fmt::format("gpu_nvidia_device_{}_{}", pcie_info.bus, pcie_info.device);
 }
 
 std::string gpu_nvidia_hardware_sampler::generate_yaml_string() const {
@@ -543,7 +544,7 @@ std::string gpu_nvidia_hardware_sampler::generate_yaml_string() const {
         throw std::runtime_error{ "Can't create the final YAML entry if the hardware sampler is still running!" };
     }
 
-    return std::format("{}\n"
+    return fmt::format("{}\n"
                        "{}\n"
                        "{}\n"
                        "{}\n"
@@ -560,7 +561,7 @@ std::ostream &operator<<(std::ostream &out, const gpu_nvidia_hardware_sampler &s
         out.setstate(std::ios_base::failbit);
         return out;
     } else {
-        return out << std::format("sampling interval: {}\n"
+        return out << fmt::format("sampling interval: {}\n"
                                   "time points: [{}]\n\n"
                                   "general samples:\n{}\n\n"
                                   "clock samples:\n{}\n\n"
@@ -568,7 +569,7 @@ std::ostream &operator<<(std::ostream &out, const gpu_nvidia_hardware_sampler &s
                                   "memory samples:\n{}\n\n"
                                   "temperature samples:\n{}",
                                   sampler.sampling_interval(),
-                                  detail::join(detail::time_points_to_epoch(sampler.sampling_time_points()), ", "),
+                                  fmt::join(detail::time_points_to_epoch(sampler.sampling_time_points()), ", "),
                                   sampler.general_samples(),
                                   sampler.clock_samples(),
                                   sampler.power_samples(),
diff --git a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp
index 64f559d..71fb7a6 100644
--- a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp
+++ b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp
@@ -7,9 +7,11 @@
 
 #include "hardware_sampling/gpu_nvidia/nvml_samples.hpp"
 
-#include "hardware_sampling/utility.hpp"  // hws::detail::{value_or_default, join, map_entry_to_string}
+#include "hardware_sampling/utility.hpp"  // hws::detail::{value_or_default, map_entry_to_string}
+
+#include "fmt/format.h"  // fmt::format
+#include "fmt/ranges.h"  // fmt::join
 
-#include <format>   // std::format
 #include <ostream>  // std::ostream
 #include <string>   // std::string
 
@@ -24,42 +26,42 @@ std::string nvml_general_samples::generate_yaml_string() const {
 
     // device architecture
     if (this->architecture_.has_value()) {
-        str += std::format("  architecture:\n"
+        str += fmt::format("  architecture:\n"
                            "    unit: \"string\"\n"
                            "    values: \"{}\"\n",
                            this->architecture_.value());
     }
     // device byte order
     if (this->byte_order_.has_value()) {
-        str += std::format("  byte_order:\n"
+        str += fmt::format("  byte_order:\n"
                            "    unit: \"string\"\n"
                            "    values: \"{}\"\n",
                            this->byte_order_.value());
     }
     // the vendor specific ID
     if (this->vendor_id_.has_value()) {
-        str += std::format("  vendor_id:\n"
+        str += fmt::format("  vendor_id:\n"
                            "    unit: \"string\"\n"
                            "    values: \"{}\"\n",
                            this->vendor_id_.value());
     }
     // device name
     if (this->name_.has_value()) {
-        str += std::format("  name:\n"
+        str += fmt::format("  name:\n"
                            "    unit: \"string\"\n"
                            "    values: \"{}\"\n",
                            this->name_.value());
     }
     // persistence mode enabled
     if (this->persistence_mode_.has_value()) {
-        str += std::format("  persistence_mode:\n"
+        str += fmt::format("  persistence_mode:\n"
                            "    unit: \"bool\"\n"
                            "    values: {}\n",
                            this->persistence_mode_.value());
     }
     // number of cores
     if (this->num_cores_.has_value()) {
-        str += std::format("  num_cores:\n"
+        str += fmt::format("  num_cores:\n"
                            "    unit: \"int\"\n"
                            "    values: {}\n",
                            this->num_cores_.value());
@@ -67,25 +69,25 @@ std::string nvml_general_samples::generate_yaml_string() const {
 
     // device compute utilization
     if (this->compute_utilization_.has_value()) {
-        str += std::format("  compute_utilization:\n"
+        str += fmt::format("  compute_utilization:\n"
                            "    unit: \"percentage\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->compute_utilization_.value(), ", "));
+                           fmt::join(this->compute_utilization_.value(), ", "));
     }
 
     // device memory utilization
     if (this->memory_utilization_.has_value()) {
-        str += std::format("  memory_utilization:\n"
+        str += fmt::format("  memory_utilization:\n"
                            "    unit: \"percentage\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->memory_utilization_.value(), ", "));
+                           fmt::join(this->memory_utilization_.value(), ", "));
     }
     // performance state
     if (this->performance_level_.has_value()) {
-        str += std::format("  performance_level:\n"
+        str += fmt::format("  performance_level:\n"
                            "    unit: \"0 - maximum performance; 15 - minimum performance; 32 - unknown\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->performance_level_.value(), ", "));
+                           fmt::join(this->performance_level_.value(), ", "));
     }
 
     // remove last newline
@@ -95,7 +97,7 @@ std::string nvml_general_samples::generate_yaml_string() const {
 }
 
 std::ostream &operator<<(std::ostream &out, const nvml_general_samples &samples) {
-    return out << std::format("architecture [string]: {}\n"
+    return out << fmt::format("architecture [string]: {}\n"
                               "byte_order [string]: {}\n"
                               "vendor_id [string]: {}\n"
                               "name [string]: {}\n"
@@ -110,9 +112,9 @@ std::ostream &operator<<(std::ostream &out, const nvml_general_samples &samples)
                               detail::value_or_default(samples.get_name()),
                               detail::value_or_default(samples.get_persistence_mode()),
                               detail::value_or_default(samples.get_num_cores()),
-                              detail::join(detail::value_or_default(samples.get_compute_utilization()), ", "),
-                              detail::join(detail::value_or_default(samples.get_memory_utilization()), ", "),
-                              detail::join(detail::value_or_default(samples.get_performance_level()), ", "));
+                              fmt::join(detail::value_or_default(samples.get_compute_utilization()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_memory_utilization()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_performance_level()), ", "));
 }
 
 //*************************************************************************************************************************************//
@@ -124,97 +126,97 @@ std::string nvml_clock_samples::generate_yaml_string() const {
 
     // adaptive clock status
     if (this->auto_boosted_clock_enabled_.has_value()) {
-        str += std::format("  auto_boosted_clock_enabled:\n"
+        str += fmt::format("  auto_boosted_clock_enabled:\n"
                            "    unit: \"bool\"\n"
                            "    values: {}\n",
                            this->auto_boosted_clock_enabled_.value());
     }
     // minimum graph clock
     if (this->clock_frequency_min_.has_value()) {
-        str += std::format("  clock_frequency_min:\n"
+        str += fmt::format("  clock_frequency_min:\n"
                            "    unit: \"MHz\"\n"
                            "    values: {}\n",
                            this->clock_frequency_min_.value());
     }
     // maximum graph clock
     if (this->clock_frequency_max_.has_value()) {
-        str += std::format("  clock_frequency_max:\n"
+        str += fmt::format("  clock_frequency_max:\n"
                            "    unit: \"MHz\"\n"
                            "    values: {}\n",
                            this->clock_frequency_max_.value());
     }
     // minimum memory clock
     if (this->memory_clock_frequency_min_.has_value()) {
-        str += std::format("  memory_clock_frequency_min:\n"
+        str += fmt::format("  memory_clock_frequency_min:\n"
                            "    unit: \"MHz\"\n"
                            "    values: {}\n",
                            this->memory_clock_frequency_min_.value());
     }
     // maximum memory clock
     if (this->memory_clock_frequency_max_.has_value()) {
-        str += std::format("  memory_clock_frequency_max:\n"
+        str += fmt::format("  memory_clock_frequency_max:\n"
                            "    unit: \"MHz\"\n"
                            "    values: {}\n",
                            this->memory_clock_frequency_max_.value());
     }
     // maximum SM clock
     if (this->sm_clock_frequency_max_.has_value()) {
-        str += std::format("  sm_clock_frequency_max:\n"
+        str += fmt::format("  sm_clock_frequency_max:\n"
                            "    unit: \"MHz\"\n"
                            "    values: {}\n",
                            this->sm_clock_frequency_max_.value());
     }
     // the available clock frequencies
     if (this->available_clock_frequencies_.has_value()) {
-        str += std::format("  available_clock_frequencies:\n"
+        str += fmt::format("  available_clock_frequencies:\n"
                            "    unit: \"MHz\"\n"
                            "    values:\n");
         for (const auto &[key, value] : this->available_clock_frequencies_.value()) {
-            str += std::format("      {}: [{}]\n", key, detail::join(value, ", "));
+            str += fmt::format("      {}: [{}]\n", key, fmt::join(value, ", "));
         }
     }
     // the available memory clock frequencies
     if (this->available_memory_clock_frequencies_.has_value()) {
-        str += std::format("  available_memory_clock_frequencies:\n"
+        str += fmt::format("  available_memory_clock_frequencies:\n"
                            "    unit: \"MHz\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->available_memory_clock_frequencies_.value(), ", "));
+                           fmt::join(this->available_memory_clock_frequencies_.value(), ", "));
     }
 
     // graph clock
     if (this->clock_frequency_.has_value()) {
-        str += std::format("  clock_frequency:\n"
+        str += fmt::format("  clock_frequency:\n"
                            "    unit: \"MHz\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->clock_frequency_.value(), ", "));
+                           fmt::join(this->clock_frequency_.value(), ", "));
     }
     // memory clock
     if (this->memory_clock_frequency_.has_value()) {
-        str += std::format("  memory_clock_frequency:\n"
+        str += fmt::format("  memory_clock_frequency:\n"
                            "    unit: \"MHz\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->memory_clock_frequency_.value(), ", "));
+                           fmt::join(this->memory_clock_frequency_.value(), ", "));
     }
     // SM clock
     if (this->sm_clock_frequency_.has_value()) {
-        str += std::format("  sm_clock_frequency:\n"
+        str += fmt::format("  sm_clock_frequency:\n"
                            "    unit: \"MHz\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->sm_clock_frequency_.value(), ", "));
+                           fmt::join(this->sm_clock_frequency_.value(), ", "));
     }
     // clock throttle reason
     if (this->throttle_reason_.has_value()) {
-        str += std::format("  throttle_reason:\n"
+        str += fmt::format("  throttle_reason:\n"
                            "    unit: \"string\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->throttle_reason_.value(), ", "));
+                           fmt::join(this->throttle_reason_.value(), ", "));
     }
     // clock is auto-boosted
     if (this->auto_boosted_clock_.has_value()) {
-        str += std::format("  auto_boosted_clock:\n"
+        str += fmt::format("  auto_boosted_clock:\n"
                            "    unit: \"bool\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->auto_boosted_clock_.value(), ", "));
+                           fmt::join(this->auto_boosted_clock_.value(), ", "));
     }
 
     // remove last newline
@@ -224,7 +226,7 @@ std::string nvml_clock_samples::generate_yaml_string() const {
 }
 
 std::ostream &operator<<(std::ostream &out, const nvml_clock_samples &samples) {
-    return out << std::format("auto_boosted_clock_enabled [bool]: {}\n"
+    return out << fmt::format("auto_boosted_clock_enabled [bool]: {}\n"
                               "clock_frequency_min [MHz]: {}\n"
                               "clock_frequency_max [MHz]: {}\n"
                               "memory_clock_frequency_min [MHz]: {}\n"
@@ -244,12 +246,12 @@ std::ostream &operator<<(std::ostream &out, const nvml_clock_samples &samples) {
                               detail::value_or_default(samples.get_memory_clock_frequency_max()),
                               detail::value_or_default(samples.get_sm_clock_frequency_max()),
                               detail::map_entry_to_string(samples.get_available_clock_frequencies()),
-                              detail::join(detail::value_or_default(samples.get_available_memory_clock_frequencies()), ", "),
-                              detail::join(detail::value_or_default(samples.get_clock_frequency()), ", "),
-                              detail::join(detail::value_or_default(samples.get_memory_clock_frequency()), ", "),
-                              detail::join(detail::value_or_default(samples.get_sm_clock_frequency()), ", "),
-                              detail::join(detail::value_or_default(samples.get_throttle_reason()), ", "),
-                              detail::join(detail::value_or_default(samples.get_auto_boosted_clock()), ", "));
+                              fmt::join(detail::value_or_default(samples.get_available_memory_clock_frequencies()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_clock_frequency()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_memory_clock_frequency()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_sm_clock_frequency()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_throttle_reason()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_auto_boosted_clock()), ", "));
 }
 
 //*************************************************************************************************************************************//
@@ -261,60 +263,60 @@ std::string nvml_power_samples::generate_yaml_string() const {
 
     // power management limit
     if (this->power_management_limit_.has_value()) {
-        str += std::format("  power_management_limit:\n"
+        str += fmt::format("  power_management_limit:\n"
                            "    unit: \"W\"\n"
                            "    values: {}\n",
                            this->power_management_limit_.value());
     }
     // power enforced limit
     if (this->power_enforced_limit_.has_value()) {
-        str += std::format("  power_enforced_limit:\n"
+        str += fmt::format("  power_enforced_limit:\n"
                            "    unit: \"W\"\n"
                            "    values: {}\n",
                            this->power_enforced_limit_.value());
     }
     // power measurement type
     if (this->power_measurement_type_.has_value()) {
-        str += std::format("  power_measurement_type:\n"
+        str += fmt::format("  power_measurement_type:\n"
                            "    unit: \"string\"\n"
                            "    values: \"{}\"\n",
                            this->power_measurement_type_.value());
     }
     // the power management mode
     if (this->power_management_mode_.has_value()) {
-        str += std::format("  power_management_mode:\n"
+        str += fmt::format("  power_management_mode:\n"
                            "    unit: \"bool\"\n"
                            "    values: {}\n",
                            this->power_management_mode_.value());
     }
     // available power levels
     if (this->available_power_profiles_.has_value()) {
-        str += std::format("  available_power_profiles:\n"
+        str += fmt::format("  available_power_profiles:\n"
                            "    unit: \"int\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->available_power_profiles_.value(), ", "));
+                           fmt::join(this->available_power_profiles_.value(), ", "));
     }
 
     // current power usage
     if (this->power_usage_.has_value()) {
-        str += std::format("  power_usage:\n"
+        str += fmt::format("  power_usage:\n"
                            "    unit: \"W\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->power_usage_.value(), ", "));
+                           fmt::join(this->power_usage_.value(), ", "));
     }
     // total energy consumed
     if (this->power_total_energy_consumption_.has_value()) {
-        str += std::format("  power_total_energy_consumed:\n"
+        str += fmt::format("  power_total_energy_consumed:\n"
                            "    unit: \"J\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->power_total_energy_consumption_.value(), ", "));
+                           fmt::join(this->power_total_energy_consumption_.value(), ", "));
     }
     // power state
     if (this->power_profile_.has_value()) {
-        str += std::format("  power_profile:\n"
+        str += fmt::format("  power_profile:\n"
                            "    unit: \"int\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->power_profile_.value(), ", "));
+                           fmt::join(this->power_profile_.value(), ", "));
     }
 
     // remove last newline
@@ -324,7 +326,7 @@ std::string nvml_power_samples::generate_yaml_string() const {
 }
 
 std::ostream &operator<<(std::ostream &out, const nvml_power_samples &samples) {
-    return out << std::format("power_management_limit [W]: {}\n"
+    return out << fmt::format("power_management_limit [W]: {}\n"
                               "power_enforced_limit [W]: {}\n"
                               "power_measurement_type [string]: {}\n"
                               "power_management_mode [bool]: {}\n"
@@ -336,10 +338,10 @@ std::ostream &operator<<(std::ostream &out, const nvml_power_samples &samples) {
                               detail::value_or_default(samples.get_power_enforced_limit()),
                               detail::value_or_default(samples.get_power_measurement_type()),
                               detail::value_or_default(samples.get_power_management_mode()),
-                              detail::join(detail::value_or_default(samples.get_available_power_profiles()), ", "),
-                              detail::join(detail::value_or_default(samples.get_power_usage()), ", "),
-                              detail::join(detail::value_or_default(samples.get_power_total_energy_consumption()), ", "),
-                              detail::join(detail::value_or_default(samples.get_power_profile()), ", "));
+                              fmt::join(detail::value_or_default(samples.get_available_power_profiles()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_power_usage()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_power_total_energy_consumption()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_power_profile()), ", "));
 }
 
 //*************************************************************************************************************************************//
@@ -351,28 +353,28 @@ std::string nvml_memory_samples::generate_yaml_string() const {
 
     // total memory size
     if (this->memory_total_.has_value()) {
-        str += std::format("  memory_total:\n"
+        str += fmt::format("  memory_total:\n"
                            "    unit: \"B\"\n"
                            "    values: {}\n",
                            this->memory_total_.value());
     }
     // maximum PCIe link speed
     if (this->pcie_link_max_speed_.has_value()) {
-        str += std::format("  pcie_max_bandwidth:\n"
+        str += fmt::format("  pcie_max_bandwidth:\n"
                            "    unit: \"MBPS\"\n"
                            "    values: {}\n",
                            this->pcie_link_max_speed_.value());
     }
     // memory bus width
     if (this->memory_bus_width_.has_value()) {
-        str += std::format("  memory_bus_width:\n"
+        str += fmt::format("  memory_bus_width:\n"
                            "    unit: \"Bit\"\n"
                            "    values: {}\n",
                            this->memory_bus_width_.value());
     }
     // maximum PCIe link generation
     if (this->max_pcie_link_generation_.has_value()) {
-        str += std::format("  max_pcie_link_generation:\n"
+        str += fmt::format("  max_pcie_link_generation:\n"
                            "    unit: \"int\"\n"
                            "    values: {}\n",
                            this->max_pcie_link_generation_.value());
@@ -380,38 +382,38 @@ std::string nvml_memory_samples::generate_yaml_string() const {
 
     // free memory size
     if (this->memory_free_.has_value()) {
-        str += std::format("  memory_free:\n"
+        str += fmt::format("  memory_free:\n"
                            "    unit: \"B\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->memory_free_.value(), ", "));
+                           fmt::join(this->memory_free_.value(), ", "));
     }
     // used memory size
     if (this->memory_used_.has_value()) {
-        str += std::format("  memory_used:\n"
+        str += fmt::format("  memory_used:\n"
                            "    unit: \"B\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->memory_used_.value(), ", "));
+                           fmt::join(this->memory_used_.value(), ", "));
     }
     // PCIe link speed
     if (this->pcie_link_speed_.has_value()) {
-        str += std::format("  pcie_bandwidth:\n"
+        str += fmt::format("  pcie_bandwidth:\n"
                            "    unit: \"MBPS\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->pcie_link_speed_.value(), ", "));
+                           fmt::join(this->pcie_link_speed_.value(), ", "));
     }
     // PCIe link width
     if (this->pcie_link_width_.has_value()) {
-        str += std::format("  pcie_link_width:\n"
+        str += fmt::format("  pcie_link_width:\n"
                            "    unit: \"int\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->pcie_link_width_.value(), ", "));
+                           fmt::join(this->pcie_link_width_.value(), ", "));
     }
     // PCIe link generation
     if (this->pcie_link_generation_.has_value()) {
-        str += std::format("  pcie_link_generation:\n"
+        str += fmt::format("  pcie_link_generation:\n"
                            "    unit: \"int\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->pcie_link_generation_.value(), ", "));
+                           fmt::join(this->pcie_link_generation_.value(), ", "));
     }
 
     // remove last newline
@@ -421,7 +423,7 @@ std::string nvml_memory_samples::generate_yaml_string() const {
 }
 
 std::ostream &operator<<(std::ostream &out, const nvml_memory_samples &samples) {
-    return out << std::format("memory_total [B]: {}\n"
+    return out << fmt::format("memory_total [B]: {}\n"
                               "pcie_link_max_speed [MBPS]: {}\n"
                               "memory_bus_width [Bit]: {}\n"
                               "max_pcie_link_generation [int]: {}\n"
@@ -434,11 +436,11 @@ std::ostream &operator<<(std::ostream &out, const nvml_memory_samples &samples)
                               detail::value_or_default(samples.get_pcie_link_max_speed()),
                               detail::value_or_default(samples.get_memory_bus_width()),
                               detail::value_or_default(samples.get_max_pcie_link_generation()),
-                              detail::join(detail::value_or_default(samples.get_memory_free()), ", "),
-                              detail::join(detail::value_or_default(samples.get_memory_used()), ", "),
-                              detail::join(detail::value_or_default(samples.get_pcie_link_speed()), ", "),
-                              detail::join(detail::value_or_default(samples.get_pcie_link_width()), ", "),
-                              detail::join(detail::value_or_default(samples.get_pcie_link_generation()), ", "));
+                              fmt::join(detail::value_or_default(samples.get_memory_free()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_memory_used()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_pcie_link_speed()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_pcie_link_width()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_pcie_link_generation()), ", "));
 }
 
 //*************************************************************************************************************************************//
@@ -450,35 +452,35 @@ std::string nvml_temperature_samples::generate_yaml_string() const {
 
     // number of fans
     if (this->num_fans_.has_value()) {
-        str += std::format("  num_fans:\n"
+        str += fmt::format("  num_fans:\n"
                            "    unit: \"int\"\n"
                            "    values: {}\n",
                            this->num_fans_.value());
     }
     // min fan speed
     if (this->min_fan_speed_.has_value()) {
-        str += std::format("  min_fan_speed:\n"
+        str += fmt::format("  min_fan_speed:\n"
                            "    unit: \"percentage\"\n"
                            "    values: {}\n",
                            this->min_fan_speed_.value());
     }
     // max fan speed
     if (this->max_fan_speed_.has_value()) {
-        str += std::format("  max_fan_speed:\n"
+        str += fmt::format("  max_fan_speed:\n"
                            "    unit: \"percentage\"\n"
                            "    values: {}\n",
                            this->max_fan_speed_.value());
     }
     // temperature threshold GPU max
     if (this->temperature_threshold_gpu_max_.has_value()) {
-        str += std::format("  temperature_gpu_max:\n"
+        str += fmt::format("  temperature_gpu_max:\n"
                            "    unit: \"°C\"\n"
                            "    values: {}\n",
                            this->temperature_threshold_gpu_max_.value());
     }
     // temperature threshold memory max
     if (this->temperature_threshold_mem_max_.has_value()) {
-        str += std::format("  temperature_mem_max:\n"
+        str += fmt::format("  temperature_mem_max:\n"
                            "    unit: \"°C\"\n"
                            "    values: {}\n",
                            this->temperature_threshold_mem_max_.value());
@@ -486,17 +488,17 @@ std::string nvml_temperature_samples::generate_yaml_string() const {
 
     // fan speed
     if (this->fan_speed_.has_value()) {
-        str += std::format("  fan_speed:\n"
+        str += fmt::format("  fan_speed:\n"
                            "    unit: \"percentage\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->fan_speed_.value(), ", "));
+                           fmt::join(this->fan_speed_.value(), ", "));
     }
     // temperature GPU
     if (this->temperature_gpu_.has_value()) {
-        str += std::format("  temperature_gpu:\n"
+        str += fmt::format("  temperature_gpu:\n"
                            "    unit: \"°C\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->temperature_gpu_.value(), ", "));
+                           fmt::join(this->temperature_gpu_.value(), ", "));
     }
 
     // remove last newline
@@ -506,7 +508,7 @@ std::string nvml_temperature_samples::generate_yaml_string() const {
 }
 
 std::ostream &operator<<(std::ostream &out, const nvml_temperature_samples &samples) {
-    return out << std::format("num_fans [int]: {}\n"
+    return out << fmt::format("num_fans [int]: {}\n"
                               "min_fan_speed [%]: {}\n"
                               "max_fan_speed [%]: {}\n"
                               "temperature_threshold_gpu_max [°C]: {}\n"
@@ -518,8 +520,8 @@ std::ostream &operator<<(std::ostream &out, const nvml_temperature_samples &samp
                               detail::value_or_default(samples.get_max_fan_speed()),
                               detail::value_or_default(samples.get_temperature_threshold_gpu_max()),
                               detail::value_or_default(samples.get_temperature_threshold_mem_max()),
-                              detail::join(detail::value_or_default(samples.get_fan_speed()), ", "),
-                              detail::join(detail::value_or_default(samples.get_temperature_gpu()), ", "));
+                              fmt::join(detail::value_or_default(samples.get_fan_speed()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_temperature_gpu()), ", "));
 }
 
 }  // namespace hws
diff --git a/src/hardware_sampling/hardware_sampler.cpp b/src/hardware_sampling/hardware_sampler.cpp
index 1c3ff49..1ac8f81 100644
--- a/src/hardware_sampling/hardware_sampler.cpp
+++ b/src/hardware_sampling/hardware_sampler.cpp
@@ -8,12 +8,15 @@
 #include "hardware_sampling/hardware_sampler.hpp"
 
 #include "hardware_sampling/event.hpp"    // hws::event
-#include "hardware_sampling/utility.hpp"  // hws::detail::{durations_from_reference_time, join}
+#include "hardware_sampling/utility.hpp"  // hws::detail::durations_from_reference_time
+
+#include "fmt/format.h"  // fmt::format
+#include "fmt/ranges.h"  // fmt::join
+#include "fmt/chrono.h"  // fmt::localtime, direct formatting of std::chrono types
 
 #include <chrono>     // std::chrono::{system_clock, steady_clock, duration_cast, milliseconds}
 #include <cstddef>    // std::size_t
 #include <exception>  // std::exception
-#include <format>     // std::format
 #include <fstream>    // std::ofstream
 #include <iostream>   // std::cerr, std::endl
 #include <stdexcept>  // std::runtime_error, std::out_of_range
@@ -109,7 +112,7 @@ void hardware_sampler::add_event(decltype(event::name) name) {
 
 event hardware_sampler::get_event(const std::size_t idx) const {
     if (idx >= this->num_events()) {
-        throw std::out_of_range{ std::format("The index {} is out-of-range for the number of events {}!", idx, this->num_events()) };
+        throw std::out_of_range{ fmt::format("The index {} is out-of-range for the number of events {}!", idx, this->num_events()) };
     }
 
     return events_[idx];
@@ -126,10 +129,10 @@ void hardware_sampler::dump_yaml(const char *filename) {
     file << "---\n\n";
 
     // set the device identification
-    file << std::format("device_identification: {}\n\n", this->device_identification());
+    file << fmt::format("device_identification: {}\n\n", this->device_identification());
 
     // output the start date time of this hardware sampling
-    file << std::format("start_time: \"{:%Y-%m-%d %X}\"\n\n", std::chrono::current_zone()->to_local(start_date_time_));
+    file << fmt::format("start_time: \"{:%Y-%m-%d %X}\"\n\n", start_date_time_);
 
     // output the event information
     std::vector<decltype(event::time_point)> event_time_points{};
@@ -138,22 +141,22 @@ void hardware_sampler::dump_yaml(const char *filename) {
         event_time_points.push_back(time_point);
         event_names.push_back(name);
     }
-    file << std::format("events:\n"
+    file << fmt::format("events:\n"
                         "  time_points:\n"
                         "    unit: \"s\"\n"
                         "    values: [{}]\n"
                         "  names: [{}]\n\n",
-                        detail::join(detail::durations_from_reference_time(event_time_points, this->get_event(0).time_point), ", "),
-                        detail::join(event_names, ", "));
+                        fmt::join(detail::durations_from_reference_time(event_time_points, this->get_event(0).time_point), ", "),
+                        fmt::join(event_names, ", "));
 
     // output the sampling information
-    file << std::format("sampling_interval: {}\n"
+    file << fmt::format("sampling_interval: {}\n"
                         "time_points:\n"
                         "  unit: \"s\"\n"
                         "  values: [{}]\n"
                         "{}\n\n",
                         this->sampling_interval(),
-                        detail::join(detail::durations_from_reference_time(this->sampling_time_points(), this->get_event(0).time_point), ", "),
+                        fmt::join(detail::durations_from_reference_time(this->sampling_time_points(), this->get_event(0).time_point), ", "),
                         this->generate_yaml_string());
 }
 

From 0f7a253a1bbf4d81b97914099b894cf0e436f72a Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Mon, 16 Sep 2024 11:12:11 +0200
Subject: [PATCH 19/69] Unify temperature related samples.

---
 README.md                                     |  57 ++--
 include/hardware_sampling/cpu/cpu_samples.hpp |   2 +-
 .../gpu_amd/rocm_smi_samples.hpp              |  50 +--
 .../gpu_nvidia/nvml_samples.hpp               |  16 +-
 src/hardware_sampling/cpu/cpu_samples.cpp     |  28 +-
 .../cpu/hardware_sampler.cpp                  |   8 +-
 .../gpu_amd/hardware_sampler.cpp              | 189 ++++++------
 .../gpu_amd/rocm_smi_samples.cpp              | 290 +++++++++---------
 .../gpu_nvidia/hardware_sampler.cpp           |  44 +--
 .../gpu_nvidia/nvml_samples.cpp               |  56 ++--
 10 files changed, 368 insertions(+), 372 deletions(-)

diff --git a/README.md b/README.md
index bf47501..36b116d 100644
--- a/README.md
+++ b/README.md
@@ -177,39 +177,36 @@ export PYTHONPATH=${CMAKE_INSTALL_PREFIX}/lib:${PYTHONPATH}
 
 | sample                   | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs |
 |:-------------------------|:----:|:-----------:|:--------:|:----------:|
-| per_core_temperature     |  °C  |             |          |            |
-| core_throttle_percentage |  %   |             |          |            |
-| per_package_temperature  |  °C  |             |          |            |
-| num_fans                 |      |     int     |   int    |            |
-| max_fan_speed            |      |             |   int    |            |
-| temperature_gpu_min      |      |             |   m°C    |            |
-| temperature_gpu_max      |      |     °C      |   m°C    |            |
-| temperature_hotspot_min  |      |             |   m°C    |            |
-| temperature_hotspot_max  |      |             |   m°C    |            |
-| temperature_memory_min   |      |             |   m°C    |            |
-| temperature_memory_max   |      |             |   m°C    |            |
-| temperature_hbm_0_min    |      |             |   m°C    |            |
-| temperature_hbm_0_max    |      |             |   m°C    |            |
-| temperature_hbm_1_min    |      |             |   m°C    |    MBPS    |
-| temperature_hbm_1_max    |      |             |   m°C    |            |
-| temperature_hbm_2_min    |      |             |   m°C    |     B      |
-| temperature_hbm_2_max    |      |             |   m°C    |     B      |
-| temperature_hbm_3_min    |      |             |   m°C    |    BPS     |
-| temperature_hbm_3_max    |      |             |   m°C    |    int     |
-| fan_speed                |      |      %      |    %     |    int     |
-| temperature_gpu          |      |     °C      |   m°C    |    Bit     |
-| temperature_hotspot      |      |             |   m°C    |    int     |
-| temperature_memory       |      |             |   m°C    |    str     |
-| temperature_hbm_0        |      |             |   m°C    |     B      |
-| temperature_hbm_1        |      |             |   m°C    |     B      |
-| temperature_hbm_2        |      |             |   m°C    |    int     |
-| temperature_hbm_3        |      |             |   m°C    |    int     |
+| num_fans                 |  -   |     int     |   int    |            |
+| fan_speed_min            |  -   |      %      |    -     |            | 
+| fan_speed_max            |  -   |      %      |   RPM    |            |
+| temperature_min          |  -   |      -      |    °C    |            |
+| temperature_max          |  -   |     °C      |    °C    |            |
+| memory_temperature_min   |  -   |      -      |    °C    |            |
+| memory_temperature_max   |  -   |     °C      |    °C    |            |
+| hotspot_temperature_min  |  -   |      -      |    °C    |            |
+| hotspot_temperature_max  |  -   |      -      |    °C    |            |
+| hbm_0_temperature_min    |  -   |      -      |    °C    |            |
+| hbm_0_temperature_max    |  -   |      -      |    °C    |            |
+| hbm_1_temperature_min    |  -   |      -      |    °C    |            |
+| hbm_1_temperature_max    |  -   |      -      |    °C    |            |
+| hbm_2_temperature_min    |  -   |      -      |    °C    |            |
+| hbm_2_temperature_max    |  -   |      -      |    °C    |            |
+| hbm_3_temperature_min    |  -   |      -      |    °C    |            |
+| hbm_3_temperature_max    |  -   |      -      |    °C    |            |
+| fan_speed_percentage     |  -   |      %      |    %     |            |
+| temperature              |  °C  |     °C      |    °C    |            |
+| memory_temperature       |  -   |      -      |    °C    |            |
+| hotspot_temperature      |  -   |      -      |    °C    |            |
+| hbm_0_temperature        |  -   |      -      |    °C    |            |
+| hbm_1_temperature        |  -   |      -      |    °C    |            |
+| hbm_2_temperature        |  -   |      -      |    °C    |            |
+| hbm_3_temperature        |  -   |      -      |    °C    |            |
 | temperature_{}_max       |      |             |          |            |
 | temperature_psu          |      |             |          |            |
 | temperature_{}           |      |             |          |            |
-| min_fan_speed            |      |      %      |          |            |
-| max_fan_speed            |      |      %      |          |            |
-| temperature_mem_max      |      |     °C      |          |            |
+| core_temperature         |  °C  |      -      |    -     |     -      |
+| core_throttle_percentage |  %   |      -      |    -     |     -      |
 
 ### gfx-related (iGPU) samples
 
diff --git a/include/hardware_sampling/cpu/cpu_samples.hpp b/include/hardware_sampling/cpu/cpu_samples.hpp
index b537326..8a90c30 100644
--- a/include/hardware_sampling/cpu/cpu_samples.hpp
+++ b/include/hardware_sampling/cpu/cpu_samples.hpp
@@ -207,9 +207,9 @@ class cpu_temperature_samples {
      */
     [[nodiscard]] std::string generate_yaml_string() const;
 
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, temperature)             // the current temperature of the whole package in °C
     HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, core_temperature)  // the current temperature of the core part of the CPU in °C
     HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, core_throttle_percent)   // the percent of time the CPU has throttled
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, package_temperature)     // the current temperature of the whole package in °C
 };
 
 /**
diff --git a/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp b/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp
index 407a68c..05deb6b 100644
--- a/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp
+++ b/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp
@@ -201,31 +201,31 @@ class rocm_smi_temperature_samples {
      */
     [[nodiscard]] std::string generate_yaml_string() const;
 
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint32_t, num_fans)                // the number of fans (if any)
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, max_fan_speed)           // the maximum fan speed
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, temperature_edge_min)     // the minimum temperature on the GPU's edge temperature sensor in m°C
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, temperature_edge_max)     // the maximum temperature on the GPU's edge temperature sensor in m°C
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, temperature_hotspot_min)  // the minimum temperature on the GPU's hotspot temperature sensor in m°C
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, temperature_hotspot_max)  // the maximum temperature on the GPU's hotspot temperature sensor in m°C
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, temperature_memory_min)   // the minimum temperature on the GPU's memory temperature sensor in m°C
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, temperature_memory_max)   // the maximum temperature on the GPU's memory temperature sensor in m°C
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, temperature_hbm_0_min)    // the minimum temperature on the GPU's HBM0 temperature sensor in m°C
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, temperature_hbm_0_max)    // the maximum temperature on the GPU's HBM0 temperature sensor in m°C
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, temperature_hbm_1_min)    // the minimum temperature on the GPU's HBM1 temperature sensor in m°C
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, temperature_hbm_1_max)    // the maximum temperature on the GPU's HBM1 temperature sensor in m°C
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, temperature_hbm_2_min)    // the minimum temperature on the GPU's HBM2 temperature sensor in m°C
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, temperature_hbm_2_max)    // the maximum temperature on the GPU's HBM2 temperature sensor in m°C
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, temperature_hbm_3_min)    // the minimum temperature on the GPU's HBM3 temperature sensor in m°C
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, temperature_hbm_3_max)    // the maximum temperature on the GPU's HBM3 temperature sensor in m°C
-
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int64_t, fan_speed)            // the current fan speed in %
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int64_t, temperature_edge)     // the current temperature on the GPU's edge temperature sensor in m°C
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int64_t, temperature_hotspot)  // the current temperature on the GPU's hotspot temperature sensor in m°C
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int64_t, temperature_memory)   // the current temperature on the GPU's memory temperature sensor in m°C
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int64_t, temperature_hbm_0)    // the current temperature on the GPU's HBM0 temperature sensor in m°C
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int64_t, temperature_hbm_1)    // the current temperature on the GPU's HBM1 temperature sensor in m°C
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int64_t, temperature_hbm_2)    // the current temperature on the GPU's HBM2 temperature sensor in m°C
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int64_t, temperature_hbm_3)    // the current temperature on the GPU's HBM3 temperature sensor in m°C
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint32_t, num_fans)          // the number of fans (if any)
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, fan_speed_max)     // the maximum fan speed in RPM
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, temperature_min)          // the minimum temperature on the GPU's edge temperature sensor in °C
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, temperature_max)          // the maximum temperature on the GPU's edge temperature sensor in °C
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, memory_temperature_min)   // the minimum temperature on the GPU's memory temperature sensor in °C
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, memory_temperature_max)   // the maximum temperature on the GPU's memory temperature sensor in °C
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, hotspot_temperature_min)  // the minimum temperature on the GPU's hotspot temperature sensor in °C
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, hotspot_temperature_max)  // the maximum temperature on the GPU's hotspot temperature sensor in °C
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, hbm_0_temperature_min)    // the minimum temperature on the GPU's HBM0 temperature sensor in °C
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, hbm_0_temperature_max)    // the maximum temperature on the GPU's HBM0 temperature sensor in °C
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, hbm_1_temperature_min)    // the minimum temperature on the GPU's HBM1 temperature sensor in °C
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, hbm_1_temperature_max)    // the maximum temperature on the GPU's HBM1 temperature sensor in °C
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, hbm_2_temperature_min)    // the minimum temperature on the GPU's HBM2 temperature sensor in °C
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, hbm_2_temperature_max)    // the maximum temperature on the GPU's HBM2 temperature sensor in °C
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, hbm_3_temperature_min)    // the minimum temperature on the GPU's HBM3 temperature sensor in °C
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, hbm_3_temperature_max)    // the maximum temperature on the GPU's HBM3 temperature sensor in °C
+
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, fan_speed_percentage)  // the current fan speed in %
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, temperature)           // the current temperature on the GPU's edge temperature sensor in °C
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, hotspot_temperature)   // the current temperature on the GPU's hotspot temperature sensor in °C
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, memory_temperature)    // the current temperature on the GPU's memory temperature sensor in °C
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, hbm_0_temperature)     // the current temperature on the GPU's HBM0 temperature sensor in °C
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, hbm_1_temperature)     // the current temperature on the GPU's HBM1 temperature sensor in °C
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, hbm_2_temperature)     // the current temperature on the GPU's HBM2 temperature sensor in °C
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, hbm_3_temperature)     // the current temperature on the GPU's HBM3 temperature sensor in °C
 };
 
 /**
diff --git a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp
index 2757a60..b1af6bc 100644
--- a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp
+++ b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp
@@ -208,14 +208,14 @@ class nvml_temperature_samples {
      */
     [[nodiscard]] std::string generate_yaml_string() const;
 
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, num_fans)                       // the number of fans (if any)
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, min_fan_speed)                  // the minimum fan speed the user can set in %
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, max_fan_speed)                  // the maximum fan speed the user can set in %
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, temperature_threshold_gpu_max)  // the maximum graphics temperature threshold in °C
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, temperature_threshold_mem_max)  // the maximum memory temperature threshold in °C
-
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, fan_speed)        // the current intended fan speed in %
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, temperature_gpu)  // the current GPU temperature in °C
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, num_fans)          // the number of fans (if any)
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, fan_speed_min)     // the minimum fan speed the user can set in %
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, fan_speed_max)     // the maximum fan speed the user can set in %
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, temperature_max)         // the maximum graphics temperature threshold in °C
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, memory_temperature_max)  // the maximum memory temperature threshold in °C
+
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, fan_speed_percentage)  // the current intended fan speed in %
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, temperature)           // the current GPU temperature in °C
 };
 
 /**
diff --git a/src/hardware_sampling/cpu/cpu_samples.cpp b/src/hardware_sampling/cpu/cpu_samples.cpp
index 02148f0..80ed9b4 100644
--- a/src/hardware_sampling/cpu/cpu_samples.cpp
+++ b/src/hardware_sampling/cpu/cpu_samples.cpp
@@ -482,9 +482,17 @@ std::ostream &operator<<(std::ostream &out, const cpu_memory_samples &samples) {
 std::string cpu_temperature_samples::generate_yaml_string() const {
     std::string str{ "temperature:\n" };
 
+    // the temperature of the whole package
+    if (this->temperature_.has_value()) {
+        str += fmt::format("  temperature:\n"
+                           "    turbostat_name: \"PkgTmp\"\n"
+                           "    unit: \"°C\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->temperature_.value(), ", "));
+    }
     // the temperature of the cores
     if (this->core_temperature_.has_value()) {
-        str += fmt::format("  per_core_temperature:\n"
+        str += fmt::format("  core_temperature:\n"
                            "    turbostat_name: \"CoreTmp\"\n"
                            "    unit: \"°C\"\n"
                            "    values: [{}]\n",
@@ -498,14 +506,6 @@ std::string cpu_temperature_samples::generate_yaml_string() const {
                            "    values: [{}]\n",
                            fmt::join(this->core_throttle_percent_.value(), ", "));
     }
-    // the temperature of the whole package
-    if (this->package_temperature_.has_value()) {
-        str += fmt::format("  per_package_temperature:\n"
-                           "    turbostat_name: \"PkgTmp\"\n"
-                           "    unit: \"°C\"\n"
-                           "    values: [{}]\n",
-                           fmt::join(this->package_temperature_.value(), ", "));
-    }
 
     // remove last newline
     str.pop_back();
@@ -514,12 +514,12 @@ std::string cpu_temperature_samples::generate_yaml_string() const {
 }
 
 std::ostream &operator<<(std::ostream &out, const cpu_temperature_samples &samples) {
-    return out << fmt::format("core_temperature [°C]: [{}]\n"
-                              "core_throttle_percent [%]: [{}]\n"
-                              "package_temperature [°C]: [{}]",
+    return out << fmt::format("temperature [°C]: [{}]\n"
+                              "core_temperature [°C]: [{}]\n"
+                              "core_throttle_percent [%]: [{}]",
+                              fmt::join(detail::value_or_default(samples.get_temperature()), ", "),
                               fmt::join(detail::value_or_default(samples.get_core_temperature()), ", "),
-                              fmt::join(detail::value_or_default(samples.get_core_throttle_percent()), ", "),
-                              fmt::join(detail::value_or_default(samples.get_package_temperature()), ", "));
+                              fmt::join(detail::value_or_default(samples.get_core_throttle_percent()), ", "));
 }
 
 //*************************************************************************************************************************************//
diff --git a/src/hardware_sampling/cpu/hardware_sampler.cpp b/src/hardware_sampling/cpu/hardware_sampler.cpp
index 6c8471d..cf2eeda 100644
--- a/src/hardware_sampling/cpu/hardware_sampler.cpp
+++ b/src/hardware_sampling/cpu/hardware_sampler.cpp
@@ -198,8 +198,8 @@ void cpu_hardware_sampler::sampling_loop() {
                 using vector_type = decltype(temperature_samples_.core_throttle_percent_)::value_type;
                 temperature_samples_.core_throttle_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
             } else if (header[i] == "PkgTmp") {
-                using vector_type = decltype(temperature_samples_.package_temperature_)::value_type;
-                temperature_samples_.package_temperature_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                using vector_type = decltype(temperature_samples_.temperature_)::value_type;
+                temperature_samples_.temperature_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
             } else if (header[i] == "GFX%rc6") {
                 using vector_type = decltype(gfx_samples_.gfx_render_state_percent_)::value_type;
                 gfx_samples_.gfx_render_state_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
@@ -345,8 +345,8 @@ void cpu_hardware_sampler::sampling_loop() {
                         using vector_type = decltype(temperature_samples_.core_throttle_percent_)::value_type;
                         temperature_samples_.core_throttle_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
                     } else if (header[i] == "PkgTmp") {
-                        using vector_type = decltype(temperature_samples_.package_temperature_)::value_type;
-                        temperature_samples_.package_temperature_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        using vector_type = decltype(temperature_samples_.temperature_)::value_type;
+                        temperature_samples_.temperature_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
                     } else if (header[i] == "GFX%rc6") {
                         using vector_type = decltype(gfx_samples_.gfx_render_state_percent_)::value_type;
                         gfx_samples_.gfx_render_state_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
diff --git a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
index 0b80c81..76cde4b 100644
--- a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
+++ b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
@@ -323,125 +323,127 @@ void gpu_amd_hardware_sampler::sampling_loop() {
     // retrieve fixed temperature related information
     {
         std::uint32_t fan_id{ 0 };
-        decltype(temperature_samples_.fan_speed_)::value_type::value_type fan_speed{};
+        std::int64_t fan_speed{};
         while (rsmi_dev_fan_speed_get(device_id_, fan_id, &fan_speed) == RSMI_STATUS_SUCCESS) {
             if (fan_id == 0) {
                 // queried samples -> retrieved every iteration if available
-                temperature_samples_.fan_speed_ = decltype(temperature_samples_.fan_speed_)::value_type{ fan_speed };
+                const auto percentage = static_cast<decltype(temperature_samples_.fan_speed_percentage_)::value_type::value_type>(fan_speed) /
+                                        static_cast<decltype(temperature_samples_.fan_speed_percentage_)::value_type::value_type>(RSMI_MAX_FAN_SPEED);
+                temperature_samples_.fan_speed_percentage_ = decltype(temperature_samples_.fan_speed_percentage_)::value_type{ percentage };
             }
             ++fan_id;
         }
         temperature_samples_.num_fans_ = fan_id;
 
-        decltype(temperature_samples_.max_fan_speed_)::value_type max_fan_speed{};
+        decltype(temperature_samples_.fan_speed_max_)::value_type max_fan_speed{};
         if (rsmi_dev_fan_speed_max_get(device_id_, std::uint32_t{ 0 }, &max_fan_speed) == RSMI_STATUS_SUCCESS) {
-            temperature_samples_.max_fan_speed_ = max_fan_speed;
+            temperature_samples_.fan_speed_max_ = max_fan_speed;
         }
 
-        decltype(temperature_samples_.temperature_edge_min_)::value_type temperature_edge_min{};
-        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_EDGE, RSMI_TEMP_MIN, &temperature_edge_min) == RSMI_STATUS_SUCCESS) {
-            temperature_samples_.temperature_edge_min_ = temperature_edge_min;
+        std::int64_t temperature_min{};
+        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_EDGE, RSMI_TEMP_MIN, &temperature_min) == RSMI_STATUS_SUCCESS) {
+            temperature_samples_.temperature_min_ = static_cast<decltype(temperature_samples_.temperature_min_)::value_type>(temperature_min) / 1000.0;
         }
 
-        decltype(temperature_samples_.temperature_edge_max_)::value_type temperature_edge_max{};
-        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_EDGE, RSMI_TEMP_MAX, &temperature_edge_max) == RSMI_STATUS_SUCCESS) {
-            temperature_samples_.temperature_edge_max_ = temperature_edge_min;
+        std::int64_t temperature_max{};
+        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_EDGE, RSMI_TEMP_MAX, &temperature_max) == RSMI_STATUS_SUCCESS) {
+            temperature_samples_.temperature_max_ = static_cast<decltype(temperature_samples_.temperature_max_)::value_type>(temperature_max) / 1000.0;
         }
 
-        decltype(temperature_samples_.temperature_hotspot_min_)::value_type temperature_hotspot_min{};
-        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_JUNCTION, RSMI_TEMP_MIN, &temperature_hotspot_min) == RSMI_STATUS_SUCCESS) {
-            temperature_samples_.temperature_hotspot_min_ = temperature_hotspot_min;
+        std::int64_t memory_temperature_min{};
+        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_MEMORY, RSMI_TEMP_MIN, &memory_temperature_min) == RSMI_STATUS_SUCCESS) {
+            temperature_samples_.memory_temperature_min_ = static_cast<decltype(temperature_samples_.memory_temperature_min_)::value_type>(memory_temperature_min) / 1000.0;
         }
 
-        decltype(temperature_samples_.temperature_hotspot_max_)::value_type temperature_hotspot_max{};
-        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_JUNCTION, RSMI_TEMP_MAX, &temperature_hotspot_max) == RSMI_STATUS_SUCCESS) {
-            temperature_samples_.temperature_hotspot_max_ = temperature_hotspot_max;
+        std::int64_t memory_temperature_max{};
+        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_MEMORY, RSMI_TEMP_MAX, &memory_temperature_max) == RSMI_STATUS_SUCCESS) {
+            temperature_samples_.memory_temperature_max_ = static_cast<decltype(temperature_samples_.memory_temperature_max_)::value_type>(memory_temperature_max) / 1000.0;
         }
 
-        decltype(temperature_samples_.temperature_memory_min_)::value_type temperature_memory_min{};
-        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_MEMORY, RSMI_TEMP_MIN, &temperature_memory_min) == RSMI_STATUS_SUCCESS) {
-            temperature_samples_.temperature_memory_min_ = temperature_memory_min;
+        std::int64_t hotspot_temperature_min{};
+        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_JUNCTION, RSMI_TEMP_MIN, &hotspot_temperature_min) == RSMI_STATUS_SUCCESS) {
+            temperature_samples_.hotspot_temperature_min_ = static_cast<decltype(temperature_samples_.hotspot_temperature_min_)::value_type>(hotspot_temperature_min) / 1000.0;
         }
 
-        decltype(temperature_samples_.temperature_memory_max_)::value_type temperature_memory_max{};
-        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_MEMORY, RSMI_TEMP_MAX, &temperature_memory_max) == RSMI_STATUS_SUCCESS) {
-            temperature_samples_.temperature_memory_max_ = temperature_memory_max;
+        std::int64_t hotspot_temperature_max{};
+        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_JUNCTION, RSMI_TEMP_MAX, &hotspot_temperature_max) == RSMI_STATUS_SUCCESS) {
+            temperature_samples_.hotspot_temperature_max_ = static_cast<decltype(temperature_samples_.hotspot_temperature_max_)::value_type>(hotspot_temperature_max) / 1000.0;
         }
 
-        decltype(temperature_samples_.temperature_hbm_0_min_)::value_type temperature_hbm_0_min{};
-        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_0, RSMI_TEMP_MIN, &temperature_hbm_0_min) == RSMI_STATUS_SUCCESS) {
-            temperature_samples_.temperature_hbm_0_min_ = temperature_hbm_0_min;
+        std::int64_t hbm_0_temperature_min{};
+        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_0, RSMI_TEMP_MIN, &hbm_0_temperature_min) == RSMI_STATUS_SUCCESS) {
+            temperature_samples_.hbm_0_temperature_min_ = static_cast<decltype(temperature_samples_.hbm_0_temperature_min_)::value_type>(hbm_0_temperature_min) / 1000.0;
         }
 
-        decltype(temperature_samples_.temperature_hbm_0_max_)::value_type temperature_hbm_0_max{};
-        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_0, RSMI_TEMP_MAX, &temperature_hbm_0_max) == RSMI_STATUS_SUCCESS) {
-            temperature_samples_.temperature_hbm_0_max_ = temperature_hbm_0_max;
+        std::int64_t hbm_0_temperature_max{};
+        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_0, RSMI_TEMP_MAX, &hbm_0_temperature_max) == RSMI_STATUS_SUCCESS) {
+            temperature_samples_.hbm_0_temperature_max_ = static_cast<decltype(temperature_samples_.hbm_0_temperature_max_)::value_type>(hbm_0_temperature_max) / 1000.0;
         }
 
-        decltype(temperature_samples_.temperature_hbm_1_min_)::value_type temperature_hbm_1_min{};
-        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_1, RSMI_TEMP_MIN, &temperature_hbm_1_min) == RSMI_STATUS_SUCCESS) {
-            temperature_samples_.temperature_hbm_1_min_ = temperature_hbm_1_min;
+        std::int64_t hbm_1_temperature_min{};
+        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_1, RSMI_TEMP_MIN, &hbm_1_temperature_min) == RSMI_STATUS_SUCCESS) {
+            temperature_samples_.hbm_1_temperature_min_ = static_cast<decltype(temperature_samples_.hbm_1_temperature_min_)::value_type>(hbm_1_temperature_min) / 1000.0;
         }
 
-        decltype(temperature_samples_.temperature_hbm_1_max_)::value_type temperature_hbm_1_max{};
-        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_1, RSMI_TEMP_MAX, &temperature_hbm_1_max) == RSMI_STATUS_SUCCESS) {
-            temperature_samples_.temperature_hbm_1_max_ = temperature_hbm_1_max;
+        std::int64_t hbm_1_temperature_max{};
+        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_1, RSMI_TEMP_MAX, &hbm_1_temperature_max) == RSMI_STATUS_SUCCESS) {
+            temperature_samples_.hbm_1_temperature_max_ = static_cast<decltype(temperature_samples_.hbm_1_temperature_max_)::value_type>(hbm_1_temperature_max) / 1000.0;
         }
 
-        decltype(temperature_samples_.temperature_hbm_2_min_)::value_type temperature_hbm_2_min{};
-        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_2, RSMI_TEMP_MIN, &temperature_hbm_2_min) == RSMI_STATUS_SUCCESS) {
-            temperature_samples_.temperature_hbm_2_min_ = temperature_hbm_2_min;
+        std::int64_t hbm_2_temperature_min{};
+        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_2, RSMI_TEMP_MIN, &hbm_2_temperature_min) == RSMI_STATUS_SUCCESS) {
+            temperature_samples_.hbm_2_temperature_min_ = static_cast<decltype(temperature_samples_.hbm_2_temperature_min_)::value_type>(hbm_2_temperature_min) / 1000.0;
         }
 
-        decltype(temperature_samples_.temperature_hbm_2_max_)::value_type temperature_hbm_2_max{};
-        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_2, RSMI_TEMP_MAX, &temperature_hbm_2_max) == RSMI_STATUS_SUCCESS) {
-            temperature_samples_.temperature_hbm_2_max_ = temperature_hbm_2_max;
+        std::int64_t hbm_2_temperature_max{};
+        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_2, RSMI_TEMP_MAX, &hbm_2_temperature_max) == RSMI_STATUS_SUCCESS) {
+            temperature_samples_.hbm_2_temperature_max_ = static_cast<decltype(temperature_samples_.hbm_2_temperature_max_)::value_type>(hbm_2_temperature_max) / 1000.0;
         }
 
-        decltype(temperature_samples_.temperature_hbm_3_min_)::value_type temperature_hbm_3_min{};
-        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_3, RSMI_TEMP_MIN, &temperature_hbm_3_min) == RSMI_STATUS_SUCCESS) {
-            temperature_samples_.temperature_hbm_3_min_ = temperature_hbm_3_min;
+        std::int64_t hbm_3_temperature_min{};
+        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_3, RSMI_TEMP_MIN, &hbm_3_temperature_min) == RSMI_STATUS_SUCCESS) {
+            temperature_samples_.hbm_3_temperature_min_ = static_cast<decltype(temperature_samples_.hbm_3_temperature_min_)::value_type>(hbm_3_temperature_min) / 1000.0;
         }
 
-        decltype(temperature_samples_.temperature_hbm_3_max_)::value_type temperature_hbm_3_max{};
-        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_3, RSMI_TEMP_MAX, &temperature_hbm_3_max) == RSMI_STATUS_SUCCESS) {
-            temperature_samples_.temperature_hbm_3_max_ = temperature_hbm_3_max;
+        std::int64_t hbm_3_temperature_max{};
+        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_3, RSMI_TEMP_MAX, &hbm_3_temperature_max) == RSMI_STATUS_SUCCESS) {
+            temperature_samples_.hbm_3_temperature_max_ = static_cast<decltype(temperature_samples_.hbm_3_temperature_max_)::value_type>(hbm_3_temperature_max) / 1000.0;
         }
 
         // queried samples -> retrieved every iteration if available
-        decltype(temperature_samples_.temperature_edge_)::value_type::value_type temperature_edge{};
-        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_EDGE, RSMI_TEMP_CURRENT, &temperature_edge) == RSMI_STATUS_SUCCESS) {
-            temperature_samples_.temperature_edge_ = decltype(temperature_samples_.temperature_edge_)::value_type{ temperature_edge };
+        std::int64_t temperature{};
+        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_EDGE, RSMI_TEMP_CURRENT, &temperature) == RSMI_STATUS_SUCCESS) {
+            temperature_samples_.temperature_ = decltype(temperature_samples_.temperature_)::value_type{ static_cast<decltype(temperature_samples_.temperature_)::value_type::value_type>(temperature) / 1000.0 };
         }
 
-        decltype(temperature_samples_.temperature_hotspot_)::value_type::value_type temperature_hotspot{};
-        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_JUNCTION, RSMI_TEMP_CURRENT, &temperature_hotspot) == RSMI_STATUS_SUCCESS) {
-            temperature_samples_.temperature_hotspot_ = decltype(temperature_samples_.temperature_hotspot_)::value_type{ temperature_hotspot };
+        std::int64_t hotspot_temperature{};
+        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_JUNCTION, RSMI_TEMP_CURRENT, &hotspot_temperature) == RSMI_STATUS_SUCCESS) {
+            temperature_samples_.hotspot_temperature_ = decltype(temperature_samples_.hotspot_temperature_)::value_type{ static_cast<decltype(temperature_samples_.hotspot_temperature_)::value_type::value_type>(hotspot_temperature) / 1000.0 };
         }
 
-        decltype(temperature_samples_.temperature_memory_)::value_type::value_type temperature_memory{};
-        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_MEMORY, RSMI_TEMP_CURRENT, &temperature_memory) == RSMI_STATUS_SUCCESS) {
-            temperature_samples_.temperature_memory_ = decltype(temperature_samples_.temperature_memory_)::value_type{ temperature_memory };
+        std::int64_t memory_temperature{};
+        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_MEMORY, RSMI_TEMP_CURRENT, &memory_temperature) == RSMI_STATUS_SUCCESS) {
+            temperature_samples_.memory_temperature_ = decltype(temperature_samples_.memory_temperature_)::value_type{ static_cast<decltype(temperature_samples_.memory_temperature_)::value_type::value_type>(memory_temperature) / 1000.0 };
         }
 
-        decltype(temperature_samples_.temperature_hbm_0_)::value_type::value_type temperature_hbm_0{};
-        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_0, RSMI_TEMP_CURRENT, &temperature_hbm_0) == RSMI_STATUS_SUCCESS) {
-            temperature_samples_.temperature_hbm_0_ = decltype(temperature_samples_.temperature_hbm_0_)::value_type{ temperature_hbm_0 };
+        std::int64_t hbm_0_temperature{};
+        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_0, RSMI_TEMP_CURRENT, &hbm_0_temperature) == RSMI_STATUS_SUCCESS) {
+            temperature_samples_.hbm_0_temperature_ = decltype(temperature_samples_.hbm_0_temperature_)::value_type{ static_cast<decltype(temperature_samples_.hbm_0_temperature_)::value_type::value_type>(hbm_0_temperature) / 1000.0 };
         }
 
-        decltype(temperature_samples_.temperature_hbm_1_)::value_type::value_type temperature_hbm_1{};
-        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_1, RSMI_TEMP_CURRENT, &temperature_hbm_1) == RSMI_STATUS_SUCCESS) {
-            temperature_samples_.temperature_hbm_1_ = decltype(temperature_samples_.temperature_hbm_1_)::value_type{ temperature_hbm_1 };
+        std::int64_t hbm_1_temperature{};
+        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_1, RSMI_TEMP_CURRENT, &hbm_1_temperature) == RSMI_STATUS_SUCCESS) {
+            temperature_samples_.hbm_1_temperature_ = decltype(temperature_samples_.hbm_1_temperature_)::value_type{ static_cast<decltype(temperature_samples_.hbm_1_temperature_)::value_type::value_type>(hbm_1_temperature) / 1000.0 };
         }
 
-        decltype(temperature_samples_.temperature_hbm_2_)::value_type::value_type temperature_hbm_2{};
-        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_2, RSMI_TEMP_CURRENT, &temperature_hbm_2) == RSMI_STATUS_SUCCESS) {
-            temperature_samples_.temperature_hbm_2_ = decltype(temperature_samples_.temperature_hbm_2_)::value_type{ temperature_hbm_2 };
+        std::int64_t hbm_2_temperature{};
+        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_2, RSMI_TEMP_CURRENT, &hbm_2_temperature) == RSMI_STATUS_SUCCESS) {
+            temperature_samples_.hbm_2_temperature_ = decltype(temperature_samples_.hbm_2_temperature_)::value_type{ static_cast<decltype(temperature_samples_.hbm_2_temperature_)::value_type::value_type>(hbm_2_temperature) / 1000.0 };
         }
 
-        decltype(temperature_samples_.temperature_hbm_3_)::value_type::value_type temperature_hbm_3{};
-        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_3, RSMI_TEMP_CURRENT, &temperature_hbm_3) == RSMI_STATUS_SUCCESS) {
-            temperature_samples_.temperature_hbm_3_ = decltype(temperature_samples_.temperature_hbm_3_)::value_type{ temperature_hbm_3 };
+        std::int64_t hbm_3_temperature{};
+        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_3, RSMI_TEMP_CURRENT, &hbm_3_temperature) == RSMI_STATUS_SUCCESS) {
+            temperature_samples_.hbm_3_temperature_ = decltype(temperature_samples_.hbm_3_temperature_)::value_type{ static_cast<decltype(temperature_samples_.hbm_3_temperature_)::value_type::value_type>(hbm_3_temperature) / 1000.0 };
         }
     }
 
@@ -598,52 +600,53 @@ void gpu_amd_hardware_sampler::sampling_loop() {
 
             // retrieve temperature related samples
             {
-                if (temperature_samples_.fan_speed_.has_value()) {
-                    decltype(temperature_samples_.fan_speed_)::value_type::value_type value{};
+                if (temperature_samples_.fan_speed_percentage_.has_value()) {
+                    std::int64_t value{};
                     HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_fan_speed_get(device_id_, std::uint32_t{ 0 }, &value));
-                    temperature_samples_.fan_speed_->push_back(value);
+                    temperature_samples_.fan_speed_percentage_->push_back(static_cast<decltype(temperature_samples_.fan_speed_percentage_)::value_type::value_type>(value) /
+                                                                          static_cast<decltype(temperature_samples_.fan_speed_percentage_)::value_type::value_type>(RSMI_MAX_FAN_SPEED));
                 }
 
-                if (temperature_samples_.temperature_edge_.has_value()) {
-                    decltype(temperature_samples_.temperature_edge_)::value_type::value_type value{};
+                if (temperature_samples_.temperature_.has_value()) {
+                    std::int64_t value{};
                     HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_EDGE, RSMI_TEMP_CURRENT, &value));
-                    temperature_samples_.temperature_edge_->push_back(value);
+                    temperature_samples_.temperature_->push_back(static_cast<decltype(temperature_samples_.temperature_)::value_type::value_type>(value) / 1000.0);
                 }
 
-                if (temperature_samples_.temperature_hotspot_.has_value()) {
-                    decltype(temperature_samples_.temperature_hotspot_)::value_type::value_type value{};
-                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_JUNCTION, RSMI_TEMP_CURRENT, &value));
-                    temperature_samples_.temperature_hotspot_->push_back(value);
+                if (temperature_samples_.memory_temperature_.has_value()) {
+                    std::int64_t value{};
+                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_MEMORY, RSMI_TEMP_CURRENT, &value));
+                    temperature_samples_.memory_temperature_->push_back(static_cast<decltype(temperature_samples_.memory_temperature_)::value_type::value_type>(value) / 1000.0);
                 }
 
-                if (temperature_samples_.temperature_memory_.has_value()) {
-                    decltype(temperature_samples_.temperature_memory_)::value_type::value_type value{};
-                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_MEMORY, RSMI_TEMP_CURRENT, &value));
-                    temperature_samples_.temperature_memory_->push_back(value);
+                if (temperature_samples_.hotspot_temperature_.has_value()) {
+                    std::int64_t value{};
+                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_JUNCTION, RSMI_TEMP_CURRENT, &value));
+                    temperature_samples_.hotspot_temperature_->push_back(static_cast<decltype(temperature_samples_.hotspot_temperature_)::value_type::value_type>(value) / 1000.0);
                 }
 
-                if (temperature_samples_.temperature_hbm_0_.has_value()) {
-                    decltype(temperature_samples_.temperature_hbm_0_)::value_type::value_type value{};
+                if (temperature_samples_.hbm_0_temperature_.has_value()) {
+                    std::int64_t value{};
                     HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_0, RSMI_TEMP_CURRENT, &value));
-                    temperature_samples_.temperature_hbm_0_->push_back(value);
+                    temperature_samples_.hbm_0_temperature_->push_back(static_cast<decltype(temperature_samples_.hbm_0_temperature_)::value_type::value_type>(value) / 1000.0);
                 }
 
-                if (temperature_samples_.temperature_hbm_1_.has_value()) {
-                    decltype(temperature_samples_.temperature_hbm_1_)::value_type::value_type value{};
+                if (temperature_samples_.hbm_1_temperature_.has_value()) {
+                    std::int64_t value{};
                     HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_1, RSMI_TEMP_CURRENT, &value));
-                    temperature_samples_.temperature_hbm_1_->push_back(value);
+                    temperature_samples_.hbm_1_temperature_->push_back(static_cast<decltype(temperature_samples_.hbm_1_temperature_)::value_type::value_type>(value) / 1000.0);
                 }
 
-                if (temperature_samples_.temperature_hbm_2_.has_value()) {
-                    decltype(temperature_samples_.temperature_hbm_2_)::value_type::value_type value{};
+                if (temperature_samples_.hbm_2_temperature_.has_value()) {
+                    std::int64_t value{};
                     HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_2, RSMI_TEMP_CURRENT, &value));
-                    temperature_samples_.temperature_hbm_2_->push_back(value);
+                    temperature_samples_.hbm_2_temperature_->push_back(static_cast<decltype(temperature_samples_.hbm_2_temperature_)::value_type::value_type>(value) / 1000.0);
                 }
 
-                if (temperature_samples_.temperature_hbm_3_.has_value()) {
-                    decltype(temperature_samples_.temperature_hbm_3_)::value_type::value_type value{};
+                if (temperature_samples_.hbm_3_temperature_.has_value()) {
+                    std::int64_t value{};
                     HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_3, RSMI_TEMP_CURRENT, &value));
-                    temperature_samples_.temperature_hbm_3_->push_back(value);
+                    temperature_samples_.hbm_3_temperature_->push_back(static_cast<decltype(temperature_samples_.hbm_3_temperature_)::value_type::value_type>(value) / 1000.0);
                 }
             }
         }
diff --git a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp b/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp
index 568082e..e8dcffa 100644
--- a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp
+++ b/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp
@@ -423,170 +423,166 @@ std::string rocm_smi_temperature_samples::generate_yaml_string() const {
                            this->num_fans_.value());
     }
     // maximum fan speed
-    if (this->max_fan_speed_.has_value()) {
-        str += fmt::format("  max_fan_speed:\n"
-                           "    unit: \"int\"\n"
+    if (this->fan_speed_max_.has_value()) {
+        str += fmt::format("  fan_speed_max:\n"
+                           "    unit: \"RPM\"\n"
                            "    values: {}\n",
-                           this->max_fan_speed_.value());
+                           this->fan_speed_max_.value());
     }
     // minimum GPU edge temperature
-    if (this->temperature_edge_min_.has_value()) {
-        str += fmt::format("  temperature_gpu_min:\n"
-                           "    unit: \"m°C\"\n"
+    if (this->temperature_min_.has_value()) {
+        str += fmt::format("  temperature_min:\n"
+                           "    unit: \"°C\"\n"
                            "    values: {}\n",
-                           this->temperature_edge_min_.value());
+                           this->temperature_min_.value());
     }
     // maximum GPU edge temperature
-    if (this->temperature_edge_max_.has_value()) {
-        str += fmt::format("  temperature_gpu_max:\n"
-                           "    unit: \"m°C\"\n"
+    if (this->temperature_max_.has_value()) {
+        str += fmt::format("  temperature_max:\n"
+                           "    unit: \"°C\"\n"
                            "    values: {}\n",
-                           this->temperature_edge_max_.value());
+                           this->temperature_max_.value());
     }
-    // minimum GPU hotspot temperature
-    if (this->temperature_hotspot_min_.has_value()) {
-        str += fmt::format("  temperature_hotspot_min:\n"
-                           "    unit: \"m°C\"\n"
+    // minimum GPU memory temperature
+    if (this->memory_temperature_min_.has_value()) {
+        str += fmt::format("  memory_temperature_min:\n"
+                           "    unit: \"°C\"\n"
                            "    values: {}\n",
-                           this->temperature_hotspot_min_.value());
+                           this->memory_temperature_min_.value());
     }
-    // maximum GPU hotspot temperature
-    if (this->temperature_hotspot_max_.has_value()) {
-        str += fmt::format("  temperature_hotspot_max:\n"
-                           "    unit: \"m°C\"\n"
+    // maximum GPU memory temperature
+    if (this->memory_temperature_max_.has_value()) {
+        str += fmt::format("  memory_temperature_max:\n"
+                           "    unit: \"°C\"\n"
                            "    values: {}\n",
-                           this->temperature_hotspot_max_.value());
+                           this->memory_temperature_max_.value());
     }
-    // minimum GPU memory temperature
-    if (this->temperature_memory_min_.has_value()) {
-        str += fmt::format("  temperature_memory_min:\n"
-                           "    unit: \"m°C\"\n"
+    // minimum GPU hotspot temperature
+    if (this->hotspot_temperature_min_.has_value()) {
+        str += fmt::format("  hotspot_temperature_min:\n"
+                           "    unit: \"°C\"\n"
                            "    values: {}\n",
-                           this->temperature_memory_min_.value());
+                           this->hotspot_temperature_min_.value());
     }
-    // maximum GPU memory temperature
-    if (this->temperature_memory_max_.has_value()) {
-        str += fmt::format("  temperature_memory_max:\n"
-                           "    unit: \"m°C\"\n"
+    // maximum GPU hotspot temperature
+    if (this->hotspot_temperature_max_.has_value()) {
+        str += fmt::format("  hotspot_temperature_max:\n"
+                           "    unit: \"°C\"\n"
                            "    values: {}\n",
-                           this->temperature_memory_max_.value());
+                           this->hotspot_temperature_max_.value());
     }
     // minimum GPU HBM 0 temperature
-    if (this->temperature_hbm_0_min_.has_value()) {
-        str += fmt::format("  temperature_hbm_0_min:\n"
-                           "    unit: \"m°C\"\n"
+    if (this->hbm_0_temperature_min_.has_value()) {
+        str += fmt::format("  hbm_0_temperature_min:\n"
+                           "    unit: \"°C\"\n"
                            "    values: {}\n",
-                           this->temperature_hbm_0_min_.value());
+                           this->hbm_0_temperature_min_.value());
     }
     // maximum GPU HBM 0 temperature
-    if (this->temperature_hbm_0_max_.has_value()) {
-        str += fmt::format("  temperature_hbm_0_max:\n"
-                           "    unit: \"m°C\"\n"
+    if (this->hbm_0_temperature_max_.has_value()) {
+        str += fmt::format("  hbm_0_temperature_max:\n"
+                           "    unit: \"°C\"\n"
                            "    values: {}\n",
-                           this->temperature_hbm_0_max_.value());
+                           this->hbm_0_temperature_max_.value());
     }
     // minimum GPU HBM 1 temperature
-    if (this->temperature_hbm_1_min_.has_value()) {
-        str += fmt::format("  temperature_hbm_1_min:\n"
-                           "    unit: \"m°C\"\n"
+    if (this->hbm_1_temperature_min_.has_value()) {
+        str += fmt::format("  hbm_1_temperature_min:\n"
+                           "    unit: \"°C\"\n"
                            "    values: {}\n",
-                           this->temperature_hbm_1_min_.value());
+                           this->hbm_1_temperature_min_.value());
     }
     // maximum GPU HBM 1 temperature
-    if (this->temperature_hbm_1_max_.has_value()) {
-        str += fmt::format("  temperature_hbm_1_max:\n"
-                           "    unit: \"m°C\"\n"
+    if (this->hbm_1_temperature_max_.has_value()) {
+        str += fmt::format("  hbm_1_temperature_max:\n"
+                           "    unit: \"°C\"\n"
                            "    values: {}\n",
-                           this->temperature_hbm_1_max_.value());
+                           this->hbm_1_temperature_max_.value());
     }
     // minimum GPU HBM 2 temperature
-    if (this->temperature_hbm_2_min_.has_value()) {
-        str += fmt::format("  temperature_hbm_2_min:\n"
-                           "    unit: \"m°C\"\n"
+    if (this->hbm_2_temperature_min_.has_value()) {
+        str += fmt::format("  hbm_2_temperature_min:\n"
+                           "    unit: \"°C\"\n"
                            "    values: {}\n",
-                           this->temperature_hbm_2_min_.value());
+                           this->hbm_2_temperature_min_.value());
     }
     // maximum GPU HBM 2 temperature
-    if (this->temperature_hbm_2_max_.has_value()) {
-        str += fmt::format("  temperature_hbm_2_max:\n"
-                           "    unit: \"m°C\"\n"
+    if (this->hbm_2_temperature_max_.has_value()) {
+        str += fmt::format("  hbm_2_temperature_max:\n"
+                           "    unit: \"°C\"\n"
                            "    values: {}\n",
-                           this->temperature_hbm_2_max_.value());
+                           this->hbm_2_temperature_max_.value());
     }
     // minimum GPU HBM 3 temperature
-    if (this->temperature_hbm_3_min_.has_value()) {
-        str += fmt::format("  temperature_hbm_3_min:\n"
-                           "    unit: \"m°C\"\n"
+    if (this->hbm_3_temperature_min_.has_value()) {
+        str += fmt::format("  hbm_3_temperature_min:\n"
+                           "    unit: \"°C\"\n"
                            "    values: {}\n",
-                           this->temperature_hbm_3_min_.value());
+                           this->hbm_3_temperature_min_.value());
     }
     // maximum GPU HBM 3 temperature
-    if (this->temperature_hbm_3_max_.has_value()) {
-        str += fmt::format("  temperature_hbm_3_max:\n"
-                           "    unit: \"m°C\"\n"
+    if (this->hbm_3_temperature_max_.has_value()) {
+        str += fmt::format("  hbm_3_temperature_max:\n"
+                           "    unit: \"°C\"\n"
                            "    values: {}\n",
-                           this->temperature_hbm_3_max_.value());
+                           this->hbm_3_temperature_max_.value());
     }
 
     // fan speed
-    if (this->fan_speed_.has_value()) {
-        std::vector<double> fan_speed_percent(this->fan_speed_->size());
-        for (std::size_t i = 0; i < fan_speed_percent.size(); ++i) {
-            fan_speed_percent[i] = static_cast<double>(this->fan_speed_.value()[i]) / static_cast<double>(RSMI_MAX_FAN_SPEED);
-        }
-        str += fmt::format("  fan_speed:\n"
+    if (this->fan_speed_percentage_.has_value()) {
+        str += fmt::format("  fan_speed_percentage:\n"
                            "    unit: \"percentage\"\n"
                            "    values: [{}]\n",
-                           fmt::join(fan_speed_percent, ", "));
+                           fmt::join(this->fan_speed_percentage_.value(), ", "));
     }
     // GPU edge temperature
-    if (this->temperature_edge_.has_value()) {
-        str += fmt::format("  temperature_gpu:\n"
-                           "    unit: \"m°C\"\n"
+    if (this->temperature_.has_value()) {
+        str += fmt::format("  temperature:\n"
+                           "    unit: \"°C\"\n"
                            "    values: [{}]\n",
-                           fmt::join(this->temperature_edge_.value(), ", "));
+                           fmt::join(this->temperature_.value(), ", "));
     }
-    // GPU hotspot temperature
-    if (this->temperature_hotspot_.has_value()) {
-        str += fmt::format("  temperature_hotspot:\n"
-                           "    unit: \"m°C\"\n"
+    // GPU memory temperature
+    if (this->memory_temperature_.has_value()) {
+        str += fmt::format("  memory_temperature:\n"
+                           "    unit: \"°C\"\n"
                            "    values: [{}]\n",
-                           fmt::join(this->temperature_hotspot_.value(), ", "));
+                           fmt::join(this->memory_temperature_.value(), ", "));
     }
-    // GPU memory temperature
-    if (this->temperature_memory_.has_value()) {
-        str += fmt::format("  temperature_memory:\n"
-                           "    unit: \"m°C\"\n"
+    // GPU hotspot temperature
+    if (this->hotspot_temperature_.has_value()) {
+        str += fmt::format("  hotspot_temperature:\n"
+                           "    unit: \"°C\"\n"
                            "    values: [{}]\n",
-                           fmt::join(this->temperature_memory_.value(), ", "));
+                           fmt::join(this->hotspot_temperature_.value(), ", "));
     }
     // GPU HBM 0 temperature
-    if (this->temperature_hbm_0_.has_value()) {
-        str += fmt::format("  temperature_hbm_0:\n"
-                           "    unit: \"m°C\"\n"
+    if (this->hbm_0_temperature_.has_value()) {
+        str += fmt::format("  hbm_0_temperature:\n"
+                           "    unit: \"°C\"\n"
                            "    values: [{}]\n",
-                           fmt::join(this->temperature_hbm_0_.value(), ", "));
+                           fmt::join(this->hbm_0_temperature_.value(), ", "));
     }
     // GPU HBM 1 temperature
-    if (this->temperature_hbm_1_.has_value()) {
-        str += fmt::format("  temperature_hbm_1:\n"
-                           "    unit: \"m°C\"\n"
+    if (this->hbm_1_temperature_.has_value()) {
+        str += fmt::format("  hbm_1_temperature:\n"
+                           "    unit: \"°C\"\n"
                            "    values: [{}]\n",
-                           fmt::join(this->temperature_hbm_1_.value(), ", "));
+                           fmt::join(this->hbm_1_temperature_.value(), ", "));
     }
     // GPU HBM 2 temperature
-    if (this->temperature_hbm_2_.has_value()) {
-        str += fmt::format("  temperature_hbm_2:\n"
-                           "    unit: \"m°C\"\n"
+    if (this->hbm_2_temperature_.has_value()) {
+        str += fmt::format("  hbm_2_temperature:\n"
+                           "    unit: \"°C\"\n"
                            "    values: [{}]\n",
-                           fmt::join(this->temperature_hbm_2_.value(), ", "));
+                           fmt::join(this->hbm_2_temperature_.value(), ", "));
     }
     // GPU HBM 3 temperature
-    if (this->temperature_hbm_3_.has_value()) {
-        str += fmt::format("  temperature_hbm_3:\n"
-                           "    unit: \"m°C\"\n"
+    if (this->hbm_3_temperature_.has_value()) {
+        str += fmt::format("  hbm_3_temperature:\n"
+                           "    unit: \"°C\"\n"
                            "    values: [{}]\n",
-                           fmt::join(this->temperature_hbm_3_.value(), ", "));
+                           fmt::join(this->hbm_3_temperature_.value(), ", "));
     }
 
     // remove last newline
@@ -597,53 +593,53 @@ std::string rocm_smi_temperature_samples::generate_yaml_string() const {
 
 std::ostream &operator<<(std::ostream &out, const rocm_smi_temperature_samples &samples) {
     return out << fmt::format("num_fans [int]: {}\n"
-                              "max_fan_speed [int]: {}\n"
-                              "temperature_edge_min [m°C]: {}\n"
-                              "temperature_edge_max [m°C]: {}\n"
-                              "temperature_hotspot_min [m°C]: {}\n"
-                              "temperature_hotspot_max [m°C]: {}\n"
-                              "temperature_memory_min [m°C]: {}\n"
-                              "temperature_memory_max [m°C]: {}\n"
-                              "temperature_hbm_0_min [m°C]: {}\n"
-                              "temperature_hbm_0_max [m°C]: {}\n"
-                              "temperature_hbm_1_min [m°C]: {}\n"
-                              "temperature_hbm_1_max [m°C]: {}\n"
-                              "temperature_hbm_2_min [m°C]: {}\n"
-                              "temperature_hbm_2_max [m°C]: {}\n"
-                              "temperature_hbm_3_min [m°C]: {}\n"
-                              "temperature_hbm_3_max [m°C]: {}\n"
-                              "fan_speed [%]: [{}]\n"
-                              "temperature_edge [m°C]: [{}]\n"
-                              "temperature_hotspot [m°C]: [{}]\n"
-                              "temperature_memory [m°C]: [{}]\n"
-                              "temperature_hbm_0 [m°C]: [{}]\n"
-                              "temperature_hbm_1 [m°C]: [{}]\n"
-                              "temperature_hbm_2 [m°C]: [{}]\n"
-                              "temperature_hbm_3 [m°C]: [{}]",
+                              "fan_speed_max [RPM]: {}\n"
+                              "temperature_min [°C]: {}\n"
+                              "temperature_max [°C]: {}\n"
+                              "memory_temperature_min [°C]: {}\n"
+                              "memory_temperature_max [°C]: {}\n"
+                              "hotspot_temperature_min [°C]: {}\n"
+                              "hotspot_temperature_max [°C]: {}\n"
+                              "hbm_0_temperature_min [°C]: {}\n"
+                              "hbm_0_temperature_max [°C]: {}\n"
+                              "hbm_1_temperature_min [°C]: {}\n"
+                              "hbm_1_temperature_max [°C]: {}\n"
+                              "hbm_2_temperature_min [°C]: {}\n"
+                              "hbm_2_temperature_max [°C]: {}\n"
+                              "hbm_3_temperature_min [°C]: {}\n"
+                              "hbm_3_temperature_max [°C]: {}\n"
+                              "fan_speed_percentage [%]: [{}]\n"
+                              "temperature [°C]: [{}]\n"
+                              "memory_temperature [°C]: [{}]\n"
+                              "hotspot_temperature [°C]: [{}]\n"
+                              "hbm_0_temperature [°C]: [{}]\n"
+                              "hbm_1_temperature [°C]: [{}]\n"
+                              "hbm_2_temperature [°C]: [{}]\n"
+                              "hbm_3_temperature [°C]: [{}]",
                               detail::value_or_default(samples.get_num_fans()),
-                              detail::value_or_default(samples.get_max_fan_speed()),
-                              detail::value_or_default(samples.get_temperature_edge_min()),
-                              detail::value_or_default(samples.get_temperature_edge_max()),
-                              detail::value_or_default(samples.get_temperature_hotspot_min()),
-                              detail::value_or_default(samples.get_temperature_hotspot_max()),
-                              detail::value_or_default(samples.get_temperature_memory_min()),
-                              detail::value_or_default(samples.get_temperature_memory_max()),
-                              detail::value_or_default(samples.get_temperature_hbm_0_min()),
-                              detail::value_or_default(samples.get_temperature_hbm_0_max()),
-                              detail::value_or_default(samples.get_temperature_hbm_1_min()),
-                              detail::value_or_default(samples.get_temperature_hbm_1_max()),
-                              detail::value_or_default(samples.get_temperature_hbm_2_min()),
-                              detail::value_or_default(samples.get_temperature_hbm_2_max()),
-                              detail::value_or_default(samples.get_temperature_hbm_3_min()),
-                              detail::value_or_default(samples.get_temperature_hbm_3_max()),
-                              fmt::join(detail::value_or_default(samples.get_fan_speed()), ", "),
-                              fmt::join(detail::value_or_default(samples.get_temperature_edge()), ", "),
-                              fmt::join(detail::value_or_default(samples.get_temperature_hotspot()), ", "),
-                              fmt::join(detail::value_or_default(samples.get_temperature_memory()), ", "),
-                              fmt::join(detail::value_or_default(samples.get_temperature_hbm_0()), ", "),
-                              fmt::join(detail::value_or_default(samples.get_temperature_hbm_1()), ", "),
-                              fmt::join(detail::value_or_default(samples.get_temperature_hbm_2()), ", "),
-                              fmt::join(detail::value_or_default(samples.get_temperature_hbm_3()), ", "));
+                              detail::value_or_default(samples.get_fan_speed_max()),
+                              detail::value_or_default(samples.get_temperature_min()),
+                              detail::value_or_default(samples.get_temperature_max()),
+                              detail::value_or_default(samples.get_memory_temperature_min()),
+                              detail::value_or_default(samples.get_memory_temperature_max()),
+                              detail::value_or_default(samples.get_hotspot_temperature_min()),
+                              detail::value_or_default(samples.get_hotspot_temperature_max()),
+                              detail::value_or_default(samples.get_hbm_0_temperature_min()),
+                              detail::value_or_default(samples.get_hbm_0_temperature_max()),
+                              detail::value_or_default(samples.get_hbm_1_temperature_min()),
+                              detail::value_or_default(samples.get_hbm_1_temperature_max()),
+                              detail::value_or_default(samples.get_hbm_2_temperature_min()),
+                              detail::value_or_default(samples.get_hbm_2_temperature_max()),
+                              detail::value_or_default(samples.get_hbm_3_temperature_min()),
+                              detail::value_or_default(samples.get_hbm_3_temperature_max()),
+                              fmt::join(detail::value_or_default(samples.get_fan_speed_percentage()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_temperature()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_memory_temperature()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_hotspot_temperature()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_hbm_0_temperature()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_hbm_1_temperature()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_hbm_2_temperature()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_hbm_3_temperature()), ", "));
 }
 
 }  // namespace hws
diff --git a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
index 17e7049..ebb65d3 100644
--- a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
+++ b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
@@ -378,33 +378,33 @@ void gpu_nvidia_hardware_sampler::sampling_loop() {
         }
 
         if (temperature_samples_.num_fans_.has_value() && temperature_samples_.num_fans_.value() > 0) {
-            decltype(temperature_samples_.min_fan_speed_)::value_type min_fan_speed{};
-            decltype(temperature_samples_.max_fan_speed_)::value_type max_fan_speed{};
+            decltype(temperature_samples_.fan_speed_min_)::value_type min_fan_speed{};
+            decltype(temperature_samples_.fan_speed_max_)::value_type max_fan_speed{};
             if (nvmlDeviceGetMinMaxFanSpeed(device, &min_fan_speed, &max_fan_speed) == NVML_SUCCESS) {
-                temperature_samples_.min_fan_speed_ = min_fan_speed;
-                temperature_samples_.max_fan_speed_ = max_fan_speed;
+                temperature_samples_.fan_speed_min_ = min_fan_speed;
+                temperature_samples_.fan_speed_max_ = max_fan_speed;
             }
         }
 
-        decltype(temperature_samples_.temperature_threshold_gpu_max_)::value_type temperature_threshold_gpu_max{};
-        if (nvmlDeviceGetTemperatureThreshold(device, NVML_TEMPERATURE_THRESHOLD_GPU_MAX, &temperature_threshold_gpu_max) == NVML_SUCCESS) {
-            temperature_samples_.temperature_threshold_gpu_max_ = temperature_threshold_gpu_max;
+        unsigned int temperature_max{};
+        if (nvmlDeviceGetTemperatureThreshold(device, NVML_TEMPERATURE_THRESHOLD_GPU_MAX, &temperature_max) == NVML_SUCCESS) {
+            temperature_samples_.temperature_max_ = static_cast<decltype(temperature_samples_.temperature_max_)::value_type>(temperature_max);
         }
 
-        decltype(temperature_samples_.temperature_threshold_mem_max_)::value_type temperature_threshold_mem_max{};
-        if (nvmlDeviceGetTemperatureThreshold(device, NVML_TEMPERATURE_THRESHOLD_MEM_MAX, &temperature_threshold_mem_max) == NVML_SUCCESS) {
-            temperature_samples_.temperature_threshold_mem_max_ = temperature_threshold_mem_max;
+        unsigned int memory_temperature_max{};
+        if (nvmlDeviceGetTemperatureThreshold(device, NVML_TEMPERATURE_THRESHOLD_MEM_MAX, &memory_temperature_max) == NVML_SUCCESS) {
+            temperature_samples_.memory_temperature_max_ = static_cast<decltype(temperature_samples_.memory_temperature_max_)::value_type>(memory_temperature_max);
         }
 
         // queried samples -> retrieved every iteration if available
-        decltype(temperature_samples_.fan_speed_)::value_type::value_type fan_speed{};
-        if (nvmlDeviceGetFanSpeed(device, &fan_speed) == NVML_SUCCESS) {
-            temperature_samples_.fan_speed_ = decltype(temperature_samples_.fan_speed_)::value_type{ fan_speed };
+        unsigned int fan_speed_percentage{};
+        if (nvmlDeviceGetFanSpeed(device, &fan_speed_percentage) == NVML_SUCCESS) {
+            temperature_samples_.fan_speed_percentage_ = decltype(temperature_samples_.fan_speed_percentage_)::value_type{ static_cast<decltype(temperature_samples_.fan_speed_percentage_)::value_type::value_type>(fan_speed_percentage) };
         }
 
-        decltype(temperature_samples_.temperature_gpu_)::value_type::value_type temperature_gpu{};
-        if (nvmlDeviceGetTemperature(device, NVML_TEMPERATURE_GPU, &temperature_gpu) == NVML_SUCCESS) {
-            temperature_samples_.temperature_gpu_ = decltype(temperature_samples_.temperature_gpu_)::value_type{ temperature_gpu };
+        unsigned int temperature{};
+        if (nvmlDeviceGetTemperature(device, NVML_TEMPERATURE_GPU, &temperature) == NVML_SUCCESS) {
+            temperature_samples_.temperature_ = decltype(temperature_samples_.temperature_)::value_type{ static_cast<decltype(temperature_samples_.temperature_)::value_type::value_type>(temperature) };
         }
     }
 
@@ -513,16 +513,16 @@ void gpu_nvidia_hardware_sampler::sampling_loop() {
 
             // retrieve temperature related information
             {
-                if (temperature_samples_.fan_speed_.has_value()) {
-                    decltype(temperature_samples_.fan_speed_)::value_type::value_type value{};
+                if (temperature_samples_.fan_speed_percentage_.has_value()) {
+                    unsigned int value{};
                     HWS_NVML_ERROR_CHECK(nvmlDeviceGetFanSpeed(device, &value));
-                    temperature_samples_.fan_speed_->push_back(value);
+                    temperature_samples_.fan_speed_percentage_->push_back(static_cast<decltype(temperature_samples_.fan_speed_percentage_)::value_type::value_type>(value));
                 }
 
-                if (temperature_samples_.temperature_gpu_.has_value()) {
-                    decltype(temperature_samples_.temperature_gpu_)::value_type::value_type value{};
+                if (temperature_samples_.temperature_.has_value()) {
+                    unsigned int value{};
                     HWS_NVML_ERROR_CHECK(nvmlDeviceGetTemperature(device, NVML_TEMPERATURE_GPU, &value));
-                    temperature_samples_.temperature_gpu_->push_back(value);
+                    temperature_samples_.temperature_->push_back(static_cast<decltype(temperature_samples_.temperature_)::value_type::value_type>(value));
                 }
             }
         }
diff --git a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp
index 71fb7a6..9258f24 100644
--- a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp
+++ b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp
@@ -458,47 +458,47 @@ std::string nvml_temperature_samples::generate_yaml_string() const {
                            this->num_fans_.value());
     }
     // min fan speed
-    if (this->min_fan_speed_.has_value()) {
-        str += fmt::format("  min_fan_speed:\n"
+    if (this->fan_speed_min_.has_value()) {
+        str += fmt::format("  fan_speed_min:\n"
                            "    unit: \"percentage\"\n"
                            "    values: {}\n",
-                           this->min_fan_speed_.value());
+                           this->fan_speed_min_.value());
     }
     // max fan speed
-    if (this->max_fan_speed_.has_value()) {
-        str += fmt::format("  max_fan_speed:\n"
+    if (this->fan_speed_max_.has_value()) {
+        str += fmt::format("  fan_speed_max:\n"
                            "    unit: \"percentage\"\n"
                            "    values: {}\n",
-                           this->max_fan_speed_.value());
+                           this->fan_speed_max_.value());
     }
     // temperature threshold GPU max
-    if (this->temperature_threshold_gpu_max_.has_value()) {
-        str += fmt::format("  temperature_gpu_max:\n"
+    if (this->temperature_max_.has_value()) {
+        str += fmt::format("  temperature_max:\n"
                            "    unit: \"°C\"\n"
                            "    values: {}\n",
-                           this->temperature_threshold_gpu_max_.value());
+                           this->temperature_max_.value());
     }
     // temperature threshold memory max
-    if (this->temperature_threshold_mem_max_.has_value()) {
-        str += fmt::format("  temperature_mem_max:\n"
+    if (this->memory_temperature_max_.has_value()) {
+        str += fmt::format("  memory_temperature_max:\n"
                            "    unit: \"°C\"\n"
                            "    values: {}\n",
-                           this->temperature_threshold_mem_max_.value());
+                           this->memory_temperature_max_.value());
     }
 
     // fan speed
-    if (this->fan_speed_.has_value()) {
-        str += fmt::format("  fan_speed:\n"
+    if (this->fan_speed_percentage_.has_value()) {
+        str += fmt::format("  fan_speed_percentage:\n"
                            "    unit: \"percentage\"\n"
                            "    values: [{}]\n",
-                           fmt::join(this->fan_speed_.value(), ", "));
+                           fmt::join(this->fan_speed_percentage_.value(), ", "));
     }
     // temperature GPU
-    if (this->temperature_gpu_.has_value()) {
-        str += fmt::format("  temperature_gpu:\n"
+    if (this->temperature_.has_value()) {
+        str += fmt::format("  temperature:\n"
                            "    unit: \"°C\"\n"
                            "    values: [{}]\n",
-                           fmt::join(this->temperature_gpu_.value(), ", "));
+                           fmt::join(this->temperature_.value(), ", "));
     }
 
     // remove last newline
@@ -511,17 +511,17 @@ std::ostream &operator<<(std::ostream &out, const nvml_temperature_samples &samp
     return out << fmt::format("num_fans [int]: {}\n"
                               "min_fan_speed [%]: {}\n"
                               "max_fan_speed [%]: {}\n"
-                              "temperature_threshold_gpu_max [°C]: {}\n"
-                              "temperature_threshold_mem_max [°C]: {}\n"
-                              "fan_speed [%]: [{}]\n"
-                              "temperature_gpu [°C]: [{}]",
+                              "temperature__max [°C]: {}\n"
+                              "memory_temperature_max [°C]: {}\n"
+                              "fan_speed_percentage [%]: [{}]\n"
+                              "temperature [°C]: [{}]",
                               detail::value_or_default(samples.get_num_fans()),
-                              detail::value_or_default(samples.get_min_fan_speed()),
-                              detail::value_or_default(samples.get_max_fan_speed()),
-                              detail::value_or_default(samples.get_temperature_threshold_gpu_max()),
-                              detail::value_or_default(samples.get_temperature_threshold_mem_max()),
-                              fmt::join(detail::value_or_default(samples.get_fan_speed()), ", "),
-                              fmt::join(detail::value_or_default(samples.get_temperature_gpu()), ", "));
+                              detail::value_or_default(samples.get_fan_speed_min()),
+                              detail::value_or_default(samples.get_fan_speed_max()),
+                              detail::value_or_default(samples.get_temperature_max()),
+                              detail::value_or_default(samples.get_memory_temperature_max()),
+                              fmt::join(detail::value_or_default(samples.get_fan_speed_percentage()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_temperature()), ", "));
 }
 
 }  // namespace hws

From 357bf249a5399fce84ff854eabfc6e01213acdfe Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Mon, 16 Sep 2024 14:32:03 +0200
Subject: [PATCH 20/69] Unify memory related samples.

---
 README.md                                     | 47 ++++++-----
 include/hardware_sampling/cpu/cpu_samples.hpp |  8 +-
 .../gpu_amd/rocm_smi_samples.hpp              | 19 +++--
 .../gpu_nvidia/nvml_samples.hpp               | 11 +--
 src/hardware_sampling/cpu/cpu_samples.cpp     | 66 +++++++--------
 .../cpu/hardware_sampler.cpp                  |  8 +-
 .../gpu_amd/hardware_sampler.cpp              | 25 ++++--
 .../gpu_amd/rocm_smi_samples.cpp              | 72 +++++++++-------
 .../gpu_nvidia/hardware_sampler.cpp           | 29 ++++---
 .../gpu_nvidia/nvml_samples.cpp               | 83 ++++++++++---------
 10 files changed, 206 insertions(+), 162 deletions(-)

diff --git a/README.md b/README.md
index 36b116d..a0c61bb 100644
--- a/README.md
+++ b/README.md
@@ -144,34 +144,35 @@ export PYTHONPATH=${CMAKE_INSTALL_PREFIX}/lib:${PYTHONPATH}
 
 | sample                      | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs |
 |:----------------------------|:----:|:-----------:|:--------:|:----------:|
-| cache_size_L1d              | str  |             |          |            |
-| cache_size_L1i              | str  |             |          |            |
-| cache_size_L2               | str  |             |          |            |
-| cache_size_L3               | str  |             |          |            |
+| cache_size_L1d              | str  |      -      |    -     |     -      |
+| cache_size_L1i              | str  |      -      |    -     |     -      |
+| cache_size_L2               | str  |      -      |    -     |     -      |
+| cache_size_L3               | str  |      -      |    -     |     -      |
 | memory_total                |  B   |      B      |    B     |            |
-| swap_memory_total           |  B   |             |          |            |
-| memory_free                 |  B   |      B      |    B     |            |
+| visible_memory_total        |  -   |      -      |    B     |     -      |
+| swap_memory_total           |  B   |      -      |    -     |     -      |
+| memory_total_{}             |  -   |             |          |     B      |
+| allocatable_memory_total_{} |  -   |             |          |     B      |
+| num_pcie_lanes_min          |  -   |      -      |   int    |            |
+| num_pcie_lanes_max          |  -   |     int     |   int    |            |
+| pcie_link_generation_max    |  -   |     int     |    -     |    int     |
+| pcie_link_speed_max         |  -   |    MBPS     |    -     |    BPS     |
+| pcie_link_transfer_rate_min |  -   |      -      |   MT/s   |            |
+| pcie_link_transfer_rate_max |  -   |      -      |   MT/s   |            |
+| memory_bus_width            |  -   |     Bit     |    -     |            |
 | memory_used                 |  B   |      B      |    B     |            |
-| swap_memory_free            |  B   |             |          |            |
-| swap_memory_used            |  B   |             |          |            |
-| visible_memory_total        |      |             |    B     |            |
-| min_num_pcie_lanes          |      |             |   int    |            |
-| max_num_pcie_lanes          |      |             |   int    |            |
-| pcie_bandwidth              |      |    MBPS     |   T/s    |    MBPS    |
-| num_pcie_lanes              |      |             |   int    |            |
-| memory_total_{}             |      |             |          |     B      |
-| allocatable_memory_total_{} |      |             |          |     B      |
-| pcie_max_bandwidth          |      |    MBPS     |          |    BPS     |
-| max_pcie_link_width         |      |             |          |    int     |
-| max_pcie_link_generation    |      |     int     |          |    int     |
+| memory_free                 |  B   |      B      |    B     |            |
+| swap_memory_used            |  B   |      -      |    -     |     -      |
+| swap_memory_free            |  B   |      -      |    -     |     -      |
+| num_pcie_lanes              |  -   |     int     |   int    |            |
+| pcie_link_generation        |  -   |     int     |    -     |    int     |
+| pcie_link_speed             |  -   |    MBPS     |    -     |    MBPS    |
+| pcie_link_transfer_rate     |  -   |      -      |   T/s    |     -      |
+| memory_used_{}              |      |             |          |     B      |
+| memory_free_{}              |      |             |          |     B      |
 | memory_bus_width_{}         |      |             |          |    Bit     |
 | memory_num_channels_{}      |      |             |          |    int     |
 | memory_location_{}          |      |             |          |    str     |
-| memory_free_{}              |      |             |          |     B      |
-| memory_used_{}              |      |             |          |     B      |
-| pcie_link_width             |      |     int     |          |    int     |
-| pcie_link_generation        |      |     int     |          |    int     |
-| memory_bus_width            |      |     Bit     |          |            |
 
 ### temperature-related samples
 
diff --git a/include/hardware_sampling/cpu/cpu_samples.hpp b/include/hardware_sampling/cpu/cpu_samples.hpp
index 8a90c30..98a88f2 100644
--- a/include/hardware_sampling/cpu/cpu_samples.hpp
+++ b/include/hardware_sampling/cpu/cpu_samples.hpp
@@ -166,10 +166,10 @@ class cpu_memory_samples {
      */
     [[nodiscard]] std::string generate_yaml_string() const;
 
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, l1d_cache)                 // the size of the L1 data cache
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, l1i_cache)                 // the size of the L1 instruction cache
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, l2_cache)                  // the size of the L2 cache
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, l3_cache)                  // the size of the L2 cache
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, cache_size_L1d)            // the size of the L1 data cache
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, cache_size_L1i)            // the size of the L1 instruction cache
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, cache_size_L2)             // the size of the L2 cache
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, cache_size_L3)             // the size of the L2 cache
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned long long, memory_total)       // the total available memory in Byte
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned long long, swap_memory_total)  // the total available swap memory in Byte
 
diff --git a/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp b/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp
index 05deb6b..03de73a 100644
--- a/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp
+++ b/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp
@@ -163,14 +163,17 @@ class rocm_smi_memory_samples {
      */
     [[nodiscard]] std::string generate_yaml_string() const;
 
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, memory_total)          // the total available memory in Byte
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, visible_memory_total)  // the total visible available memory in Byte, may be smaller than the total memory
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint32_t, min_num_pcie_lanes)    // the minimum number of used PCIe lanes
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint32_t, max_num_pcie_lanes)    // the maximum number of used PCIe lanes
-
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint64_t, memory_used)         // the currently used memory in Byte
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint64_t, pcie_transfer_rate)  // the current PCIe transfer rate in T/s
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint32_t, num_pcie_lanes)      // the number of currently used PCIe lanes
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, memory_total)                 // the total available memory in Byte
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, visible_memory_total)         // the total visible available memory in Byte, may be smaller than the total memory
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint32_t, num_pcie_lanes_min)           // the minimum number of used PCIe lanes
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint32_t, num_pcie_lanes_max)           // the maximum number of used PCIe lanes
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, pcie_link_transfer_rate_min)  // the minimum PCIe link transfer rate in MT/s
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, pcie_link_transfer_rate_max)  // the maximum PCIe link transfer rate in MT/s
+
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint64_t, memory_used)              // the currently used memory in Byte
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint64_t, memory_free)              // the currently free memory in Byte
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint32_t, num_pcie_lanes)           // the number of currently used PCIe lanes
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint64_t, pcie_link_transfer_rate)  // the current PCIe transfer rate in MT/s
 };
 
 /**
diff --git a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp
index b1af6bc..b85c7a8 100644
--- a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp
+++ b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp
@@ -169,15 +169,16 @@ class nvml_memory_samples {
     [[nodiscard]] std::string generate_yaml_string() const;
 
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned long, memory_total)             // the total available memory in Byte
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, pcie_link_max_speed)       // the maximum PCIe link speed in MBPS
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, num_pcie_lanes_max)        // the maximum number of PCIe lanes
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, pcie_link_generation_max)  // the maximum PCIe link generation (e.g., PCIe 4.0, PCIe 5.0, etc)
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, pcie_link_speed_max)       // the maximum PCIe link speed in MBPS
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, memory_bus_width)          // the memory bus with in Bit
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, max_pcie_link_generation)  // the current PCIe link generation (e.g., PCIe 4.0, PCIe 5.0, etc)
 
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned long long, memory_free)     // the currently free memory in Byte
     HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned long long, memory_used)     // the currently used memory in Byte
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, pcie_link_speed)       // the current PCIe link speed in MBPS
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, pcie_link_width)       // the current PCIe link width (e.g., x16, x8, x4, etc)
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned long long, memory_free)     // the currently free memory in Byte
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, num_pcie_lanes)        // the current PCIe link width (e.g., x16, x8, x4, etc)
     HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, pcie_link_generation)  // the current PCIe link generation (may change during runtime to save energy)
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, pcie_link_speed)       // the current PCIe link speed in MBPS
 };
 
 /**
diff --git a/src/hardware_sampling/cpu/cpu_samples.cpp b/src/hardware_sampling/cpu/cpu_samples.cpp
index 80ed9b4..7b5054c 100644
--- a/src/hardware_sampling/cpu/cpu_samples.cpp
+++ b/src/hardware_sampling/cpu/cpu_samples.cpp
@@ -374,32 +374,32 @@ std::string cpu_memory_samples::generate_yaml_string() const {
     std::string str{ "memory:\n" };
 
     // the size of the L1 data cache
-    if (this->l1d_cache_.has_value()) {
+    if (this->cache_size_L1d_.has_value()) {
         str += fmt::format("  cache_size_L1d:\n"
                            "    unit: \"string\"\n"
                            "    values: \"{}\"\n",
-                           this->l1d_cache_.value());
+                           this->cache_size_L1d_.value());
     }
     // the size of the L1 instruction cache
-    if (this->l1i_cache_.has_value()) {
+    if (this->cache_size_L1i_.has_value()) {
         str += fmt::format("  cache_size_L1i:\n"
                            "    unit: \"string\"\n"
                            "    values: \"{}\"\n",
-                           this->l1i_cache_.value());
+                           this->cache_size_L1i_.value());
     }
     // the size of the L2 cache
-    if (this->l2_cache_.has_value()) {
+    if (this->cache_size_L2_.has_value()) {
         str += fmt::format("  cache_size_L2:\n"
                            "    unit: \"string\"\n"
                            "    values: \"{}\"\n",
-                           this->l2_cache_.value());
+                           this->cache_size_L2_.value());
     }
     // the size of the L3 cache
-    if (this->l3_cache_.has_value()) {
+    if (this->cache_size_L3_.has_value()) {
         str += fmt::format("  cache_size_L3:\n"
                            "    unit: \"string\"\n"
                            "    values: \"{}\"\n",
-                           this->l3_cache_.value());
+                           this->cache_size_L3_.value());
     }
 
     // the total size of available memory
@@ -417,13 +417,6 @@ std::string cpu_memory_samples::generate_yaml_string() const {
                            this->swap_memory_total_.value());
     }
 
-    // the available free memory
-    if (this->memory_free_.has_value()) {
-        str += fmt::format("  memory_free:\n"
-                           "    unit: \"B\"\n"
-                           "    values: [{}]\n",
-                           fmt::join(this->memory_free_.value(), ", "));
-    }
     // the used memory
     if (this->memory_used_.has_value()) {
         str += fmt::format("  memory_used:\n"
@@ -431,12 +424,12 @@ std::string cpu_memory_samples::generate_yaml_string() const {
                            "    values: [{}]\n",
                            fmt::join(this->memory_used_.value(), ", "));
     }
-    // the available swap memory
-    if (this->swap_memory_free_.has_value()) {
-        str += fmt::format("  swap_memory_free:\n"
+    // the available free memory
+    if (this->memory_free_.has_value()) {
+        str += fmt::format("  memory_free:\n"
                            "    unit: \"B\"\n"
                            "    values: [{}]\n",
-                           fmt::join(this->swap_memory_free_.value(), ", "));
+                           fmt::join(this->memory_free_.value(), ", "));
     }
     // the swap memory
     if (this->swap_memory_used_.has_value()) {
@@ -445,6 +438,13 @@ std::string cpu_memory_samples::generate_yaml_string() const {
                            "    values: [{}]\n",
                            fmt::join(this->swap_memory_used_.value(), ", "));
     }
+    // the available swap memory
+    if (this->swap_memory_free_.has_value()) {
+        str += fmt::format("  swap_memory_free:\n"
+                           "    unit: \"B\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->swap_memory_free_.value(), ", "));
+    }
 
     // remove last newline
     str.pop_back();
@@ -453,26 +453,26 @@ std::string cpu_memory_samples::generate_yaml_string() const {
 }
 
 std::ostream &operator<<(std::ostream &out, const cpu_memory_samples &samples) {
-    return out << fmt::format("l1d_cache [string]: {}\n"
-                              "l1i_cache [string]: {}\n"
-                              "l2_cache [string]: {}\n"
-                              "l3_cache [string]: {}\n"
+    return out << fmt::format("cache_size_L1d [string]: {}\n"
+                              "cache_size_L1i [string]: {}\n"
+                              "cache_size_L2 [string]: {}\n"
+                              "cache_size_L3 [string]: {}\n"
                               "memory_total [B]: {}\n"
                               "swap_memory_total [B]: {}\n"
-                              "memory_free [B]: [{}]\n"
                               "memory_used [B]: [{}]\n"
-                              "swap_memory_free [B]: [{}]\n"
-                              "swap_memory_used [B]: [{}]",
-                              detail::value_or_default(samples.get_l1d_cache()),
-                              detail::value_or_default(samples.get_l1i_cache()),
-                              detail::value_or_default(samples.get_l2_cache()),
-                              detail::value_or_default(samples.get_l3_cache()),
+                              "memory_free [B]: [{}]\n"
+                              "swap_memory_used [B]: [{}]\n"
+                              "swap_memory_free [B]: [{}]",
+                              detail::value_or_default(samples.get_cache_size_L1d()),
+                              detail::value_or_default(samples.get_cache_size_L1i()),
+                              detail::value_or_default(samples.get_cache_size_L2()),
+                              detail::value_or_default(samples.get_cache_size_L3()),
                               detail::value_or_default(samples.get_memory_total()),
                               detail::value_or_default(samples.get_swap_memory_total()),
-                              fmt::join(detail::value_or_default(samples.get_memory_free()), ", "),
                               fmt::join(detail::value_or_default(samples.get_memory_used()), ", "),
-                              fmt::join(detail::value_or_default(samples.get_swap_memory_free()), ", "),
-                              fmt::join(detail::value_or_default(samples.get_swap_memory_used()), ", "));
+                              fmt::join(detail::value_or_default(samples.get_memory_free()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_swap_memory_used()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_swap_memory_free()), ", "));
 }
 
 //*************************************************************************************************************************************//
diff --git a/src/hardware_sampling/cpu/hardware_sampler.cpp b/src/hardware_sampling/cpu/hardware_sampler.cpp
index cf2eeda..6ad38ff 100644
--- a/src/hardware_sampling/cpu/hardware_sampler.cpp
+++ b/src/hardware_sampling/cpu/hardware_sampler.cpp
@@ -98,13 +98,13 @@ void cpu_hardware_sampler::sampling_loop() {
             } else if (detail::starts_with(line, "CPU min MHz")) {
                 clock_samples_.clock_frequency_min_ = detail::convert_to<decltype(clock_samples_.clock_frequency_min_)::value_type>(value);
             } else if (detail::starts_with(line, "L1d cache")) {
-                memory_samples_.l1d_cache_ = detail::convert_to<decltype(memory_samples_.l1d_cache_)::value_type>(value);
+                memory_samples_.cache_size_L1d_ = detail::convert_to<decltype(memory_samples_.cache_size_L1d_)::value_type>(value);
             } else if (detail::starts_with(line, "L1i cache")) {
-                memory_samples_.l1i_cache_ = detail::convert_to<decltype(memory_samples_.l1i_cache_)::value_type>(value);
+                memory_samples_.cache_size_L1i_ = detail::convert_to<decltype(memory_samples_.cache_size_L1i_)::value_type>(value);
             } else if (detail::starts_with(line, "L2 cache")) {
-                memory_samples_.l2_cache_ = detail::convert_to<decltype(memory_samples_.l2_cache_)::value_type>(value);
+                memory_samples_.cache_size_L2_ = detail::convert_to<decltype(memory_samples_.cache_size_L2_)::value_type>(value);
             } else if (detail::starts_with(line, "L3 cache")) {
-                memory_samples_.l3_cache_ = detail::convert_to<decltype(memory_samples_.l3_cache_)::value_type>(value);
+                memory_samples_.cache_size_L3_ = detail::convert_to<decltype(memory_samples_.cache_size_L3_)::value_type>(value);
             }
         }
 
diff --git a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
index 76cde4b..f8a8253 100644
--- a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
+++ b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
@@ -298,17 +298,20 @@ void gpu_amd_hardware_sampler::sampling_loop() {
 
         rsmi_pcie_bandwidth_t bandwidth_info{};
         if (rsmi_dev_pci_bandwidth_get(device_id_, &bandwidth_info) == RSMI_STATUS_SUCCESS) {
-            memory_samples_.min_num_pcie_lanes_ = bandwidth_info.lanes[0];
-            memory_samples_.max_num_pcie_lanes_ = bandwidth_info.lanes[bandwidth_info.transfer_rate.num_supported - 1];
+            memory_samples_.num_pcie_lanes_min_ = bandwidth_info.lanes[0];
+            memory_samples_.num_pcie_lanes_max_ = bandwidth_info.lanes[bandwidth_info.transfer_rate.num_supported - 1];
+            memory_samples_.pcie_link_transfer_rate_min_ = bandwidth_info.transfer_rate.frequency[0] / 1000000;
+            memory_samples_.pcie_link_transfer_rate_max_ = bandwidth_info.transfer_rate.frequency[bandwidth_info.transfer_rate.num_supported - 1] / 1000000;
+
             // queried samples -> retrieved every iteration if available
-            memory_samples_.pcie_transfer_rate_ = decltype(memory_samples_.pcie_transfer_rate_)::value_type{};
+            memory_samples_.pcie_link_transfer_rate_ = decltype(memory_samples_.pcie_link_transfer_rate_)::value_type{};
             memory_samples_.num_pcie_lanes_ = decltype(memory_samples_.num_pcie_lanes_)::value_type{};
             if (bandwidth_info.transfer_rate.current < RSMI_MAX_NUM_FREQUENCIES) {
-                memory_samples_.pcie_transfer_rate_->push_back(bandwidth_info.transfer_rate.frequency[bandwidth_info.transfer_rate.current]);
+                memory_samples_.pcie_link_transfer_rate_->push_back(bandwidth_info.transfer_rate.frequency[bandwidth_info.transfer_rate.current] / 1000000);
                 memory_samples_.num_pcie_lanes_->push_back(bandwidth_info.lanes[bandwidth_info.transfer_rate.current]);
             } else {
                 // the current index is (somehow) wrong
-                memory_samples_.pcie_transfer_rate_->push_back(0);
+                memory_samples_.pcie_link_transfer_rate_->push_back(0);
                 memory_samples_.num_pcie_lanes_->push_back(0);
             }
         }
@@ -317,6 +320,9 @@ void gpu_amd_hardware_sampler::sampling_loop() {
         decltype(memory_samples_.memory_used_)::value_type::value_type memory_used{};
         if (rsmi_dev_memory_usage_get(device_id_, RSMI_MEM_TYPE_VRAM, &memory_used) == RSMI_STATUS_SUCCESS) {
             memory_samples_.memory_used_ = decltype(memory_samples_.memory_used_)::value_type{ memory_used };
+            if (memory_samples_.memory_total_.has_value()) {
+                memory_samples_.memory_free_ = decltype(memory_samples_.memory_used_)::value_type{ memory_samples_.memory_total_.value() - memory_samples_.memory_used_->front() };
+            }
         }
     }
 
@@ -582,17 +588,20 @@ void gpu_amd_hardware_sampler::sampling_loop() {
                     decltype(memory_samples_.memory_used_)::value_type::value_type value{};
                     HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_memory_usage_get(device_id_, RSMI_MEM_TYPE_VRAM, &value));
                     memory_samples_.memory_used_->push_back(value);
+                    if (memory_samples_.memory_free_.has_value()) {
+                        memory_samples_.memory_free_->push_back(memory_samples_.memory_total_.value() - value);
+                    }
                 }
 
-                if (memory_samples_.pcie_transfer_rate_.has_value() && memory_samples_.num_pcie_lanes_.has_value()) {
+                if (memory_samples_.pcie_link_transfer_rate_.has_value() && memory_samples_.num_pcie_lanes_.has_value()) {
                     rsmi_pcie_bandwidth_t bandwidth_info{};
                     HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_pci_bandwidth_get(device_id_, &bandwidth_info));
                     if (bandwidth_info.transfer_rate.current < RSMI_MAX_NUM_FREQUENCIES) {
-                        memory_samples_.pcie_transfer_rate_->push_back(bandwidth_info.transfer_rate.frequency[bandwidth_info.transfer_rate.current]);
+                        memory_samples_.pcie_link_transfer_rate_->push_back(bandwidth_info.transfer_rate.frequency[bandwidth_info.transfer_rate.current] / 1000000);
                         memory_samples_.num_pcie_lanes_->push_back(bandwidth_info.lanes[bandwidth_info.transfer_rate.current]);
                     } else {
                         // the current index is (somehow) wrong
-                        memory_samples_.pcie_transfer_rate_->push_back(0);
+                        memory_samples_.pcie_link_transfer_rate_->push_back(0);
                         memory_samples_.num_pcie_lanes_->push_back(0);
                     }
                 }
diff --git a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp b/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp
index e8dcffa..0bb7eeb 100644
--- a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp
+++ b/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp
@@ -337,18 +337,32 @@ std::string rocm_smi_memory_samples::generate_yaml_string() const {
                            this->visible_memory_total_.value());
     }
     // min number of PCIe lanes
-    if (this->min_num_pcie_lanes_.has_value()) {
-        str += fmt::format("  min_num_pcie_lanes:\n"
+    if (this->num_pcie_lanes_min_.has_value()) {
+        str += fmt::format("  num_pcie_lanes_min:\n"
                            "    unit: \"int\"\n"
                            "    values: {}\n",
-                           this->min_num_pcie_lanes_.value());
+                           this->num_pcie_lanes_min_.value());
     }
     // max number of PCIe lanes
-    if (this->max_num_pcie_lanes_.has_value()) {
-        str += fmt::format("  max_num_pcie_lanes:\n"
+    if (this->num_pcie_lanes_max_.has_value()) {
+        str += fmt::format("  num_pcie_lanes_max:\n"
                            "    unit: \"int\"\n"
                            "    values: {}\n",
-                           this->max_num_pcie_lanes_.value());
+                           this->num_pcie_lanes_max_.value());
+    }
+    // the minimum PCIe link transfer rate
+    if (this->pcie_link_transfer_rate_min_.has_value()) {
+        str += fmt::format("  pcie_link_transfer_rate_min:\n"
+                           "    unit: \"MT/s\"\n"
+                           "    values: {}\n",
+                           this->pcie_link_transfer_rate_min_.value());
+    }
+    // the maximum PCIe link transfer rate
+    if (this->pcie_link_transfer_rate_max_.has_value()) {
+        str += fmt::format("  pcie_link_transfer_rate_max:\n"
+                           "    unit: \"MT/s\"\n"
+                           "    values: {}\n",
+                           this->pcie_link_transfer_rate_max_.value());
     }
 
     // used memory
@@ -359,31 +373,27 @@ std::string rocm_smi_memory_samples::generate_yaml_string() const {
                            fmt::join(this->memory_used_.value(), ", "));
     }
     // free memory
-    if (this->memory_used_.has_value() && this->memory_total_.has_value()) {
-        decltype(rocm_smi_memory_samples::memory_used_)::value_type memory_free(this->memory_used_->size(), this->memory_total_.value());
-        for (std::size_t i = 0; i < memory_free.size(); ++i) {
-            memory_free[i] -= this->memory_used_.value()[i];
-        }
+    if (this->memory_free_.has_value()) {
         str += fmt::format("  memory_free:\n"
                            "    unit: \"B\"\n"
                            "    values: [{}]\n",
-                           fmt::join(memory_free, ", "));
+                           fmt::join(this->memory_free_.value(), ", "));
     }
 
-    // PCIe bandwidth
-    if (this->pcie_transfer_rate_.has_value()) {
-        str += fmt::format("  pcie_bandwidth:\n"
-                           "    unit: \"T/s\"\n"
-                           "    values: [{}]\n",
-                           fmt::join(this->pcie_transfer_rate_.value(), ", "));
-    }
     // number of PCIe lanes
     if (this->num_pcie_lanes_.has_value()) {
-        str += fmt::format("  pcie_num_lanes:\n"
+        str += fmt::format("  num_pcie_lanes:\n"
                            "    unit: \"int\"\n"
                            "    values: [{}]\n",
                            fmt::join(this->num_pcie_lanes_.value(), ", "));
     }
+    // PCIe transfer rate
+    if (this->pcie_link_transfer_rate_.has_value()) {
+        str += fmt::format("  pcie_link_transfer_rate:\n"
+                           "    unit: \"MT/s\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->pcie_link_transfer_rate_.value(), ", "));
+    }
 
     // remove last newline
     str.pop_back();
@@ -394,18 +404,24 @@ std::string rocm_smi_memory_samples::generate_yaml_string() const {
 std::ostream &operator<<(std::ostream &out, const rocm_smi_memory_samples &samples) {
     return out << fmt::format("memory_total [B]: {}\n"
                               "visible_memory_total [B]: {}\n"
-                              "min_num_pcie_lanes [int]: {}\n"
-                              "max_num_pcie_lanes [int]: {}\n"
+                              "num_pcie_lanes_min [int]: {}\n"
+                              "num_pcie_lanes_max [int]: {}\n"
+                              "pcie_link_transfer_rate_min [MBPS]: {}\n"
+                              "pcie_link_transfer_rate_max [MBPS]: {}\n"
                               "memory_used [B]: [{}]\n"
-                              "pcie_transfer_rate [T/s]: [{}]\n"
-                              "num_pcie_lanes [int]: [{}]",
+                              "memory_free [B]: [{}]\n"
+                              "num_pcie_lanes [int]: [{}]\n"
+                              "pcie_link_transfer_rate [MBPS]: [{}]",
                               detail::value_or_default(samples.get_memory_total()),
                               detail::value_or_default(samples.get_visible_memory_total()),
-                              detail::value_or_default(samples.get_min_num_pcie_lanes()),
-                              detail::value_or_default(samples.get_max_num_pcie_lanes()),
+                              detail::value_or_default(samples.get_num_pcie_lanes_min()),
+                              detail::value_or_default(samples.get_num_pcie_lanes_max()),
+                              detail::value_or_default(samples.get_pcie_link_transfer_rate_min()),
+                              detail::value_or_default(samples.get_pcie_link_transfer_rate_max()),
                               fmt::join(detail::value_or_default(samples.get_memory_used()), ", "),
-                              fmt::join(detail::value_or_default(samples.get_pcie_transfer_rate()), ", "),
-                              fmt::join(detail::value_or_default(samples.get_num_pcie_lanes()), ", "));
+                              fmt::join(detail::value_or_default(samples.get_memory_free()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_num_pcie_lanes()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_pcie_link_transfer_rate()), ", "));
 }
 
 //*************************************************************************************************************************************//
diff --git a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
index ebb65d3..7af2a2a 100644
--- a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
+++ b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
@@ -347,20 +347,25 @@ void gpu_nvidia_hardware_sampler::sampling_loop() {
             memory_samples_.memory_bus_width_ = memory_bus_width;
         }
 
-        decltype(memory_samples_.max_pcie_link_generation_)::value_type max_pcie_link_generation{};
-        if (nvmlDeviceGetMaxPcieLinkGeneration(device, &max_pcie_link_generation) == NVML_SUCCESS) {
-            memory_samples_.max_pcie_link_generation_ = max_pcie_link_generation;
+        decltype(memory_samples_.num_pcie_lanes_max_)::value_type num_pcie_lanes_max{};
+        if (nvmlDeviceGetMaxPcieLinkWidth(device, &num_pcie_lanes_max) == NVML_SUCCESS) {
+            memory_samples_.num_pcie_lanes_max_ = num_pcie_lanes_max;
         }
 
-        decltype(memory_samples_.pcie_link_max_speed_)::value_type pcie_link_max_speed{};
-        if (nvmlDeviceGetPcieLinkMaxSpeed(device, &pcie_link_max_speed) == NVML_SUCCESS) {
-            memory_samples_.pcie_link_max_speed_ = pcie_link_max_speed;
+        decltype(memory_samples_.pcie_link_generation_max_)::value_type pcie_link_generation_max{};
+        if (nvmlDeviceGetMaxPcieLinkGeneration(device, &pcie_link_generation_max) == NVML_SUCCESS) {
+            memory_samples_.pcie_link_generation_max_ = pcie_link_generation_max;
+        }
+
+        decltype(memory_samples_.pcie_link_speed_max_)::value_type pcie_link_speed_max{};
+        if (nvmlDeviceGetPcieLinkMaxSpeed(device, &pcie_link_speed_max) == NVML_SUCCESS) {
+            memory_samples_.pcie_link_speed_max_ = pcie_link_speed_max;
         }
 
         // queried samples -> retrieved every iteration if available
-        decltype(memory_samples_.pcie_link_width_)::value_type::value_type pcie_link_width{};
-        if (nvmlDeviceGetCurrPcieLinkWidth(device, &pcie_link_width) == NVML_SUCCESS) {
-            memory_samples_.pcie_link_width_ = decltype(memory_samples_.pcie_link_width_)::value_type{ pcie_link_width };
+        decltype(memory_samples_.num_pcie_lanes_)::value_type::value_type num_pcie_lanes{};
+        if (nvmlDeviceGetCurrPcieLinkWidth(device, &num_pcie_lanes) == NVML_SUCCESS) {
+            memory_samples_.num_pcie_lanes_ = decltype(memory_samples_.num_pcie_lanes_)::value_type{ num_pcie_lanes };
         }
 
         decltype(memory_samples_.pcie_link_generation_)::value_type::value_type pcie_link_generation{};
@@ -498,10 +503,10 @@ void gpu_nvidia_hardware_sampler::sampling_loop() {
                     memory_samples_.memory_used_->push_back(memory_info.used);
                 }
 
-                if (memory_samples_.pcie_link_width_.has_value()) {
-                    decltype(memory_samples_.pcie_link_width_)::value_type::value_type value{};
+                if (memory_samples_.num_pcie_lanes_.has_value()) {
+                    decltype(memory_samples_.num_pcie_lanes_)::value_type::value_type value{};
                     HWS_NVML_ERROR_CHECK(nvmlDeviceGetCurrPcieLinkWidth(device, &value));
-                    memory_samples_.pcie_link_width_->push_back(value);
+                    memory_samples_.num_pcie_lanes_->push_back(value);
                 }
 
                 if (memory_samples_.pcie_link_generation_.has_value()) {
diff --git a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp
index 9258f24..0ee319a 100644
--- a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp
+++ b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp
@@ -359,11 +359,25 @@ std::string nvml_memory_samples::generate_yaml_string() const {
                            this->memory_total_.value());
     }
     // maximum PCIe link speed
-    if (this->pcie_link_max_speed_.has_value()) {
-        str += fmt::format("  pcie_max_bandwidth:\n"
+    if (this->pcie_link_speed_max_.has_value()) {
+        str += fmt::format("  pcie_link_speed_max:\n"
                            "    unit: \"MBPS\"\n"
                            "    values: {}\n",
-                           this->pcie_link_max_speed_.value());
+                           this->pcie_link_speed_max_.value());
+    }
+    // maximum PCIe link generation
+    if (this->pcie_link_generation_max_.has_value()) {
+        str += fmt::format("  pcie_link_generation_max:\n"
+                           "    unit: \"int\"\n"
+                           "    values: {}\n",
+                           this->pcie_link_generation_max_.value());
+    }
+    // maximum number of available PCIe lanes
+    if (this->num_pcie_lanes_max_.has_value()) {
+        str += fmt::format("  num_pcie_lanes_max:\n"
+                           "    unit: \"int\"\n"
+                           "    values: {}\n",
+                           this->num_pcie_lanes_max_.value());
     }
     // memory bus width
     if (this->memory_bus_width_.has_value()) {
@@ -372,21 +386,7 @@ std::string nvml_memory_samples::generate_yaml_string() const {
                            "    values: {}\n",
                            this->memory_bus_width_.value());
     }
-    // maximum PCIe link generation
-    if (this->max_pcie_link_generation_.has_value()) {
-        str += fmt::format("  max_pcie_link_generation:\n"
-                           "    unit: \"int\"\n"
-                           "    values: {}\n",
-                           this->max_pcie_link_generation_.value());
-    }
 
-    // free memory size
-    if (this->memory_free_.has_value()) {
-        str += fmt::format("  memory_free:\n"
-                           "    unit: \"B\"\n"
-                           "    values: [{}]\n",
-                           fmt::join(this->memory_free_.value(), ", "));
-    }
     // used memory size
     if (this->memory_used_.has_value()) {
         str += fmt::format("  memory_used:\n"
@@ -394,19 +394,19 @@ std::string nvml_memory_samples::generate_yaml_string() const {
                            "    values: [{}]\n",
                            fmt::join(this->memory_used_.value(), ", "));
     }
-    // PCIe link speed
-    if (this->pcie_link_speed_.has_value()) {
-        str += fmt::format("  pcie_bandwidth:\n"
-                           "    unit: \"MBPS\"\n"
+    // free memory size
+    if (this->memory_free_.has_value()) {
+        str += fmt::format("  memory_free:\n"
+                           "    unit: \"B\"\n"
                            "    values: [{}]\n",
-                           fmt::join(this->pcie_link_speed_.value(), ", "));
+                           fmt::join(this->memory_free_.value(), ", "));
     }
     // PCIe link width
-    if (this->pcie_link_width_.has_value()) {
-        str += fmt::format("  pcie_link_width:\n"
+    if (this->num_pcie_lanes_.has_value()) {
+        str += fmt::format("  num_pcie_lanes:\n"
                            "    unit: \"int\"\n"
                            "    values: [{}]\n",
-                           fmt::join(this->pcie_link_width_.value(), ", "));
+                           fmt::join(this->num_pcie_lanes_.value(), ", "));
     }
     // PCIe link generation
     if (this->pcie_link_generation_.has_value()) {
@@ -415,6 +415,13 @@ std::string nvml_memory_samples::generate_yaml_string() const {
                            "    values: [{}]\n",
                            fmt::join(this->pcie_link_generation_.value(), ", "));
     }
+    // PCIe link speed
+    if (this->pcie_link_speed_.has_value()) {
+        str += fmt::format("  pcie_link_speed:\n"
+                           "    unit: \"MBPS\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->pcie_link_speed_.value(), ", "));
+    }
 
     // remove last newline
     str.pop_back();
@@ -424,23 +431,25 @@ std::string nvml_memory_samples::generate_yaml_string() const {
 
 std::ostream &operator<<(std::ostream &out, const nvml_memory_samples &samples) {
     return out << fmt::format("memory_total [B]: {}\n"
-                              "pcie_link_max_speed [MBPS]: {}\n"
+                              "pcie_link_speed_max [MBPS]: {}\n"
+                              "pcie_link_generation_max [int]: {}\n"
+                              "num_pcie_lanes_max [int]: {}\n"
                               "memory_bus_width [Bit]: {}\n"
-                              "max_pcie_link_generation [int]: {}\n"
-                              "memory_free [B]: [{}]\n"
                               "memory_used [B]: [{}]\n"
-                              "pcie_link_speed [MBPS]: [{}]\n"
-                              "pcie_link_width [int]: [{}]\n"
-                              "pcie_link_generation [int]: [{}]",
+                              "memory_free [B]: [{}]\n"
+                              "num_pcie_lanes [int]: [{}]\n"
+                              "pcie_link_generation [int]: [{}]\n"
+                              "pcie_link_speed [MBPS]: [{}]",
                               detail::value_or_default(samples.get_memory_total()),
-                              detail::value_or_default(samples.get_pcie_link_max_speed()),
+                              detail::value_or_default(samples.get_pcie_link_speed_max()),
+                              detail::value_or_default(samples.get_pcie_link_generation_max()),
+                              detail::value_or_default(samples.get_num_pcie_lanes_max()),
                               detail::value_or_default(samples.get_memory_bus_width()),
-                              detail::value_or_default(samples.get_max_pcie_link_generation()),
-                              fmt::join(detail::value_or_default(samples.get_memory_free()), ", "),
                               fmt::join(detail::value_or_default(samples.get_memory_used()), ", "),
-                              fmt::join(detail::value_or_default(samples.get_pcie_link_speed()), ", "),
-                              fmt::join(detail::value_or_default(samples.get_pcie_link_width()), ", "),
-                              fmt::join(detail::value_or_default(samples.get_pcie_link_generation()), ", "));
+                              fmt::join(detail::value_or_default(samples.get_memory_free()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_num_pcie_lanes()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_pcie_link_generation()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_pcie_link_speed()), ", "));
 }
 
 //*************************************************************************************************************************************//

From e1a808c83b5eef70044d9ebd56b96773b18496e1 Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Mon, 16 Sep 2024 14:55:33 +0200
Subject: [PATCH 21/69] Unify general samples.

---
 README.md                                     | 51 +++++++++----------
 .../gpu_amd/rocm_smi_samples.hpp              |  4 +-
 include/hardware_sampling/gpu_amd/utility.hpp | 32 ++++++++++++
 .../gpu_nvidia/nvml_samples.hpp               |  2 +-
 .../hardware_sampling/gpu_nvidia/utility.hpp  |  2 +
 .../gpu_amd/hardware_sampler.cpp              |  4 +-
 .../gpu_amd/rocm_smi_samples.cpp              |  4 +-
 .../gpu_nvidia/nvml_samples.cpp               |  4 +-
 8 files changed, 68 insertions(+), 35 deletions(-)

diff --git a/README.md b/README.md
index a0c61bb..f74d6b9 100644
--- a/README.md
+++ b/README.md
@@ -66,32 +66,31 @@ export PYTHONPATH=${CMAKE_INSTALL_PREFIX}/lib:${PYTHONPATH}
 
 ### General samples
 
-| sample              | CPUs  | NVIDIA GPUs | AMD GPUs  |  Intel GPUs  |
-|:--------------------|:-----:|:-----------:|:---------:|:------------:|
-| name                |  str  |     str     |    str    |     str      |
-| vendor_id           |  str  |  str (fix)  |    str    | str (PCIe ID |
-| architecture        |  str  |     str     |    str    |      ?       |
-| byte_order          |  str  |  str (fix)  | str (fix) |  str (fix)   |
-| compute_utilization |   %   |      %      |     %     |      ?       |
-| memory_utilization  |   -   |      %      |     %     |      ?       |
-| performance_level   |       |     int     |    int    |              |
-| num_cores           |  int  |     int     |     -     |              |
-| num_compute_units   |   -   |     int     |    int    |      ?       | TODO
-| num_threads         |  int  |      -      |     -     |      -       |
-| threads_per_core    |  int  |      -      |     -     |      -       |
-| cores_per_socket    |  int  |      -      |     -     |      -       |
-| num_sockets         |  int  |      -      |     -     |      -       |
-| numa_nodes          |  int  |             |           |              |
-| flags               |  str  |             |           |              |
-| ipc                 | float |      -      |     -     |      -       |
-| irq                 |  int  |      -      |     -     |      -       |
-| smi                 |  int  |      -      |     -     |      -       |
-| poll                |  int  |      -      |     -     |      -       |
-| poll_percent        |   %   |      -      |     -     |      -       |
-| persistence_mode    |       |    bool     |           |              |
-| standby_mode        |       |             |           |     str      |
-| num_threads_per_eu  |       |             |           |     int      |
-| eu_simd_width       |       |             |           |     int      |
+| sample              | CPUs  | NVIDIA GPUs | AMD GPUs  |  Intel GPUs   |
+|:--------------------|:-----:|:-----------:|:---------:|:-------------:|
+| architecture        |  str  |     str     |    str    |       ?       |
+| byte_order          |  str  |  str (fix)  | str (fix) |   str (fix)   |
+| num_cores           |  int  |     int     |     -     |               |
+| num_threads         |  int  |      -      |     -     |       -       |
+| threads_per_core    |  int  |      -      |     -     |       -       |
+| cores_per_socket    |  int  |      -      |     -     |       -       |
+| num_sockets         |  int  |      -      |     -     |       -       |
+| numa_nodes          |  int  |      -      |     -     |               |
+| vendor_id           |  str  |  str (fix)  |    str    | str (PCIe ID) |
+| name                |  str  |     str     |    str    |      str      |
+| flags               |  str  |      -      |     -     |               |
+| persistence_mode    |   -   |    bool     |     -     |               |
+| compute_utilization |   %   |      %      |     %     |       ?       |
+| memory_utilization  |   -   |      %      |     %     |       ?       |
+| ipc                 | float |      -      |     -     |       -       |
+| irq                 |  int  |      -      |     -     |       -       |
+| smi                 |  int  |      -      |     -     |       -       |
+| poll                |  int  |      -      |     -     |       -       |
+| poll_percent        |   %   |      -      |     -     |       -       |
+| performance_level   |   -   |     int     |    int    |               |
+| standby_mode        |       |             |           |      str      |
+| num_threads_per_eu  |       |             |           |      int      |
+| eu_simd_width       |       |             |           |      int      |
 
 ### clock-related samples
 
diff --git a/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp b/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp
index 03de73a..af5228b 100644
--- a/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp
+++ b/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp
@@ -43,14 +43,14 @@ class rocm_smi_general_samples {
      */
     [[nodiscard]] std::string generate_yaml_string() const;
 
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, architecture)  // the architecture name of the device
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, byte_order)    // the byte order (e.g., little/big endian)
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, vendor_id)     // the vendor ID
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, name)          // the name of the device
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, architecture)  // the architecture name of the device
 
     HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint32_t, compute_utilization)  // the GPU compute utilization in percent
     HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint32_t, memory_utilization)   // the GPU memory utilization in percent
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(int, performance_level)              // the performance level: one of rsmi_dev_perf_level_t
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::string, performance_level)              // the performance level: one of rsmi_dev_perf_level_t
 };
 
 /**
diff --git a/include/hardware_sampling/gpu_amd/utility.hpp b/include/hardware_sampling/gpu_amd/utility.hpp
index 59e19b5..f17db78 100644
--- a/include/hardware_sampling/gpu_amd/utility.hpp
+++ b/include/hardware_sampling/gpu_amd/utility.hpp
@@ -52,6 +52,38 @@ namespace hws {
     #define HWS_HIP_ERROR_CHECK(hip_func) hip_func;
 #endif
 
+// TODO: move to cpp file
+
+/**
+ * @brief Convert the performance level value (rsmi_dev_perf_level_t) to a string.
+ * @param[in] clocks_event_reasons the bitmask to convert to a string
+ * @return all event throttle reasons
+ */
+[[nodiscard]] inline std::string performance_level_to_string(const rsmi_dev_perf_level_t perf_level) {
+    switch (perf_level) {
+        case RSMI_DEV_PERF_LEVEL_AUTO:
+            return "\"auto\"";
+        case RSMI_DEV_PERF_LEVEL_LOW:
+            return "\"low\"";
+        case RSMI_DEV_PERF_LEVEL_HIGH:
+            return "\"high\"";
+        case RSMI_DEV_PERF_LEVEL_MANUAL:
+            return "\"manual\"";
+        case RSMI_DEV_PERF_LEVEL_STABLE_STD:
+            return "\"stable_std\"";
+        case RSMI_DEV_PERF_LEVEL_STABLE_PEAK:
+            return "\"stable_peak\"";
+        case RSMI_DEV_PERF_LEVEL_STABLE_MIN_MCLK:
+            return "\"stable_min_mclk\"";
+        case RSMI_DEV_PERF_LEVEL_STABLE_MIN_SCLK:
+            return "\"stable_min_sclk\"";
+        case RSMI_DEV_PERF_LEVEL_DETERMINISM:
+            return "\"determinism\"";
+        case RSMI_DEV_PERF_LEVEL_UNKNOWN:
+            return "\"unknown\"";
+    }
+}
+
 }  // namespace hws
 
 #endif  // HARDWARE_SAMPLING_GPU_AMD_UTILITY_HPP_
diff --git a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp
index b85c7a8..ed6504b 100644
--- a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp
+++ b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp
@@ -45,10 +45,10 @@ class nvml_general_samples {
 
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, architecture)  // the architecture name of the device
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, byte_order)    // the byte order (e.g., little/big endian)
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, num_cores)    // the number of CUDA cores
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, vendor_id)     // the vendor ID
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, name)          // the name of the device
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(bool, persistence_mode)     // the persistence mode: if true, the driver is always loaded reducing the latency for the first API call
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, num_cores)    // the number of CUDA cores
 
     HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, compute_utilization)  // the GPU compute utilization in percent
     HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, memory_utilization)   // the GPU memory utilization in percent
diff --git a/include/hardware_sampling/gpu_nvidia/utility.hpp b/include/hardware_sampling/gpu_nvidia/utility.hpp
index 272126b..7b1266d 100644
--- a/include/hardware_sampling/gpu_nvidia/utility.hpp
+++ b/include/hardware_sampling/gpu_nvidia/utility.hpp
@@ -39,6 +39,8 @@ namespace hws::detail {
     #define HWS_NVML_ERROR_CHECK(nvml_func) nvml_func;
 #endif
 
+// TODO: move to cpp file
+
 /**
  * @brief Convert the clock throttle reason event bitmask to a string representation. If the provided bitmask represents multiple reasons, they are split using "|".
  * @param[in] clocks_event_reasons the bitmask to convert to a string
diff --git a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
index f8a8253..2d344ef 100644
--- a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
+++ b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
@@ -110,7 +110,7 @@ void gpu_amd_hardware_sampler::sampling_loop() {
         // queried samples -> retrieved every iteration if available
         rsmi_dev_perf_level_t pstate{};
         if (rsmi_dev_perf_level_get(device_id_, &pstate) == RSMI_STATUS_SUCCESS) {
-            general_samples_.performance_level_ = decltype(general_samples_.performance_level_)::value_type{ static_cast<decltype(general_samples_.performance_level_)::value_type::value_type>(pstate) };
+            general_samples_.performance_level_ = decltype(general_samples_.performance_level_)::value_type{ performance_level_to_string(pstate) };
         }
 
         decltype(general_samples_.compute_utilization_)::value_type::value_type utilization_gpu{};
@@ -468,7 +468,7 @@ void gpu_amd_hardware_sampler::sampling_loop() {
                 if (general_samples_.performance_level_.has_value()) {
                     rsmi_dev_perf_level_t pstate{};
                     HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_perf_level_get(device_id_, &pstate));
-                    general_samples_.performance_level_->push_back(static_cast<decltype(general_samples_.performance_level_)::value_type::value_type>(pstate));
+                    general_samples_.performance_level_->push_back(performance_level_to_string(pstate));
                 }
 
                 if (general_samples_.compute_utilization_.has_value()) {
diff --git a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp b/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp
index 0bb7eeb..6f0c3fc 100644
--- a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp
+++ b/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp
@@ -71,7 +71,7 @@ std::string rocm_smi_general_samples::generate_yaml_string() const {
     // performance state
     if (this->performance_level_.has_value()) {
         str += fmt::format("  performance_state:\n"
-                           "    unit: \"int - see rsmi_dev_perf_level_t\"\n"
+                           "    unit: \"string\"\n"
                            "    values: [{}]\n",
                            fmt::join(this->performance_level_.value(), ", "));
     }
@@ -89,7 +89,7 @@ std::ostream &operator<<(std::ostream &out, const rocm_smi_general_samples &samp
                               "name [string]: {}\n"
                               "compute_utilization [%]: [{}]\n"
                               "memory_utilization [%]: [{}]\n"
-                              "performance_level [int]: [{}]",
+                              "performance_level [string]: [{}]",
                               detail::value_or_default(samples.get_architecture()),
                               detail::value_or_default(samples.get_byte_order()),
                               detail::value_or_default(samples.get_vendor_id()),
diff --git a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp
index 0ee319a..b3254ad 100644
--- a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp
+++ b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp
@@ -99,19 +99,19 @@ std::string nvml_general_samples::generate_yaml_string() const {
 std::ostream &operator<<(std::ostream &out, const nvml_general_samples &samples) {
     return out << fmt::format("architecture [string]: {}\n"
                               "byte_order [string]: {}\n"
+                              "num_cores [int]: {}\n"
                               "vendor_id [string]: {}\n"
                               "name [string]: {}\n"
                               "persistence_mode [bool]: {}\n"
-                              "num_cores [int]: {}\n"
                               "compute_utilization [%]: [{}]\n"
                               "memory_utilization [%]: [{}]\n"
                               "performance_level [int]: [{}]",
                               detail::value_or_default(samples.get_architecture()),
                               detail::value_or_default(samples.get_byte_order()),
+                              detail::value_or_default(samples.get_num_cores()),
                               detail::value_or_default(samples.get_vendor_id()),
                               detail::value_or_default(samples.get_name()),
                               detail::value_or_default(samples.get_persistence_mode()),
-                              detail::value_or_default(samples.get_num_cores()),
                               fmt::join(detail::value_or_default(samples.get_compute_utilization()), ", "),
                               fmt::join(detail::value_or_default(samples.get_memory_utilization()), ", "),
                               fmt::join(detail::value_or_default(samples.get_performance_level()), ", "));

From ece190c1d1a503895e777139e085b3a9fcb63e23 Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Mon, 16 Sep 2024 14:58:53 +0200
Subject: [PATCH 22/69] Prefix YAML entry to make its meaning clearer.

---
 src/hardware_sampling/gpu_nvidia/nvml_samples.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp
index b3254ad..43e9dd5 100644
--- a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp
+++ b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp
@@ -172,7 +172,7 @@ std::string nvml_clock_samples::generate_yaml_string() const {
                            "    unit: \"MHz\"\n"
                            "    values:\n");
         for (const auto &[key, value] : this->available_clock_frequencies_.value()) {
-            str += fmt::format("      {}: [{}]\n", key, fmt::join(value, ", "));
+            str += fmt::format("      memory_clock_frequency_{}: [{}]\n", key, fmt::join(value, ", "));
         }
     }
     // the available memory clock frequencies

From c8357eee24a7859d1b6c8e826d4dea635bcc0bd3 Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Mon, 16 Sep 2024 15:29:16 +0200
Subject: [PATCH 23/69] Consistent quoting of string-like values in the YAML
 file (and only in the YAML file).

---
 include/hardware_sampling/gpu_amd/utility.hpp | 20 +++++++++----------
 .../hardware_sampling/gpu_nvidia/utility.hpp  |  2 +-
 include/hardware_sampling/utility.hpp         | 20 +++++++++++++++++++
 src/hardware_sampling/cpu/cpu_samples.cpp     |  4 ++--
 .../gpu_amd/rocm_smi_samples.cpp              |  8 ++++----
 .../gpu_nvidia/nvml_samples.cpp               |  4 ++--
 src/hardware_sampling/hardware_sampler.cpp    | 10 ++++++----
 7 files changed, 45 insertions(+), 23 deletions(-)

diff --git a/include/hardware_sampling/gpu_amd/utility.hpp b/include/hardware_sampling/gpu_amd/utility.hpp
index f17db78..00d98bc 100644
--- a/include/hardware_sampling/gpu_amd/utility.hpp
+++ b/include/hardware_sampling/gpu_amd/utility.hpp
@@ -62,25 +62,25 @@ namespace hws {
 [[nodiscard]] inline std::string performance_level_to_string(const rsmi_dev_perf_level_t perf_level) {
     switch (perf_level) {
         case RSMI_DEV_PERF_LEVEL_AUTO:
-            return "\"auto\"";
+            return "auto";
         case RSMI_DEV_PERF_LEVEL_LOW:
-            return "\"low\"";
+            return "low";
         case RSMI_DEV_PERF_LEVEL_HIGH:
-            return "\"high\"";
+            return "high";
         case RSMI_DEV_PERF_LEVEL_MANUAL:
-            return "\"manual\"";
+            return "manual";
         case RSMI_DEV_PERF_LEVEL_STABLE_STD:
-            return "\"stable_std\"";
+            return "stable_std";
         case RSMI_DEV_PERF_LEVEL_STABLE_PEAK:
-            return "\"stable_peak\"";
+            return "stable_peak";
         case RSMI_DEV_PERF_LEVEL_STABLE_MIN_MCLK:
-            return "\"stable_min_mclk\"";
+            return "stable_min_mclk";
         case RSMI_DEV_PERF_LEVEL_STABLE_MIN_SCLK:
-            return "\"stable_min_sclk\"";
+            return "stable_min_sclk";
         case RSMI_DEV_PERF_LEVEL_DETERMINISM:
-            return "\"determinism\"";
+            return "determinism";
         case RSMI_DEV_PERF_LEVEL_UNKNOWN:
-            return "\"unknown\"";
+            return "unknown";
     }
 }
 
diff --git a/include/hardware_sampling/gpu_nvidia/utility.hpp b/include/hardware_sampling/gpu_nvidia/utility.hpp
index 7b1266d..05560b1 100644
--- a/include/hardware_sampling/gpu_nvidia/utility.hpp
+++ b/include/hardware_sampling/gpu_nvidia/utility.hpp
@@ -78,7 +78,7 @@ namespace hws::detail {
         if ((clocks_event_reasons & nvmlClocksThrottleReasonHwThermalSlowdown) != 0ull) {
             reasons.emplace_back("HwThermalSlowdown");
         }
-        return fmt::format("\"{}\"", fmt::join(reasons, "|"));
+        return fmt::format("{}", fmt::join(reasons, "|"));
     }
 }
 
diff --git a/include/hardware_sampling/utility.hpp b/include/hardware_sampling/utility.hpp
index 91836c4..2eb7451 100644
--- a/include/hardware_sampling/utility.hpp
+++ b/include/hardware_sampling/utility.hpp
@@ -252,6 +252,26 @@ template <typename MapType>
     return "";
 }
 
+/**
+ * @brief Quote all @p values and return a vector of strings.
+ * @details Example: calling this function with `{ 1, 2, 3, 4 }` would return a vector of strings containing `{ "1", "2", "3", "4" }`.
+ * @tparam T the type of the values to quote
+ * @param[in] values the values to quote
+ * @return the quoted values (`[[nodiscard]]`)
+ */
+template <typename T>
+[[nodiscard]] inline std::vector<std::string> quote(const std::vector<T> &values) {
+    std::vector<std::string> quoted{};
+    quoted.reserve(values.size());
+
+    // quote all values
+    for (const T &val : values) {
+        quoted.push_back(fmt::format("\"{}\"", val));
+    }
+
+    return quoted;
+}
+
 }  // namespace hws::detail
 
 #endif  // HARDWARE_SAMPLING_UTILITY_HPP_
diff --git a/src/hardware_sampling/cpu/cpu_samples.cpp b/src/hardware_sampling/cpu/cpu_samples.cpp
index 7b5054c..0242e9a 100644
--- a/src/hardware_sampling/cpu/cpu_samples.cpp
+++ b/src/hardware_sampling/cpu/cpu_samples.cpp
@@ -7,7 +7,7 @@
 
 #include "hardware_sampling/cpu/cpu_samples.hpp"
 
-#include "hardware_sampling/utility.hpp"  // hws::detail::value_or_default
+#include "hardware_sampling/utility.hpp"  // hws::detail::{value_or_default, quote}
 
 #include "fmt/format.h"  // fmt::format
 #include "fmt/ranges.h"  // fmt::join
@@ -104,7 +104,7 @@ std::string cpu_general_samples::generate_yaml_string() const {
         str += fmt::format("  flags:\n"
                            "    unit: \"string\"\n"
                            "    values: [{}]\n",
-                           fmt::join(this->flags_.value(), ", "));
+                           fmt::join(detail::quote(this->flags_.value()), ", "));
     }
 
     // the percent the CPU was busy
diff --git a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp b/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp
index 6f0c3fc..ba06efe 100644
--- a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp
+++ b/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp
@@ -7,7 +7,7 @@
 
 #include "hardware_sampling/gpu_amd/rocm_smi_samples.hpp"
 
-#include "hardware_sampling/utility.hpp"  // hws::detail::{value_or_default,}
+#include "hardware_sampling/utility.hpp"  // hws::detail::{value_or_default, quote}
 
 #include "fmt/format.h"         // fmt::format
 #include "fmt/ranges.h"         // fmt::join
@@ -73,7 +73,7 @@ std::string rocm_smi_general_samples::generate_yaml_string() const {
         str += fmt::format("  performance_state:\n"
                            "    unit: \"string\"\n"
                            "    values: [{}]\n",
-                           fmt::join(this->performance_level_.value(), ", "));
+                           fmt::join(detail::quote(this->performance_level_.value()), ", "));
     }
 
     // remove last newline
@@ -267,7 +267,7 @@ std::string rocm_smi_power_samples::generate_yaml_string() const {
         str += fmt::format("  available_power_profiles:\n"
                            "    unit: \"string\"\n"
                            "    values: [{}]\n",
-                           fmt::join(this->available_power_profiles_.value(), ", "));
+                           fmt::join(detail::quote(this->available_power_profiles_.value()), ", "));
     }
 
     // current power usage
@@ -289,7 +289,7 @@ std::string rocm_smi_power_samples::generate_yaml_string() const {
         str += fmt::format("  power_profile:\n"
                            "    unit: \"string\"\n"
                            "    values: [{}]\n",
-                           fmt::join(this->power_profile_.value(), ", "));
+                           fmt::join(detail::quote(this->power_profile_.value()), ", "));
     }
 
     // remove last newline
diff --git a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp
index 43e9dd5..94bf97b 100644
--- a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp
+++ b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp
@@ -7,7 +7,7 @@
 
 #include "hardware_sampling/gpu_nvidia/nvml_samples.hpp"
 
-#include "hardware_sampling/utility.hpp"  // hws::detail::{value_or_default, map_entry_to_string}
+#include "hardware_sampling/utility.hpp"  // hws::detail::{value_or_default, map_entry_to_string, quote}
 
 #include "fmt/format.h"  // fmt::format
 #include "fmt/ranges.h"  // fmt::join
@@ -209,7 +209,7 @@ std::string nvml_clock_samples::generate_yaml_string() const {
         str += fmt::format("  throttle_reason:\n"
                            "    unit: \"string\"\n"
                            "    values: [{}]\n",
-                           fmt::join(this->throttle_reason_.value(), ", "));
+                           fmt::join(detail::quote(this->throttle_reason_.value()), ", "));
     }
     // clock is auto-boosted
     if (this->auto_boosted_clock_.has_value()) {
diff --git a/src/hardware_sampling/hardware_sampler.cpp b/src/hardware_sampling/hardware_sampler.cpp
index 1ac8f81..3511f7f 100644
--- a/src/hardware_sampling/hardware_sampler.cpp
+++ b/src/hardware_sampling/hardware_sampler.cpp
@@ -129,7 +129,7 @@ void hardware_sampler::dump_yaml(const char *filename) {
     file << "---\n\n";
 
     // set the device identification
-    file << fmt::format("device_identification: {}\n\n", this->device_identification());
+    file << fmt::format("device_identification: \"{}\"\n\n", this->device_identification());
 
     // output the start date time of this hardware sampling
     file << fmt::format("start_time: \"{:%Y-%m-%d %X}\"\n\n", start_date_time_);
@@ -139,7 +139,7 @@ void hardware_sampler::dump_yaml(const char *filename) {
     std::vector<decltype(event::name)> event_names{};
     for (const auto &[time_point, name] : events_) {
         event_time_points.push_back(time_point);
-        event_names.push_back(name);
+        event_names.push_back(fmt::format("\"{}\"", name));
     }
     file << fmt::format("events:\n"
                         "  time_points:\n"
@@ -150,12 +150,14 @@ void hardware_sampler::dump_yaml(const char *filename) {
                         fmt::join(event_names, ", "));
 
     // output the sampling information
-    file << fmt::format("sampling_interval: {}\n"
+    file << fmt::format("sampling_interval:\n"
+                        "  unit: \"ms\"\n"
+                        "  values: {}\n"
                         "time_points:\n"
                         "  unit: \"s\"\n"
                         "  values: [{}]\n"
                         "{}\n\n",
-                        this->sampling_interval(),
+                        this->sampling_interval().count(),
                         fmt::join(detail::durations_from_reference_time(this->sampling_time_points(), this->get_event(0).time_point), ", "),
                         this->generate_yaml_string());
 }

From eaa4e4e8ca4e016fc874b719babb5575bba27148 Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Mon, 16 Sep 2024 15:39:39 +0200
Subject: [PATCH 24/69] Move implementation to cpp file.

---
 CMakeLists.txt                                |  6 +-
 include/hardware_sampling/gpu_amd/utility.hpp | 34 ++----------
 .../hardware_sampling/gpu_nvidia/utility.hpp  | 40 +-------------
 src/hardware_sampling/gpu_amd/utility.cpp     | 41 ++++++++++++++
 src/hardware_sampling/gpu_nvidia/utility.cpp  | 55 +++++++++++++++++++
 5 files changed, 106 insertions(+), 70 deletions(-)
 create mode 100644 src/hardware_sampling/gpu_amd/utility.cpp
 create mode 100644 src/hardware_sampling/gpu_nvidia/utility.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 48ed48e..74cc828 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -202,7 +202,8 @@ if (CUDAToolkit_FOUND)
     target_sources(${HWS_LIBRARY_NAME} PRIVATE
             $<BUILD_INTERFACE:
             ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp;
-            ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp
+            ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp;
+            ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/gpu_nvidia/utility.cpp
             >)
 
     # add compile definition
@@ -228,7 +229,8 @@ if (rocm_smi_FOUND)
     target_sources(${HWS_LIBRARY_NAME} PRIVATE
             $<BUILD_INTERFACE:
             ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/gpu_amd/hardware_sampler.cpp;
-            ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp
+            ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp;
+            ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/gpu_amd/utility.cpp
             >)
 
     # add compile definition
diff --git a/include/hardware_sampling/gpu_amd/utility.hpp b/include/hardware_sampling/gpu_amd/utility.hpp
index 00d98bc..b0786f9 100644
--- a/include/hardware_sampling/gpu_amd/utility.hpp
+++ b/include/hardware_sampling/gpu_amd/utility.hpp
@@ -16,8 +16,9 @@
 #include "rocm_smi/rocm_smi.h"  // ROCm SMI runtime functions
 
 #include <stdexcept>  // std::runtime_error
+#include <string>     // std::string
 
-namespace hws {
+namespace hws::detail {
 
 /**
  * @def HWS_ROCM_SMI_ERROR_CHECK
@@ -52,37 +53,12 @@ namespace hws {
     #define HWS_HIP_ERROR_CHECK(hip_func) hip_func;
 #endif
 
-// TODO: move to cpp file
-
 /**
- * @brief Convert the performance level value (rsmi_dev_perf_level_t) to a string.
+ * @brief Convert the performance level value (`rsmi_dev_perf_level_t`) to a string.
  * @param[in] clocks_event_reasons the bitmask to convert to a string
- * @return all event throttle reasons
+ * @return all event throttle reasons (`[[nodiscard]]`)
  */
-[[nodiscard]] inline std::string performance_level_to_string(const rsmi_dev_perf_level_t perf_level) {
-    switch (perf_level) {
-        case RSMI_DEV_PERF_LEVEL_AUTO:
-            return "auto";
-        case RSMI_DEV_PERF_LEVEL_LOW:
-            return "low";
-        case RSMI_DEV_PERF_LEVEL_HIGH:
-            return "high";
-        case RSMI_DEV_PERF_LEVEL_MANUAL:
-            return "manual";
-        case RSMI_DEV_PERF_LEVEL_STABLE_STD:
-            return "stable_std";
-        case RSMI_DEV_PERF_LEVEL_STABLE_PEAK:
-            return "stable_peak";
-        case RSMI_DEV_PERF_LEVEL_STABLE_MIN_MCLK:
-            return "stable_min_mclk";
-        case RSMI_DEV_PERF_LEVEL_STABLE_MIN_SCLK:
-            return "stable_min_sclk";
-        case RSMI_DEV_PERF_LEVEL_DETERMINISM:
-            return "determinism";
-        case RSMI_DEV_PERF_LEVEL_UNKNOWN:
-            return "unknown";
-    }
-}
+[[nodiscard]] std::string performance_level_to_string(rsmi_dev_perf_level_t perf_level);
 
 }  // namespace hws
 
diff --git a/include/hardware_sampling/gpu_nvidia/utility.hpp b/include/hardware_sampling/gpu_nvidia/utility.hpp
index 05560b1..b347f0a 100644
--- a/include/hardware_sampling/gpu_nvidia/utility.hpp
+++ b/include/hardware_sampling/gpu_nvidia/utility.hpp
@@ -13,12 +13,10 @@
 #pragma once
 
 #include "fmt/format.h"  // fmt::format
-#include "fmt/ranges.h"  // fmt::join
 #include "nvml.h"        // NVML runtime functions
 
 #include <stdexcept>  // std::runtime_error
 #include <string>     // std::string
-#include <vector>     // std::vector
 
 namespace hws::detail {
 
@@ -39,48 +37,12 @@ namespace hws::detail {
     #define HWS_NVML_ERROR_CHECK(nvml_func) nvml_func;
 #endif
 
-// TODO: move to cpp file
-
 /**
  * @brief Convert the clock throttle reason event bitmask to a string representation. If the provided bitmask represents multiple reasons, they are split using "|".
  * @param[in] clocks_event_reasons the bitmask to convert to a string
  * @return all event throttle reasons
  */
-[[nodiscard]] inline std::string throttle_event_reason_to_string(const unsigned long long clocks_event_reasons) {
-    if (clocks_event_reasons == 0ull) {
-        return "None";
-    } else {
-        std::vector<std::string> reasons{};
-        if ((clocks_event_reasons & nvmlClocksEventReasonApplicationsClocksSetting) != 0ull) {
-            reasons.emplace_back("ApplicationsClocksSetting");
-        }
-        if ((clocks_event_reasons & nvmlClocksEventReasonDisplayClockSetting) != 0ull) {
-            reasons.emplace_back("DisplayClockSetting");
-        }
-        if ((clocks_event_reasons & nvmlClocksEventReasonGpuIdle) != 0ull) {
-            reasons.emplace_back("GpuIdle");
-        }
-        if ((clocks_event_reasons & nvmlClocksEventReasonSwPowerCap) != 0ull) {
-            reasons.emplace_back("SwPowerCap");
-        }
-        if ((clocks_event_reasons & nvmlClocksEventReasonSwThermalSlowdown) != 0ull) {
-            reasons.emplace_back("SwThermalSlowdown");
-        }
-        if ((clocks_event_reasons & nvmlClocksEventReasonSyncBoost) != 0ull) {
-            reasons.emplace_back("SyncBoost");
-        }
-        if ((clocks_event_reasons & nvmlClocksThrottleReasonHwPowerBrakeSlowdown) != 0ull) {
-            reasons.emplace_back("HwPowerBrakeSlowdown");
-        }
-        if ((clocks_event_reasons & nvmlClocksThrottleReasonHwSlowdown) != 0ull) {
-            reasons.emplace_back("HwSlowdown");
-        }
-        if ((clocks_event_reasons & nvmlClocksThrottleReasonHwThermalSlowdown) != 0ull) {
-            reasons.emplace_back("HwThermalSlowdown");
-        }
-        return fmt::format("{}", fmt::join(reasons, "|"));
-    }
-}
+[[nodiscard]] std::string throttle_event_reason_to_string(unsigned long long clocks_event_reasons);
 
 }  // namespace hws::detail
 
diff --git a/src/hardware_sampling/gpu_amd/utility.cpp b/src/hardware_sampling/gpu_amd/utility.cpp
new file mode 100644
index 0000000..3164c18
--- /dev/null
+++ b/src/hardware_sampling/gpu_amd/utility.cpp
@@ -0,0 +1,41 @@
+/**
+ * @author Marcel Breyer
+ * @copyright 2024-today All Rights Reserved
+ * @license This file is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ */
+
+#include "hardware_sampling/gpu_amd/utility.hpp"
+
+#include "rocm_smi/rocm_smi.h"  // ROCm SMI runtime functions
+
+#include <string>  // std::string
+
+namespace hws {
+
+std::string performance_level_to_string(const rsmi_dev_perf_level_t perf_level) {
+    switch (perf_level) {
+        case RSMI_DEV_PERF_LEVEL_AUTO:
+            return "auto";
+        case RSMI_DEV_PERF_LEVEL_LOW:
+            return "low";
+        case RSMI_DEV_PERF_LEVEL_HIGH:
+            return "high";
+        case RSMI_DEV_PERF_LEVEL_MANUAL:
+            return "manual";
+        case RSMI_DEV_PERF_LEVEL_STABLE_STD:
+            return "stable_std";
+        case RSMI_DEV_PERF_LEVEL_STABLE_PEAK:
+            return "stable_peak";
+        case RSMI_DEV_PERF_LEVEL_STABLE_MIN_MCLK:
+            return "stable_min_mclk";
+        case RSMI_DEV_PERF_LEVEL_STABLE_MIN_SCLK:
+            return "stable_min_sclk";
+        case RSMI_DEV_PERF_LEVEL_DETERMINISM:
+            return "determinism";
+        case RSMI_DEV_PERF_LEVEL_UNKNOWN:
+            return "unknown";
+    }
+}
+
+}  // namespace hws::detail
diff --git a/src/hardware_sampling/gpu_nvidia/utility.cpp b/src/hardware_sampling/gpu_nvidia/utility.cpp
new file mode 100644
index 0000000..70883e6
--- /dev/null
+++ b/src/hardware_sampling/gpu_nvidia/utility.cpp
@@ -0,0 +1,55 @@
+/**
+ * @author Marcel Breyer
+ * @copyright 2024-today All Rights Reserved
+ * @license This file is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ */
+
+#include "hardware_sampling/gpu_nvidia/utility.hpp"
+
+#include "fmt/format.h"  // fmt::format
+#include "fmt/ranges.h"  // fmt::join
+#include "nvml.h"        // NVML runtime functions
+
+#include <string>  // std::string
+#include <vector>  // std::vector
+
+namespace hws::detail {
+
+std::string throttle_event_reason_to_string(const unsigned long long clocks_event_reasons) {
+    if (clocks_event_reasons == 0ull) {
+        return "None";
+    } else {
+        std::vector<std::string> reasons{};
+        if ((clocks_event_reasons & nvmlClocksEventReasonApplicationsClocksSetting) != 0ull) {
+            reasons.emplace_back("ApplicationsClocksSetting");
+        }
+        if ((clocks_event_reasons & nvmlClocksEventReasonDisplayClockSetting) != 0ull) {
+            reasons.emplace_back("DisplayClockSetting");
+        }
+        if ((clocks_event_reasons & nvmlClocksEventReasonGpuIdle) != 0ull) {
+            reasons.emplace_back("GpuIdle");
+        }
+        if ((clocks_event_reasons & nvmlClocksEventReasonSwPowerCap) != 0ull) {
+            reasons.emplace_back("SwPowerCap");
+        }
+        if ((clocks_event_reasons & nvmlClocksEventReasonSwThermalSlowdown) != 0ull) {
+            reasons.emplace_back("SwThermalSlowdown");
+        }
+        if ((clocks_event_reasons & nvmlClocksEventReasonSyncBoost) != 0ull) {
+            reasons.emplace_back("SyncBoost");
+        }
+        if ((clocks_event_reasons & nvmlClocksThrottleReasonHwPowerBrakeSlowdown) != 0ull) {
+            reasons.emplace_back("HwPowerBrakeSlowdown");
+        }
+        if ((clocks_event_reasons & nvmlClocksThrottleReasonHwSlowdown) != 0ull) {
+            reasons.emplace_back("HwSlowdown");
+        }
+        if ((clocks_event_reasons & nvmlClocksThrottleReasonHwThermalSlowdown) != 0ull) {
+            reasons.emplace_back("HwThermalSlowdown");
+        }
+        return fmt::format("{}", fmt::join(reasons, "|"));
+    }
+}
+
+}  // namespace hws::detail

From 527c63530be926ef96aec1b55a6f4d0fe464b581 Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Mon, 16 Sep 2024 16:03:32 +0200
Subject: [PATCH 25/69] Clean-up utility header.

---
 include/hardware_sampling/utility.hpp | 156 +++++++++++++-------------
 src/hardware_sampling/utility.cpp     |   4 +
 2 files changed, 85 insertions(+), 75 deletions(-)

diff --git a/include/hardware_sampling/utility.hpp b/include/hardware_sampling/utility.hpp
index 2eb7451..ff7d3f6 100644
--- a/include/hardware_sampling/utility.hpp
+++ b/include/hardware_sampling/utility.hpp
@@ -16,17 +16,15 @@
 #include "fmt/ranges.h"  // fmt::join
 
 #include <charconv>      // std::from_chars
-#include <chrono>        // std::chrono::{milliseconds, duration_cast}
+#include <chrono>        // std::chrono::duration
 #include <cmath>         // std::trunc
 #include <cstddef>       // std::size_t
-#include <iterator>      // std::back_inserter, std::next, std::prev
 #include <optional>      // std::optional
-#include <sstream>       // std::basic_stringstream
 #include <stdexcept>     // std::runtime_error
 #include <string>        // std::string, std::stof, std::stod, std::stold
-#include <string_view>   // std::string_view, std::basic_string_view
+#include <string_view>   // std::string_view
 #include <system_error>  // std::errc
-#include <type_traits>   // std::is_same_v, std::remove_cv_t, std::remove_reference_t
+#include <type_traits>   // std::is_same_v, std::is_floating_point_v, std::remove_cv_t, std::remove_reference_t, std::true_type, std::false_type
 #include <vector>        // std::vector
 
 namespace hws::detail {
@@ -56,69 +54,33 @@ namespace hws::detail {
   private:                                                                                            \
     std::optional<std::vector<sample_type>> sample_name##_{};
 
-// TODO: clean-up
-
-/**
- * @brief Checks whether the string @p sv starts with the substring @p start
- * @param[in] sv the full string
- * @param[in] start the substring
- * @return `true` if @p sv starts with @p start, otherwise `false`
- */
-[[nodiscard]] inline bool starts_with(const std::string_view sv, const std::string_view start) {
-    return sv.substr(0, start.size()) == start;
-}
+/*****************************************************************************************************/
+/**                                          type_traits                                            **/
+/*****************************************************************************************************/
 
 template <typename T>
 using remove_cvref_t = std::remove_cv_t<std::remove_reference_t<T>>;
 
-/**
- * @brief Convert all time points to their duration in seconds (using double) truncated to three decimal places passed since the @p reference time point.
- * @tparam TimePoint the type if the time points
- * @param[in] time_points the time points
- * @param[in] reference the reference time point
- * @return the duration passed in seconds since the @p reference time point (`[[nodiscard]]`)
- */
-template <typename TimePoint>
-[[nodiscard]] inline std::vector<double> durations_from_reference_time(const std::vector<TimePoint> &time_points, const TimePoint &reference) {
-    std::vector<double> durations(time_points.size());
-
-    for (std::size_t i = 0; i < durations.size(); ++i) {
-        durations[i] = std::trunc(std::chrono::duration<double>(time_points[i] - reference).count() * 1000.0) / 1000.0;
-    }
+template <typename T>
+struct is_vector : std::false_type { };
 
-    return durations;
-}
+template <typename T>
+struct is_vector<std::vector<T>> : std::true_type { };
 
-/**
- * @brief Convert all time points to their duration since the epoch start.
- * @tparam TimePoint the type of the time points
- * @param[in] time_points the time points
- * @return the duration passed since the respective @p TimePoint epoch start (`[[nodiscard]]`)
- */
-template <typename TimePoint>
-[[nodiscard]] inline std::vector<typename TimePoint::duration> time_points_to_epoch(const std::vector<TimePoint> &time_points) {
-    std::vector<typename TimePoint::duration> times(time_points.size());
+template <typename T>
+constexpr bool is_vector_v = is_vector<T>::value;
 
-    for (std::size_t i = 0; i < times.size(); ++i) {
-        times[i] = time_points[i].time_since_epoch();
-    }
-    return times;
-}
+/*****************************************************************************************************/
+/**                                      string manipulation                                        **/
+/*****************************************************************************************************/
 
 /**
- * @brief Return the value encapsulated by the std::optional @p opt if it contains a value, otherwise a default constructed @p T is returned.
- * @tparam T the type of the value stored in the std::optional
- * @param[in] opt the std::optional to check
- * @return the value of the std::optional or a default constructed @p T (`[[nodiscard]]`)
+ * @brief Checks whether the string @p sv starts with the substring @p start
+ * @param[in] sv the full string
+ * @param[in] start the substring
+ * @return `true` if @p sv starts with @p start, otherwise `false`
  */
-template <typename T>
-[[nodiscard]] inline T value_or_default(const std::optional<T> &opt) {
-    if (opt.has_value()) {
-        return opt.value();
-    } else {
-        return T{};
-    }
-}
+[[nodiscard]] bool starts_with(std::string_view sv, std::string_view start) noexcept;
 
 /**
  * @brief Trim the @p str, i.e., remove all leading and trailing whitespace characters.
@@ -134,6 +96,14 @@ template <typename T>
  */
 [[nodiscard]] std::string to_lower_case(std::string_view str);
 
+/**
+ * @brief Split the @p str at the delimiters @p delim.
+ * @param[in] str the string to split
+ * @param[in] delim the used delimiter
+ * @return a vector containing all split tokens (`[[nodiscard]]`)
+ */
+[[nodiscard]] std::vector<std::string_view> split(std::string_view str, char delim = ' ');
+
 /**
  * @brief Convert the @p str to a value of type @p T.
  * @tparam T the type to convert the string to
@@ -212,23 +182,6 @@ template <typename T>
     return split_str;
 }
 
-/**
- * @brief Split the @p str at the delimiters @p delim.
- * @param[in] str the string to split
- * @param[in] delim the used delimiter
- * @return a vector containing all split tokens (`[[nodiscard]]`)
- */
-[[nodiscard]] std::vector<std::string_view> split(std::string_view str, char delim = ' ');
-
-template <typename T>
-struct is_vector : std::false_type { };
-
-template <typename T>
-struct is_vector<std::vector<T>> : std::true_type { };
-
-template <typename T>
-constexpr bool is_vector_v = is_vector<T>::value;
-
 /**
  * @brief Convert all entries in the map to a single dict-like string.
  * @details The resulting string is of form "{KEY, VALUE}" or "{KEY, [VALUES]}".
@@ -272,6 +225,59 @@ template <typename T>
     return quoted;
 }
 
+/*****************************************************************************************************/
+/**                                      other free functions                                       **/
+/*****************************************************************************************************/
+
+/**
+ * @brief Convert all time points to their duration in seconds (using double) truncated to three decimal places passed since the @p reference time point.
+ * @tparam TimePoint the type if the time points
+ * @param[in] time_points the time points
+ * @param[in] reference the reference time point
+ * @return the duration passed in seconds since the @p reference time point (`[[nodiscard]]`)
+ */
+template <typename TimePoint>
+[[nodiscard]] inline std::vector<double> durations_from_reference_time(const std::vector<TimePoint> &time_points, const TimePoint &reference) {
+    std::vector<double> durations(time_points.size());
+
+    for (std::size_t i = 0; i < durations.size(); ++i) {
+        durations[i] = std::trunc(std::chrono::duration<double>(time_points[i] - reference).count() * 1000.0) / 1000.0;
+    }
+
+    return durations;
+}
+
+/**
+ * @brief Convert all time points to their duration since the epoch start.
+ * @tparam TimePoint the type of the time points
+ * @param[in] time_points the time points
+ * @return the duration passed since the respective @p TimePoint epoch start (`[[nodiscard]]`)
+ */
+template <typename TimePoint>
+[[nodiscard]] inline std::vector<typename TimePoint::duration> time_points_to_epoch(const std::vector<TimePoint> &time_points) {
+    std::vector<typename TimePoint::duration> times(time_points.size());
+
+    for (std::size_t i = 0; i < times.size(); ++i) {
+        times[i] = time_points[i].time_since_epoch();
+    }
+    return times;
+}
+
+/**
+ * @brief Return the value encapsulated by the std::optional @p opt if it contains a value, otherwise a default constructed @p T is returned.
+ * @tparam T the type of the value stored in the std::optional
+ * @param[in] opt the std::optional to check
+ * @return the value of the std::optional or a default constructed @p T (`[[nodiscard]]`)
+ */
+template <typename T>
+[[nodiscard]] inline T value_or_default(const std::optional<T> &opt) {
+    if (opt.has_value()) {
+        return opt.value();
+    } else {
+        return T{};
+    }
+}
+
 }  // namespace hws::detail
 
 #endif  // HARDWARE_SAMPLING_UTILITY_HPP_
diff --git a/src/hardware_sampling/utility.cpp b/src/hardware_sampling/utility.cpp
index 58e604c..9e2dbc2 100644
--- a/src/hardware_sampling/utility.cpp
+++ b/src/hardware_sampling/utility.cpp
@@ -15,6 +15,10 @@
 
 namespace hws::detail {
 
+bool starts_with(const std::string_view sv, const std::string_view start) noexcept {
+    return sv.substr(0, start.size()) == start;
+}
+
 std::string_view trim(std::string_view str) noexcept {
     // trim right
     {

From 131de00e7dc67e6c49dfa153cda08e1973b9340a Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Mon, 16 Sep 2024 16:49:31 +0200
Subject: [PATCH 26/69] Update Python bindings.

---
 README.md                                |  96 ++++++++++----------
 bindings/cpu_hardware_sampler.cpp        |  43 ++++-----
 bindings/event.cpp                       |   5 +-
 bindings/gpu_amd_hardware_sampler.cpp    | 106 ++++++++++++-----------
 bindings/gpu_intel_hardware_sampler.cpp  |  14 +--
 bindings/gpu_nvidia_hardware_sampler.cpp |  78 +++++++++--------
 bindings/hardware_sampler.cpp            |  17 ++--
 bindings/main.cpp                        |  28 +++---
 8 files changed, 204 insertions(+), 183 deletions(-)

diff --git a/README.md b/README.md
index f74d6b9..bb02f24 100644
--- a/README.md
+++ b/README.md
@@ -122,22 +122,22 @@ export PYTHONPATH=${CMAKE_INSTALL_PREFIX}/lib:${PYTHONPATH}
 
 ### power-related samples
 
-| sample                      |               CPUs                | NVIDIA GPUs |  AMD GPUs   | Intel GPUs |
-|:----------------------------|:---------------------------------:|:-----------:|:-----------:|:----------:|
-| power_management_limit      |                 -                 |      W      |      W      |            |
-| power_enforced_limit        |                 -                 |      W      |      W      |            |
-| power_measurement_type      |             str (fix)             |     str     |     str     |            |
-| power_management_mode       |                 -                 |    bool     |      -      |            |
-| available_power_profiles    |                 -                 | list of int | list of str |            |
-| power_usage                 |                 W                 |      W      |      W      |            |
-| core_watt                   |                 W                 |      -      |      -      |     -      |
-| dram_watt                   |                 W                 |      -      |      -      |     -      |
-| package_rapl_throttling     |                 %                 |      -      |      -      |     -      |
-| dram_rapl_throttling        |                 %                 |      -      |      -      |     -      |
-| power_total_energy_consumed | J<br>(calculated via power_usage) |      J      |      J      |     J      |
-| power_profile               |                 -                 |     int     |     str     |            |
-| energy_threshold_enabled    |                                   |             |             |    bool    |
-| energy_threshold            |                                   |             |             |     J      |
+| sample                         |               CPUs                | NVIDIA GPUs |  AMD GPUs   | Intel GPUs |
+|:-------------------------------|:---------------------------------:|:-----------:|:-----------:|:----------:|
+| power_management_limit         |                 -                 |      W      |      W      |            |
+| power_enforced_limit           |                 -                 |      W      |      W      |            |
+| power_measurement_type         |             str (fix)             |     str     |     str     |            |
+| power_management_mode          |                 -                 |    bool     |      -      |            |
+| available_power_profiles       |                 -                 | list of int | list of str |            |
+| power_usage                    |                 W                 |      W      |      W      |            |
+| core_watt                      |                 W                 |      -      |      -      |     -      |
+| dram_watt                      |                 W                 |      -      |      -      |     -      |
+| package_rapl_throttling        |                 %                 |      -      |      -      |     -      |
+| dram_rapl_throttling           |                 %                 |      -      |      -      |     -      |
+| power_total_energy_consumption | J<br>(calculated via power_usage) |      J      |      J      |     J      |
+| power_profile                  |                 -                 |     int     |     str     |            |
+| energy_threshold_enabled       |                                   |             |             |    bool    |
+| energy_threshold               |                                   |             |             |     J      |
 
 ### memory-related samples
 
@@ -175,38 +175,38 @@ export PYTHONPATH=${CMAKE_INSTALL_PREFIX}/lib:${PYTHONPATH}
 
 ### temperature-related samples
 
-| sample                   | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs |
-|:-------------------------|:----:|:-----------:|:--------:|:----------:|
-| num_fans                 |  -   |     int     |   int    |            |
-| fan_speed_min            |  -   |      %      |    -     |            | 
-| fan_speed_max            |  -   |      %      |   RPM    |            |
-| temperature_min          |  -   |      -      |    °C    |            |
-| temperature_max          |  -   |     °C      |    °C    |            |
-| memory_temperature_min   |  -   |      -      |    °C    |            |
-| memory_temperature_max   |  -   |     °C      |    °C    |            |
-| hotspot_temperature_min  |  -   |      -      |    °C    |            |
-| hotspot_temperature_max  |  -   |      -      |    °C    |            |
-| hbm_0_temperature_min    |  -   |      -      |    °C    |            |
-| hbm_0_temperature_max    |  -   |      -      |    °C    |            |
-| hbm_1_temperature_min    |  -   |      -      |    °C    |            |
-| hbm_1_temperature_max    |  -   |      -      |    °C    |            |
-| hbm_2_temperature_min    |  -   |      -      |    °C    |            |
-| hbm_2_temperature_max    |  -   |      -      |    °C    |            |
-| hbm_3_temperature_min    |  -   |      -      |    °C    |            |
-| hbm_3_temperature_max    |  -   |      -      |    °C    |            |
-| fan_speed_percentage     |  -   |      %      |    %     |            |
-| temperature              |  °C  |     °C      |    °C    |            |
-| memory_temperature       |  -   |      -      |    °C    |            |
-| hotspot_temperature      |  -   |      -      |    °C    |            |
-| hbm_0_temperature        |  -   |      -      |    °C    |            |
-| hbm_1_temperature        |  -   |      -      |    °C    |            |
-| hbm_2_temperature        |  -   |      -      |    °C    |            |
-| hbm_3_temperature        |  -   |      -      |    °C    |            |
-| temperature_{}_max       |      |             |          |            |
-| temperature_psu          |      |             |          |            |
-| temperature_{}           |      |             |          |            |
-| core_temperature         |  °C  |      -      |    -     |     -      |
-| core_throttle_percentage |  %   |      -      |    -     |     -      |
+| sample                  | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs |
+|:------------------------|:----:|:-----------:|:--------:|:----------:|
+| num_fans                |  -   |     int     |   int    |            |
+| fan_speed_min           |  -   |      %      |    -     |            | 
+| fan_speed_max           |  -   |      %      |   RPM    |            |
+| temperature_min         |  -   |      -      |    °C    |            |
+| temperature_max         |  -   |     °C      |    °C    |            |
+| memory_temperature_min  |  -   |      -      |    °C    |            |
+| memory_temperature_max  |  -   |     °C      |    °C    |            |
+| hotspot_temperature_min |  -   |      -      |    °C    |            |
+| hotspot_temperature_max |  -   |      -      |    °C    |            |
+| hbm_0_temperature_min   |  -   |      -      |    °C    |            |
+| hbm_0_temperature_max   |  -   |      -      |    °C    |            |
+| hbm_1_temperature_min   |  -   |      -      |    °C    |            |
+| hbm_1_temperature_max   |  -   |      -      |    °C    |            |
+| hbm_2_temperature_min   |  -   |      -      |    °C    |            |
+| hbm_2_temperature_max   |  -   |      -      |    °C    |            |
+| hbm_3_temperature_min   |  -   |      -      |    °C    |            |
+| hbm_3_temperature_max   |  -   |      -      |    °C    |            |
+| fan_speed_percentage    |  -   |      %      |    %     |            |
+| temperature             |  °C  |     °C      |    °C    |            |
+| memory_temperature      |  -   |      -      |    °C    |            |
+| hotspot_temperature     |  -   |      -      |    °C    |            |
+| hbm_0_temperature       |  -   |      -      |    °C    |            |
+| hbm_1_temperature       |  -   |      -      |    °C    |            |
+| hbm_2_temperature       |  -   |      -      |    °C    |            |
+| hbm_3_temperature       |  -   |      -      |    °C    |            |
+| temperature_{}_max      |      |             |          |            |
+| temperature_psu         |      |             |          |            |
+| temperature_{}          |      |             |          |            |
+| core_temperature        |  °C  |      -      |    -     |     -      |
+| core_throttle_percent   |  %   |      -      |    -     |     -      |
 
 ### gfx-related (iGPU) samples
 
diff --git a/bindings/cpu_hardware_sampler.cpp b/bindings/cpu_hardware_sampler.cpp
index 75f0a01..9ed204d 100644
--- a/bindings/cpu_hardware_sampler.cpp
+++ b/bindings/cpu_hardware_sampler.cpp
@@ -9,12 +9,12 @@
 #include "hardware_sampling/cpu/hardware_sampler.hpp"  // hws::cpu_hardware_sampler
 #include "hardware_sampling/hardware_sampler.hpp"      // hws::hardware_sampler
 
+#include "fmt/format.h"         // fmt::format
 #include "pybind11/chrono.h"    // automatic bindings for std::chrono::milliseconds
 #include "pybind11/pybind11.h"  // py::module_
 #include "pybind11/stl.h"       // bind STL types
 
 #include <chrono>  // std::chrono::milliseconds
-#include <format>  // std::format
 
 namespace py = pybind11;
 
@@ -23,6 +23,7 @@ void init_cpu_hardware_sampler(py::module_ &m) {
     py::class_<hws::cpu_general_samples>(m, "CpuGeneralSamples")
         .def("get_architecture", &hws::cpu_general_samples::get_architecture, "the CPU architecture (e.g., x86_64)")
         .def("get_byte_order", &hws::cpu_general_samples::get_byte_order, "the byte order (e.g., little/big endian)")
+        .def("get_num_cores", &hws::cpu_general_samples::get_num_cores, "the total number of cores of the CPU(s)")
         .def("get_num_threads", &hws::cpu_general_samples::get_num_threads, "the number of threads of the CPU(s) including potential hyper-threads")
         .def("get_threads_per_core", &hws::cpu_general_samples::get_threads_per_core, "the number of hyper-threads per core")
         .def("get_cores_per_socket", &hws::cpu_general_samples::get_cores_per_socket, "the number of physical cores per socket")
@@ -31,45 +32,47 @@ void init_cpu_hardware_sampler(py::module_ &m) {
         .def("get_vendor_id", &hws::cpu_general_samples::get_vendor_id, "the vendor ID (e.g. GenuineIntel)")
         .def("get_name", &hws::cpu_general_samples::get_name, "the name of the CPU")
         .def("get_flags", &hws::cpu_general_samples::get_flags, "potential CPU flags (e.g., sse4_1, avx, avx, etc)")
-        .def("get_busy_percent", &hws::cpu_general_samples::get_busy_percent, "the percent the CPU was busy doing work")
+        .def("get_compute_utilization", &hws::cpu_general_samples::get_compute_utilization, "the percent the CPU was busy doing work")
         .def("get_ipc", &hws::cpu_general_samples::get_ipc, "the instructions-per-cycle count")
         .def("get_irq", &hws::cpu_general_samples::get_irq, "the number of interrupts")
         .def("get_smi", &hws::cpu_general_samples::get_smi, "the number of system management interrupts")
         .def("get_poll", &hws::cpu_general_samples::get_poll, "the number of times the CPU was in the polling state")
         .def("get_poll_percent", &hws::cpu_general_samples::get_poll_percent, "the percent of the CPU was in the polling state")
         .def("__repr__", [](const hws::cpu_general_samples &self) {
-            return std::format("<HardwareSampling.CpuGeneralSamples with\n{}\n>", self);
+            return fmt::format("<HardwareSampling.CpuGeneralSamples with\n{}\n>", self);
         });
 
     // bind the clock samples
     py::class_<hws::cpu_clock_samples>(m, "CpuClockSamples")
-        .def("get_frequency_boost", &hws::cpu_clock_samples::get_frequency_boost, "true if frequency boosting is enabled")
-        .def("get_min_frequency", &hws::cpu_clock_samples::get_min_frequency, "the minimum possible CPU frequency in MHz")
-        .def("get_max_frequency", &hws::cpu_clock_samples::get_max_frequency, "the maximum possible CPU frequency in MHz")
-        .def("get_average_frequency", &hws::cpu_clock_samples::get_average_frequency, "the average CPU frequency in MHz including idle cores")
+        .def("get_auto_boosted_clock_enabled", &hws::cpu_clock_samples::get_auto_boosted_clock_enabled, "true if frequency boosting is enabled")
+        .def("get_clock_frequency_min", &hws::cpu_clock_samples::get_clock_frequency_min, "the minimum possible CPU frequency in MHz")
+        .def("get_clock_frequency_max", &hws::cpu_clock_samples::get_clock_frequency_max, "the maximum possible CPU frequency in MHz")
+        .def("get_clock_frequency", &hws::cpu_clock_samples::get_clock_frequency, "the average CPU frequency in MHz including idle cores")
         .def("get_average_non_idle_frequency", &hws::cpu_clock_samples::get_average_non_idle_frequency, "the average CPU frequency in MHz excluding idle cores")
         .def("get_time_stamp_counter", &hws::cpu_clock_samples::get_time_stamp_counter, "the time stamp counter")
         .def("__repr__", [](const hws::cpu_clock_samples &self) {
-            return std::format("<HardwareSampling.CpuClockSamples with\n{}\n>", self);
+            return fmt::format("<HardwareSampling.CpuClockSamples with\n{}\n>", self);
         });
 
     // bind the power samples
     py::class_<hws::cpu_power_samples>(m, "CpuPowerSamples")
-        .def("get_package_watt", &hws::cpu_power_samples::get_package_watt, "the currently consumed power of the package of the CPU in W")
+        .def("get_power_measurement_type", &hws::cpu_power_samples::get_power_measurement_type, "the type of the power readings: always \"instant/current\"")
+        .def("get_power_usage", &hws::cpu_power_samples::get_power_usage, "the currently consumed power of the package of the CPU in W")
+        .def("get_power_total_energy_consumed", &hws::cpu_power_samples::get_power_total_energy_consumption, "the total power consumption in J")
         .def("get_core_watt", &hws::cpu_power_samples::get_core_watt, "the currently consumed power of the core part of the CPU in W")
         .def("get_ram_watt", &hws::cpu_power_samples::get_ram_watt, "the currently consumed power of the RAM part of the CPU in W")
         .def("get_package_rapl_throttle_percent", &hws::cpu_power_samples::get_package_rapl_throttle_percent, "the percent of time the package throttled due to RAPL limiters")
         .def("get_dram_rapl_throttle_percent", &hws::cpu_power_samples::get_dram_rapl_throttle_percent, "the percent of time the DRAM throttled due to RAPL limiters")
         .def("__repr__", [](const hws::cpu_power_samples &self) {
-            return std::format("<HardwareSampling.CpuPowerSamples with\n{}\n>", self);
+            return fmt::format("<HardwareSampling.CpuPowerSamples with\n{}\n>", self);
         });
 
     // bind the memory samples
     py::class_<hws::cpu_memory_samples>(m, "CpuMemorySamples")
-        .def("get_l1d_cache", &hws::cpu_memory_samples::get_l1d_cache, "the size of the L1 data cache")
-        .def("get_l1i_cache", &hws::cpu_memory_samples::get_l1i_cache, "the size of the L1 instruction cache")
-        .def("get_l2_cache", &hws::cpu_memory_samples::get_l2_cache, "the size of the L2 cache")
-        .def("get_l3_cache", &hws::cpu_memory_samples::get_l3_cache, "the size of the L2 cache")
+        .def("get_cache_size_L1d", &hws::cpu_memory_samples::get_cache_size_L1d, "the size of the L1 data cache")
+        .def("get_cache_size_L1i", &hws::cpu_memory_samples::get_cache_size_L1i, "the size of the L1 instruction cache")
+        .def("get_cache_size_L2", &hws::cpu_memory_samples::get_cache_size_L2, "the size of the L2 cache")
+        .def("get_cache_size_L3", &hws::cpu_memory_samples::get_cache_size_L3, "the size of the L2 cache")
         .def("get_memory_total", &hws::cpu_memory_samples::get_memory_total, "the total available memory in Byte")
         .def("get_swap_memory_total", &hws::cpu_memory_samples::get_swap_memory_total, "the total available swap memory in Byte")
         .def("get_memory_used", &hws::cpu_memory_samples::get_memory_used, "the currently used memory in Byte")
@@ -77,16 +80,16 @@ void init_cpu_hardware_sampler(py::module_ &m) {
         .def("get_swap_memory_used", &hws::cpu_memory_samples::get_swap_memory_used, "the currently used swap memory in Byte")
         .def("get_swap_memory_free", &hws::cpu_memory_samples::get_swap_memory_free, "the currently free swap memory in Byte")
         .def("__repr__", [](const hws::cpu_memory_samples &self) {
-            return std::format("<HardwareSampling.CpuMemorySamples with\n{}\n>", self);
+            return fmt::format("<HardwareSampling.CpuMemorySamples with\n{}\n>", self);
         });
 
     // bind the temperature samples
     py::class_<hws::cpu_temperature_samples>(m, "CpuTemperatureSamples")
+        .def("get_temperature", &hws::cpu_temperature_samples::get_temperature, "the current temperature of the whole package in °C")
         .def("get_core_temperature", &hws::cpu_temperature_samples::get_core_temperature, "the current temperature of the core part of the CPU in °C")
         .def("get_core_throttle_percent", &hws::cpu_temperature_samples::get_core_throttle_percent, "the percent of time the CPU has throttled")
-        .def("get_package_temperature", &hws::cpu_temperature_samples::get_package_temperature, "the current temperature of the whole package in °C")
         .def("__repr__", [](const hws::cpu_temperature_samples &self) {
-            return std::format("<HardwareSampling.CpuTemperatureSamples with\n{}\n>", self);
+            return fmt::format("<HardwareSampling.CpuTemperatureSamples with\n{}\n>", self);
         });
 
     // bind the gfx samples
@@ -98,7 +101,7 @@ void init_cpu_hardware_sampler(py::module_ &m) {
         .def("get_cpu_works_for_gpu_percent", &hws::cpu_gfx_samples::get_cpu_works_for_gpu_percent, "the percent of time the CPU was doing work for the iGPU")
         .def("get_gfx_watt", &hws::cpu_gfx_samples::get_gfx_watt, "the currently consumed power of the iGPU of the CPU in W")
         .def("__repr__", [](const hws::cpu_gfx_samples &self) {
-            return std::format("<HardwareSampling.CpuGfxSamples with\n{}\n>", self);
+            return fmt::format("<HardwareSampling.CpuGfxSamples with\n{}\n>", self);
         });
 
     // bind the idle state samples
@@ -110,7 +113,7 @@ void init_cpu_hardware_sampler(py::module_ &m) {
         .def("get_system_low_power_idle_state_percent", &hws::cpu_idle_states_samples::get_system_low_power_idle_state_percent, "the percent of time the CPU was in the system low power idle state")
         .def("get_package_low_power_idle_state_percent", &hws::cpu_idle_states_samples::get_package_low_power_idle_state_percent, "the percent of time the CPU was in the package low power idle state")
         .def("__repr__", [](const hws::cpu_gfx_samples &self) {
-            return std::format("<HardwareSampling.CpuIdleStateSamples with\n{}\n>", self);
+            return fmt::format("<HardwareSampling.CpuIdleStateSamples with\n{}\n>", self);
         });
 
     // bind the CPU hardware sampler class
@@ -125,6 +128,6 @@ void init_cpu_hardware_sampler(py::module_ &m) {
         .def("gfx_samples", &hws::cpu_hardware_sampler::gfx_samples, "get all gfx (iGPU) related samples")
         .def("idle_state_samples", &hws::cpu_hardware_sampler::idle_state_samples, "get all idle state related samples")
         .def("__repr__", [](const hws::cpu_hardware_sampler &self) {
-            return std::format("<HardwareSampling.CpuHardwareSampler with\n{}\n>", self);
+            return fmt::format("<HardwareSampling.CpuHardwareSampler with\n{}\n>", self);
         });
 }
diff --git a/bindings/event.cpp b/bindings/event.cpp
index ba463ad..8a9696a 100644
--- a/bindings/event.cpp
+++ b/bindings/event.cpp
@@ -7,12 +7,11 @@
 
 #include "hardware_sampling/event.hpp"  // hws::event
 
+#include "fmt/format.h"         // fmt::format
 #include "pybind11/chrono.h"    // bind std::chrono types
 #include "pybind11/pybind11.h"  // py::module_
 #include "pybind11/stl.h"       // bind STL types
 
-#include <format>  // std::format
-
 namespace py = pybind11;
 
 void init_event(py::module_ &m) {
@@ -22,6 +21,6 @@ void init_event(py::module_ &m) {
         .def_readonly("time_point", &hws::event::time_point, "read the time point associated to this event")
         .def_readonly("name", &hws::event::name, "read the name associated to this event")
         .def("__repr__", [](const hws::event &self) {
-            return std::format("<HardWareSampling.Event with {{ time_point: {}, name: {} }}>", self.time_point.time_since_epoch(), self.name);
+            return fmt::format("<HardWareSampling.Event with {{ time_point: {}, name: {} }}>", self.time_point.time_since_epoch(), self.name);
         });
 }
diff --git a/bindings/gpu_amd_hardware_sampler.cpp b/bindings/gpu_amd_hardware_sampler.cpp
index f43c741..9ffd042 100644
--- a/bindings/gpu_amd_hardware_sampler.cpp
+++ b/bindings/gpu_amd_hardware_sampler.cpp
@@ -9,98 +9,106 @@
 #include "hardware_sampling/gpu_amd/rocm_smi_samples.hpp"  // hws::{rocm_smi_general_samples, rocm_smi_clock_samples, rocm_smi_power_samples, rocm_smi_memory_samples, rocm_smi_temperature_samples}
 #include "hardware_sampling/hardware_sampler.hpp"          // hws::hardware_sampler
 
+#include "fmt/format.h"         // fmt::format
 #include "pybind11/chrono.h"    // automatic bindings for std::chrono::milliseconds
 #include "pybind11/pybind11.h"  // py::module_
 #include "pybind11/stl.h"       // bind STL types
 
 #include <chrono>   // std::chrono::milliseconds
 #include <cstddef>  // std::size_t
-#include <format>   // std::format
 
 namespace py = pybind11;
 
 void init_gpu_amd_hardware_sampler(py::module_ &m) {
     // bind the general samples
     py::class_<hws::rocm_smi_general_samples>(m, "RocmSmiGeneralSamples")
+        .def("get_architecture", &hws::rocm_smi_general_samples::get_name, "the architecture name of the device")
+        .def("get_byte_order", &hws::rocm_smi_general_samples::get_byte_order, "the byte order (e.g., little/big endian)")
+        .def("get_vendor_id", &hws::rocm_smi_general_samples::get_vendor_id, "the vendor ID")
         .def("get_name", &hws::rocm_smi_general_samples::get_name, "the name of the device")
+        .def("get_compute_utilization", &hws::rocm_smi_general_samples::get_compute_utilization, "the GPU compute utilization in percent")
+        .def("get_memory_utilization", &hws::rocm_smi_general_samples::get_memory_utilization, "the GPU memory utilization in percent")
         .def("get_performance_level", &hws::rocm_smi_general_samples::get_performance_level, "the performance level: one of rsmi_dev_perf_level_t")
-        .def("get_utilization_gpu", &hws::rocm_smi_general_samples::get_utilization_gpu, "the GPU compute utilization in percent")
-        .def("get_utilization_mem", &hws::rocm_smi_general_samples::get_utilization_mem, "the GPU memory utilization in percent")
         .def("__repr__", [](const hws::rocm_smi_general_samples &self) {
-            return std::format("<HardwareSampling.RocmSmiGeneralSamples with\n{}\n>", self);
+            return fmt::format("<HardwareSampling.RocmSmiGeneralSamples with\n{}\n>", self);
         });
 
     // bind the clock samples
     py::class_<hws::rocm_smi_clock_samples>(m, "RocmSmiClockSamples")
-        .def("get_clock_system_min", &hws::rocm_smi_clock_samples::get_clock_system_min, "the minimum possible system clock frequency in Hz")
-        .def("get_clock_system_max", &hws::rocm_smi_clock_samples::get_clock_system_max, "the maximum possible system clock frequency in Hz")
-        .def("get_clock_socket_min", &hws::rocm_smi_clock_samples::get_clock_socket_min, "the minimum possible socket clock frequency in Hz")
-        .def("get_clock_socket_max", &hws::rocm_smi_clock_samples::get_clock_socket_max, "the maximum possible socket clock frequency in Hz")
-        .def("get_clock_memory_min", &hws::rocm_smi_clock_samples::get_clock_memory_min, "the minimum possible memory clock frequency in Hz")
-        .def("get_clock_memory_max", &hws::rocm_smi_clock_samples::get_clock_memory_max, "the maximum possible memory clock frequency in Hz")
-        .def("get_clock_system", &hws::rocm_smi_clock_samples::get_clock_system, "the current system clock frequency in Hz")
-        .def("get_clock_socket", &hws::rocm_smi_clock_samples::get_clock_socket, "the current socket clock frequency in Hz")
-        .def("get_clock_memory", &hws::rocm_smi_clock_samples::get_clock_memory, "the current memory clock frequency in Hz")
+        .def("get_clock_frequency_min", &hws::rocm_smi_clock_samples::get_clock_frequency_min, "the minimum possible system clock frequency in MHz")
+        .def("get_clock_frequency_max", &hws::rocm_smi_clock_samples::get_clock_frequency_max, "the maximum possible system clock frequency in MHz")
+        .def("get_memory_clock_frequency_min", &hws::rocm_smi_clock_samples::get_memory_clock_frequency_min, "the minimum possible memory clock frequency in MHz")
+        .def("get_memory_clock_frequency_max", &hws::rocm_smi_clock_samples::get_memory_clock_frequency_max, "the maximum possible memory clock frequency in MHz")
+        .def("get_socket_clock_frequency_min", &hws::rocm_smi_clock_samples::get_socket_clock_frequency_min, "the minimum possible socket clock frequency in MHz")
+        .def("get_socket_clock_frequency_max", &hws::rocm_smi_clock_samples::get_socket_clock_frequency_max, "the maximum possible socket clock frequency in MHz")
+        .def("get_available_clock_frequencies", &hws::rocm_smi_clock_samples::get_available_clock_frequencies, "the available clock frequencies in MHz (slowest to fastest)")
+        .def("get_available_memory_clock_frequencies", &hws::rocm_smi_clock_samples::get_available_memory_clock_frequencies, "the available memory clock frequencies in MHz (slowest to fastest)")
+        .def("get_clock_frequency", &hws::rocm_smi_clock_samples::get_clock_frequency, "the current system clock frequency in MHz")
+        .def("get_memory_clock_frequency", &hws::rocm_smi_clock_samples::get_memory_clock_frequency, "the current memory clock frequency in MHz")
+        .def("get_socket_clock_frequency", &hws::rocm_smi_clock_samples::get_socket_clock_frequency, "the current socket clock frequency in MHz")
         .def("get_overdrive_level", &hws::rocm_smi_clock_samples::get_overdrive_level, "the GPU overdrive percentage")
         .def("get_memory_overdrive_level", &hws::rocm_smi_clock_samples::get_memory_overdrive_level, "the GPU's memory overdrive percentage")
         .def("__repr__", [](const hws::rocm_smi_clock_samples &self) {
-            return std::format("<HardwareSampling.RocmSmiClockSamples with\n{}\n>", self);
+            return fmt::format("<HardwareSampling.RocmSmiClockSamples with\n{}\n>", self);
         });
 
     // bind the power samples
     py::class_<hws::rocm_smi_power_samples>(m, "RocmSmiPowerSamples")
-        .def("get_power_default_cap", &hws::rocm_smi_power_samples::get_power_default_cap, "the default power cap, may be different from power cap")
-        .def("get_power_cap", &hws::rocm_smi_power_samples::get_power_cap, "if the GPU draws more power (μW) than the power cap, the GPU may throttle")
-        .def("get_power_type", &hws::rocm_smi_power_samples::get_power_type, "the type of the power management: either current power draw or average power draw")
+        .def("get_power_management_limit", &hws::rocm_smi_power_samples::get_power_management_limit, "the default power cap (W), may be different from power cap")
+        .def("get_power_enforced_limit", &hws::rocm_smi_power_samples::get_power_enforced_limit, "if the GPU draws more power (W) than the power cap, the GPU may throttle")
+        .def("get_power_measurement_type", &hws::rocm_smi_power_samples::get_power_measurement_type, "the type of the power readings: either current power draw or average power draw")
         .def("get_available_power_profiles", &hws::rocm_smi_power_samples::get_available_power_profiles, "a list of the available power profiles")
-        .def("get_power_usage", &hws::rocm_smi_power_samples::get_power_usage, "the current GPU socket power draw in μW")
-        .def("get_power_total_energy_consumption", &hws::rocm_smi_power_samples::get_power_total_energy_consumption, "the total power consumption since the last driver reload in μJ")
+        .def("get_power_usage", &hws::rocm_smi_power_samples::get_power_usage, "the current GPU socket power draw in W")
+        .def("get_power_total_energy_consumption", &hws::rocm_smi_power_samples::get_power_total_energy_consumption, "the total power consumption since the last driver reload in J")
         .def("get_power_profile", &hws::rocm_smi_power_samples::get_power_profile, "the current active power profile; one of 'available_power_profiles'")
         .def("__repr__", [](const hws::rocm_smi_power_samples &self) {
-            return std::format("<HardwareSampling.RocmSmiPowerSamples with\n{}\n>", self);
+            return fmt::format("<HardwareSampling.RocmSmiPowerSamples with\n{}\n>", self);
         });
 
     // bind the memory samples
     py::class_<hws::rocm_smi_memory_samples>(m, "RocmSmiMemorySamples")
         .def("get_memory_total", &hws::rocm_smi_memory_samples::get_memory_total, "the total available memory in Byte")
         .def("get_visible_memory_total", &hws::rocm_smi_memory_samples::get_visible_memory_total, "the total visible available memory in Byte, may be smaller than the total memory")
-        .def("get_min_num_pcie_lanes", &hws::rocm_smi_memory_samples::get_min_num_pcie_lanes, "the minimum number of used PCIe lanes")
-        .def("get_max_num_pcie_lanes", &hws::rocm_smi_memory_samples::get_max_num_pcie_lanes, "the maximum number of used PCIe lanes")
+        .def("get_num_pcie_lanes_min", &hws::rocm_smi_memory_samples::get_num_pcie_lanes_min, "the minimum number of used PCIe lanes")
+        .def("get_num_pcie_lanes_max", &hws::rocm_smi_memory_samples::get_num_pcie_lanes_max, "the maximum number of used PCIe lanes")
+        .def("get_pcie_link_transfer_rate_min", &hws::rocm_smi_memory_samples::get_pcie_link_transfer_rate_min, "the minimum PCIe link transfer rate in MT/s")
+        .def("get_pcie_link_transfer_rate_max", &hws::rocm_smi_memory_samples::get_pcie_link_transfer_rate_max, "the maximum PCIe link transfer rate in MT/s")
         .def("get_memory_used", &hws::rocm_smi_memory_samples::get_memory_used, "the currently used memory in Byte")
-        .def("get_pcie_transfer_rate", &hws::rocm_smi_memory_samples::get_pcie_transfer_rate, "the current PCIe transfer rate in T/s")
+        .def("get_memory_free", &hws::rocm_smi_memory_samples::get_memory_free, "the currently free memory in Byte")
         .def("get_num_pcie_lanes", &hws::rocm_smi_memory_samples::get_num_pcie_lanes, "the number of currently used PCIe lanes")
+        .def("get_pcie_link_transfer_rate", &hws::rocm_smi_memory_samples::get_pcie_link_transfer_rate, "the current PCIe transfer rate in T/s")
         .def("__repr__", [](const hws::rocm_smi_memory_samples &self) {
-            return std::format("<HardwareSampling.RocmSmiMemorySamples with\n{}\n>", self);
+            return fmt::format("<HardwareSampling.RocmSmiMemorySamples with\n{}\n>", self);
         });
 
     // bind the temperature samples
     py::class_<hws::rocm_smi_temperature_samples>(m, "RocmSmiTemperatureSamples")
         .def("get_num_fans", &hws::rocm_smi_temperature_samples::get_num_fans, "the number of fans (if any)")
-        .def("get_max_fan_speed", &hws::rocm_smi_temperature_samples::get_max_fan_speed, "the maximum fan speed")
-        .def("get_temperature_edge_min", &hws::rocm_smi_temperature_samples::get_temperature_edge_min, "the minimum temperature on the GPU's edge temperature sensor in m°C")
-        .def("get_temperature_edge_max", &hws::rocm_smi_temperature_samples::get_temperature_edge_max, "the maximum temperature on the GPU's edge temperature sensor in m°C")
-        .def("get_temperature_hotspot_min", &hws::rocm_smi_temperature_samples::get_temperature_hotspot_min, "the minimum temperature on the GPU's hotspot temperature sensor in m°C")
-        .def("get_temperature_hotspot_max", &hws::rocm_smi_temperature_samples::get_temperature_hotspot_max, "the maximum temperature on the GPU's hotspot temperature sensor in m°C")
-        .def("get_temperature_memory_min", &hws::rocm_smi_temperature_samples::get_temperature_memory_min, "the minimum temperature on the GPU's memory temperature sensor in m°C")
-        .def("get_temperature_memory_max", &hws::rocm_smi_temperature_samples::get_temperature_memory_max, "the maximum temperature on the GPU's memory temperature sensor in m°C")
-        .def("get_temperature_hbm_0_min", &hws::rocm_smi_temperature_samples::get_temperature_hbm_0_min, "the minimum temperature on the GPU's HBM0 temperature sensor in m°C")
-        .def("get_temperature_hbm_0_max", &hws::rocm_smi_temperature_samples::get_temperature_hbm_0_max, "the maximum temperature on the GPU's HBM0 temperature sensor in m°C")
-        .def("get_temperature_hbm_1_min", &hws::rocm_smi_temperature_samples::get_temperature_hbm_1_min, "the minimum temperature on the GPU's HBM1 temperature sensor in m°C")
-        .def("get_temperature_hbm_1_max", &hws::rocm_smi_temperature_samples::get_temperature_hbm_1_max, "the maximum temperature on the GPU's HBM1 temperature sensor in m°C")
-        .def("get_temperature_hbm_2_min", &hws::rocm_smi_temperature_samples::get_temperature_hbm_2_min, "the minimum temperature on the GPU's HBM2 temperature sensor in m°C")
-        .def("get_temperature_hbm_2_max", &hws::rocm_smi_temperature_samples::get_temperature_hbm_2_max, "the maximum temperature on the GPU's HBM2 temperature sensor in m°C")
-        .def("get_temperature_hbm_3_min", &hws::rocm_smi_temperature_samples::get_temperature_hbm_3_min, "the minimum temperature on the GPU's HBM3 temperature sensor in m°C")
-        .def("get_temperature_hbm_3_max", &hws::rocm_smi_temperature_samples::get_temperature_hbm_3_max, "the maximum temperature on the GPU's HBM3 temperature sensor in m°C")
-        .def("get_fan_speed", &hws::rocm_smi_temperature_samples::get_fan_speed, "the current fan speed in %")
-        .def("get_temperature_edge", &hws::rocm_smi_temperature_samples::get_temperature_edge, "the current temperature on the GPU's edge temperature sensor in m°C")
-        .def("get_temperature_hotspot", &hws::rocm_smi_temperature_samples::get_temperature_hotspot, "the current temperature on the GPU's hotspot temperature sensor in m°C")
-        .def("get_temperature_memory", &hws::rocm_smi_temperature_samples::get_temperature_memory, "the current temperature on the GPU's memory temperature sensor in m°C")
-        .def("get_temperature_hbm_0", &hws::rocm_smi_temperature_samples::get_temperature_hbm_0, "the current temperature on the GPU's HBM0 temperature sensor in m°C")
-        .def("get_temperature_hbm_1", &hws::rocm_smi_temperature_samples::get_temperature_hbm_1, "the current temperature on the GPU's HBM1 temperature sensor in m°C")
-        .def("get_temperature_hbm_2", &hws::rocm_smi_temperature_samples::get_temperature_hbm_2, "the current temperature on the GPU's HBM2 temperature sensor in m°C")
-        .def("get_temperature_hbm_3", &hws::rocm_smi_temperature_samples::get_temperature_hbm_3, "the current temperature on the GPU's HBM3 temperature sensor in m°C")
+        .def("get_fan_speed_max", &hws::rocm_smi_temperature_samples::get_fan_speed_max, "the maximum fan speed in RPM")
+        .def("get_temperature_min", &hws::rocm_smi_temperature_samples::get_temperature_min, "the minimum temperature on the GPU's edge temperature sensor in °C")
+        .def("get_temperature_max", &hws::rocm_smi_temperature_samples::get_temperature_max, "the maximum temperature on the GPU's edge temperature sensor in °C")
+        .def("get_memory_temperature_min", &hws::rocm_smi_temperature_samples::get_memory_temperature_min, "the minimum temperature on the GPU's memory temperature sensor in °C")
+        .def("get_memory_temperature_max", &hws::rocm_smi_temperature_samples::get_memory_temperature_max, "the maximum temperature on the GPU's memory temperature sensor in °C")
+        .def("get_hotspot_temperature_min", &hws::rocm_smi_temperature_samples::get_hotspot_temperature_min, "the minimum temperature on the GPU's hotspot temperature sensor in °C")
+        .def("get_hotspot_temperature_max", &hws::rocm_smi_temperature_samples::get_hotspot_temperature_max, "the maximum temperature on the GPU's hotspot temperature sensor in °C")
+        .def("get_hbm_0_temperature_min", &hws::rocm_smi_temperature_samples::get_hbm_0_temperature_min, "the minimum temperature on the GPU's HBM0 temperature sensor in °C")
+        .def("get_hbm_0_temperature_max", &hws::rocm_smi_temperature_samples::get_hbm_0_temperature_max, "the maximum temperature on the GPU's HBM0 temperature sensor in °C")
+        .def("get_hbm_1_temperature_min", &hws::rocm_smi_temperature_samples::get_hbm_1_temperature_min, "the minimum temperature on the GPU's HBM1 temperature sensor in °C")
+        .def("get_hbm_1_temperature_max", &hws::rocm_smi_temperature_samples::get_hbm_1_temperature_max, "the maximum temperature on the GPU's HBM1 temperature sensor in °C")
+        .def("get_hbm_2_temperature_min", &hws::rocm_smi_temperature_samples::get_hbm_2_temperature_min, "the minimum temperature on the GPU's HBM2 temperature sensor in °C")
+        .def("get_hbm_2_temperature_max", &hws::rocm_smi_temperature_samples::get_hbm_2_temperature_max, "the maximum temperature on the GPU's HBM2 temperature sensor in °C")
+        .def("get_hbm_3_temperature_min", &hws::rocm_smi_temperature_samples::get_hbm_3_temperature_min, "the minimum temperature on the GPU's HBM3 temperature sensor in °C")
+        .def("get_hbm_3_temperature_max", &hws::rocm_smi_temperature_samples::get_hbm_3_temperature_max, "the maximum temperature on the GPU's HBM3 temperature sensor in °C")
+        .def("get_fan_speed_percentage", &hws::rocm_smi_temperature_samples::get_fan_speed_percentage, "the current fan speed in %")
+        .def("get_temperature", &hws::rocm_smi_temperature_samples::get_temperature, "the current temperature on the GPU's edge temperature sensor in °C")
+        .def("get_hotspot_temperature", &hws::rocm_smi_temperature_samples::get_hotspot_temperature, "the current temperature on the GPU's hotspot temperature sensor in °C")
+        .def("get_memory_temperature", &hws::rocm_smi_temperature_samples::get_memory_temperature, "the current temperature on the GPU's memory temperature sensor in °C")
+        .def("get_hbm_0_temperature", &hws::rocm_smi_temperature_samples::get_hbm_0_temperature, "the current temperature on the GPU's HBM0 temperature sensor in °C")
+        .def("get_hbm_1_temperature", &hws::rocm_smi_temperature_samples::get_hbm_1_temperature, "the current temperature on the GPU's HBM1 temperature sensor in °C")
+        .def("get_hbm_2_temperature", &hws::rocm_smi_temperature_samples::get_hbm_2_temperature, "the current temperature on the GPU's HBM2 temperature sensor in °C")
+        .def("get_hbm_3_temperature", &hws::rocm_smi_temperature_samples::get_hbm_3_temperature, "the current temperature on the GPU's HBM3 temperature sensor in °C")
         .def("__repr__", [](const hws::rocm_smi_temperature_samples &self) {
-            return std::format("<HardwareSampling.RocmSmiTemperatureSamples with\n{}\n>", self);
+            return fmt::format("<HardwareSampling.RocmSmiTemperatureSamples with\n{}\n>", self);
         });
 
     // bind the GPU AMD hardware sampler class
@@ -115,6 +123,6 @@ void init_gpu_amd_hardware_sampler(py::module_ &m) {
         .def("memory_samples", &hws::gpu_amd_hardware_sampler::memory_samples, "get all memory related samples")
         .def("temperature_samples", &hws::gpu_amd_hardware_sampler::temperature_samples, "get all temperature related samples")
         .def("__repr__", [](const hws::gpu_amd_hardware_sampler &self) {
-            return std::format("<HardwareSampling.GpuAmdHardwareSampler with\n{}\n>", self);
+            return fmt::format("<HardwareSampling.GpuAmdHardwareSampler with\n{}\n>", self);
         });
 }
diff --git a/bindings/gpu_intel_hardware_sampler.cpp b/bindings/gpu_intel_hardware_sampler.cpp
index 39b346a..0b05a55 100644
--- a/bindings/gpu_intel_hardware_sampler.cpp
+++ b/bindings/gpu_intel_hardware_sampler.cpp
@@ -9,13 +9,13 @@
 #include "hardware_sampling/gpu_intel/level_zero_samples.hpp"  // hws::{level_zero_general_samples, level_zero_clock_samples, level_zero_power_samples, level_zero_memory_samples, level_zero_temperature_samples}
 #include "hardware_sampling/hardware_sampler.hpp"              // hws::hardware_sampler
 
+#include "fmt/format.h"         // fmt::format
 #include "pybind11/chrono.h"    // automatic bindings for std::chrono::milliseconds
 #include "pybind11/pybind11.h"  // py::module_
 #include "pybind11/stl.h"       // bind STL types
 
 #include <chrono>   // std::chrono::milliseconds
 #include <cstddef>  // std::size_t
-#include <format>   // std::format
 
 namespace py = pybind11;
 
@@ -27,7 +27,7 @@ void init_gpu_intel_hardware_sampler(py::module_ &m) {
         .def("get_num_threads_per_eu", &hws::level_zero_general_samples::get_num_threads_per_eu, "the number of threads per EU unit")
         .def("get_eu_simd_width", &hws::level_zero_general_samples::get_eu_simd_width, "the physical EU unit SIMD width")
         .def("__repr__", [](const hws::level_zero_general_samples &self) {
-            return std::format("<HardwareSampling.LevelZeroGeneralSamples with\n{}\n>", self);
+            return fmt::format("<HardwareSampling.LevelZeroGeneralSamples with\n{}\n>", self);
         });
 
     // bind the clock samples
@@ -45,7 +45,7 @@ void init_gpu_intel_hardware_sampler(py::module_ &m) {
         .def("get_clock_mem", &hws::level_zero_clock_samples::get_clock_mem, "the current memory frequency in MHz")
         .def("get_throttle_reason_mem", &hws::level_zero_clock_samples::get_throttle_reason_mem, "the current memory frequency throttle reason")
         .def("__repr__", [](const hws::level_zero_clock_samples &self) {
-            return std::format("<HardwareSampling.LevelZeroClockSamples with\n{}\n>", self);
+            return fmt::format("<HardwareSampling.LevelZeroClockSamples with\n{}\n>", self);
         });
 
     // bind the power samples
@@ -54,7 +54,7 @@ void init_gpu_intel_hardware_sampler(py::module_ &m) {
         .def("get_energy_threshold", &hws::level_zero_power_samples::get_energy_threshold, "the energy threshold in J")
         .def("get_power_total_energy_consumption", &hws::level_zero_power_samples::get_power_total_energy_consumption, "the total power consumption since the last driver reload in mJ")
         .def("__repr__", [](const hws::level_zero_power_samples &self) {
-            return std::format("<HardwareSampling.LevelZeroPowerSamples with\n{}\n>", self);
+            return fmt::format("<HardwareSampling.LevelZeroPowerSamples with\n{}\n>", self);
         });
 
     // bind the memory samples
@@ -72,7 +72,7 @@ void init_gpu_intel_hardware_sampler(py::module_ &m) {
         .def("get_pcie_link_width", &hws::level_zero_memory_samples::get_pcie_link_width, "the current PCIe lane width")
         .def("get_pcie_link_generation", &hws::level_zero_memory_samples::get_pcie_link_generation, "the current PCIe generation")
         .def("__repr__", [](const hws::level_zero_memory_samples &self) {
-            return std::format("<HardwareSampling.LevelZeroMemorySamples with\n{}\n>", self);
+            return fmt::format("<HardwareSampling.LevelZeroMemorySamples with\n{}\n>", self);
         });
 
     // bind the temperature samples
@@ -81,7 +81,7 @@ void init_gpu_intel_hardware_sampler(py::module_ &m) {
         .def("get_temperature_psu", &hws::level_zero_temperature_samples::get_temperature_psu, "the temperature of the PSU in °C")
         .def("get_temperature", &hws::level_zero_temperature_samples::get_temperature, "the current temperature for the sensor in °C")
         .def("__repr__", [](const hws::level_zero_temperature_samples &self) {
-            return std::format("<HardwareSampling.LevelZeroTemperatureSamples with\n{}\n>", self);
+            return fmt::format("<HardwareSampling.LevelZeroTemperatureSamples with\n{}\n>", self);
         });
 
     // bind the GPU Intel hardware sampler class
@@ -96,6 +96,6 @@ void init_gpu_intel_hardware_sampler(py::module_ &m) {
         .def("memory_samples", &hws::gpu_intel_hardware_sampler::memory_samples, "get all memory related samples")
         .def("temperature_samples", &hws::gpu_intel_hardware_sampler::temperature_samples, "get all temperature related samples")
         .def("__repr__", [](const hws::gpu_intel_hardware_sampler &self) {
-            return std::format("<HardwareSampling.GpuIntelHardwareSampler with\n{}\n>", self);
+            return fmt::format("<HardwareSampling.GpuIntelHardwareSampler with\n{}\n>", self);
         });
 }
diff --git a/bindings/gpu_nvidia_hardware_sampler.cpp b/bindings/gpu_nvidia_hardware_sampler.cpp
index 21130ae..1550c07 100644
--- a/bindings/gpu_nvidia_hardware_sampler.cpp
+++ b/bindings/gpu_nvidia_hardware_sampler.cpp
@@ -9,84 +9,92 @@
 #include "hardware_sampling/gpu_nvidia/nvml_samples.hpp"      // hws::{nvml_general_samples, nvml_clock_samples, nvml_power_samples, nvml_memory_samples, nvml_temperature_samples}
 #include "hardware_sampling/hardware_sampler.hpp"             // hws::hardware_sampler
 
+#include "fmt/format.h"         // fmt::format
 #include "pybind11/chrono.h"    // automatic bindings for std::chrono::milliseconds
 #include "pybind11/pybind11.h"  // py::module_
 #include "pybind11/stl.h"       // bind STL types
 
 #include <chrono>   // std::chrono::milliseconds
 #include <cstddef>  // std::size_t
-#include <format>   // std::format
 
 namespace py = pybind11;
 
 void init_gpu_nvidia_hardware_sampler(py::module_ &m) {
     // bind the general samples
     py::class_<hws::nvml_general_samples>(m, "NvmlGeneralSamples")
+        .def("get_architecture", &hws::nvml_general_samples::get_architecture, "the architecture name of the device")
+        .def("get_byte_order", &hws::nvml_general_samples::get_byte_order, "the byte order (e.g., little/big endian)")
+        .def("get_num_cores", &hws::nvml_general_samples::get_num_cores, "the number of CUDA cores")
+        .def("get_vendor_id", &hws::nvml_general_samples::get_vendor_id, "the vendor ID")
         .def("get_name", &hws::nvml_general_samples::get_name, "the name of the device")
         .def("get_persistence_mode", &hws::nvml_general_samples::get_persistence_mode, "the persistence mode: if true, the driver is always loaded reducing the latency for the first API call")
-        .def("get_num_cores", &hws::nvml_general_samples::get_num_cores, "the number of CUDA cores")
-        .def("get_performance_state", &hws::nvml_general_samples::get_performance_state, "the performance state: 0 - 15 where 0 is the maximum performance and 15 the minimum performance")
-        .def("get_utilization_gpu", &hws::nvml_general_samples::get_utilization_gpu, "the GPU compute utilization in percent")
-        .def("get_utilization_mem", &hws::nvml_general_samples::get_utilization_mem, "the GPU memory utilization in percent")
+        .def("get_compute_utilization", &hws::nvml_general_samples::get_compute_utilization, "the GPU compute utilization in percent")
+        .def("get_memory_utilization", &hws::nvml_general_samples::get_memory_utilization, "the GPU memory utilization in percent")
+        .def("get_performance_level", &hws::nvml_general_samples::get_performance_level, "the performance state: 0 - 15 where 0 is the maximum performance and 15 the minimum performance")
         .def("__repr__", [](const hws::nvml_general_samples &self) {
-            return std::format("<HardwareSampling.NvmlGeneralSamples with\n{}\n>", self);
+            return fmt::format("<HardwareSampling.NvmlGeneralSamples with\n{}\n>", self);
         });
 
     // bind the clock samples
     py::class_<hws::nvml_clock_samples>(m, "NvmlClockSamples")
-        .def("get_adaptive_clock_status", &hws::nvml_clock_samples::get_adaptive_clock_status, "true if clock boosting is currently enabled")
-        .def("get_clock_graph_min", &hws::nvml_clock_samples::get_clock_graph_min, "the minimum possible graphics clock frequency in MHz")
-        .def("get_clock_graph_max", &hws::nvml_clock_samples::get_clock_graph_max, "the maximum possible graphics clock frequency in MHz")
-        .def("get_clock_sm_max", &hws::nvml_clock_samples::get_clock_sm_max, "the maximum possible SM clock frequency in MHz")
-        .def("get_clock_mem_min", &hws::nvml_clock_samples::get_clock_mem_min, "the minimum possible memory clock frequency in MHz")
-        .def("get_clock_mem_max", &hws::nvml_clock_samples::get_clock_mem_max, "the maximum possible memory clock frequency in MHz")
-        .def("get_clock_graph", &hws::nvml_clock_samples::get_clock_graph, "the current graphics clock frequency in MHz")
-        .def("get_clock_sm", &hws::nvml_clock_samples::get_clock_sm, "the current SM clock frequency in Mhz")
-        .def("get_clock_mem", &hws::nvml_clock_samples::get_clock_mem, "the current memory clock frequency in MHz")
-        .def("get_clock_throttle_reason", &hws::nvml_clock_samples::get_clock_throttle_reason, "the reason the GPU clock throttled (bitmask)")
-        .def("get_auto_boosted_clocks", &hws::nvml_clock_samples::get_auto_boosted_clocks, "true if the clocks are currently auto boosted")
+        .def("get_auto_boosted_clock_enabled", &hws::nvml_clock_samples::get_auto_boosted_clock_enabled, "true if clock boosting is currently enabled")
+        .def("get_clock_frequency_min", &hws::nvml_clock_samples::get_clock_frequency_min, "the minimum possible graphics clock frequency in MHz")
+        .def("get_clock_frequency_max", &hws::nvml_clock_samples::get_clock_frequency_max, "the maximum possible graphics clock frequency in MHz")
+        .def("get_memory_clock_frequency_min", &hws::nvml_clock_samples::get_memory_clock_frequency_min, "the minimum possible memory clock frequency in MHz")
+        .def("get_memory_clock_frequency_max", &hws::nvml_clock_samples::get_memory_clock_frequency_max, "the maximum possible memory clock frequency in MHz")
+        .def("get_sm_clock_frequency_max", &hws::nvml_clock_samples::get_sm_clock_frequency_max, "the maximum possible SM clock frequency in MHz")
+        .def("get_clock_frequency", &hws::nvml_clock_samples::get_clock_frequency, "the current graphics clock frequency in MHz")
+        .def("get_available_clock_frequencies", &hws::nvml_clock_samples::get_available_clock_frequencies, "the available clock frequencies in MHz, based on a memory clock frequency (slowest to fastest)")
+        .def("get_available_memory_clock_frequencies", &hws::nvml_clock_samples::get_available_memory_clock_frequencies, "the available memory clock frequencies in MHz (slowest to fastest)")
+        .def("get_memory_clock_frequency", &hws::nvml_clock_samples::get_memory_clock_frequency, "the current memory clock frequency in MHz")
+        .def("get_sm_clock_frequency", &hws::nvml_clock_samples::get_sm_clock_frequency, "the current SM clock frequency in Mhz")
+        .def("get_throttle_reason", &hws::nvml_clock_samples::get_throttle_reason, "the reason the GPU clock throttled")
+        .def("get_auto_boosted_clock", &hws::nvml_clock_samples::get_auto_boosted_clock, "true if the clocks are currently auto boosted")
         .def("__repr__", [](const hws::nvml_clock_samples &self) {
-            return std::format("<HardwareSampling.NvmlClockSamples with\n{}\n>", self);
+            return fmt::format("<HardwareSampling.NvmlClockSamples with\n{}\n>", self);
         });
 
     // bind the power samples
     py::class_<hws::nvml_power_samples>(m, "NvmlPowerSamples")
-        .def("get_power_management_mode", &hws::nvml_power_samples::get_power_management_mode, "true if power management algorithms are supported and active")
         .def("get_power_management_limit", &hws::nvml_power_samples::get_power_management_limit, "if the GPU draws more power (mW) than the power management limit, the GPU may throttle")
         .def("get_power_enforced_limit", &hws::nvml_power_samples::get_power_enforced_limit, "the actually enforced power limit, may be different from power management limit if external limiters are set")
-        .def("get_power_state", &hws::nvml_power_samples::get_power_state, "the current GPU power state: 0 - 15 where 0 is the maximum power and 15 the minimum power")
+        .def("get_power_measurement_type", &hws::nvml_power_samples::get_power_measurement_type, "the type of the power readings: either current power draw or average power draw")
+        .def("get_power_management_mode", &hws::nvml_power_samples::get_power_management_mode, "true if power management algorithms are supported and active")
+        .def("get_available_power_profiles", &hws::nvml_power_samples::get_available_power_profiles, "a list of the available power profiles")
         .def("get_power_usage", &hws::nvml_power_samples::get_power_usage, "the current power draw of the GPU and its related circuity (e.g., memory) in mW")
         .def("get_power_total_energy_consumption", &hws::nvml_power_samples::get_power_total_energy_consumption, "the total power consumption since the last driver reload in mJ")
+        .def("get_power_profile", &hws::nvml_power_samples::get_power_profile, "the current GPU power state: 0 - 15 where 0 is the maximum power and 15 the minimum power")
         .def("__repr__", [](const hws::nvml_power_samples &self) {
-            return std::format("<HardwareSampling.NvmlPowerSamples with\n{}\n>", self);
+            return fmt::format("<HardwareSampling.NvmlPowerSamples with\n{}\n>", self);
         });
 
     // bind the memory samples
     py::class_<hws::nvml_memory_samples>(m, "NvmlMemorySamples")
         .def("get_memory_total", &hws::nvml_memory_samples::get_memory_total, "the total available memory in Byte")
-        .def("get_pcie_link_max_speed", &hws::nvml_memory_samples::get_pcie_link_max_speed, "the maximum PCIe link speed in MBPS")
+        .def("get_num_pcie_lanes_max", &hws::nvml_memory_samples::get_num_pcie_lanes_max, "the maximum number of PCIe lanes")
+        .def("get_pcie_link_generation_max", &hws::nvml_memory_samples::get_pcie_link_generation_max, "the current PCIe link generation (e.g., PCIe 4.0, PCIe 5.0, etc)")
+        .def("get_pcie_link_speed_max", &hws::nvml_memory_samples::get_pcie_link_speed_max, "the maximum PCIe link speed in MBPS")
         .def("get_memory_bus_width", &hws::nvml_memory_samples::get_memory_bus_width, "the memory bus with in Bit")
-        .def("get_max_pcie_link_generation", &hws::nvml_memory_samples::get_max_pcie_link_generation, "the current PCIe link generation (e.g., PCIe 4.0, PCIe 5.0, etc)")
-        .def("get_memory_free", &hws::nvml_memory_samples::get_memory_free, "the currently free memory in Byte")
         .def("get_memory_used", &hws::nvml_memory_samples::get_memory_used, "the currently used memory in Byte")
-        .def("get_pcie_link_speed", &hws::nvml_memory_samples::get_pcie_link_speed, "the current PCIe link speed in MBPS")
-        .def("get_pcie_link_width", &hws::nvml_memory_samples::get_pcie_link_width, "the current PCIe link width (e.g., x16, x8, x4, etc)")
+        .def("get_memory_free", &hws::nvml_memory_samples::get_memory_free, "the currently free memory in Byte")
+        .def("get_num_pcie_lanes", &hws::nvml_memory_samples::get_num_pcie_lanes, "the current PCIe link width (e.g., x16, x8, x4, etc)")
         .def("get_pcie_link_generation", &hws::nvml_memory_samples::get_pcie_link_generation, "the current PCIe link generation (may change during runtime to save energy)")
+        .def("get_pcie_link_speed", &hws::nvml_memory_samples::get_pcie_link_speed, "the current PCIe link speed in MBPS")
         .def("__repr__", [](const hws::nvml_memory_samples &self) {
-            return std::format("<HardwareSampling.NvmlMemorySamples with\n{}\n>", self);
+            return fmt::format("<HardwareSampling.NvmlMemorySamples with\n{}\n>", self);
         });
 
     // bind the temperature samples
     py::class_<hws::nvml_temperature_samples>(m, "NvmlTemperatureSamples")
         .def("get_num_fans", &hws::nvml_temperature_samples::get_num_fans, "the number of fans (if any)")
-        .def("get_min_fan_speed", &hws::nvml_temperature_samples::get_min_fan_speed, "the minimum fan speed the user can set in %")
-        .def("get_max_fan_speed", &hws::nvml_temperature_samples::get_max_fan_speed, "the maximum fan speed the user can set in %")
-        .def("get_temperature_threshold_gpu_max", &hws::nvml_temperature_samples::get_temperature_threshold_gpu_max, "the maximum graphics temperature threshold in °C")
-        .def("get_temperature_threshold_mem_max", &hws::nvml_temperature_samples::get_temperature_threshold_mem_max, "the maximum memory temperature threshold in °C")
-        .def("get_fan_speed", &hws::nvml_temperature_samples::get_fan_speed, "the current intended fan speed in %")
-        .def("get_temperature_gpu", &hws::nvml_temperature_samples::get_temperature_gpu, "the current GPU temperature in °C")
+        .def("get_fan_speed_min", &hws::nvml_temperature_samples::get_fan_speed_min, "the minimum fan speed the user can set in %")
+        .def("get_fan_speed_max", &hws::nvml_temperature_samples::get_fan_speed_max, "the maximum fan speed the user can set in %")
+        .def("get_temperature_max", &hws::nvml_temperature_samples::get_temperature_max, "the maximum graphics temperature threshold in °C")
+        .def("get_memory_temperature_max", &hws::nvml_temperature_samples::get_memory_temperature_max, "the maximum memory temperature threshold in °C")
+        .def("get_fan_speed_percentage", &hws::nvml_temperature_samples::get_fan_speed_percentage, "the current intended fan speed in %")
+        .def("get_temperature", &hws::nvml_temperature_samples::get_temperature, "the current GPU temperature in °C")
         .def("__repr__", [](const hws::nvml_temperature_samples &self) {
-            return std::format("<HardwareSampling.NvmlTemperatureSamples with\n{}\n>", self);
+            return fmt::format("<HardwareSampling.NvmlTemperatureSamples with\n{}\n>", self);
         });
 
     // bind the GPU NVIDIA hardware sampler class
@@ -101,6 +109,6 @@ void init_gpu_nvidia_hardware_sampler(py::module_ &m) {
         .def("memory_samples", &hws::gpu_nvidia_hardware_sampler::memory_samples, "get all memory related samples")
         .def("temperature_samples", &hws::gpu_nvidia_hardware_sampler::temperature_samples, "get all temperature related samples")
         .def("__repr__", [](const hws::gpu_nvidia_hardware_sampler &self) {
-            return std::format("<HardwareSampling.GpuNvidiaHardwareSampler with\n{}\n>", self);
+            return fmt::format("<HardwareSampling.GpuNvidiaHardwareSampler with\n{}\n>", self);
         });
 }
diff --git a/bindings/hardware_sampler.cpp b/bindings/hardware_sampler.cpp
index 5d45f74..e46dbc8 100644
--- a/bindings/hardware_sampler.cpp
+++ b/bindings/hardware_sampler.cpp
@@ -12,21 +12,22 @@
 #if defined(HWS_FOR_CPUS_ENABLED)
     #include "hardware_sampling/cpu/hardware_sampler.hpp"  // hws::cpu_hardware_sampler
 #endif
+#if defined(HWS_FOR_NVIDIA_GPUS_ENABLED)
+    #include "hardware_sampling/gpu_nvidia/hardware_sampler.hpp"  // hws::gpu_nvidia_hardware_sampler
+#endif
 #if defined(HWS_FOR_AMD_GPUS_ENABLED)
     #include "hardware_sampling/gpu_amd/hardware_sampler.hpp"  // hws::gpu_amd_hardware_sampler
 #endif
 #if defined(HWS_FOR_INTEL_GPUS_ENABLED)
     #include "hardware_sampling/gpu_intel/hardware_sampler.hpp"  // hws::gpu_intel_hardware_sampler
 #endif
-#if defined(HWS_FOR_NVIDIA_GPUS_ENABLED)
-    #include "hardware_sampling/gpu_nvidia/hardware_sampler.hpp"  // hws::gpu_nvidia_hardware_sampler
-#endif
 
+#include "fmt/format.h"         // fmt::format
 #include "pybind11/chrono.h"    // bind std::chrono types
 #include "pybind11/pybind11.h"  // py::module_, py::class_
 #include "pybind11/stl.h"       // bind STL types
 
-#include <format>  // std::format
+#include <string>  // std::string
 
 namespace py = pybind11;
 
@@ -54,22 +55,22 @@ void init_hardware_sampler(py::module_ &m) {
         .def("__repr__", [](const hws::hardware_sampler &self) {
 #if defined(HWS_FOR_CPUS_ENABLED)
             if (dynamic_cast<const hws::cpu_hardware_sampler *>(&self)) {
-                return std::format("<plssvm.detail.tracking.CpuHardwareSampler with\n{}\n>", dynamic_cast<const hws::cpu_hardware_sampler &>(self));
+                return fmt::format("<plssvm.detail.tracking.CpuHardwareSampler with\n{}\n>", dynamic_cast<const hws::cpu_hardware_sampler &>(self));
             }
 #endif
 #if defined(HWS_FOR_NVIDIA_GPUS_ENABLED)
             if (dynamic_cast<const hws::gpu_nvidia_hardware_sampler *>(&self)) {
-                return std::format("<plssvm.detail.tracking.GpuNvidiaHardwareSampler with\n{}\n>", dynamic_cast<const hws::gpu_nvidia_hardware_sampler &>(self));
+                return fmt::format("<plssvm.detail.tracking.GpuNvidiaHardwareSampler with\n{}\n>", dynamic_cast<const hws::gpu_nvidia_hardware_sampler &>(self));
             }
 #endif
 #if defined(HWS_FOR_AMD_GPUS_ENABLED)
             if (dynamic_cast<const hws::gpu_amd_hardware_sampler *>(&self)) {
-                return std::format("<plssvm.detail.tracking.GpuAmdHardwareSampler with\n{}\n>", dynamic_cast<const hws::gpu_amd_hardware_sampler &>(self));
+                return fmt::format("<plssvm.detail.tracking.GpuAmdHardwareSampler with\n{}\n>", dynamic_cast<const hws::gpu_amd_hardware_sampler &>(self));
             }
 #endif
 #if defined(HWS_FOR_INTEL_GPUS_ENABLED)
             if (dynamic_cast<const hws::gpu_intel_hardware_sampler *>(&self)) {
-                return std::format("<plssvm.detail.tracking.GpuIntelHardwareSampler with\n{}\n>", dynamic_cast<const hws::gpu_intel_hardware_sampler &>(self));
+                return fmt::format("<plssvm.detail.tracking.GpuIntelHardwareSampler with\n{}\n>", dynamic_cast<const hws::gpu_intel_hardware_sampler &>(self));
             }
 #endif
             return std::string{ "unknown" };
diff --git a/bindings/main.cpp b/bindings/main.cpp
index 5a4c01c..2b6f507 100644
--- a/bindings/main.cpp
+++ b/bindings/main.cpp
@@ -7,6 +7,11 @@
 
 #include "pybind11/pybind11.h"  // PYBIND11_MODULE, py::module_
 
+#include <string_view>  // std::string_view
+
+#define HWS_IS_DEFINED_HELPER(x) #x
+#define HWS_IS_DEFINED(x) (std::string_view{ #x } != std::string_view{ HWS_IS_DEFINED_HELPER(x) })
+
 namespace py = pybind11;
 
 // forward declare binding functions
@@ -23,30 +28,27 @@ PYBIND11_MODULE(HardwareSampling, m) {
     init_event(m);
     init_hardware_sampler(m);
 
+    // CPU sampling
 #if defined(HWS_FOR_CPUS_ENABLED)
     init_cpu_hardware_sampler(m);
-    m.def("has_cpu_hardware_sampler", []{return true;} );
-#else
-    m.def("has_cpu_hardware_sampler", []{return false;} );
 #endif
+    m.def("has_cpu_hardware_sampler", []() { return HWS_IS_DEFINED(HWS_FOR_CPUS_ENABLED); });
+
+    // NVIDIA GPU sampling
 #if defined(HWS_FOR_NVIDIA_GPUS_ENABLED)
     init_gpu_nvidia_hardware_sampler(m);
-    m.def("has_gpu_nvidia_hardware_sampler", []{return true;} );
-#else
-    m.def("has_gpu_nvidia_hardware_sampler", []{return false;} );
 #endif
+    m.def("has_gpu_nvidia_hardware_sampler", []() { return HWS_IS_DEFINED(HWS_FOR_NVIDIA_GPUS_ENABLED); });
+
+    // AMD GPU sampling
 #if defined(HWS_FOR_AMD_GPUS_ENABLED)
     init_gpu_amd_hardware_sampler(m);
-    m.def("has_gpu_amd_hardware_sampler", []{return true;} );
-#else
-    m.def("has_gpu_amd_hardware_sampler", []{return false;} );
 #endif
+    m.def("has_gpu_amd_hardware_sampler", []() { return HWS_IS_DEFINED(HWS_FOR_AMD_GPUS_ENABLED); });
 
+    // Intel GPU sampling
 #if defined(HWS_FOR_INTEL_GPUS_ENABLED)
     init_gpu_intel_hardware_sampler(m);
-    m.def("has_gpu_intel_hardware_sampler", []{return true;} );
-#else
-    m.def("has_gpu_intel_hardware_sampler", []{return false;} );
 #endif
-
+    m.def("has_gpu_intel_hardware_sampler", []() { return HWS_IS_DEFINED(HWS_FOR_INTEL_GPUS_ENABLED); });
 }

From 6f4079577e71056a777286fb6b8d1a63a1f3de3d Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Mon, 16 Sep 2024 17:06:20 +0200
Subject: [PATCH 27/69] Update README tables.

---
 README.md | 297 +++++++++++++++++++++++++++---------------------------
 1 file changed, 148 insertions(+), 149 deletions(-)

diff --git a/README.md b/README.md
index bb02f24..c805dbb 100644
--- a/README.md
+++ b/README.md
@@ -64,175 +64,174 @@ export PYTHONPATH=${CMAKE_INSTALL_PREFIX}/lib:${PYTHONPATH}
 
 ## Available samples
 
+The sampling type `fixed` denotes samples that are gathered once per hardware samples like maximum clock frequencies or temperatures or the total available memory. 
+The sampling type `sampled` denotes samples that are gathered during the whole hardware sampling process like the current clock frequencies, temperatures, or memory consumption.
+
 ### General samples
 
-| sample              | CPUs  | NVIDIA GPUs | AMD GPUs  |  Intel GPUs   |
-|:--------------------|:-----:|:-----------:|:---------:|:-------------:|
-| architecture        |  str  |     str     |    str    |       ?       |
-| byte_order          |  str  |  str (fix)  | str (fix) |   str (fix)   |
-| num_cores           |  int  |     int     |     -     |               |
-| num_threads         |  int  |      -      |     -     |       -       |
-| threads_per_core    |  int  |      -      |     -     |       -       |
-| cores_per_socket    |  int  |      -      |     -     |       -       |
-| num_sockets         |  int  |      -      |     -     |       -       |
-| numa_nodes          |  int  |      -      |     -     |               |
-| vendor_id           |  str  |  str (fix)  |    str    | str (PCIe ID) |
-| name                |  str  |     str     |    str    |      str      |
-| flags               |  str  |      -      |     -     |               |
-| persistence_mode    |   -   |    bool     |     -     |               |
-| compute_utilization |   %   |      %      |     %     |       ?       |
-| memory_utilization  |   -   |      %      |     %     |       ?       |
-| ipc                 | float |      -      |     -     |       -       |
-| irq                 |  int  |      -      |     -     |       -       |
-| smi                 |  int  |      -      |     -     |       -       |
-| poll                |  int  |      -      |     -     |       -       |
-| poll_percent        |   %   |      -      |     -     |       -       |
-| performance_level   |   -   |     int     |    int    |               |
-| standby_mode        |       |             |           |      str      |
-| num_threads_per_eu  |       |             |           |      int      |
-| eu_simd_width       |       |             |           |      int      |
+| sample              | sample type |    CPUs     | NVIDIA GPUs | AMD GPUs  |  Intel GPUs   |
+|:--------------------|:-----------:|:-----------:|:-----------:|:---------:|:-------------:|
+| architecture        |    fixed    |     str     |     str     |    str    |       ?       |
+| byte_order          |    fixed    |     str     |  str (fix)  | str (fix) |   str (fix)   |
+| num_cores           |    fixed    |     int     |     int     |     -     |               |
+| num_threads         |    fixed    |     int     |      -      |     -     |       -       |
+| threads_per_core    |    fixed    |     int     |      -      |     -     |       -       |
+| cores_per_socket    |    fixed    |     int     |      -      |     -     |       -       |
+| num_sockets         |    fixed    |     int     |      -      |     -     |       -       |
+| numa_nodes          |    fixed    |     int     |      -      |     -     |               |
+| vendor_id           |    fixed    |     str     |  str (fix)  |    str    | str (PCIe ID) |
+| name                |    fixed    |     str     |     str     |    str    |      str      |
+| flags               |    fixed    | list of str |      -      |     -     |               |
+| persistence_mode    |    fixed    |      -      |    bool     |     -     |               |
+| compute_utilization |   sampled   |      %      |      %      |     %     |       ?       |
+| memory_utilization  |   sampled   |      -      |      %      |     %     |       ?       |
+| ipc                 |   sampled   |    float    |      -      |     -     |       -       |
+| irq                 |   sampled   |     int     |      -      |     -     |       -       |
+| smi                 |   sampled   |     int     |      -      |     -     |       -       |
+| poll                |   sampled   |     int     |      -      |     -     |       -       |
+| poll_percent        |   sampled   |      %      |      -      |     -     |       -       |
+| performance_level   |   sampled   |      -      |     int     |    str    |               |
+| standby_mode        |             |             |             |           |      str      |
+| num_threads_per_eu  |             |             |             |           |      int      |
+| eu_simd_width       |             |             |             |           |      int      |
 
 ### clock-related samples
 
-| sample                             | CPUs |   NVIDIA GPUs    | AMD GPUs | Intel GPUs |
-|:-----------------------------------|:----:|:----------------:|:--------:|:----------:|
-| auto_boosted_clock_enabled         | bool |       bool       |    -     |            |
-| clock_frequency_min                | MHz  |       MHz        |   MHz    |            |
-| clock_frequency_max                | MHz  |       MHz        |   MHz    |            |
-| memory_clock_frequency_min         |  -   |       MHz        |   MHz    |            |
-| memory_clock_frequency_max         |  -   |       MHz        |   MHz    |            |
-| socket_clock_frequency_min         |  -   |        -         |   MHz    |     -      |
-| socket_clock_frequency_min         |  -   |        -         |   MHz    |     -      |
-| sm_clock_frequency_max             |  -   |       MHz        |    -     |     -      |
-| available_clock_frequencies        |  -   |       MHz        |   MHz    |            |
-| available_memory_clock_frequencies |  -   |       MHz        |   MHz    |            |
-| clock_frequency                    | MHz  |       MHz        |   MHz    |            |
-| average_non_idle_frequency         | MHz  |        -         |    -     |     -      |
-| time_stamp_counter                 | MHz  |        -         |    -     |     -      |
-| memory_clock_frequency             |  -   |       MHz        |   MHz    |            |
-| socket_clock_frequency             |  -   |        -         |   MHz    |     -      |
-| sm_clock_frequency                 |  -   |       MHz        |    -     |     -      |
-| overdrive_level                    |  -   |        -         |    %     |     -      |
-| memory_overdrive_level             |  -   |        -         |    %     |     -      |
-| throttle_reason                    |  -   | string (bitmask) |    -     |            |
-| memory_throttle_reason             |  -   |        -         |    -     |            |
-| auto_boosted_clock                 |  -   |       bool       |    -     |     -      |
-| tdp_frequency_limit                |  -   |        -         |    -     |            |
-| memory_tdp_frequency_limit         |  -   |        -         |    -     |            |
+| sample                             | sample type | CPUs |   NVIDIA GPUs    |  AMD GPUs   | Intel GPUs |
+|:-----------------------------------|:-----------:|:----:|:----------------:|:-----------:|:----------:|
+| auto_boosted_clock_enabled         |    fixed    | bool |       bool       |      -      |            |
+| clock_frequency_min                |    fixed    | MHz  |       MHz        |     MHz     |            |
+| clock_frequency_max                |    fixed    | MHz  |       MHz        |     MHz     |            |
+| memory_clock_frequency_min         |    fixed    |  -   |       MHz        |     MHz     |            |
+| memory_clock_frequency_max         |    fixed    |  -   |       MHz        |     MHz     |            |
+| socket_clock_frequency_min         |    fixed    |  -   |        -         |     MHz     |     -      |
+| socket_clock_frequency_min         |    fixed    |  -   |        -         |     MHz     |     -      |
+| sm_clock_frequency_max             |    fixed    |  -   |       MHz        |      -      |     -      |
+| available_clock_frequencies        |    fixed    |  -   |    map of MHz    | list of MHz |            |
+| available_memory_clock_frequencies |    fixed    |  -   |   list of MHz    | list of MHz |            |
+| clock_frequency                    |   sampled   | MHz  |       MHz        |     MHz     |            |
+| average_non_idle_frequency         |   sampled   | MHz  |        -         |      -      |     -      |
+| time_stamp_counter                 |   sampled   | MHz  |        -         |      -      |     -      |
+| memory_clock_frequency             |   sampled   |  -   |       MHz        |     MHz     |            |
+| socket_clock_frequency             |   sampled   |  -   |        -         |     MHz     |     -      |
+| sm_clock_frequency                 |   sampled   |  -   |       MHz        |      -      |     -      |
+| overdrive_level                    |   sampled   |  -   |        -         |      %      |     -      |
+| memory_overdrive_level             |   sampled   |  -   |        -         |      %      |     -      |
+| throttle_reason                    |   sampled   |  -   | string (bitmask) |      -      |            |
+| memory_throttle_reason             |             |  -   |        -         |      -      |            |
+| auto_boosted_clock                 |   sampled   |  -   |       bool       |      -      |     -      |
+| tdp_frequency_limit                |             |  -   |        -         |      -      |            |
+| memory_tdp_frequency_limit         |             |  -   |        -         |      -      |            |
 
 ### power-related samples
 
-| sample                         |               CPUs                | NVIDIA GPUs |  AMD GPUs   | Intel GPUs |
-|:-------------------------------|:---------------------------------:|:-----------:|:-----------:|:----------:|
-| power_management_limit         |                 -                 |      W      |      W      |            |
-| power_enforced_limit           |                 -                 |      W      |      W      |            |
-| power_measurement_type         |             str (fix)             |     str     |     str     |            |
-| power_management_mode          |                 -                 |    bool     |      -      |            |
-| available_power_profiles       |                 -                 | list of int | list of str |            |
-| power_usage                    |                 W                 |      W      |      W      |            |
-| core_watt                      |                 W                 |      -      |      -      |     -      |
-| dram_watt                      |                 W                 |      -      |      -      |     -      |
-| package_rapl_throttling        |                 %                 |      -      |      -      |     -      |
-| dram_rapl_throttling           |                 %                 |      -      |      -      |     -      |
-| power_total_energy_consumption | J<br>(calculated via power_usage) |      J      |      J      |     J      |
-| power_profile                  |                 -                 |     int     |     str     |            |
-| energy_threshold_enabled       |                                   |             |             |    bool    |
-| energy_threshold               |                                   |             |             |     J      |
+| sample                         | sample type |               CPUs                | NVIDIA GPUs |  AMD GPUs   | Intel GPUs |
+|:-------------------------------|:-----------:|:---------------------------------:|:-----------:|:-----------:|:----------:|
+| power_management_limit         |    fixed    |                 -                 |      W      |      W      |            |
+| power_enforced_limit           |    fixed    |                 -                 |      W      |      W      |            |
+| power_measurement_type         |    fixed    |             str (fix)             |     str     |     str     |            |
+| power_management_mode          |    fixed    |                 -                 |    bool     |      -      |            |
+| available_power_profiles       |    fixed    |                 -                 | list of int | list of str |            |
+| power_usage                    |   sampled   |                 W                 |      W      |      W      |            |
+| core_watt                      |   sampled   |                 W                 |      -      |      -      |     -      |
+| dram_watt                      |   sampled   |                 W                 |      -      |      -      |     -      |
+| package_rapl_throttling        |   sampled   |                 %                 |      -      |      -      |     -      |
+| dram_rapl_throttling           |   sampled   |                 %                 |      -      |      -      |     -      |
+| power_total_energy_consumption |   sampled   | J<br>(calculated via power_usage) |      J      |      J      |     J      |
+| power_profile                  |   sampled   |                 -                 |     int     |     str     |            |
+| energy_threshold_enabled       |             |                                   |             |             |    bool    |
+| energy_threshold               |             |                                   |             |             |     J      |
 
 ### memory-related samples
 
-| sample                      | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs |
-|:----------------------------|:----:|:-----------:|:--------:|:----------:|
-| cache_size_L1d              | str  |      -      |    -     |     -      |
-| cache_size_L1i              | str  |      -      |    -     |     -      |
-| cache_size_L2               | str  |      -      |    -     |     -      |
-| cache_size_L3               | str  |      -      |    -     |     -      |
-| memory_total                |  B   |      B      |    B     |            |
-| visible_memory_total        |  -   |      -      |    B     |     -      |
-| swap_memory_total           |  B   |      -      |    -     |     -      |
-| memory_total_{}             |  -   |             |          |     B      |
-| allocatable_memory_total_{} |  -   |             |          |     B      |
-| num_pcie_lanes_min          |  -   |      -      |   int    |            |
-| num_pcie_lanes_max          |  -   |     int     |   int    |            |
-| pcie_link_generation_max    |  -   |     int     |    -     |    int     |
-| pcie_link_speed_max         |  -   |    MBPS     |    -     |    BPS     |
-| pcie_link_transfer_rate_min |  -   |      -      |   MT/s   |            |
-| pcie_link_transfer_rate_max |  -   |      -      |   MT/s   |            |
-| memory_bus_width            |  -   |     Bit     |    -     |            |
-| memory_used                 |  B   |      B      |    B     |            |
-| memory_free                 |  B   |      B      |    B     |            |
-| swap_memory_used            |  B   |      -      |    -     |     -      |
-| swap_memory_free            |  B   |      -      |    -     |     -      |
-| num_pcie_lanes              |  -   |     int     |   int    |            |
-| pcie_link_generation        |  -   |     int     |    -     |    int     |
-| pcie_link_speed             |  -   |    MBPS     |    -     |    MBPS    |
-| pcie_link_transfer_rate     |  -   |      -      |   T/s    |     -      |
-| memory_used_{}              |      |             |          |     B      |
-| memory_free_{}              |      |             |          |     B      |
-| memory_bus_width_{}         |      |             |          |    Bit     |
-| memory_num_channels_{}      |      |             |          |    int     |
-| memory_location_{}          |      |             |          |    str     |
+| sample                      | sample type | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs |
+|:----------------------------|:-----------:|:----:|:-----------:|:--------:|:----------:|
+| cache_size_L1d              |    fixed    | str  |      -      |    -     |     -      |
+| cache_size_L1i              |    fixed    | str  |      -      |    -     |     -      |
+| cache_size_L2               |    fixed    | str  |      -      |    -     |     -      |
+| cache_size_L3               |    fixed    | str  |      -      |    -     |     -      |
+| memory_total                |    fixed    |  B   |      B      |    B     |            |
+| visible_memory_total        |    fixed    |  -   |      -      |    B     |     -      |
+| swap_memory_total           |    fixed    |  B   |      -      |    -     |     -      |
+| memory_total_{}             |             |  -   |             |          |     B      |
+| allocatable_memory_total_{} |             |  -   |             |          |     B      |
+| num_pcie_lanes_min          |    fixed    |  -   |      -      |   int    |            |
+| num_pcie_lanes_max          |    fixed    |  -   |     int     |   int    |            |
+| pcie_link_generation_max    |    fixed    |  -   |     int     |    -     |    int     |
+| pcie_link_speed_max         |    fixed    |  -   |    MBPS     |    -     |    BPS     |
+| pcie_link_transfer_rate_min |    fixed    |  -   |      -      |   MT/s   |            |
+| pcie_link_transfer_rate_max |    fixed    |  -   |      -      |   MT/s   |            |
+| memory_bus_width            |    fixed    |  -   |     Bit     |    -     |            |
+| memory_used                 |   sampled   |  B   |      B      |    B     |            |
+| memory_free                 |   sampled   |  B   |      B      |    B     |            |
+| swap_memory_used            |   sampled   |  B   |      -      |    -     |     -      |
+| swap_memory_free            |   sampled   |  B   |      -      |    -     |     -      |
+| num_pcie_lanes              |   sampled   |  -   |     int     |   int    |            |
+| pcie_link_generation        |   sampled   |  -   |     int     |    -     |    int     |
+| pcie_link_speed             |   sampled   |  -   |    MBPS     |    -     |    MBPS    |
+| pcie_link_transfer_rate     |   sampled   |  -   |      -      |   T/s    |     -      |
+| memory_used_{}              |             |      |             |          |     B      |
+| memory_free_{}              |             |      |             |          |     B      |
+| memory_bus_width_{}         |             |      |             |          |    Bit     |
+| memory_num_channels_{}      |             |      |             |          |    int     |
+| memory_location_{}          |             |      |             |          |    str     |
 
 ### temperature-related samples
 
-| sample                  | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs |
-|:------------------------|:----:|:-----------:|:--------:|:----------:|
-| num_fans                |  -   |     int     |   int    |            |
-| fan_speed_min           |  -   |      %      |    -     |            | 
-| fan_speed_max           |  -   |      %      |   RPM    |            |
-| temperature_min         |  -   |      -      |    °C    |            |
-| temperature_max         |  -   |     °C      |    °C    |            |
-| memory_temperature_min  |  -   |      -      |    °C    |            |
-| memory_temperature_max  |  -   |     °C      |    °C    |            |
-| hotspot_temperature_min |  -   |      -      |    °C    |            |
-| hotspot_temperature_max |  -   |      -      |    °C    |            |
-| hbm_0_temperature_min   |  -   |      -      |    °C    |            |
-| hbm_0_temperature_max   |  -   |      -      |    °C    |            |
-| hbm_1_temperature_min   |  -   |      -      |    °C    |            |
-| hbm_1_temperature_max   |  -   |      -      |    °C    |            |
-| hbm_2_temperature_min   |  -   |      -      |    °C    |            |
-| hbm_2_temperature_max   |  -   |      -      |    °C    |            |
-| hbm_3_temperature_min   |  -   |      -      |    °C    |            |
-| hbm_3_temperature_max   |  -   |      -      |    °C    |            |
-| fan_speed_percentage    |  -   |      %      |    %     |            |
-| temperature             |  °C  |     °C      |    °C    |            |
-| memory_temperature      |  -   |      -      |    °C    |            |
-| hotspot_temperature     |  -   |      -      |    °C    |            |
-| hbm_0_temperature       |  -   |      -      |    °C    |            |
-| hbm_1_temperature       |  -   |      -      |    °C    |            |
-| hbm_2_temperature       |  -   |      -      |    °C    |            |
-| hbm_3_temperature       |  -   |      -      |    °C    |            |
-| temperature_{}_max      |      |             |          |            |
-| temperature_psu         |      |             |          |            |
-| temperature_{}          |      |             |          |            |
-| core_temperature        |  °C  |      -      |    -     |     -      |
-| core_throttle_percent   |  %   |      -      |    -     |     -      |
+| sample                  | sample type | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs |
+|:------------------------|:-----------:|:----:|:-----------:|:--------:|:----------:|
+| num_fans                |    fixed    |  -   |     int     |   int    |            |
+| fan_speed_min           |    fixed    |  -   |      %      |    -     |            | 
+| fan_speed_max           |    fixed    |  -   |      %      |   RPM    |            |
+| temperature_min         |    fixed    |  -   |      -      |    °C    |            |
+| temperature_max         |    fixed    |  -   |     °C      |    °C    |            |
+| memory_temperature_min  |    fixed    |  -   |      -      |    °C    |            |
+| memory_temperature_max  |    fixed    |  -   |     °C      |    °C    |            |
+| hotspot_temperature_min |    fixed    |  -   |      -      |    °C    |            |
+| hotspot_temperature_max |    fixed    |  -   |      -      |    °C    |            |
+| hbm_0_temperature_min   |    fixed    |  -   |      -      |    °C    |            |
+| hbm_0_temperature_max   |    fixed    |  -   |      -      |    °C    |            |
+| hbm_1_temperature_min   |    fixed    |  -   |      -      |    °C    |            |
+| hbm_1_temperature_max   |    fixed    |  -   |      -      |    °C    |            |
+| hbm_2_temperature_min   |    fixed    |  -   |      -      |    °C    |            |
+| hbm_2_temperature_max   |    fixed    |  -   |      -      |    °C    |            |
+| hbm_3_temperature_min   |    fixed    |  -   |      -      |    °C    |            |
+| hbm_3_temperature_max   |    fixed    |  -   |      -      |    °C    |            |
+| fan_speed_percentage    |   sampled   |  -   |      %      |    %     |            |
+| temperature             |   sampled   |  °C  |     °C      |    °C    |            |
+| memory_temperature      |   sampled   |  -   |      -      |    °C    |            |
+| hotspot_temperature     |   sampled   |  -   |      -      |    °C    |            |
+| hbm_0_temperature       |   sampled   |  -   |      -      |    °C    |            |
+| hbm_1_temperature       |   sampled   |  -   |      -      |    °C    |            |
+| hbm_2_temperature       |   sampled   |  -   |      -      |    °C    |            |
+| hbm_3_temperature       |   sampled   |  -   |      -      |    °C    |            |
+| temperature_{}_max      |             |      |             |          |            |
+| temperature_psu         |             |      |             |          |            |
+| temperature_{}          |             |      |             |          |            |
+| core_temperature        |   sampled   |  °C  |      -      |    -     |     -      |
+| core_throttle_percent   |   sampled   |  %   |      -      |    -     |     -      |
 
 ### gfx-related (iGPU) samples
 
-| sample                     | CPUs |
-|:---------------------------|:----:|
-| graphics_render_state      |  %   |
-| graphics_frequency         | MHz  |
-| average_graphics_frequency | MHz  |
-| gpu_state_c0               |  %   |
-| cpu_works_for_gpu          |  %   |
-| graphics_power             |  W   |
+| sample                    | sample type | CPUs |
+|:--------------------------|:-----------:|:----:|
+| gfx_render_state_percent  |   sampled   |  %   |
+| gfx_frequency             |   sampled   | MHz  |
+| average_gfx_frequency     |   sampled   | MHz  |
+| gfx_state_c0_percent      |   sampled   |  %   |
+| cpu_works_for_gpu_percent |   sampled   |  %   |
+| gfx_watt                  |   sampled   |  W   |
 
 ### "idle states"-related samples
 
-| sample                           | CPUs |
-|:---------------------------------|:----:|
-| all_cpus_state_c0                |  %   |
-| any_cpu_state_c0                 |  %   |
-| lower_power_idle_state           |  %   |
-| system_lower_power_idle_state    |  %   |
-| package_lower_power_idle_state   |  %   |
-| cpu_idle_state_{}_percentage     |  %   |
-| package_idle_state_{}_percentage |  %   |
-| package_idle_state_{}_percentage |  %   |
-| idle_state_{}_percentage         |  %   |
-| idle_state_{}                    | int  |
+| sample                               | sample type |     CPUs      |
+|:-------------------------------------|:-----------:|:-------------:|
+| idle_states                          |    fixed    | map of values |
+| all_cpus_state_c0_percent            |   sampled   |       %       |
+| any_cpu_state_c0_percent             |   sampled   |       %       |
+| low_power_idle_state_percent         |   sampled   |       %       |
+| system_low_power_idle_state_percent  |   sampled   |       %       |
+| package_low_power_idle_state_percent |   sampled   |       %       |
 
 
 

From e1a0da58580851963b7e6b77df74e43082da7bac Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Mon, 16 Sep 2024 17:35:34 +0200
Subject: [PATCH 28/69] Update sample name.

---
 README.md                                      |  2 +-
 bindings/cpu_hardware_sampler.cpp              |  2 +-
 include/hardware_sampling/cpu/cpu_samples.hpp  |  6 +++---
 src/hardware_sampling/cpu/cpu_samples.cpp      | 10 +++++-----
 src/hardware_sampling/cpu/hardware_sampler.cpp |  4 ++--
 5 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/README.md b/README.md
index c805dbb..91dba28 100644
--- a/README.md
+++ b/README.md
@@ -110,7 +110,7 @@ The sampling type `sampled` denotes samples that are gathered during the whole h
 | available_clock_frequencies        |    fixed    |  -   |    map of MHz    | list of MHz |            |
 | available_memory_clock_frequencies |    fixed    |  -   |   list of MHz    | list of MHz |            |
 | clock_frequency                    |   sampled   | MHz  |       MHz        |     MHz     |            |
-| average_non_idle_frequency         |   sampled   | MHz  |        -         |      -      |     -      |
+| average_non_idle_clock_frequency   |   sampled   | MHz  |        -         |      -      |     -      |
 | time_stamp_counter                 |   sampled   | MHz  |        -         |      -      |     -      |
 | memory_clock_frequency             |   sampled   |  -   |       MHz        |     MHz     |            |
 | socket_clock_frequency             |   sampled   |  -   |        -         |     MHz     |     -      |
diff --git a/bindings/cpu_hardware_sampler.cpp b/bindings/cpu_hardware_sampler.cpp
index 9ed204d..354c585 100644
--- a/bindings/cpu_hardware_sampler.cpp
+++ b/bindings/cpu_hardware_sampler.cpp
@@ -48,7 +48,7 @@ void init_cpu_hardware_sampler(py::module_ &m) {
         .def("get_clock_frequency_min", &hws::cpu_clock_samples::get_clock_frequency_min, "the minimum possible CPU frequency in MHz")
         .def("get_clock_frequency_max", &hws::cpu_clock_samples::get_clock_frequency_max, "the maximum possible CPU frequency in MHz")
         .def("get_clock_frequency", &hws::cpu_clock_samples::get_clock_frequency, "the average CPU frequency in MHz including idle cores")
-        .def("get_average_non_idle_frequency", &hws::cpu_clock_samples::get_average_non_idle_frequency, "the average CPU frequency in MHz excluding idle cores")
+        .def("get_average_non_idle_clock_frequency", &hws::cpu_clock_samples::get_average_non_idle_clock_frequency, "the average CPU frequency in MHz excluding idle cores")
         .def("get_time_stamp_counter", &hws::cpu_clock_samples::get_time_stamp_counter, "the time stamp counter")
         .def("__repr__", [](const hws::cpu_clock_samples &self) {
             return fmt::format("<HardwareSampling.CpuClockSamples with\n{}\n>", self);
diff --git a/include/hardware_sampling/cpu/cpu_samples.hpp b/include/hardware_sampling/cpu/cpu_samples.hpp
index 98a88f2..f92ba0d 100644
--- a/include/hardware_sampling/cpu/cpu_samples.hpp
+++ b/include/hardware_sampling/cpu/cpu_samples.hpp
@@ -95,9 +95,9 @@ class cpu_clock_samples {
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, clock_frequency_min)       // the minimum possible CPU frequency in MHz
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, clock_frequency_max)       // the maximum possible CPU frequency in MHz
 
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, clock_frequency)             // the average CPU frequency in MHz including idle cores
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, average_non_idle_frequency)  // the average CPU frequency in MHz excluding idle cores
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, time_stamp_counter)          // the time stamp counter
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, clock_frequency)                   // the average CPU frequency in MHz including idle cores
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, average_non_idle_clock_frequency)  // the average CPU frequency in MHz excluding idle cores
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, time_stamp_counter)                // the time stamp counter
 };
 
 /**
diff --git a/src/hardware_sampling/cpu/cpu_samples.cpp b/src/hardware_sampling/cpu/cpu_samples.cpp
index 0242e9a..cc99d76 100644
--- a/src/hardware_sampling/cpu/cpu_samples.cpp
+++ b/src/hardware_sampling/cpu/cpu_samples.cpp
@@ -242,12 +242,12 @@ std::string cpu_clock_samples::generate_yaml_string() const {
                            fmt::join(this->clock_frequency_.value(), ", "));
     }
     // the average CPU frequency excluding idle time
-    if (this->average_non_idle_frequency_.has_value()) {
-        str += fmt::format("  average_non_idle_frequency:\n"
+    if (this->average_non_idle_clock_frequency_.has_value()) {
+        str += fmt::format("  average_non_idle_clock_frequency:\n"
                            "    turbostat_name: \"Bzy_MHz\"\n"
                            "    unit: \"MHz\"\n"
                            "    values: [{}]\n",
-                           fmt::join(this->average_non_idle_frequency_.value(), ", "));
+                           fmt::join(this->average_non_idle_clock_frequency_.value(), ", "));
     }
     // the time stamp counter
     if (this->time_stamp_counter_.has_value()) {
@@ -269,13 +269,13 @@ std::ostream &operator<<(std::ostream &out, const cpu_clock_samples &samples) {
                               "clock_frequency_min [MHz]: {}\n"
                               "clock_frequency_max [MHz]: {}\n"
                               "clock_frequency [MHz]: [{}]\n"
-                              "average_non_idle_frequency [MHz]: [{}]\n"
+                              "average_non_idle_clock_frequency [MHz]: [{}]\n"
                               "time_stamp_counter [MHz]: [{}]",
                               detail::value_or_default(samples.get_auto_boosted_clock_enabled()),
                               detail::value_or_default(samples.get_clock_frequency_min()),
                               detail::value_or_default(samples.get_clock_frequency_max()),
                               fmt::join(detail::value_or_default(samples.get_clock_frequency()), ", "),
-                              fmt::join(detail::value_or_default(samples.get_average_non_idle_frequency()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_average_non_idle_clock_frequency()), ", "),
                               fmt::join(detail::value_or_default(samples.get_time_stamp_counter()), ", "));
 }
 
diff --git a/src/hardware_sampling/cpu/hardware_sampler.cpp b/src/hardware_sampling/cpu/hardware_sampler.cpp
index 6ad38ff..492c17a 100644
--- a/src/hardware_sampling/cpu/hardware_sampler.cpp
+++ b/src/hardware_sampling/cpu/hardware_sampler.cpp
@@ -171,8 +171,8 @@ void cpu_hardware_sampler::sampling_loop() {
                 using vector_type = decltype(general_samples_.compute_utilization_)::value_type;
                 general_samples_.compute_utilization_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
             } else if (header[i] == "Bzy_MHz") {
-                using vector_type = decltype(clock_samples_.average_non_idle_frequency_)::value_type;
-                clock_samples_.average_non_idle_frequency_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                using vector_type = decltype(clock_samples_.average_non_idle_clock_frequency_)::value_type;
+                clock_samples_.average_non_idle_clock_frequency_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
             } else if (header[i] == "TSC_MHz") {
                 using vector_type = decltype(clock_samples_.time_stamp_counter_)::value_type;
                 clock_samples_.time_stamp_counter_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };

From d719e8f0b4fb5d929ddef786f6cd549f6c835966 Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Mon, 16 Sep 2024 17:36:07 +0200
Subject: [PATCH 29/69] Add new function returning relative time points
 (relative to the first event) as "normal" number.

---
 README.md                                      | 10 +++++-----
 bindings/hardware_sampler.cpp                  |  7 ++++---
 src/hardware_sampling/cpu/hardware_sampler.cpp |  4 ++--
 3 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/README.md b/README.md
index 91dba28..25e2f73 100644
--- a/README.md
+++ b/README.md
@@ -241,6 +241,7 @@ The sampling type `sampled` denotes samples that are gathered during the whole h
 import HardwareSampling
 import numpy as np
 import matplotlib.pyplot as plt
+import matplotlib.dates as mdates
 import datetime
 
 sampler = HardwareSampling.CpuHardwareSampler()
@@ -259,16 +260,15 @@ sampler.stop()
 sampler.dump_yaml("track.yaml")
 
 # plot the results
-time_points = sampler.time_points()
-relative_time_points = [(t - time_points[0]) / datetime.timedelta(milliseconds=1) for t in time_points]
+time_points = sampler.relative_time_points()
 
-plt.plot(relative_time_points, sampler.clock_samples().get_average_frequency(), label="average")
-plt.plot(relative_time_points, sampler.clock_samples().get_average_non_idle_frequency(), label="average non-idle")
+plt.plot(time_points, sampler.clock_samples().get_clock_frequency(), label="average")
+plt.plot(time_points, sampler.clock_samples().get_average_non_idle_clock_frequency(), label="average non-idle")
 
 axes = plt.gcf().axes[0]
 x_bounds = axes.get_xlim()
 for event in sampler.get_events()[1:-1]:
-    tp = (event.time_point - time_points[0]) / datetime.timedelta(milliseconds=1)
+    tp = (event.time_point - sampler.time_points()[0]) / datetime.timedelta(milliseconds=1000)
 
     axes.axvline(x=tp, color='r')
     axes.annotate(text=event.name, xy=(((tp - x_bounds[0]) / (x_bounds[1] - x_bounds[0])), 1.025), xycoords='axes fraction', rotation=270)
diff --git a/bindings/hardware_sampler.cpp b/bindings/hardware_sampler.cpp
index e46dbc8..12c0c01 100644
--- a/bindings/hardware_sampler.cpp
+++ b/bindings/hardware_sampler.cpp
@@ -7,7 +7,8 @@
 
 #include "hardware_sampling/hardware_sampler.hpp"  // hws::hardware_sampler
 
-#include "hardware_sampling/event.hpp"  // hws::event
+#include "hardware_sampling/event.hpp"    // hws::event
+#include "hardware_sampling/utility.hpp"  // hws::detail::durations_from_reference_time
 
 #if defined(HWS_FOR_CPUS_ENABLED)
     #include "hardware_sampling/cpu/hardware_sampler.hpp"  // hws::cpu_hardware_sampler
@@ -50,6 +51,7 @@ void init_hardware_sampler(py::module_ &m) {
         .def("get_events", &hws::hardware_sampler::get_events, "get all events")
         .def("get_event", &hws::hardware_sampler::get_event, "get a specific event")
         .def("time_points", &hws::hardware_sampler::sampling_time_points, "get the time points of the respective hardware samples")
+        .def("relative_time_points", [](const hws::hardware_sampler &self) { return hws::detail::durations_from_reference_time(self.sampling_time_points(), self.get_event(0).time_point); }, "get the relative durations of the respective hardware samples in seconds (as \"normal\" number)")
         .def("sampling_interval", &hws::hardware_sampler::sampling_interval, "get the sampling interval of this hardware sampler (in ms)")
         .def("dump_yaml", py::overload_cast<const std::string &>(&hws::hardware_sampler::dump_yaml), "dump all hardware samples to the given YAML file")
         .def("__repr__", [](const hws::hardware_sampler &self) {
@@ -73,6 +75,5 @@ void init_hardware_sampler(py::module_ &m) {
                 return fmt::format("<plssvm.detail.tracking.GpuIntelHardwareSampler with\n{}\n>", dynamic_cast<const hws::gpu_intel_hardware_sampler &>(self));
             }
 #endif
-            return std::string{ "unknown" };
-        });
+            return std::string{ "unknown" }; });
 }
diff --git a/src/hardware_sampling/cpu/hardware_sampler.cpp b/src/hardware_sampling/cpu/hardware_sampler.cpp
index 492c17a..7e89eca 100644
--- a/src/hardware_sampling/cpu/hardware_sampler.cpp
+++ b/src/hardware_sampling/cpu/hardware_sampler.cpp
@@ -318,8 +318,8 @@ void cpu_hardware_sampler::sampling_loop() {
                         using vector_type = decltype(general_samples_.compute_utilization_)::value_type;
                         general_samples_.compute_utilization_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
                     } else if (header[i] == "Bzy_MHz") {
-                        using vector_type = decltype(clock_samples_.average_non_idle_frequency_)::value_type;
-                        clock_samples_.average_non_idle_frequency_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        using vector_type = decltype(clock_samples_.average_non_idle_clock_frequency_)::value_type;
+                        clock_samples_.average_non_idle_clock_frequency_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
                     } else if (header[i] == "TSC_MHz") {
                         using vector_type = decltype(clock_samples_.time_stamp_counter_)::value_type;
                         clock_samples_.time_stamp_counter_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));

From 4f411f390529400753d0e1747e81d8175f867255 Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Mon, 16 Sep 2024 17:56:47 +0200
Subject: [PATCH 30/69] Add new python only functions that return a relative
 event, i.e., events where the "relative_time_point" member is the time
 duration since the first event occured (and not an absolute time).

---
 README.md                             |  8 +++---
 bindings/hardware_sampler.cpp         | 39 +++++++++++++++++++++++++++
 include/hardware_sampling/utility.hpp | 14 +++++++++-
 3 files changed, 55 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index 25e2f73..d7acad6 100644
--- a/README.md
+++ b/README.md
@@ -267,11 +267,9 @@ plt.plot(time_points, sampler.clock_samples().get_average_non_idle_clock_frequen
 
 axes = plt.gcf().axes[0]
 x_bounds = axes.get_xlim()
-for event in sampler.get_events()[1:-1]:
-    tp = (event.time_point - sampler.time_points()[0]) / datetime.timedelta(milliseconds=1000)
-
-    axes.axvline(x=tp, color='r')
-    axes.annotate(text=event.name, xy=(((tp - x_bounds[0]) / (x_bounds[1] - x_bounds[0])), 1.025), xycoords='axes fraction', rotation=270)
+for event in sampler.get_relative_events()[1:-1]:
+    axes.axvline(x=event.relative_time_point, color='r')
+    axes.annotate(text=event.name, xy=(((event.relative_time_point - x_bounds[0]) / (x_bounds[1] - x_bounds[0])), 1.025), xycoords='axes fraction', rotation=270)
 
 plt.xlabel("runtime [ms]")
 plt.ylabel("clock frequency [MHz]")
diff --git a/bindings/hardware_sampler.cpp b/bindings/hardware_sampler.cpp
index 12c0c01..6619854 100644
--- a/bindings/hardware_sampler.cpp
+++ b/bindings/hardware_sampler.cpp
@@ -30,11 +30,43 @@
 
 #include <string>  // std::string
 
+namespace hws {
+
+/**
+ * @brief A struct encapsulating a single event with a relative time point.
+ */
+struct relative_event {
+    /**
+     * @brief Construct a new event given a time point and name.
+     * @param[in] time_point_p the time when the event occurred relative to the first event
+     * @param[in] name_p the name of the event
+     */
+    relative_event(const double relative_time_point_p, std::string name_p) :
+        relative_time_point{ relative_time_point_p },
+        name{ std::move(name_p) } { }
+
+    /// The relative time point this event occurred at.
+    double relative_time_point;
+    /// The name of this event.
+    std::string name;
+};
+
+}  // namespace hws
+
 namespace py = pybind11;
 
 void init_hardware_sampler(py::module_ &m) {
     const py::module_ pure_virtual_module = m.def_submodule("__pure_virtual");
 
+    // a special python only struct encapsulating a relative event, i.e., an event where its "relative_time_point" member is the time passed since the first event
+    py::class_<hws::relative_event>(m, "RelativeEvent")
+        .def(py::init<decltype(hws::relative_event::relative_time_point), decltype(hws::relative_event::name)>(), "construct a new event using a time point and a name")
+        .def_readonly("relative_time_point", &hws::relative_event::relative_time_point, "read the relative time point associated to this event")
+        .def_readonly("name", &hws::relative_event::name, "read the name associated to this event")
+        .def("__repr__", [](const hws::relative_event &self) {
+            return fmt::format("<HardWareSampling.RelativeEvent with {{ time_point: {}, name: {} }}>", self.relative_time_point, self.name);
+        });
+
     // bind the pure virtual hardware sampler base class
     py::class_<hws::hardware_sampler> pyhardware_sampler(pure_virtual_module, "__pure_virtual_base_HardwareSampler");
     pyhardware_sampler.def("start", &hws::hardware_sampler::start_sampling, "start the current hardware sampling")
@@ -49,7 +81,14 @@ void init_hardware_sampler(py::module_ &m) {
         .def("add_event", py::overload_cast<decltype(hws::event::name)>(&hws::hardware_sampler::add_event), "add a new event using a name, the current time is used as time point")
         .def("num_events", &hws::hardware_sampler::num_events, "get the number of events")
         .def("get_events", &hws::hardware_sampler::get_events, "get all events")
+        .def("get_relative_events", [](const hws::hardware_sampler &self) {
+            std::vector<hws::relative_event> relative_events{};
+            for (const hws::event &e : self.get_events()) {
+                relative_events.emplace_back(hws::detail::duration_from_reference_time(e.time_point, self.get_event(0).time_point), e.name);
+            }
+            return relative_events; }, "get all relative events")
         .def("get_event", &hws::hardware_sampler::get_event, "get a specific event")
+        .def("get_relative_event", [](const hws::hardware_sampler &self, const std::size_t idx) { return hws::relative_event{ hws::detail::duration_from_reference_time(self.get_event(idx).time_point, self.get_event(0).time_point), self.get_event(idx).name }; }, "get a specific relative event")
         .def("time_points", &hws::hardware_sampler::sampling_time_points, "get the time points of the respective hardware samples")
         .def("relative_time_points", [](const hws::hardware_sampler &self) { return hws::detail::durations_from_reference_time(self.sampling_time_points(), self.get_event(0).time_point); }, "get the relative durations of the respective hardware samples in seconds (as \"normal\" number)")
         .def("sampling_interval", &hws::hardware_sampler::sampling_interval, "get the sampling interval of this hardware sampler (in ms)")
diff --git a/include/hardware_sampling/utility.hpp b/include/hardware_sampling/utility.hpp
index ff7d3f6..c70b4c2 100644
--- a/include/hardware_sampling/utility.hpp
+++ b/include/hardware_sampling/utility.hpp
@@ -229,6 +229,18 @@ template <typename T>
 /**                                      other free functions                                       **/
 /*****************************************************************************************************/
 
+/**
+ * @brief Convert the time point to its duration in seconds (using double) truncated to three decimal places passed since the @p reference time point.
+ * @tparam TimePoint the type if the time point
+ * @param[in] time_point the time point
+ * @param[in] reference the reference time point
+ * @return the duration passed in seconds since the @p reference time point (`[[nodiscard]]`)
+ */
+template <typename TimePoint>
+[[nodiscard]] inline double duration_from_reference_time(const TimePoint &time_point, const TimePoint &reference) {
+    return std::trunc(std::chrono::duration<double>(time_point - reference).count() * 1000.0) / 1000.0;
+}
+
 /**
  * @brief Convert all time points to their duration in seconds (using double) truncated to three decimal places passed since the @p reference time point.
  * @tparam TimePoint the type if the time points
@@ -241,7 +253,7 @@ template <typename TimePoint>
     std::vector<double> durations(time_points.size());
 
     for (std::size_t i = 0; i < durations.size(); ++i) {
-        durations[i] = std::trunc(std::chrono::duration<double>(time_points[i] - reference).count() * 1000.0) / 1000.0;
+        durations[i] = duration_from_reference_time(time_points[i], reference);
     }
 
     return durations;

From d6c69e9564aabf6d49e143850f5ce3c814651461 Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Tue, 17 Sep 2024 10:59:25 +0200
Subject: [PATCH 31/69] Mark dump_yaml member functions as const.

---
 bindings/hardware_sampler.cpp                  | 2 +-
 include/hardware_sampling/hardware_sampler.hpp | 6 +++---
 src/hardware_sampling/hardware_sampler.cpp     | 6 +++---
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/bindings/hardware_sampler.cpp b/bindings/hardware_sampler.cpp
index 6619854..6b3d465 100644
--- a/bindings/hardware_sampler.cpp
+++ b/bindings/hardware_sampler.cpp
@@ -92,7 +92,7 @@ void init_hardware_sampler(py::module_ &m) {
         .def("time_points", &hws::hardware_sampler::sampling_time_points, "get the time points of the respective hardware samples")
         .def("relative_time_points", [](const hws::hardware_sampler &self) { return hws::detail::durations_from_reference_time(self.sampling_time_points(), self.get_event(0).time_point); }, "get the relative durations of the respective hardware samples in seconds (as \"normal\" number)")
         .def("sampling_interval", &hws::hardware_sampler::sampling_interval, "get the sampling interval of this hardware sampler (in ms)")
-        .def("dump_yaml", py::overload_cast<const std::string &>(&hws::hardware_sampler::dump_yaml), "dump all hardware samples to the given YAML file")
+        .def("dump_yaml", py::overload_cast<const std::string &>(&hws::hardware_sampler::dump_yaml, py::const_), "dump all hardware samples to the given YAML file")
         .def("__repr__", [](const hws::hardware_sampler &self) {
 #if defined(HWS_FOR_CPUS_ENABLED)
             if (dynamic_cast<const hws::cpu_hardware_sampler *>(&self)) {
diff --git a/include/hardware_sampling/hardware_sampler.hpp b/include/hardware_sampling/hardware_sampler.hpp
index ce7c6fb..9da2905 100644
--- a/include/hardware_sampling/hardware_sampler.hpp
+++ b/include/hardware_sampling/hardware_sampler.hpp
@@ -148,15 +148,15 @@ class hardware_sampler {
      * @brief Dump the hardware samples to the YAML file with @p filename.
      * @param[in] filename the YAML file to append the hardware samples to
      */
-    void dump_yaml(const char *filename);
+    void dump_yaml(const char *filename) const;
     /**
      * @copydoc hws::hardware_sampler::dump_yaml(const char *)
      */
-    void dump_yaml(const std::string &filename);
+    void dump_yaml(const std::string &filename) const;
     /**
      * @copydoc hws::hardware_sampler::dump_yaml(const char *)
      */
-    void dump_yaml(const std::filesystem::path &filename);
+    void dump_yaml(const std::filesystem::path &filename) const;
 
   protected:
     /**
diff --git a/src/hardware_sampling/hardware_sampler.cpp b/src/hardware_sampling/hardware_sampler.cpp
index 3511f7f..b68cbd1 100644
--- a/src/hardware_sampling/hardware_sampler.cpp
+++ b/src/hardware_sampling/hardware_sampler.cpp
@@ -118,7 +118,7 @@ event hardware_sampler::get_event(const std::size_t idx) const {
     return events_[idx];
 }
 
-void hardware_sampler::dump_yaml(const char *filename) {
+void hardware_sampler::dump_yaml(const char *filename) const {
     if (!this->has_sampling_stopped()) {
         throw std::runtime_error{ "Can dump samples to the YAML file only after the sampling has been stopped!" };
     }
@@ -162,11 +162,11 @@ void hardware_sampler::dump_yaml(const char *filename) {
                         this->generate_yaml_string());
 }
 
-void hardware_sampler::dump_yaml(const std::string &filename) {
+void hardware_sampler::dump_yaml(const std::string &filename) const {
     this->dump_yaml(filename.c_str());
 }
 
-void hardware_sampler::dump_yaml(const std::filesystem::path &filename) {
+void hardware_sampler::dump_yaml(const std::filesystem::path &filename) const {
     this->dump_yaml(filename.string().c_str());
 }
 

From cbbac19e67cc9cf128e0186d25c03675ea29b811 Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Tue, 17 Sep 2024 11:00:07 +0200
Subject: [PATCH 32/69] Implement HWS_CUDA_ERROR_CHECK macro for also checking
 cuda error codes (previously only NVML functions could be checked).

---
 include/hardware_sampling/gpu_nvidia/utility.hpp | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/include/hardware_sampling/gpu_nvidia/utility.hpp b/include/hardware_sampling/gpu_nvidia/utility.hpp
index b347f0a..0352915 100644
--- a/include/hardware_sampling/gpu_nvidia/utility.hpp
+++ b/include/hardware_sampling/gpu_nvidia/utility.hpp
@@ -12,8 +12,9 @@
 #define HARDWARE_SAMPLING_GPU_NVIDIA_UTILITY_HPP_
 #pragma once
 
-#include "fmt/format.h"  // fmt::format
-#include "nvml.h"        // NVML runtime functions
+#include "cuda_runtime_api.h"  // CUDA runtime functions
+#include "fmt/format.h"        // fmt::format
+#include "nvml.h"              // NVML runtime functions
 
 #include <stdexcept>  // std::runtime_error
 #include <string>     // std::string
@@ -33,14 +34,23 @@ namespace hws::detail {
                 throw std::runtime_error{ fmt::format("Error in NVML function call \"{}\": {} ({})", #nvml_func, nvmlErrorString(errc), static_cast<int>(errc)) }; \
             }                                                                                                                                                      \
         }
+
+    #define HWS_CUDA_ERROR_CHECK(cuda_func)                                                                                                                           \
+        {                                                                                                                                                             \
+            const cudaError_t errc = cuda_func;                                                                                                                       \
+            if (errc != cudaSuccess) {                                                                                                                                \
+                throw std::runtime_error{ fmt::format("Error in CUDA function call \"{}\": {} ({})", #cuda_func, cudaGetErrorName(errc), cudaGetErrorString(errc)) }; \
+            }                                                                                                                                                         \
+        }
 #else
     #define HWS_NVML_ERROR_CHECK(nvml_func) nvml_func;
+    #define HWS_CUDA_ERROR_CHECK(cuda_func) cuda_func;
 #endif
 
 /**
  * @brief Convert the clock throttle reason event bitmask to a string representation. If the provided bitmask represents multiple reasons, they are split using "|".
  * @param[in] clocks_event_reasons the bitmask to convert to a string
- * @return all event throttle reasons
+ * @return all event throttle reasons (`[[nodiscard]]`)
  */
 [[nodiscard]] std::string throttle_event_reason_to_string(unsigned long long clocks_event_reasons);
 

From 5fc03c2cfbd10ae6316578bb729608093dade14e Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Tue, 17 Sep 2024 11:29:02 +0200
Subject: [PATCH 33/69] Fix errors in documentation.

---
 include/hardware_sampling/hardware_sampler.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/hardware_sampling/hardware_sampler.hpp b/include/hardware_sampling/hardware_sampler.hpp
index 9da2905..64eb833 100644
--- a/include/hardware_sampling/hardware_sampler.hpp
+++ b/include/hardware_sampling/hardware_sampler.hpp
@@ -86,7 +86,7 @@ class hardware_sampler {
      */
     [[nodiscard]] bool has_sampling_started() const noexcept;
     /**
-     * @brief Check whether this hardware sampler has currently sampling.
+     * @brief Check whether this hardware sampler is currently sampling.
      * @return `true` if the hardware sampler is currently sampling, `false` otherwise (`[[nodiscard]]`)
      */
     [[nodiscard]] bool is_sampling() const noexcept;
@@ -121,7 +121,7 @@ class hardware_sampler {
 
     /**
      * @brief Return the number of recorded events.
-     * @return the number of events (`[[nodiscard]]`)
+     * @return the events (`[[nodiscard]]`)
      */
     [[nodiscard]] const std::vector<event> &get_events() const noexcept { return events_; }
 

From acb982668cf84b0d082931734cfa87c2a167f9b8 Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Tue, 17 Sep 2024 11:51:15 +0200
Subject: [PATCH 34/69] Fix errors in documentation wrongly using PLSSVM.

---
 bindings/CMakeLists.txt       | 2 +-
 bindings/hardware_sampler.cpp | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/bindings/CMakeLists.txt b/bindings/CMakeLists.txt
index 95f6a2b..f6f8c5e 100644
--- a/bindings/CMakeLists.txt
+++ b/bindings/CMakeLists.txt
@@ -4,7 +4,7 @@
 ##          See the LICENSE.md file in the project root for full license information.
 ########################################################################################################################
 
-message(STATUS "Building Python language bindings for PLSSVM.")
+message(STATUS "Building Python language bindings.")
 
 find_package(Python COMPONENTS Interpreter Development)
 
diff --git a/bindings/hardware_sampler.cpp b/bindings/hardware_sampler.cpp
index 6b3d465..76dcbcc 100644
--- a/bindings/hardware_sampler.cpp
+++ b/bindings/hardware_sampler.cpp
@@ -96,22 +96,22 @@ void init_hardware_sampler(py::module_ &m) {
         .def("__repr__", [](const hws::hardware_sampler &self) {
 #if defined(HWS_FOR_CPUS_ENABLED)
             if (dynamic_cast<const hws::cpu_hardware_sampler *>(&self)) {
-                return fmt::format("<plssvm.detail.tracking.CpuHardwareSampler with\n{}\n>", dynamic_cast<const hws::cpu_hardware_sampler &>(self));
+                return fmt::format("<HardwareSampling.CpuHardwareSampler with\n{}\n>", dynamic_cast<const hws::cpu_hardware_sampler &>(self));
             }
 #endif
 #if defined(HWS_FOR_NVIDIA_GPUS_ENABLED)
             if (dynamic_cast<const hws::gpu_nvidia_hardware_sampler *>(&self)) {
-                return fmt::format("<plssvm.detail.tracking.GpuNvidiaHardwareSampler with\n{}\n>", dynamic_cast<const hws::gpu_nvidia_hardware_sampler &>(self));
+                return fmt::format("<HardwareSampling.GpuNvidiaHardwareSampler with\n{}\n>", dynamic_cast<const hws::gpu_nvidia_hardware_sampler &>(self));
             }
 #endif
 #if defined(HWS_FOR_AMD_GPUS_ENABLED)
             if (dynamic_cast<const hws::gpu_amd_hardware_sampler *>(&self)) {
-                return fmt::format("<plssvm.detail.tracking.GpuAmdHardwareSampler with\n{}\n>", dynamic_cast<const hws::gpu_amd_hardware_sampler &>(self));
+                return fmt::format("<HardwareSampling.GpuAmdHardwareSampler with\n{}\n>", dynamic_cast<const hws::gpu_amd_hardware_sampler &>(self));
             }
 #endif
 #if defined(HWS_FOR_INTEL_GPUS_ENABLED)
             if (dynamic_cast<const hws::gpu_intel_hardware_sampler *>(&self)) {
-                return fmt::format("<plssvm.detail.tracking.GpuIntelHardwareSampler with\n{}\n>", dynamic_cast<const hws::gpu_intel_hardware_sampler &>(self));
+                return fmt::format("<HardwareSampling.GpuIntelHardwareSampler with\n{}\n>", dynamic_cast<const hws::gpu_intel_hardware_sampler &>(self));
             }
 #endif
             return std::string{ "unknown" }; });

From bd4d98790070a9681e6fcfa7adb508f0a3a1232d Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Tue, 17 Sep 2024 12:44:08 +0200
Subject: [PATCH 35/69] Add a new system_hardware_sampler that automatically
 samples all available hardware (e.g., CPU and GPUs).

---
 CMakeLists.txt                                |  21 ++-
 bindings/CMakeLists.txt                       |   2 +
 bindings/hardware_sampler.cpp                 |  39 +---
 bindings/main.cpp                             |   4 +
 bindings/relative_event.cpp                   |  26 +++
 bindings/relative_event.hpp                   |  40 ++++
 bindings/system_hardware_sampler.cpp          |  67 +++++++
 include/hardware_sampling/core.hpp            |   1 +
 .../system_hardware_sampler.hpp               | 177 ++++++++++++++++++
 .../system_hardware_sampler.cpp               | 177 ++++++++++++++++++
 10 files changed, 518 insertions(+), 36 deletions(-)
 create mode 100644 bindings/relative_event.cpp
 create mode 100644 bindings/relative_event.hpp
 create mode 100644 bindings/system_hardware_sampler.cpp
 create mode 100644 include/hardware_sampling/system_hardware_sampler.hpp
 create mode 100644 src/hardware_sampling/system_hardware_sampler.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 74cc828..1080a26 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -15,6 +15,7 @@ project("HWS - Hardware Sampling for GPUs and CPUs"
 set(HWS_SOURCES
         ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/event.cpp
         ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/hardware_sampler.cpp
+        ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/system_hardware_sampler.cpp
         ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/utility.cpp
 )
 
@@ -87,6 +88,24 @@ else ()
 endif ()
 target_link_libraries(${HWS_LIBRARY_NAME} PUBLIC fmt::fmt)
 
+#set(HWS_ryml_VERSION v0.7.2)
+#find_package(ryml QUIET)
+#if (fmt_FOUND)
+#    message(STATUS "Found package ryml (rapidyaml).")
+#else ()
+#    message(STATUS "Couldn't find package ryml (rapidyaml). Building version ${HWS_ryml_VERSION} from source.")
+#    # fetch yaml library ryml
+#    FetchContent_Declare(ryml
+#            GIT_REPOSITORY https://github.com/biojppm/rapidyaml
+#            GIT_TAG ${HWS_ryml_VERSION}
+#            GIT_SHALLOW FALSE
+#            QUIET
+#    )
+#    FetchContent_MakeAvailable(ryml)
+#    add_dependencies(${HWS_LIBRARY_NAME} ryml)
+#endif ()
+#target_link_libraries(${HWS_LIBRARY_NAME} PUBLIC ryml::ryml)
+
 ####################################################################################################################
 ##                                                CPU measurements                                                ##
 ####################################################################################################################
@@ -194,7 +213,7 @@ endif ()
 # find libraries necessary for NVML and link against them
 find_package(CUDAToolkit QUIET)
 if (CUDAToolkit_FOUND)
-    target_link_libraries(${HWS_LIBRARY_NAME} PRIVATE CUDA::nvml)
+    target_link_libraries(${HWS_LIBRARY_NAME} PRIVATE CUDA::nvml CUDA::cudart)
 
     message(STATUS "Enable sampling of NVIDIA GPU information using NVML.")
 
diff --git a/bindings/CMakeLists.txt b/bindings/CMakeLists.txt
index f6f8c5e..fb00d29 100644
--- a/bindings/CMakeLists.txt
+++ b/bindings/CMakeLists.txt
@@ -32,7 +32,9 @@ endif ()
 # set source files that are always used
 set(HWS_PYTHON_BINDINGS_SOURCES
         ${CMAKE_CURRENT_SOURCE_DIR}/event.cpp
+        ${CMAKE_CURRENT_SOURCE_DIR}/relative_event.cpp
         ${CMAKE_CURRENT_SOURCE_DIR}/hardware_sampler.cpp
+        ${CMAKE_CURRENT_SOURCE_DIR}/system_hardware_sampler.cpp
         ${CMAKE_CURRENT_SOURCE_DIR}/main.cpp
 )
 
diff --git a/bindings/hardware_sampler.cpp b/bindings/hardware_sampler.cpp
index 76dcbcc..5f86f96 100644
--- a/bindings/hardware_sampler.cpp
+++ b/bindings/hardware_sampler.cpp
@@ -28,45 +28,14 @@
 #include "pybind11/pybind11.h"  // py::module_, py::class_
 #include "pybind11/stl.h"       // bind STL types
 
-#include <string>  // std::string
-
-namespace hws {
-
-/**
- * @brief A struct encapsulating a single event with a relative time point.
- */
-struct relative_event {
-    /**
-     * @brief Construct a new event given a time point and name.
-     * @param[in] time_point_p the time when the event occurred relative to the first event
-     * @param[in] name_p the name of the event
-     */
-    relative_event(const double relative_time_point_p, std::string name_p) :
-        relative_time_point{ relative_time_point_p },
-        name{ std::move(name_p) } { }
-
-    /// The relative time point this event occurred at.
-    double relative_time_point;
-    /// The name of this event.
-    std::string name;
-};
-
-}  // namespace hws
+#include "relative_event.hpp"  // hws::detail::relative_event
+#include <string>              // std::string
 
 namespace py = pybind11;
 
 void init_hardware_sampler(py::module_ &m) {
     const py::module_ pure_virtual_module = m.def_submodule("__pure_virtual");
 
-    // a special python only struct encapsulating a relative event, i.e., an event where its "relative_time_point" member is the time passed since the first event
-    py::class_<hws::relative_event>(m, "RelativeEvent")
-        .def(py::init<decltype(hws::relative_event::relative_time_point), decltype(hws::relative_event::name)>(), "construct a new event using a time point and a name")
-        .def_readonly("relative_time_point", &hws::relative_event::relative_time_point, "read the relative time point associated to this event")
-        .def_readonly("name", &hws::relative_event::name, "read the name associated to this event")
-        .def("__repr__", [](const hws::relative_event &self) {
-            return fmt::format("<HardWareSampling.RelativeEvent with {{ time_point: {}, name: {} }}>", self.relative_time_point, self.name);
-        });
-
     // bind the pure virtual hardware sampler base class
     py::class_<hws::hardware_sampler> pyhardware_sampler(pure_virtual_module, "__pure_virtual_base_HardwareSampler");
     pyhardware_sampler.def("start", &hws::hardware_sampler::start_sampling, "start the current hardware sampling")
@@ -82,13 +51,13 @@ void init_hardware_sampler(py::module_ &m) {
         .def("num_events", &hws::hardware_sampler::num_events, "get the number of events")
         .def("get_events", &hws::hardware_sampler::get_events, "get all events")
         .def("get_relative_events", [](const hws::hardware_sampler &self) {
-            std::vector<hws::relative_event> relative_events{};
+            std::vector<hws::detail::relative_event> relative_events{};
             for (const hws::event &e : self.get_events()) {
                 relative_events.emplace_back(hws::detail::duration_from_reference_time(e.time_point, self.get_event(0).time_point), e.name);
             }
             return relative_events; }, "get all relative events")
         .def("get_event", &hws::hardware_sampler::get_event, "get a specific event")
-        .def("get_relative_event", [](const hws::hardware_sampler &self, const std::size_t idx) { return hws::relative_event{ hws::detail::duration_from_reference_time(self.get_event(idx).time_point, self.get_event(0).time_point), self.get_event(idx).name }; }, "get a specific relative event")
+        .def("get_relative_event", [](const hws::hardware_sampler &self, const std::size_t idx) { return hws::detail::relative_event{ hws::detail::duration_from_reference_time(self.get_event(idx).time_point, self.get_event(0).time_point), self.get_event(idx).name }; }, "get a specific relative event")
         .def("time_points", &hws::hardware_sampler::sampling_time_points, "get the time points of the respective hardware samples")
         .def("relative_time_points", [](const hws::hardware_sampler &self) { return hws::detail::durations_from_reference_time(self.sampling_time_points(), self.get_event(0).time_point); }, "get the relative durations of the respective hardware samples in seconds (as \"normal\" number)")
         .def("sampling_interval", &hws::hardware_sampler::sampling_interval, "get the sampling interval of this hardware sampler (in ms)")
diff --git a/bindings/main.cpp b/bindings/main.cpp
index 2b6f507..11dbf33 100644
--- a/bindings/main.cpp
+++ b/bindings/main.cpp
@@ -16,7 +16,9 @@ namespace py = pybind11;
 
 // forward declare binding functions
 void init_event(py::module_ &);
+void init_relative_event(py::module_ &);
 void init_hardware_sampler(py::module_ &);
+void init_system_hardware_sampler(py::module_ &);
 void init_cpu_hardware_sampler(py::module_ &);
 void init_gpu_nvidia_hardware_sampler(py::module_ &);
 void init_gpu_amd_hardware_sampler(py::module_ &);
@@ -26,7 +28,9 @@ PYBIND11_MODULE(HardwareSampling, m) {
     m.doc() = "Hardware Sampling for CPUs and GPUs";
 
     init_event(m);
+    init_relative_event(m);
     init_hardware_sampler(m);
+    init_system_hardware_sampler(m);
 
     // CPU sampling
 #if defined(HWS_FOR_CPUS_ENABLED)
diff --git a/bindings/relative_event.cpp b/bindings/relative_event.cpp
new file mode 100644
index 0000000..c0cb611
--- /dev/null
+++ b/bindings/relative_event.cpp
@@ -0,0 +1,26 @@
+/**
+ * @author Marcel Breyer
+ * @copyright 2024-today All Rights Reserved
+ * @license This file is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ */
+
+#include "relative_event.hpp"  // hws::detail::relative_event
+
+#include "fmt/format.h"         // fmt::format
+#include "pybind11/chrono.h"    // bind std::chrono types
+#include "pybind11/pybind11.h"  // py::module_
+#include "pybind11/stl.h"       // bind STL types
+
+namespace py = pybind11;
+
+void init_relative_event(py::module_ &m) {
+    // a special python only struct encapsulating a relative event, i.e., an event where its "relative_time_point" member is the time passed since the first event
+    py::class_<hws::detail::relative_event>(m, "RelativeEvent")
+        .def(py::init<decltype(hws::detail::relative_event::relative_time_point), decltype(hws::detail::relative_event::name)>(), "construct a new event using a time point and a name")
+        .def_readonly("relative_time_point", &hws::detail::relative_event::relative_time_point, "read the relative time point associated to this event")
+        .def_readonly("name", &hws::detail::relative_event::name, "read the name associated to this event")
+        .def("__repr__", [](const hws::detail::relative_event &self) {
+            return fmt::format("<HardWareSampling.RelativeEvent with {{ time_point: {}, name: {} }}>", self.relative_time_point, self.name);
+        });
+}
diff --git a/bindings/relative_event.hpp b/bindings/relative_event.hpp
new file mode 100644
index 0000000..2033f12
--- /dev/null
+++ b/bindings/relative_event.hpp
@@ -0,0 +1,40 @@
+/**
+ * @file
+ * @author Marcel Breyer
+ * @copyright 2024-today All Rights Reserved
+ * @license This file is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Defines a struct encapsulating a single event with a relative time point.
+ */
+
+#ifndef HARDWARE_SAMPLING_BINDINGS_RELATIVE_EVENT_HPP_
+#define HARDWARE_SAMPLING_BINDINGS_RELATIVE_EVENT_HPP_
+
+#include <string>   // std::string
+#include <utility>  // std::move
+
+namespace hws::detail {
+
+/**
+ * @brief A struct encapsulating a single event with a relative time point.
+ */
+struct relative_event {
+    /**
+     * @brief Construct a new event given a time point and name.
+     * @param[in] time_point_p the time when the event occurred relative to the first event
+     * @param[in] name_p the name of the event
+     */
+    relative_event(const double relative_time_point_p, std::string name_p) :
+        relative_time_point{ relative_time_point_p },
+        name{ std::move(name_p) } { }
+
+    /// The relative time point this event occurred at.
+    double relative_time_point;
+    /// The name of this event.
+    std::string name;
+};
+
+}  // namespace hws::detail
+
+#endif  // HARDWARE_SAMPLING_BINDINGS_RELATIVE_EVENT_HPP_
diff --git a/bindings/system_hardware_sampler.cpp b/bindings/system_hardware_sampler.cpp
new file mode 100644
index 0000000..3c24ad3
--- /dev/null
+++ b/bindings/system_hardware_sampler.cpp
@@ -0,0 +1,67 @@
+/**
+ * @author Marcel Breyer
+ * @copyright 2024-today All Rights Reserved
+ * @license This file is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ */
+
+#include "hardware_sampling/system_hardware_sampler.hpp"  // hws::system_hardware_sampler
+
+#include "hardware_sampling/event.hpp"    // hws::event
+#include "hardware_sampling/utility.hpp"  // hws::detail::durations_from_reference_time
+
+#include "fmt/format.h"         // fmt::format
+#include "pybind11/chrono.h"    // bind std::chrono types
+#include "pybind11/pybind11.h"  // py::module_, py::class_
+#include "pybind11/stl.h"       // bind STL types
+
+#include "relative_event.hpp"  // hws::detail::relative_event
+#include <string>              // std::string
+
+namespace py = pybind11;
+
+void init_system_hardware_sampler(py::module_ &m) {
+    // bind the pure virtual hardware sampler base class
+    py::class_<hws::system_hardware_sampler>(m, "SystemHardwareSampler")
+        .def(py::init<>(), "construct a new system hardware sampler with the default sampling interval")
+        .def(py::init<std::chrono::milliseconds>(), "construct a new system hardware sampler for with the specified sampling interval")
+        .def("start", &hws::system_hardware_sampler::start_sampling, "start hardware sampling for all available hardware samplers")
+        .def("stop", &hws::system_hardware_sampler::stop_sampling, "stop hardware sampling for all available hardware samplers")
+        .def("pause", &hws::system_hardware_sampler::pause_sampling, "pause hardware sampling for all available hardware samplers")
+        .def("resume", &hws::system_hardware_sampler::resume_sampling, "resume hardware sampling for all available hardware samplers")
+        .def("has_started", &hws::system_hardware_sampler::has_sampling_started, "check whether hardware sampling has already been started for all hardware samplers")
+        .def("is_sampling", &hws::system_hardware_sampler::is_sampling, "check whether the hardware sampling is currently active for all hardware samplers")
+        .def("has_stopped", &hws::system_hardware_sampler::has_sampling_stopped, "check whether hardware sampling has already been stopped for all hardware samplers")
+        .def("add_event", py::overload_cast<hws::event>(&hws::system_hardware_sampler::add_event), "add a new event to all hardware samplers")
+        .def("add_event", py::overload_cast<decltype(hws::event::time_point), decltype(hws::event::name)>(&hws::system_hardware_sampler::add_event), "add a new event using a time point and a name to all hardware samplers")
+        .def("add_event", py::overload_cast<decltype(hws::event::name)>(&hws::system_hardware_sampler::add_event), "add a new event using a name, the current time is used as time point to all hardware samplers")
+        .def("num_events", &hws::system_hardware_sampler::num_events, "get the number of events separately for each hardware sampler")
+        .def("get_events", &hws::system_hardware_sampler::get_events, "get all events separately for each hardware sampler")
+        .def("get_relative_events", [](const hws::system_hardware_sampler &self) {
+             std::vector<std::vector<hws::detail::relative_event>> relative_events{};
+             for (const std::vector<hws::event> &events : self.get_events()) {
+                 relative_events.emplace_back();
+                 for (const hws::event &e : events) {
+                     relative_events.back().emplace_back(hws::detail::duration_from_reference_time(e.time_point, events[0].time_point), e.name);
+                 }
+             }
+             return relative_events; }, "get all relative events separately for each hardware sampler")
+        .def("time_points", &hws::system_hardware_sampler::sampling_time_points, "get the time points of the respective hardware samples separately for each hardware sampler")
+        .def("relative_time_points", [](const hws::system_hardware_sampler &self) {
+            std::vector<std::vector<double>> relative_time_points{};
+            for (std::size_t s = 0; s < self.num_samplers(); ++s) {
+                relative_time_points.emplace_back(hws::detail::durations_from_reference_time(self.sampling_time_points()[s], self.get_events()[s][0].time_point));
+            }
+            return relative_time_points; }, "get the relative durations of the respective hardware samples in seconds (as \"normal\" number)")
+        .def("sampling_interval", &hws::system_hardware_sampler::sampling_interval, "get the sampling interval separately for each hardware sampler (in ms)")
+        .def("num_samplers", &hws::system_hardware_sampler::num_samplers, "get the number of hardware samplers available for the whole system")
+        .def("samplers", [](hws::system_hardware_sampler &self) {
+            std::vector<hws::hardware_sampler*> out{};
+            for (auto &ptr : self.samplers()) {
+                out.push_back(ptr.get());
+            }
+            return out; }, "get the hardware samplers available for the whole system")
+        .def("sampler", [](hws::system_hardware_sampler &self, const std::size_t idx) { return self.sampler(idx).get(); }, "get the i-th hardware sampler available for the whole system")
+        .def("dump_yaml", py::overload_cast<const std::string &>(&hws::system_hardware_sampler::dump_yaml, py::const_), "dump all hardware samples for all hardware samplers to the given YAML file")
+        .def("__repr__", [](const hws::system_hardware_sampler &self) { return fmt::format("<hws.SystemHardwareSampler with {} samples>", self.num_samplers()); });
+}
diff --git a/include/hardware_sampling/core.hpp b/include/hardware_sampling/core.hpp
index 3c986a5..15d65df 100644
--- a/include/hardware_sampling/core.hpp
+++ b/include/hardware_sampling/core.hpp
@@ -14,6 +14,7 @@
 
 #include "hardware_sampling/event.hpp"
 #include "hardware_sampling/hardware_sampler.hpp"
+#include "hardware_sampling/system_hardware_sampler.hpp"
 
 #if defined(HWS_FOR_CPUS_ENABLED)
     #include "hardware_sampling/cpu/cpu_samples.hpp"
diff --git a/include/hardware_sampling/system_hardware_sampler.hpp b/include/hardware_sampling/system_hardware_sampler.hpp
new file mode 100644
index 0000000..c585a3f
--- /dev/null
+++ b/include/hardware_sampling/system_hardware_sampler.hpp
@@ -0,0 +1,177 @@
+/**
+ * @file
+ * @author Marcel Breyer
+ * @copyright 2024-today All Rights Reserved
+ * @license This file is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Defines a hardware sampler for the whole system, i.e., automatically creates CPU and GPU hardware samples if the respective sampler and hardware are available.
+ */
+
+#ifndef HARDWARE_SAMPLING_SYSTEM_HARDWARE_SAMPLER_HPP_
+#define HARDWARE_SAMPLING_SYSTEM_HARDWARE_SAMPLER_HPP_
+
+#include "hardware_sampling/event.hpp"             // hws::event
+#include "hardware_sampling/hardware_sampler.hpp"  // hws::hardware_sampler
+
+#include <chrono>      // std::chrono::{milliseconds, steady_clock::time_point}
+#include <cstddef>     // std::size_t
+#include <filesystem>  // std::filesystem::path
+#include <memory>      // std::unique_ptr
+#include <string>      // std::string
+#include <vector>      // std::vector
+
+namespace hws {
+
+/**
+ * @brief A hardware sampler for the whole system.
+ * @details Enables hardware samplers for which hardware is available and the CMake configuration found the respective dependencies.
+ */
+class system_hardware_sampler {
+  public:
+    /**
+     * @brief Construct hardware samplers with the default sampling interval.
+     */
+    system_hardware_sampler();
+    /**
+     * @brief Construct hardware samplers with the provided @p sampling_interval.
+     * @param[in] sampling_interval the used sampling interval
+     */
+    explicit system_hardware_sampler(std::chrono::milliseconds sampling_interval);
+
+    /**
+     * @brief Delete the copy-constructor.
+     */
+    system_hardware_sampler(const system_hardware_sampler &) = delete;
+    /**
+     * @brief Delete the move-constructor.
+     */
+    system_hardware_sampler(system_hardware_sampler &&) noexcept = delete;
+    /**
+     * @brief Delete the copy-assignment operator.
+     */
+    system_hardware_sampler &operator=(const system_hardware_sampler &) = delete;
+    /**
+     * @brief Delete the move-assignment operator.
+     */
+    system_hardware_sampler &operator=(system_hardware_sampler &&) noexcept = delete;
+
+    /**
+     * @brief Start hardware sampling for all wrapped hardware samplers.
+     */
+    void start_sampling();
+    /**
+     * @brief Stop hardware sampling for all wrapped hardware samplers.
+     */
+    void stop_sampling();
+    /**
+     * @brief Pause hardware sampling for all wrapped hardware samplers.
+     */
+    void pause_sampling();
+    /**
+     * @brief Resume hardware sampling for all wrapped hardware samplers.
+     */
+    void resume_sampling();
+
+    /**
+     * @brief Check whether the hardware samplers have already started sampling.
+     * @return `true` if **all** hardware samplers have already started sampling, `false` otherwise (`[[nodiscard]]`)
+     */
+    [[nodiscard]] bool has_sampling_started() const noexcept;
+    /**
+     * @brief Check whether the hardware samplers are currently sampling.
+     * @return `true` if **all** hardware samplers are currently sampling, `false` otherwise (`[[nodiscard]]`)
+     */
+    [[nodiscard]] bool is_sampling() const noexcept;
+    /**
+     * @brief Check whether the hardware samplers have already stopped sampling.
+     * @return `true` if **all** hardware samplers have already stopped sampling, `false` otherwise (`[[nodiscard]]`)
+     */
+    [[nodiscard]] bool has_sampling_stopped() const noexcept;
+
+    /**
+     * @brief Add a new event to all hardware samplers.
+     * @param e the event
+     */
+    void add_event(event e);
+    /**
+     * @brief Add a new event to all hardware samplers.
+     * @param[in] time_point the time point when the event occurred
+     * @param[in] name the name of the event
+     */
+    void add_event(decltype(event::time_point) time_point, decltype(event::name) name);
+    /**
+     * @brief Add a new event to all hardware samplers. The time_point will be the current time.
+     * @param[in] name the name of the event
+     */
+    void add_event(decltype(event::name) name);
+
+    /**
+     * @brief Return the number of recorded events separately for each hardware sampler.
+     * @return the number of events per hardware sampler (`[[nodiscard]]`)
+     */
+    [[nodiscard]] std::vector<std::size_t> num_events() const;
+    /**
+     * @brief Return the number of recorded events separately for each hardware sampler.
+     * @return the events per hardware sampler (`[[nodiscard]]`)
+     */
+    [[nodiscard]] std::vector<std::vector<event>> get_events() const;
+    /**
+     * @brief Return the time points the samples separately for each hardware sampler.
+     * @return the time points per hardware sampler (`[[nodiscard]]`)
+     */
+    [[nodiscard]] std::vector<std::vector<std::chrono::steady_clock::time_point>> sampling_time_points() const;
+    /**
+     * @brief Return the sampling interval separately for each hardware sampler.
+     * @return the samping interval in milliseconds per hardware sampler (`[[nodiscard]]`)
+     */
+    [[nodiscard]] std::vector<std::chrono::milliseconds> sampling_interval() const;
+
+    /**
+     * @brief The number of hardware samplers available for the whole system.
+     * @return the number of hardware samplers (`[[nodiscard]]`)
+     */
+    [[nodiscard]] std::size_t num_samplers() const noexcept;
+    /**
+     * @brief The hardware samplers available for the whole system.
+     * @return all available hardware samplers (`[[nodiscard]]`)
+     */
+    [[nodiscard]] const std::vector<std::unique_ptr<hardware_sampler>> &samplers() const noexcept;
+    /**
+     * @copydoc hws::system_hardware_sampler::samplers() const
+     */
+    [[nodiscard]] std::vector<std::unique_ptr<hardware_sampler>> &samplers() noexcept;
+    /**
+     * @brief Return the hardware sampler at index @p idx.
+     * @param[in] idx the index of the hardware sampler
+     * @throws std::out_of_range if @p idx is out-of-range
+     * @return the hardware sampler at index @p idx (`[[nodiscard]]`)
+     */
+    [[nodiscard]] const std::unique_ptr<hardware_sampler> &sampler(std::size_t idx) const;
+    /**
+     * @copydoc hws::system_hardware_sampler::samplers(std::size_t idx) const
+     */
+    [[nodiscard]] std::unique_ptr<hardware_sampler> &sampler(std::size_t idx);
+
+    /**
+     * @brief Dump the hardware samples of all hardware samplers to the YAML file with @p filename.
+     * @param[in] filename the YAML file to append the hardware samples to
+     */
+    void dump_yaml(const char *filename) const;
+    /**
+     * @copydoc hws::system_hardware_sampler::dump_yaml(const char *)
+     */
+    void dump_yaml(const std::string &filename) const;
+    /**
+     * @copydoc hws::system_hardware_sampler::dump_yaml(const char *)
+     */
+    void dump_yaml(const std::filesystem::path &filename) const;
+
+  private:
+    /// The different hardware sampler for the current system.
+    std::vector<std::unique_ptr<hardware_sampler>> samplers_;
+};
+
+}  // namespace hws
+
+#endif  // HARDWARE_SAMPLING_SYSTEM_HARDWARE_SAMPLER_HPP_
diff --git a/src/hardware_sampling/system_hardware_sampler.cpp b/src/hardware_sampling/system_hardware_sampler.cpp
new file mode 100644
index 0000000..d2d63a9
--- /dev/null
+++ b/src/hardware_sampling/system_hardware_sampler.cpp
@@ -0,0 +1,177 @@
+/**
+ * @author Marcel Breyer
+ * @copyright 2024-today All Rights Reserved
+ * @license This file is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ */
+
+#include "hardware_sampling/system_hardware_sampler.hpp"
+
+#include "hardware_sampling/event.hpp"  // hws::event
+
+#if defined(HWS_FOR_CPUS_ENABLED)
+    #include "hardware_sampling/cpu/hardware_sampler.hpp"  // hws::cpu_hardware_sampler
+#endif
+#if defined(HWS_FOR_NVIDIA_GPUS_ENABLED)
+    #include "hardware_sampling/gpu_nvidia/hardware_sampler.hpp"  // hws::gpu_nvidia_hardware_sampler
+    #include "hardware_sampling/gpu_nvidia/utility.hpp"           // HWS_CUDA_ERROR_CHECK, hws::detail::
+#endif
+#if defined(HWS_FOR_AMD_GPUS_ENABLED)
+    #include "hardware_sampling/gpu_amd/hardware_sampler.hpp"  // hws::gpu_amd_hardware_sampler
+    #include "hardware_sampling/gpu_amd/utility.hpp"           // HWS_HIP_ERROR_CHECK
+
+    #include "hip/hip_runtime.h"  // hipGetDeviceCount
+#endif
+#if defined(HWS_FOR_INTEL_GPUS_ENABLED)
+    #include "hardware_sampling/gpu_intel/hardware_sampler.hpp"  // hws::gpu_intel_hardware_sampler
+    #include "hardware_sampling/gpu_intel/utility.hpp"           // HWS_LEVEL_ZERO_ERROR_CHECK
+#endif
+
+#include "fmt/format.h"  // fmt::format
+
+#include <algorithm>  // std::for_each, std::all_of
+#include <chrono>     // std::chrono::milliseconds
+#include <memory>     // std::unique_ptr, std::make_unique
+#include <stdexcept>  // std::out_of_range
+#include <vector>     // std::vector
+
+namespace hws {
+
+system_hardware_sampler::system_hardware_sampler() :
+    system_hardware_sampler{ HWS_SAMPLING_INTERVAL } { }
+
+system_hardware_sampler::system_hardware_sampler(const std::chrono::milliseconds sampling_interval) {
+    // create the hardware samplers based on the available hardware
+#if defined(HWS_FOR_CPUS_ENABLED)
+    {
+        samplers_.push_back(std::make_unique<cpu_hardware_sampler>(sampling_interval));
+    }
+#endif
+#if defined(HWS_FOR_NVIDIA_GPUS_ENABLED)
+    {
+        int device_count{};
+        HWS_CUDA_ERROR_CHECK(cudaGetDeviceCount(&device_count));
+        for (int device = 0; device < device_count; ++device) {
+            samplers_.push_back(std::make_unique<gpu_nvidia_hardware_sampler>(static_cast<std::size_t>(device), sampling_interval));
+        }
+    }
+#endif
+#if defined(HWS_FOR_AMD_GPUS_ENABLED)
+    {
+        int device_count{};
+        HWS_HIP_ERROR_CHECK(hipGetDeviceCount(&device_count));
+        for (int device = 0; device < device_count; ++device) {
+            samplers_.push_back(std::make_unique<gpu_amd_hardware_sampler>(static_cast<std::size_t>(device), sampling_interval));
+        }
+    }
+#endif
+#if defined(HWS_FOR_INTEL_GPUS_ENABLED)
+    {
+        // TODO: implement
+    }
+#endif
+}
+
+void system_hardware_sampler::start_sampling() {
+    std::for_each(samplers_.begin(), samplers_.end(), [](auto &ptr) { ptr->start_sampling(); });
+}
+
+void system_hardware_sampler::stop_sampling() {
+    std::for_each(samplers_.begin(), samplers_.end(), [](auto &ptr) { ptr->stop_sampling(); });
+}
+
+void system_hardware_sampler::pause_sampling() {
+    std::for_each(samplers_.begin(), samplers_.end(), [](auto &ptr) { ptr->pause_sampling(); });
+}
+
+void system_hardware_sampler::resume_sampling() {
+    std::for_each(samplers_.begin(), samplers_.end(), [](auto &ptr) { ptr->resume_sampling(); });
+}
+
+bool system_hardware_sampler::has_sampling_started() const noexcept {
+    return std::all_of(samplers_.cbegin(), samplers_.cend(), [](const auto &ptr) { return ptr->has_sampling_started(); });
+}
+
+bool system_hardware_sampler::is_sampling() const noexcept {
+    return std::all_of(samplers_.cbegin(), samplers_.cend(), [](const auto &ptr) { return ptr->is_sampling(); });
+}
+
+bool system_hardware_sampler::has_sampling_stopped() const noexcept {
+    return std::all_of(samplers_.cbegin(), samplers_.cend(), [](const auto &ptr) { return ptr->has_sampling_stopped(); });
+}
+
+void system_hardware_sampler::add_event(event e) {
+    std::for_each(samplers_.begin(), samplers_.end(), [&e](auto &ptr) { ptr->add_event(e); });
+}
+
+void system_hardware_sampler::add_event(decltype(event::time_point) time_point, decltype(event::name) name) {
+    std::for_each(samplers_.begin(), samplers_.end(), [&time_point, &name](auto &ptr) { ptr->add_event(time_point, name); });
+}
+
+void system_hardware_sampler::add_event(decltype(event::name) name) {
+    std::for_each(samplers_.begin(), samplers_.end(), [&name](auto &ptr) { ptr->add_event(name); });
+}
+
+std::vector<std::size_t> system_hardware_sampler::num_events() const {
+    std::vector<std::size_t> num_events_per_sampler(this->num_samplers());
+    std::transform(samplers_.cbegin(), samplers_.cend(), num_events_per_sampler.begin(), [](const auto &ptr) { return ptr->num_events(); });
+    return num_events_per_sampler;
+}
+
+std::vector<std::vector<event>> system_hardware_sampler::get_events() const {
+    std::vector<std::vector<event>> events_per_sampler(this->num_samplers());
+    std::transform(samplers_.cbegin(), samplers_.cend(), events_per_sampler.begin(), [](const auto &ptr) { return ptr->get_events(); });
+    return events_per_sampler;
+}
+
+std::vector<std::vector<std::chrono::steady_clock::time_point>> system_hardware_sampler::sampling_time_points() const {
+    std::vector<std::vector<std::chrono::steady_clock::time_point>> sampling_time_points_per_sampler(this->num_samplers());
+    std::transform(samplers_.cbegin(), samplers_.cend(), sampling_time_points_per_sampler.begin(), [](const auto &ptr) { return ptr->sampling_time_points(); });
+    return sampling_time_points_per_sampler;
+}
+
+std::vector<std::chrono::milliseconds> system_hardware_sampler::sampling_interval() const {
+    std::vector<std::chrono::milliseconds> sampling_interval_per_sampler(this->num_samplers());
+    std::transform(samplers_.cbegin(), samplers_.cend(), sampling_interval_per_sampler.begin(), [](const auto &ptr) { return ptr->sampling_interval(); });
+    return sampling_interval_per_sampler;
+}
+
+std::size_t system_hardware_sampler::num_samplers() const noexcept {
+    return samplers_.size();
+}
+
+std::vector<std::unique_ptr<hardware_sampler>> &system_hardware_sampler::samplers() noexcept {
+    return samplers_;
+}
+
+const std::vector<std::unique_ptr<hardware_sampler>> &system_hardware_sampler::samplers() const noexcept {
+    return samplers_;
+}
+
+std::unique_ptr<hardware_sampler> &system_hardware_sampler::sampler(const std::size_t idx) {
+    if (idx >= samplers_.size()) {
+        throw std::out_of_range{ fmt::format("Index {} is out-of-range for size {}!", idx, samplers_.size()) };
+    }
+    return samplers_[idx];
+}
+
+const std::unique_ptr<hardware_sampler> &system_hardware_sampler::sampler(const std::size_t idx) const {
+    if (idx >= samplers_.size()) {
+        throw std::out_of_range{ fmt::format("Index {} is out-of-range for size {}!", idx, samplers_.size()) };
+    }
+    return samplers_[idx];
+}
+
+void system_hardware_sampler::dump_yaml(const char *filename) const {
+    std::for_each(samplers_.cbegin(), samplers_.cend(), [&filename](const auto &ptr) { ptr->dump_yaml(filename); });
+}
+
+void system_hardware_sampler::dump_yaml(const std::string &filename) const {
+    std::for_each(samplers_.cbegin(), samplers_.cend(), [&filename](const auto &ptr) { ptr->dump_yaml(filename); });
+}
+
+void system_hardware_sampler::dump_yaml(const std::filesystem::path &filename) const {
+    std::for_each(samplers_.cbegin(), samplers_.cend(), [&filename](const auto &ptr) { ptr->dump_yaml(filename); });
+}
+
+}  // namespace hws

From 7c96f02681cfad7229b815b2f687d65f2d7598eb Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Tue, 17 Sep 2024 13:04:53 +0200
Subject: [PATCH 36/69] Fix clang-tidy warnings.

---
 .../cpu/hardware_sampler.hpp                  |  4 +-
 .../gpu_amd/hardware_sampler.hpp              |  4 +-
 .../gpu_nvidia/hardware_sampler.hpp           |  4 +-
 .../gpu_nvidia/nvml_device_handle_impl.hpp    |  2 +-
 .../gpu_nvidia/nvml_samples.hpp               |  2 +-
 .../system_hardware_sampler.hpp               |  5 ++
 src/hardware_sampling/cpu/utility.cpp         |  6 +--
 .../gpu_amd/hardware_sampler.cpp              | 50 +++++++++----------
 .../gpu_amd/rocm_smi_samples.cpp              |  5 +-
 .../gpu_nvidia/hardware_sampler.cpp           | 38 +++++++-------
 src/hardware_sampling/hardware_sampler.cpp    |  2 +-
 11 files changed, 63 insertions(+), 59 deletions(-)

diff --git a/include/hardware_sampling/cpu/hardware_sampler.hpp b/include/hardware_sampling/cpu/hardware_sampler.hpp
index 18b489f..4e65338 100644
--- a/include/hardware_sampling/cpu/hardware_sampler.hpp
+++ b/include/hardware_sampling/cpu/hardware_sampler.hpp
@@ -113,12 +113,12 @@ class cpu_hardware_sampler : public hardware_sampler {
     /**
      * @copydoc hws::hardware_sampler::device_identification
      */
-    std::string device_identification() const final;
+    [[nodiscard]] std::string device_identification() const final;
 
     /**
      * @copydoc hws::hardware_sampler::generate_yaml_string
      */
-    std::string generate_yaml_string() const final;
+    [[nodiscard]] std::string generate_yaml_string() const final;
 
     /// The general CPU samples.
     cpu_general_samples general_samples_{};
diff --git a/include/hardware_sampling/gpu_amd/hardware_sampler.hpp b/include/hardware_sampling/gpu_amd/hardware_sampler.hpp
index 80a7dbe..65e6ca3 100644
--- a/include/hardware_sampling/gpu_amd/hardware_sampler.hpp
+++ b/include/hardware_sampling/gpu_amd/hardware_sampler.hpp
@@ -120,12 +120,12 @@ class gpu_amd_hardware_sampler : public hardware_sampler {
     /**
      * @copydoc hws::hardware_sampler::device_identification
      */
-    std::string device_identification() const final;
+    [[nodiscard]] std::string device_identification() const final;
 
     /**
      * @copydoc hws::hardware_sampler::generate_yaml_string
      */
-    std::string generate_yaml_string() const final;
+    [[nodiscard]] std::string generate_yaml_string() const final;
 
     /// The ID of the device to sample.
     std::uint32_t device_id_{};
diff --git a/include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp b/include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp
index 60ed693..562348a 100644
--- a/include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp
+++ b/include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp
@@ -121,12 +121,12 @@ class gpu_nvidia_hardware_sampler : public hardware_sampler {
     /**
      * @copydoc hws::hardware_sampler::device_identification
      */
-    std::string device_identification() const final;
+    [[nodiscard]] std::string device_identification() const final;
 
     /**
      * @copydoc hws::hardware_sampler::generate_yaml_string
      */
-    std::string generate_yaml_string() const final;
+    [[nodiscard]] std::string generate_yaml_string() const final;
 
     /// The device handle for the device to sample.
     detail::nvml_device_handle device_{};
diff --git a/include/hardware_sampling/gpu_nvidia/nvml_device_handle_impl.hpp b/include/hardware_sampling/gpu_nvidia/nvml_device_handle_impl.hpp
index 9247f29..df6147c 100644
--- a/include/hardware_sampling/gpu_nvidia/nvml_device_handle_impl.hpp
+++ b/include/hardware_sampling/gpu_nvidia/nvml_device_handle_impl.hpp
@@ -32,7 +32,7 @@ struct nvml_device_handle::nvml_device_handle_impl {
      * @param[in] device_id the device to get the handle for
      */
     explicit nvml_device_handle_impl(const std::size_t device_id) {
-        HWS_NVML_ERROR_CHECK(nvmlDeviceGetHandleByIndex(static_cast<int>(device_id), &device));
+        HWS_NVML_ERROR_CHECK(nvmlDeviceGetHandleByIndex(static_cast<int>(device_id), &device))
     }
 
     /// The wrapped NVML device handle.
diff --git a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp
index ed6504b..c6e7ad9 100644
--- a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp
+++ b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp
@@ -170,7 +170,7 @@ class nvml_memory_samples {
 
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned long, memory_total)             // the total available memory in Byte
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, num_pcie_lanes_max)        // the maximum number of PCIe lanes
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, pcie_link_generation_max)  // the maximum PCIe link generation (e.g., PCIe 4.0, PCIe 5.0, etc)
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, pcie_link_generation_max)  // the maximum PCIe link generation (e.g., PCIe 4.0, PCIe 5.0, etc.)
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, pcie_link_speed_max)       // the maximum PCIe link speed in MBPS
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, memory_bus_width)          // the memory bus with in Bit
 
diff --git a/include/hardware_sampling/system_hardware_sampler.hpp b/include/hardware_sampling/system_hardware_sampler.hpp
index c585a3f..394a0c6 100644
--- a/include/hardware_sampling/system_hardware_sampler.hpp
+++ b/include/hardware_sampling/system_hardware_sampler.hpp
@@ -56,6 +56,11 @@ class system_hardware_sampler {
      */
     system_hardware_sampler &operator=(system_hardware_sampler &&) noexcept = delete;
 
+    /**
+     * @brief Explicitly use the default destructor.
+     */
+    ~system_hardware_sampler() = default;
+
     /**
      * @brief Start hardware sampling for all wrapped hardware samplers.
      */
diff --git a/src/hardware_sampling/cpu/utility.cpp b/src/hardware_sampling/cpu/utility.cpp
index 2b0080f..3a17995 100644
--- a/src/hardware_sampling/cpu/utility.cpp
+++ b/src/hardware_sampling/cpu/utility.cpp
@@ -36,10 +36,10 @@ std::string run_subprocess(const std::string_view cmd_line) {
 
     // create subprocess
     subprocess_s proc{};
-    HWS_SUBPROCESS_ERROR_CHECK(subprocess_create(cmd_ptr_split.data(), options, &proc));
+    HWS_SUBPROCESS_ERROR_CHECK(subprocess_create(cmd_ptr_split.data(), options, &proc))
     // wait until process has finished
     int return_code{};
-    HWS_SUBPROCESS_ERROR_CHECK(subprocess_join(&proc, &return_code));
+    HWS_SUBPROCESS_ERROR_CHECK(subprocess_join(&proc, &return_code))
     if (return_code != 0) {
         throw std::runtime_error{ fmt::format("Error: \"{}\" returned with {}!", cmd_line, return_code) };
     }
@@ -50,7 +50,7 @@ std::string run_subprocess(const std::string_view cmd_line) {
     const std::size_t bytes_read = std::fread(buffer.data(), sizeof(typename decltype(buffer)::value_type), buffer.size(), out_handle);
 
     // destroy subprocess
-    HWS_SUBPROCESS_ERROR_CHECK(subprocess_destroy(&proc));
+    HWS_SUBPROCESS_ERROR_CHECK(subprocess_destroy(&proc))
 
     // create output
     return buffer.substr(0, bytes_read);
diff --git a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
index 2d344ef..96380b3 100644
--- a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
+++ b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
@@ -47,7 +47,7 @@ gpu_amd_hardware_sampler::gpu_amd_hardware_sampler(const std::size_t device_id,
     device_id_{ static_cast<std::uint32_t>(device_id) } {
     // make sure that rsmi_init is only called once for all instances
     if (instances_++ == 0) {
-        HWS_ROCM_SMI_ERROR_CHECK(rsmi_init(std::uint64_t{ 0 }));
+        HWS_ROCM_SMI_ERROR_CHECK(rsmi_init(std::uint64_t{ 0 }))
         // notify that initialization has been finished
         init_finished_ = true;
     } else {
@@ -66,7 +66,7 @@ gpu_amd_hardware_sampler::~gpu_amd_hardware_sampler() {
         // the last instance must shut down the ROCm SMI runtime
         // make sure that rsmi_shut_down is only called once
         if (--instances_ == 0) {
-            HWS_ROCM_SMI_ERROR_CHECK(rsmi_shut_down());
+            HWS_ROCM_SMI_ERROR_CHECK(rsmi_shut_down())
             // reset init_finished flag
             init_finished_ = false;
         }
@@ -92,8 +92,8 @@ void gpu_amd_hardware_sampler::sampling_loop() {
         general_samples_.byte_order_ = "Little Endian";
 
         hipDeviceProp_t prop{};
-        if (hipGetDeviceProperties(&prop, device_id_) == hipSuccess) {
-            std::string architecture{ prop.gcnArchName };
+        if (hipGetDeviceProperties(&prop, static_cast<int>(device_id_)) == hipSuccess) {
+            const std::string architecture{ prop.gcnArchName };
             general_samples_.architecture_ = architecture.substr(0, architecture.find_first_of('\0'));
         }
 
@@ -467,19 +467,19 @@ void gpu_amd_hardware_sampler::sampling_loop() {
             {
                 if (general_samples_.performance_level_.has_value()) {
                     rsmi_dev_perf_level_t pstate{};
-                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_perf_level_get(device_id_, &pstate));
+                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_perf_level_get(device_id_, &pstate))
                     general_samples_.performance_level_->push_back(performance_level_to_string(pstate));
                 }
 
                 if (general_samples_.compute_utilization_.has_value()) {
                     decltype(general_samples_.compute_utilization_)::value_type::value_type value{};
-                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_busy_percent_get(device_id_, &value));
+                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_busy_percent_get(device_id_, &value))
                     general_samples_.compute_utilization_->push_back(value);
                 }
 
                 if (general_samples_.memory_utilization_.has_value()) {
                     decltype(general_samples_.memory_utilization_)::value_type::value_type value{};
-                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_memory_busy_percent_get(device_id_, &value));
+                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_memory_busy_percent_get(device_id_, &value))
                     general_samples_.memory_utilization_->push_back(value);
                 }
             }
@@ -488,7 +488,7 @@ void gpu_amd_hardware_sampler::sampling_loop() {
             {
                 if (clock_samples_.clock_frequency_.has_value()) {
                     rsmi_frequencies_t frequency_info{};
-                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_SYS, &frequency_info));
+                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_SYS, &frequency_info))
                     if (frequency_info.current < RSMI_MAX_NUM_FREQUENCIES) {
                         clock_samples_.clock_frequency_->push_back(static_cast<decltype(clock_samples_.clock_frequency_)::value_type::value_type>(frequency_info.frequency[frequency_info.current]) / 1000.0 / 1000.0);
                     } else {
@@ -499,7 +499,7 @@ void gpu_amd_hardware_sampler::sampling_loop() {
 
                 if (clock_samples_.socket_clock_frequency_.has_value()) {
                     rsmi_frequencies_t frequency_info{};
-                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_SOC, &frequency_info));
+                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_SOC, &frequency_info))
                     if (frequency_info.current < RSMI_MAX_NUM_FREQUENCIES) {
                         clock_samples_.socket_clock_frequency_->push_back(static_cast<decltype(clock_samples_.socket_clock_frequency_)::value_type::value_type>(frequency_info.frequency[frequency_info.current]) / 1000.0 / 1000.0);
                     } else {
@@ -510,7 +510,7 @@ void gpu_amd_hardware_sampler::sampling_loop() {
 
                 if (clock_samples_.memory_clock_frequency_.has_value()) {
                     rsmi_frequencies_t frequency_info{};
-                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_MEM, &frequency_info));
+                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_MEM, &frequency_info))
                     if (frequency_info.current < RSMI_MAX_NUM_FREQUENCIES) {
                         clock_samples_.memory_clock_frequency_->push_back(static_cast<decltype(clock_samples_.memory_clock_frequency_)::value_type::value_type>(frequency_info.frequency[frequency_info.current]) / 1000.0 / 1000.0);
                     } else {
@@ -521,13 +521,13 @@ void gpu_amd_hardware_sampler::sampling_loop() {
 
                 if (clock_samples_.overdrive_level_.has_value()) {
                     decltype(clock_samples_.overdrive_level_)::value_type::value_type value{};
-                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_overdrive_level_get(device_id_, &value));
+                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_overdrive_level_get(device_id_, &value))
                     clock_samples_.overdrive_level_->push_back(value);
                 }
 
                 if (clock_samples_.memory_overdrive_level_.has_value()) {
                     decltype(clock_samples_.memory_overdrive_level_)::value_type::value_type value{};
-                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_mem_overdrive_level_get(device_id_, &value));
+                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_mem_overdrive_level_get(device_id_, &value))
                     clock_samples_.memory_overdrive_level_->push_back(value);
                 }
             }
@@ -537,7 +537,7 @@ void gpu_amd_hardware_sampler::sampling_loop() {
                 if (power_samples_.power_usage_.has_value()) {
                     [[maybe_unused]] RSMI_POWER_TYPE power_type{};
                     std::uint64_t value{};
-                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_power_get(device_id_, &value, &power_type));
+                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_power_get(device_id_, &value, &power_type))
                     power_samples_.power_usage_->push_back(static_cast<decltype(power_samples_.power_usage_)::value_type::value_type>(value - initial_power_usage) / 1000.0 / 1000.0);
                 }
 
@@ -545,14 +545,14 @@ void gpu_amd_hardware_sampler::sampling_loop() {
                     [[maybe_unused]] std::uint64_t timestamp{};
                     float resolution{};
                     std::uint64_t value{};
-                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_energy_count_get(device_id_, &value, &resolution, &timestamp));
+                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_energy_count_get(device_id_, &value, &resolution, &timestamp))
                     const auto scaled_value = static_cast<decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type>(value) * static_cast<decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type>(resolution);
                     power_samples_.power_total_energy_consumption_->push_back(scaled_value / 1000.0);
                 }
 
                 if (power_samples_.power_profile_.has_value()) {
                     rsmi_power_profile_status_t power_profile{};
-                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_power_profile_presets_get(device_id_, std::uint32_t{ 0 }, &power_profile));
+                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_power_profile_presets_get(device_id_, std::uint32_t{ 0 }, &power_profile))
                     switch (power_profile.current) {
                         case RSMI_PWR_PROF_PRST_CUSTOM_MASK:
                             power_samples_.power_profile_->emplace_back("CUSTOM");
@@ -586,7 +586,7 @@ void gpu_amd_hardware_sampler::sampling_loop() {
             {
                 if (memory_samples_.memory_used_.has_value()) {
                     decltype(memory_samples_.memory_used_)::value_type::value_type value{};
-                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_memory_usage_get(device_id_, RSMI_MEM_TYPE_VRAM, &value));
+                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_memory_usage_get(device_id_, RSMI_MEM_TYPE_VRAM, &value))
                     memory_samples_.memory_used_->push_back(value);
                     if (memory_samples_.memory_free_.has_value()) {
                         memory_samples_.memory_free_->push_back(memory_samples_.memory_total_.value() - value);
@@ -595,7 +595,7 @@ void gpu_amd_hardware_sampler::sampling_loop() {
 
                 if (memory_samples_.pcie_link_transfer_rate_.has_value() && memory_samples_.num_pcie_lanes_.has_value()) {
                     rsmi_pcie_bandwidth_t bandwidth_info{};
-                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_pci_bandwidth_get(device_id_, &bandwidth_info));
+                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_pci_bandwidth_get(device_id_, &bandwidth_info))
                     if (bandwidth_info.transfer_rate.current < RSMI_MAX_NUM_FREQUENCIES) {
                         memory_samples_.pcie_link_transfer_rate_->push_back(bandwidth_info.transfer_rate.frequency[bandwidth_info.transfer_rate.current] / 1000000);
                         memory_samples_.num_pcie_lanes_->push_back(bandwidth_info.lanes[bandwidth_info.transfer_rate.current]);
@@ -611,50 +611,50 @@ void gpu_amd_hardware_sampler::sampling_loop() {
             {
                 if (temperature_samples_.fan_speed_percentage_.has_value()) {
                     std::int64_t value{};
-                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_fan_speed_get(device_id_, std::uint32_t{ 0 }, &value));
+                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_fan_speed_get(device_id_, std::uint32_t{ 0 }, &value))
                     temperature_samples_.fan_speed_percentage_->push_back(static_cast<decltype(temperature_samples_.fan_speed_percentage_)::value_type::value_type>(value) /
                                                                           static_cast<decltype(temperature_samples_.fan_speed_percentage_)::value_type::value_type>(RSMI_MAX_FAN_SPEED));
                 }
 
                 if (temperature_samples_.temperature_.has_value()) {
                     std::int64_t value{};
-                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_EDGE, RSMI_TEMP_CURRENT, &value));
+                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_EDGE, RSMI_TEMP_CURRENT, &value))
                     temperature_samples_.temperature_->push_back(static_cast<decltype(temperature_samples_.temperature_)::value_type::value_type>(value) / 1000.0);
                 }
 
                 if (temperature_samples_.memory_temperature_.has_value()) {
                     std::int64_t value{};
-                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_MEMORY, RSMI_TEMP_CURRENT, &value));
+                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_MEMORY, RSMI_TEMP_CURRENT, &value))
                     temperature_samples_.memory_temperature_->push_back(static_cast<decltype(temperature_samples_.memory_temperature_)::value_type::value_type>(value) / 1000.0);
                 }
 
                 if (temperature_samples_.hotspot_temperature_.has_value()) {
                     std::int64_t value{};
-                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_JUNCTION, RSMI_TEMP_CURRENT, &value));
+                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_JUNCTION, RSMI_TEMP_CURRENT, &value))
                     temperature_samples_.hotspot_temperature_->push_back(static_cast<decltype(temperature_samples_.hotspot_temperature_)::value_type::value_type>(value) / 1000.0);
                 }
 
                 if (temperature_samples_.hbm_0_temperature_.has_value()) {
                     std::int64_t value{};
-                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_0, RSMI_TEMP_CURRENT, &value));
+                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_0, RSMI_TEMP_CURRENT, &value))
                     temperature_samples_.hbm_0_temperature_->push_back(static_cast<decltype(temperature_samples_.hbm_0_temperature_)::value_type::value_type>(value) / 1000.0);
                 }
 
                 if (temperature_samples_.hbm_1_temperature_.has_value()) {
                     std::int64_t value{};
-                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_1, RSMI_TEMP_CURRENT, &value));
+                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_1, RSMI_TEMP_CURRENT, &value))
                     temperature_samples_.hbm_1_temperature_->push_back(static_cast<decltype(temperature_samples_.hbm_1_temperature_)::value_type::value_type>(value) / 1000.0);
                 }
 
                 if (temperature_samples_.hbm_2_temperature_.has_value()) {
                     std::int64_t value{};
-                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_2, RSMI_TEMP_CURRENT, &value));
+                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_2, RSMI_TEMP_CURRENT, &value))
                     temperature_samples_.hbm_2_temperature_->push_back(static_cast<decltype(temperature_samples_.hbm_2_temperature_)::value_type::value_type>(value) / 1000.0);
                 }
 
                 if (temperature_samples_.hbm_3_temperature_.has_value()) {
                     std::int64_t value{};
-                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_3, RSMI_TEMP_CURRENT, &value));
+                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_3, RSMI_TEMP_CURRENT, &value))
                     temperature_samples_.hbm_3_temperature_->push_back(static_cast<decltype(temperature_samples_.hbm_3_temperature_)::value_type::value_type>(value) / 1000.0);
                 }
             }
diff --git a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp b/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp
index ba06efe..641ca29 100644
--- a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp
+++ b/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp
@@ -9,9 +9,8 @@
 
 #include "hardware_sampling/utility.hpp"  // hws::detail::{value_or_default, quote}
 
-#include "fmt/format.h"         // fmt::format
-#include "fmt/ranges.h"         // fmt::join
-#include "rocm_smi/rocm_smi.h"  // RSMI_MAX_FAN_SPEED
+#include "fmt/format.h"  // fmt::format
+#include "fmt/ranges.h"  // fmt::join
 
 #include <ostream>  // std::ostream
 #include <string>   // std::string
diff --git a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
index 7af2a2a..20c9918 100644
--- a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
+++ b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
@@ -46,7 +46,7 @@ gpu_nvidia_hardware_sampler::gpu_nvidia_hardware_sampler(const std::size_t devic
     hardware_sampler{ sampling_interval } {
     // make sure that nvmlInit is only called once for all instances
     if (instances_++ == 0) {
-        HWS_NVML_ERROR_CHECK(nvmlInit());
+        HWS_NVML_ERROR_CHECK(nvmlInit())
         // notify that initialization has been finished
         init_finished_ = true;
     } else {
@@ -68,7 +68,7 @@ gpu_nvidia_hardware_sampler::~gpu_nvidia_hardware_sampler() {
         // the last instance must shut down the NVML runtime
         // make sure that nvmlShutdown is only called once
         if (--instances_ == 0) {
-            HWS_NVML_ERROR_CHECK(nvmlShutdown());
+            HWS_NVML_ERROR_CHECK(nvmlShutdown())
             // reset init_finished flag
             init_finished_ = false;
         }
@@ -227,7 +227,7 @@ void gpu_nvidia_hardware_sampler::sampling_loop() {
         {
             unsigned int clock_count{ 128 };
             std::vector<unsigned int> supported_clocks(clock_count);
-            if (clock_samples_.memory_clock_frequency_min_.has_value() && nvmlDeviceGetSupportedGraphicsClocks(device, clock_samples_.memory_clock_frequency_min_.value(), &clock_count, supported_clocks.data()) == NVML_SUCCESS) {
+            if (clock_samples_.memory_clock_frequency_min_.has_value() && nvmlDeviceGetSupportedGraphicsClocks(device, static_cast<unsigned int>(clock_samples_.memory_clock_frequency_min_.value()), &clock_count, supported_clocks.data()) == NVML_SUCCESS) {
                 clock_samples_.clock_frequency_min_ = static_cast<decltype(clock_samples_.clock_frequency_min_)::value_type>(*std::min_element(supported_clocks.cbegin(), supported_clocks.cbegin() + clock_count));
             }
 
@@ -427,13 +427,13 @@ void gpu_nvidia_hardware_sampler::sampling_loop() {
             {
                 if (general_samples_.performance_level_.has_value()) {
                     nvmlPstates_t pstate{};
-                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetPerformanceState(device, &pstate));
+                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetPerformanceState(device, &pstate))
                     general_samples_.performance_level_->push_back(static_cast<decltype(general_samples_.performance_level_)::value_type::value_type>(pstate));
                 }
 
                 if (general_samples_.compute_utilization_.has_value() && general_samples_.memory_utilization_.has_value()) {
                     nvmlUtilization_t util{};
-                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetUtilizationRates(device, &util));
+                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetUtilizationRates(device, &util))
                     general_samples_.compute_utilization_->push_back(util.gpu);
                     general_samples_.memory_utilization_->push_back(util.memory);
                 }
@@ -443,32 +443,32 @@ void gpu_nvidia_hardware_sampler::sampling_loop() {
             {
                 if (clock_samples_.clock_frequency_.has_value()) {
                     unsigned int value{};
-                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetClockInfo(device, NVML_CLOCK_GRAPHICS, &value));
+                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetClockInfo(device, NVML_CLOCK_GRAPHICS, &value))
                     clock_samples_.clock_frequency_->push_back(static_cast<decltype(clock_samples_.clock_frequency_)::value_type::value_type>(value));
                 }
 
                 if (clock_samples_.sm_clock_frequency_.has_value()) {
                     unsigned int value{};
-                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetClockInfo(device, NVML_CLOCK_SM, &value));
+                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetClockInfo(device, NVML_CLOCK_SM, &value))
                     clock_samples_.sm_clock_frequency_->push_back(static_cast<decltype(clock_samples_.sm_clock_frequency_)::value_type::value_type>(value));
                 }
 
                 if (clock_samples_.memory_clock_frequency_.has_value()) {
                     unsigned int value{};
-                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetClockInfo(device, NVML_CLOCK_MEM, &value));
+                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetClockInfo(device, NVML_CLOCK_MEM, &value))
                     clock_samples_.memory_clock_frequency_->push_back(static_cast<decltype(clock_samples_.memory_clock_frequency_)::value_type::value_type>(value));
                 }
 
                 if (clock_samples_.throttle_reason_.has_value()) {
                     unsigned long long value{};
-                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetCurrentClocksEventReasons(device, &value));
+                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetCurrentClocksEventReasons(device, &value))
                     clock_samples_.throttle_reason_->push_back(detail::throttle_event_reason_to_string(value));
                 }
 
                 if (clock_samples_.auto_boosted_clock_.has_value()) {
                     nvmlEnableState_t mode{};
                     nvmlEnableState_t default_mode{};
-                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetAutoBoostedClocksEnabled(device, &mode, &default_mode));
+                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetAutoBoostedClocksEnabled(device, &mode, &default_mode))
                     clock_samples_.auto_boosted_clock_->push_back(mode == NVML_FEATURE_ENABLED);
                 }
             }
@@ -477,19 +477,19 @@ void gpu_nvidia_hardware_sampler::sampling_loop() {
             {
                 if (power_samples_.power_profile_.has_value()) {
                     nvmlPstates_t pstate{};
-                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetPowerState(device, &pstate));
+                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetPowerState(device, &pstate))
                     power_samples_.power_profile_->push_back(static_cast<decltype(power_samples_.power_profile_)::value_type::value_type>(pstate));
                 }
 
                 if (power_samples_.power_usage_.has_value()) {
                     unsigned int value{};
-                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetPowerUsage(device, &value));
+                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetPowerUsage(device, &value))
                     power_samples_.power_usage_->push_back(static_cast<decltype(power_samples_.power_usage_)::value_type::value_type>(value - initial_power_usage) / 1000.0);
                 }
 
                 if (power_samples_.power_total_energy_consumption_.has_value()) {
                     unsigned long long value{};
-                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetTotalEnergyConsumption(device, &value));
+                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetTotalEnergyConsumption(device, &value))
                     power_samples_.power_total_energy_consumption_->push_back(static_cast<decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type>(value) / 1000.0);
                 }
             }
@@ -498,20 +498,20 @@ void gpu_nvidia_hardware_sampler::sampling_loop() {
             {
                 if (memory_samples_.memory_free_.has_value() && memory_samples_.memory_used_.has_value()) {
                     nvmlMemory_t memory_info{};
-                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetMemoryInfo(device, &memory_info));
+                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetMemoryInfo(device, &memory_info))
                     memory_samples_.memory_free_->push_back(memory_info.free);
                     memory_samples_.memory_used_->push_back(memory_info.used);
                 }
 
                 if (memory_samples_.num_pcie_lanes_.has_value()) {
                     decltype(memory_samples_.num_pcie_lanes_)::value_type::value_type value{};
-                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetCurrPcieLinkWidth(device, &value));
+                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetCurrPcieLinkWidth(device, &value))
                     memory_samples_.num_pcie_lanes_->push_back(value);
                 }
 
                 if (memory_samples_.pcie_link_generation_.has_value()) {
                     decltype(memory_samples_.pcie_link_generation_)::value_type::value_type value{};
-                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetCurrPcieLinkGeneration(device, &value));
+                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetCurrPcieLinkGeneration(device, &value))
                     memory_samples_.pcie_link_generation_->push_back(value);
                 }
             }
@@ -520,13 +520,13 @@ void gpu_nvidia_hardware_sampler::sampling_loop() {
             {
                 if (temperature_samples_.fan_speed_percentage_.has_value()) {
                     unsigned int value{};
-                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetFanSpeed(device, &value));
+                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetFanSpeed(device, &value))
                     temperature_samples_.fan_speed_percentage_->push_back(static_cast<decltype(temperature_samples_.fan_speed_percentage_)::value_type::value_type>(value));
                 }
 
                 if (temperature_samples_.temperature_.has_value()) {
                     unsigned int value{};
-                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetTemperature(device, NVML_TEMPERATURE_GPU, &value));
+                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetTemperature(device, NVML_TEMPERATURE_GPU, &value))
                     temperature_samples_.temperature_->push_back(static_cast<decltype(temperature_samples_.temperature_)::value_type::value_type>(value));
                 }
             }
@@ -539,7 +539,7 @@ void gpu_nvidia_hardware_sampler::sampling_loop() {
 
 std::string gpu_nvidia_hardware_sampler::device_identification() const {
     nvmlPciInfo_st pcie_info{};
-    HWS_NVML_ERROR_CHECK(nvmlDeviceGetPciInfo_v3(device_.get_impl().device, &pcie_info));
+    HWS_NVML_ERROR_CHECK(nvmlDeviceGetPciInfo_v3(device_.get_impl().device, &pcie_info))
     return fmt::format("gpu_nvidia_device_{}_{}", pcie_info.bus, pcie_info.device);
 }
 
diff --git a/src/hardware_sampling/hardware_sampler.cpp b/src/hardware_sampling/hardware_sampler.cpp
index b68cbd1..5d27972 100644
--- a/src/hardware_sampling/hardware_sampler.cpp
+++ b/src/hardware_sampling/hardware_sampler.cpp
@@ -12,7 +12,7 @@
 
 #include "fmt/format.h"  // fmt::format
 #include "fmt/ranges.h"  // fmt::join
-#include "fmt/chrono.h"  // fmt::localtime, direct formatting of std::chrono types
+#include "fmt/chrono.h"  // direct formatting of std::chrono types
 
 #include <chrono>     // std::chrono::{system_clock, steady_clock, duration_cast, milliseconds}
 #include <cstddef>    // std::size_t

From 9782e96425ecf7bf9f71ba5d650584012fd17d96 Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Mon, 23 Sep 2024 12:07:43 +0200
Subject: [PATCH 37/69] Remove unused fetch content.

---
 CMakeLists.txt | 18 ------------------
 1 file changed, 18 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1080a26..1fbf7a7 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -88,24 +88,6 @@ else ()
 endif ()
 target_link_libraries(${HWS_LIBRARY_NAME} PUBLIC fmt::fmt)
 
-#set(HWS_ryml_VERSION v0.7.2)
-#find_package(ryml QUIET)
-#if (fmt_FOUND)
-#    message(STATUS "Found package ryml (rapidyaml).")
-#else ()
-#    message(STATUS "Couldn't find package ryml (rapidyaml). Building version ${HWS_ryml_VERSION} from source.")
-#    # fetch yaml library ryml
-#    FetchContent_Declare(ryml
-#            GIT_REPOSITORY https://github.com/biojppm/rapidyaml
-#            GIT_TAG ${HWS_ryml_VERSION}
-#            GIT_SHALLOW FALSE
-#            QUIET
-#    )
-#    FetchContent_MakeAvailable(ryml)
-#    add_dependencies(${HWS_LIBRARY_NAME} ryml)
-#endif ()
-#target_link_libraries(${HWS_LIBRARY_NAME} PUBLIC ryml::ryml)
-
 ####################################################################################################################
 ##                                                CPU measurements                                                ##
 ####################################################################################################################

From 1dd98f2d7a858a85b169a6e9071641ff11a27dc2 Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Tue, 24 Sep 2024 10:52:11 +0200
Subject: [PATCH 38/69] Add missing detail namespace qualifier.

---
 src/hardware_sampling/gpu_amd/hardware_sampler.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
index 96380b3..e535124 100644
--- a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
+++ b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
@@ -110,7 +110,7 @@ void gpu_amd_hardware_sampler::sampling_loop() {
         // queried samples -> retrieved every iteration if available
         rsmi_dev_perf_level_t pstate{};
         if (rsmi_dev_perf_level_get(device_id_, &pstate) == RSMI_STATUS_SUCCESS) {
-            general_samples_.performance_level_ = decltype(general_samples_.performance_level_)::value_type{ performance_level_to_string(pstate) };
+            general_samples_.performance_level_ = decltype(general_samples_.performance_level_)::value_type{ detail::performance_level_to_string(pstate) };
         }
 
         decltype(general_samples_.compute_utilization_)::value_type::value_type utilization_gpu{};
@@ -468,7 +468,7 @@ void gpu_amd_hardware_sampler::sampling_loop() {
                 if (general_samples_.performance_level_.has_value()) {
                     rsmi_dev_perf_level_t pstate{};
                     HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_perf_level_get(device_id_, &pstate))
-                    general_samples_.performance_level_->push_back(performance_level_to_string(pstate));
+                    general_samples_.performance_level_->push_back(detail::performance_level_to_string(pstate));
                 }
 
                 if (general_samples_.compute_utilization_.has_value()) {

From 28b52f4a6b5607b219793d24bdae71fbd28d9346 Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Tue, 24 Sep 2024 11:12:21 +0200
Subject: [PATCH 39/69] Fix an error where the power usage was calculated from
 a reference point instead of the total power consumption.

---
 src/hardware_sampling/gpu_amd/hardware_sampler.cpp | 14 ++++++++------
 .../gpu_nvidia/hardware_sampler.cpp                | 14 ++++++++------
 2 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
index e535124..9182347 100644
--- a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
+++ b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
@@ -83,7 +83,7 @@ void gpu_amd_hardware_sampler::sampling_loop() {
 
     this->add_time_point(std::chrono::steady_clock::now());
 
-    std::uint64_t initial_power_usage{};
+    double initial_total_power_consumption{};  // initial total power consumption in J
 
     // retrieve initial general information
     {
@@ -201,7 +201,8 @@ void gpu_amd_hardware_sampler::sampling_loop() {
 
         {
             RSMI_POWER_TYPE power_type{};
-            if (rsmi_dev_power_get(device_id_, &initial_power_usage, &power_type) == RSMI_STATUS_SUCCESS) {
+            std::uint64_t power_usage{};
+            if (rsmi_dev_power_get(device_id_, &power_usage, &power_type) == RSMI_STATUS_SUCCESS) {
                 switch (power_type) {
                     case RSMI_POWER_TYPE::RSMI_AVERAGE_POWER:
                         power_samples_.power_measurement_type_ = "average";
@@ -214,7 +215,7 @@ void gpu_amd_hardware_sampler::sampling_loop() {
                         break;
                 }
                 // report power usage since the first sample
-                power_samples_.power_usage_ = decltype(power_samples_.power_usage_)::value_type{ static_cast<decltype(power_samples_.power_usage_)::value_type::value_type>(0) };
+                power_samples_.power_usage_ = decltype(power_samples_.power_usage_)::value_type{ static_cast<decltype(power_samples_.power_usage_)::value_type::value_type>(power_usage) / 1000.0 / 1000.0 };
             }
         }
 
@@ -280,7 +281,8 @@ void gpu_amd_hardware_sampler::sampling_loop() {
         std::uint64_t power_total_energy_consumption{};
         if (rsmi_dev_energy_count_get(device_id_, &power_total_energy_consumption, &resolution, &timestamp) == RSMI_STATUS_SUCCESS) {
             const auto scaled_value = static_cast<decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type>(power_total_energy_consumption) * static_cast<decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type>(resolution);
-            power_samples_.power_total_energy_consumption_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ scaled_value / 1000.0 / 1000.0 };
+            initial_total_power_consumption = scaled_value / 1000.0 / 1000.0;
+            power_samples_.power_total_energy_consumption_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ 0.0 };
         }
     }
 
@@ -538,7 +540,7 @@ void gpu_amd_hardware_sampler::sampling_loop() {
                     [[maybe_unused]] RSMI_POWER_TYPE power_type{};
                     std::uint64_t value{};
                     HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_power_get(device_id_, &value, &power_type))
-                    power_samples_.power_usage_->push_back(static_cast<decltype(power_samples_.power_usage_)::value_type::value_type>(value - initial_power_usage) / 1000.0 / 1000.0);
+                    power_samples_.power_usage_->push_back(static_cast<decltype(power_samples_.power_usage_)::value_type::value_type>(value) / 1000.0 / 1000.0);
                 }
 
                 if (power_samples_.power_total_energy_consumption_.has_value()) {
@@ -547,7 +549,7 @@ void gpu_amd_hardware_sampler::sampling_loop() {
                     std::uint64_t value{};
                     HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_energy_count_get(device_id_, &value, &resolution, &timestamp))
                     const auto scaled_value = static_cast<decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type>(value) * static_cast<decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type>(resolution);
-                    power_samples_.power_total_energy_consumption_->push_back(scaled_value / 1000.0);
+                    power_samples_.power_total_energy_consumption_->push_back((scaled_value / 1000.0 / 1000.0) - initial_total_power_consumption);
                 }
 
                 if (power_samples_.power_profile_.has_value()) {
diff --git a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
index 20c9918..d5a2e71 100644
--- a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
+++ b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
@@ -88,7 +88,7 @@ void gpu_nvidia_hardware_sampler::sampling_loop() {
 
     this->add_time_point(std::chrono::steady_clock::now());
 
-    unsigned int initial_power_usage{};
+    double initial_total_power_consumption{};  // initial total power consumption in J
 
     // retrieve initial general information
     {
@@ -316,13 +316,15 @@ void gpu_nvidia_hardware_sampler::sampling_loop() {
         power_samples_.available_power_profiles_ = power_states;
 
         // queried samples -> retrieved every iteration if available
-        if (nvmlDeviceGetPowerUsage(device, &initial_power_usage) == NVML_SUCCESS) {
-            power_samples_.power_usage_ = decltype(power_samples_.power_usage_)::value_type{ static_cast<decltype(power_samples_.power_usage_)::value_type::value_type>(0) };
+        unsigned int power_usage{};
+        if (nvmlDeviceGetPowerUsage(device, &power_usage) == NVML_SUCCESS) {
+            power_samples_.power_usage_ = decltype(power_samples_.power_usage_)::value_type{ static_cast<decltype(power_samples_.power_usage_)::value_type::value_type>(power_usage) / 1000.0 };
         }
 
         unsigned long long power_total_energy_consumption{};
         if (nvmlDeviceGetTotalEnergyConsumption(device, &power_total_energy_consumption) == NVML_SUCCESS) {
-            power_samples_.power_total_energy_consumption_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ static_cast<decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type>(power_total_energy_consumption) / 1000.0 };
+            initial_total_power_consumption = static_cast<decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type>(power_total_energy_consumption) / 1000.0;
+            power_samples_.power_total_energy_consumption_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ 0.0 };
         }
 
         nvmlPstates_t pstate{};
@@ -484,13 +486,13 @@ void gpu_nvidia_hardware_sampler::sampling_loop() {
                 if (power_samples_.power_usage_.has_value()) {
                     unsigned int value{};
                     HWS_NVML_ERROR_CHECK(nvmlDeviceGetPowerUsage(device, &value))
-                    power_samples_.power_usage_->push_back(static_cast<decltype(power_samples_.power_usage_)::value_type::value_type>(value - initial_power_usage) / 1000.0);
+                    power_samples_.power_usage_->push_back(static_cast<decltype(power_samples_.power_usage_)::value_type::value_type>(value) / 1000.0);
                 }
 
                 if (power_samples_.power_total_energy_consumption_.has_value()) {
                     unsigned long long value{};
                     HWS_NVML_ERROR_CHECK(nvmlDeviceGetTotalEnergyConsumption(device, &value))
-                    power_samples_.power_total_energy_consumption_->push_back(static_cast<decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type>(value) / 1000.0);
+                    power_samples_.power_total_energy_consumption_->push_back((static_cast<decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type>(value) / 1000.0) - initial_total_power_consumption);
                 }
             }
 

From 03e572e0927f007ff1caaecf46e060afc0003683 Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Tue, 24 Sep 2024 11:13:25 +0200
Subject: [PATCH 40/69] Change order of device ID and bus ID.

---
 src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
index d5a2e71..1536237 100644
--- a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
+++ b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
@@ -542,7 +542,7 @@ void gpu_nvidia_hardware_sampler::sampling_loop() {
 std::string gpu_nvidia_hardware_sampler::device_identification() const {
     nvmlPciInfo_st pcie_info{};
     HWS_NVML_ERROR_CHECK(nvmlDeviceGetPciInfo_v3(device_.get_impl().device, &pcie_info))
-    return fmt::format("gpu_nvidia_device_{}_{}", pcie_info.bus, pcie_info.device);
+    return fmt::format("gpu_nvidia_device_{}_{}", pcie_info.device, pcie_info.bus);
 }
 
 std::string gpu_nvidia_hardware_sampler::generate_yaml_string() const {

From 6ae5c21621a6ca6dcda8f88e0f279c0f2cf7310b Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Tue, 24 Sep 2024 11:17:01 +0200
Subject: [PATCH 41/69] Add newlines between the different categories to make
 the YAML output more clear.

---
 src/hardware_sampling/cpu/hardware_sampler.cpp       | 12 ++++++------
 src/hardware_sampling/gpu_amd/hardware_sampler.cpp   |  8 ++++----
 .../gpu_nvidia/hardware_sampler.cpp                  |  8 ++++----
 3 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/src/hardware_sampling/cpu/hardware_sampler.cpp b/src/hardware_sampling/cpu/hardware_sampler.cpp
index 7e89eca..d4a6754 100644
--- a/src/hardware_sampling/cpu/hardware_sampler.cpp
+++ b/src/hardware_sampling/cpu/hardware_sampler.cpp
@@ -428,12 +428,12 @@ std::string cpu_hardware_sampler::generate_yaml_string() const {
         throw std::runtime_error{ "Can't create the final YAML entry if the hardware sampler is still running!" };
     }
 
-    return fmt::format("{}\n"
-                       "{}\n"
-                       "{}\n"
-                       "{}\n"
-                       "{}\n"
-                       "{}\n"
+    return fmt::format("{}\n\n"
+                       "{}\n\n"
+                       "{}\n\n"
+                       "{}\n\n"
+                       "{}\n\n"
+                       "{}\n\n"
                        "{}",
                        general_samples_.generate_yaml_string(),
                        clock_samples_.generate_yaml_string(),
diff --git a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
index 9182347..84480bb 100644
--- a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
+++ b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
@@ -677,10 +677,10 @@ std::string gpu_amd_hardware_sampler::generate_yaml_string() const {
         throw std::runtime_error{ "Can't create the final YAML entry if the hardware sampler is still running!" };
     }
 
-    return fmt::format("{}\n"
-                       "{}\n"
-                       "{}\n"
-                       "{}\n"
+    return fmt::format("{}\n\n"
+                       "{}\n\n"
+                       "{}\n\n"
+                       "{}\n\n"
                        "{}",
                        general_samples_.generate_yaml_string(),
                        clock_samples_.generate_yaml_string(),
diff --git a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
index 1536237..769f0a6 100644
--- a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
+++ b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
@@ -551,10 +551,10 @@ std::string gpu_nvidia_hardware_sampler::generate_yaml_string() const {
         throw std::runtime_error{ "Can't create the final YAML entry if the hardware sampler is still running!" };
     }
 
-    return fmt::format("{}\n"
-                       "{}\n"
-                       "{}\n"
-                       "{}\n"
+    return fmt::format("{}\n\n"
+                       "{}\n\n"
+                       "{}\n\n"
+                       "{}\n\n"
                        "{}",
                        general_samples_.generate_yaml_string(),
                        clock_samples_.generate_yaml_string(),

From d32f6bf14f14179f915649de17b7ab0e4b1dca5d Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Tue, 24 Sep 2024 11:27:11 +0200
Subject: [PATCH 42/69] Fix some compilation warnings and linker errors.

---
 include/hardware_sampling/gpu_amd/utility.hpp      | 7 +++++--
 src/hardware_sampling/gpu_amd/hardware_sampler.cpp | 2 +-
 src/hardware_sampling/gpu_amd/utility.cpp          | 3 ++-
 3 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/include/hardware_sampling/gpu_amd/utility.hpp b/include/hardware_sampling/gpu_amd/utility.hpp
index b0786f9..a277e06 100644
--- a/include/hardware_sampling/gpu_amd/utility.hpp
+++ b/include/hardware_sampling/gpu_amd/utility.hpp
@@ -50,7 +50,10 @@ namespace hws::detail {
 
 #else
     #define HWS_ROCM_SMI_ERROR_CHECK(rocm_smi_func) rocm_smi_func;
-    #define HWS_HIP_ERROR_CHECK(hip_func) hip_func;
+    #define HWS_HIP_ERROR_CHECK(hip_func)                \
+        {                                                \
+            [[maybe_unused]] hipError_t errc = hip_func; \
+        }
 #endif
 
 /**
@@ -60,6 +63,6 @@ namespace hws::detail {
  */
 [[nodiscard]] std::string performance_level_to_string(rsmi_dev_perf_level_t perf_level);
 
-}  // namespace hws
+}  // namespace hws::detail
 
 #endif  // HARDWARE_SAMPLING_GPU_AMD_UTILITY_HPP_
diff --git a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
index 84480bb..dae7fec 100644
--- a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
+++ b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
@@ -8,7 +8,7 @@
 #include "hardware_sampling/gpu_amd/hardware_sampler.hpp"
 
 #include "hardware_sampling/gpu_amd/rocm_smi_samples.hpp"  // hws::{rocm_smi_general_samples, rocm_smi_clock_samples, rocm_smi_power_samples, rocm_smi_memory_samples, rocm_smi_temperature_samples}
-#include "hardware_sampling/gpu_amd/utility.hpp"           // HWS_ROCM_SMI_ERROR_CHECK
+#include "hardware_sampling/gpu_amd/utility.hpp"           // hws::detail::performance_level_to_string, HWS_ROCM_SMI_ERROR_CHECK
 #include "hardware_sampling/hardware_sampler.hpp"          // hws::hardware_sampler
 #include "hardware_sampling/utility.hpp"                   // hws::detail::time_points_to_epoch
 
diff --git a/src/hardware_sampling/gpu_amd/utility.cpp b/src/hardware_sampling/gpu_amd/utility.cpp
index 3164c18..35d375c 100644
--- a/src/hardware_sampling/gpu_amd/utility.cpp
+++ b/src/hardware_sampling/gpu_amd/utility.cpp
@@ -11,7 +11,7 @@
 
 #include <string>  // std::string
 
-namespace hws {
+namespace hws::detail {
 
 std::string performance_level_to_string(const rsmi_dev_perf_level_t perf_level) {
     switch (perf_level) {
@@ -34,6 +34,7 @@ std::string performance_level_to_string(const rsmi_dev_perf_level_t perf_level)
         case RSMI_DEV_PERF_LEVEL_DETERMINISM:
             return "determinism";
         case RSMI_DEV_PERF_LEVEL_UNKNOWN:
+        default:
             return "unknown";
     }
 }

From 89129b2effb9b1835a60be4d7f5b24389893bfdd Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Tue, 24 Sep 2024 11:29:32 +0200
Subject: [PATCH 43/69] Update README file.

---
 README.md | 33 ++++++++++++++++++++++-----------
 1 file changed, 22 insertions(+), 11 deletions(-)

diff --git a/README.md b/README.md
index d7acad6..32cd1bc 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,7 @@
 # hws - Hardware Sampling for CPUs and GPUs
 
-The Hardware Sampling (hws) library can be used to track hardware performance like clock frequency, memory usage, temperatures, or power draw. 
+The Hardware Sampling (hws) library can be used to track hardware performance like clock frequency, memory usage,
+temperatures, or power draw.
 It currently supports CPUs as well as GPUs from NVIDIA, AMD, and Intel.
 
 ## Getting Started
@@ -10,15 +11,23 @@ It currently supports CPUs as well as GPUs from NVIDIA, AMD, and Intel.
 General dependencies:
 
 - a C++17 capable compiler
-- [{fmt} > 11.0.2](https://github.com/fmtlib/fmt) for string formatting (automatically build during the CMake configuration if it couldn't be found using the respective `find_package` call)
-- [Pybind11 > v2.13.1](https://github.com/pybind/pybind11) if Python bindings are enabled (automatically build during the CMake configuration if it couldn't be found using the respective `find_package` call)
+- [{fmt} > 11.0.2](https://github.com/fmtlib/fmt) for string formatting (automatically build during the CMake
+  configuration if it couldn't be found using the respective `find_package` call)
+- [Pybind11 > v2.13.1](https://github.com/pybind/pybind11) if Python bindings are enabled (automatically build during
+  the CMake configuration if it couldn't be found using the respective `find_package` call)
 
 Dependencies based on the hardware to sample:
 
-- if a CPU should be targeted: at least one of [`turbostat`](https://www.linux.org/docs/man8/turbostat.html) (may require root privileges), [`lscpu`](https://man7.org/linux/man-pages/man1/lscpu.1.html), or [`free`](https://man7.org/linux/man-pages/man1/free.1.html) and the [`subprocess.h`](https://github.com/sheredom/subprocess.h) library (automatically build during the CMake configuration if it couldn't be found using the respective `find_package` call)
+- if a CPU should be targeted: at least one of [`turbostat`](https://www.linux.org/docs/man8/turbostat.html) (may
+  require root privileges), [`lscpu`](https://man7.org/linux/man-pages/man1/lscpu.1.html), or [
+  `free`](https://man7.org/linux/man-pages/man1/free.1.html) and the [
+  `subprocess.h`](https://github.com/sheredom/subprocess.h) library (automatically build during the CMake configuration
+  if it couldn't be found using the respective `find_package` call)
 - if an NVIDIA GPU should be targeted: NVIDIA's Management Library [`NVML`](https://docs.nvidia.com/deploy/nvml-api/)
-- if an AMD GPU should be targeted: AMD's ROCm SMI library [`rocm_smi_lib`](https://rocm.docs.amd.com/projects/rocm_smi_lib/en/latest/doxygen/html/modules.html)
-- if an Intel GPU should be targeted: Intel's [`Level Zero library`](https://spec.oneapi.io/level-zero/latest/core/INTRO.html)
+- if an AMD GPU should be targeted: AMD's ROCm SMI library [
+  `rocm_smi_lib`](https://rocm.docs.amd.com/projects/rocm_smi_lib/en/latest/doxygen/html/modules.html)
+- if an Intel GPU should be targeted: Intel's [
+  `Level Zero library`](https://spec.oneapi.io/level-zero/latest/core/INTRO.html)
 
 ### Building hws
 
@@ -41,7 +50,8 @@ cmake --build . -j
 
 The `[optional_options]` can be one or multiple of:
 
-- `HWS_ENABLE_ERROR_CHECKS=ON|OFF` (default: `OFF`): enable sanity checks during hardware sampling, may be problematic with smaller sample intervals
+- `HWS_ENABLE_ERROR_CHECKS=ON|OFF` (default: `OFF`): enable sanity checks during hardware sampling, may be problematic
+  with smaller sample intervals
 - `HWS_SAMPLING_INTERVAL=100ms` (default: `100ms`): set the sampling interval in milliseconds
 - `HWS_ENABLE_PYTHON_BINDINGS=ON|OFF` (default: `ON`): enable Python bindings
 
@@ -233,8 +243,6 @@ The sampling type `sampled` denotes samples that are gathered during the whole h
 | system_low_power_idle_state_percent  |   sampled   |       %       |
 | package_low_power_idle_state_percent |   sampled   |       %       |
 
-
-
 ## Example Python usage
 
 ```python
@@ -269,7 +277,9 @@ axes = plt.gcf().axes[0]
 x_bounds = axes.get_xlim()
 for event in sampler.get_relative_events()[1:-1]:
     axes.axvline(x=event.relative_time_point, color='r')
-    axes.annotate(text=event.name, xy=(((event.relative_time_point - x_bounds[0]) / (x_bounds[1] - x_bounds[0])), 1.025), xycoords='axes fraction', rotation=270)
+    axes.annotate(text=event.name,
+                  xy=(((event.relative_time_point - x_bounds[0]) / (x_bounds[1] - x_bounds[0])), 1.025),
+                  xycoords='axes fraction', rotation=270)
 
 plt.xlabel("runtime [ms]")
 plt.ylabel("clock frequency [MHz]")
@@ -283,4 +293,5 @@ plt.show()
 
 ## License
 
-The hws library is distributed under the [MIT license](https://github.com/SC-SGS/hardware_sampling/blob/main/LICENSE.md).
\ No newline at end of file
+The hws library is distributed under
+the [MIT license](https://github.com/SC-SGS/hardware_sampling/blob/main/LICENSE.md).
\ No newline at end of file

From d26130afeeb099460d7b1e58bacf5de416f969c1 Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Tue, 24 Sep 2024 11:54:06 +0200
Subject: [PATCH 44/69] Interpolate total power consumption from the current
 power usage on AMD GPUs if the rsmi_dev_energy_count_get doesn't work (may
 happen on some older GPUs).

---
 .../gpu_amd/hardware_sampler.cpp                 | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
index dae7fec..dbd2971 100644
--- a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
+++ b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
@@ -283,6 +283,9 @@ void gpu_amd_hardware_sampler::sampling_loop() {
             const auto scaled_value = static_cast<decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type>(power_total_energy_consumption) * static_cast<decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type>(resolution);
             initial_total_power_consumption = scaled_value / 1000.0 / 1000.0;
             power_samples_.power_total_energy_consumption_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ 0.0 };
+        } else if (power_samples_.power_usage_.has_value()) {
+            // if the total energy consumption cannot be retrieved, but the current power draw, approximate it
+            power_samples_.power_total_energy_consumption_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ 0.0 };
         }
     }
 
@@ -547,9 +550,16 @@ void gpu_amd_hardware_sampler::sampling_loop() {
                     [[maybe_unused]] std::uint64_t timestamp{};
                     float resolution{};
                     std::uint64_t value{};
-                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_energy_count_get(device_id_, &value, &resolution, &timestamp))
-                    const auto scaled_value = static_cast<decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type>(value) * static_cast<decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type>(resolution);
-                    power_samples_.power_total_energy_consumption_->push_back((scaled_value / 1000.0 / 1000.0) - initial_total_power_consumption);
+                    if (rsmi_dev_energy_count_get(device_id_, &value, &resolution, &timestamp) == RSMI_STATUS_SUCCESS) {
+                        const auto scaled_value = static_cast<decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type>(value) * static_cast<decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type>(resolution);
+                        power_samples_.power_total_energy_consumption_->push_back((scaled_value / 1000.0 / 1000.0) - initial_total_power_consumption);
+                    } else if (power_samples_.power_usage_.has_value()) {
+                        // if the total energy consumption cannot be retrieved, but the current power draw, approximate it
+                        const std::size_t num_time_points = this->sampling_time_points().size();
+                        const auto time_difference = std::chrono::duration<double>(this->sampling_time_points()[num_time_points - 1] - this->sampling_time_points()[num_time_points - 2]).count();
+                        const auto current = power_samples_.power_usage_->back() * time_difference;
+                        power_samples_.power_total_energy_consumption_->push_back(power_samples_.power_total_energy_consumption_->back() + current);
+                    }
                 }
 
                 if (power_samples_.power_profile_.has_value()) {

From d115e3117daaa82f9d923f89d41d53c2763a2b7a Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Tue, 24 Sep 2024 12:09:24 +0200
Subject: [PATCH 45/69] Update Intel GPU Level Zero implementation (not tested
 yet since currently no access to Intel GPUs).

---
 CMakeLists.txt                                |  38 +-
 README.md                                     | 220 ++++----
 bindings/gpu_intel_hardware_sampler.cpp       |  67 ++-
 .../gpu_intel/hardware_sampler.hpp            |   8 +-
 .../level_zero_device_handle_impl.hpp         |  14 +-
 .../gpu_intel/level_zero_samples.hpp          |  93 ++--
 .../hardware_sampling/gpu_intel/utility.hpp   |  26 +-
 .../gpu_intel/hardware_sampler.cpp            | 313 ++++++++----
 .../gpu_intel/level_zero_samples.cpp          | 481 ++++++++++--------
 src/hardware_sampling/gpu_intel/utility.cpp   |  65 ++-
 10 files changed, 777 insertions(+), 548 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1fbf7a7..f3ba9df 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -245,25 +245,25 @@ endif ()
 ##                                        Intel GPU sampling via Level Zero                                       ##
 ####################################################################################################################
 # try finding Level Zero
-#find_package(level_zero QUIET)
-#if (level_zero_FOUND)
-#    target_link_libraries(${HWS_LIBRARY_NAME} PRIVATE level_zero)
-#
-#    message(STATUS "Enable sampling of Intel GPU information using Level Zero.")
-#
-#    # add source file to source file list
-#    target_sources(${HWS_LIBRARY_NAME} PRIVATE
-#            $<BUILD_INTERFACE:
-#            ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/gpu_intel/hardware_sampler.cpp;
-#            ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/gpu_intel/level_zero_samples.cpp;
-#            ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/gpu_intel/utility.cpp
-#            >)
-#
-#    # add compile definition
-#    target_compile_definitions(${HWS_LIBRARY_NAME} PUBLIC HWS_FOR_INTEL_GPUS_ENABLED)
-#else ()
-#    message(STATUS "Hardware sampling for Intel GPUs disabled!")
-#endif ()
+find_package(level_zero QUIET)
+if (level_zero_FOUND)
+    target_link_libraries(${HWS_LIBRARY_NAME} PRIVATE level_zero)
+
+    message(STATUS "Enable sampling of Intel GPU information using Level Zero.")
+
+    # add source file to source file list
+    target_sources(${HWS_LIBRARY_NAME} PRIVATE
+            $<BUILD_INTERFACE:
+            ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/gpu_intel/hardware_sampler.cpp;
+            ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/gpu_intel/level_zero_samples.cpp;
+            ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/gpu_intel/utility.cpp
+            >)
+
+    # add compile definition
+    target_compile_definitions(${HWS_LIBRARY_NAME} PUBLIC HWS_FOR_INTEL_GPUS_ENABLED)
+else ()
+    message(STATUS "Hardware sampling for Intel GPUs disabled!")
+endif ()
 
 
 ####################################################################################################################
diff --git a/README.md b/README.md
index 32cd1bc..3207e22 100644
--- a/README.md
+++ b/README.md
@@ -74,150 +74,144 @@ export PYTHONPATH=${CMAKE_INSTALL_PREFIX}/lib:${PYTHONPATH}
 
 ## Available samples
 
-The sampling type `fixed` denotes samples that are gathered once per hardware samples like maximum clock frequencies or temperatures or the total available memory. 
-The sampling type `sampled` denotes samples that are gathered during the whole hardware sampling process like the current clock frequencies, temperatures, or memory consumption.
+The sampling type `fixed` denotes samples that are gathered once per hardware samples like maximum clock frequencies or
+temperatures or the total available memory.
+The sampling type `sampled` denotes samples that are gathered during the whole hardware sampling process like the
+current clock frequencies, temperatures, or memory consumption.
 
 ### General samples
 
 | sample              | sample type |    CPUs     | NVIDIA GPUs | AMD GPUs  |  Intel GPUs   |
 |:--------------------|:-----------:|:-----------:|:-----------:|:---------:|:-------------:|
-| architecture        |    fixed    |     str     |     str     |    str    |       ?       |
+| architecture        |    fixed    |     str     |     str     |    str    |       -       |
 | byte_order          |    fixed    |     str     |  str (fix)  | str (fix) |   str (fix)   |
-| num_cores           |    fixed    |     int     |     int     |     -     |               |
+| num_cores           |    fixed    |     int     |     int     |     -     |       -       |
 | num_threads         |    fixed    |     int     |      -      |     -     |       -       |
 | threads_per_core    |    fixed    |     int     |      -      |     -     |       -       |
 | cores_per_socket    |    fixed    |     int     |      -      |     -     |       -       |
 | num_sockets         |    fixed    |     int     |      -      |     -     |       -       |
-| numa_nodes          |    fixed    |     int     |      -      |     -     |               |
+| numa_nodes          |    fixed    |     int     |      -      |     -     |       -       |
 | vendor_id           |    fixed    |     str     |  str (fix)  |    str    | str (PCIe ID) |
 | name                |    fixed    |     str     |     str     |    str    |      str      |
-| flags               |    fixed    | list of str |      -      |     -     |               |
-| persistence_mode    |    fixed    |      -      |    bool     |     -     |               |
-| compute_utilization |   sampled   |      %      |      %      |     %     |       ?       |
-| memory_utilization  |   sampled   |      -      |      %      |     %     |       ?       |
+| flags               |    fixed    | list of str |      -      |     -     |  list of str  |
+| persistence_mode    |    fixed    |      -      |    bool     |     -     |       -       |
+| standby_mode        |    fixed    |      -      |      -      |     -     |      str      |
+| num_threads_per_eu  |    fixed    |      -      |      -      |     -     |      int      |
+| eu_simd_width       |    fixed    |      -      |      -      |     -     |      int      |
+| compute_utilization |   sampled   |      %      |      %      |     %     |       -       |
+| memory_utilization  |   sampled   |      -      |      %      |     %     |       -       |
 | ipc                 |   sampled   |    float    |      -      |     -     |       -       |
 | irq                 |   sampled   |     int     |      -      |     -     |       -       |
 | smi                 |   sampled   |     int     |      -      |     -     |       -       |
 | poll                |   sampled   |     int     |      -      |     -     |       -       |
 | poll_percent        |   sampled   |      %      |      -      |     -     |       -       |
-| performance_level   |   sampled   |      -      |     int     |    str    |               |
-| standby_mode        |             |             |             |           |      str      |
-| num_threads_per_eu  |             |             |             |           |      int      |
-| eu_simd_width       |             |             |             |           |      int      |
+| performance_level   |   sampled   |      -      |     int     |    str    |       -       |
 
 ### clock-related samples
 
-| sample                             | sample type | CPUs |   NVIDIA GPUs    |  AMD GPUs   | Intel GPUs |
-|:-----------------------------------|:-----------:|:----:|:----------------:|:-----------:|:----------:|
-| auto_boosted_clock_enabled         |    fixed    | bool |       bool       |      -      |            |
-| clock_frequency_min                |    fixed    | MHz  |       MHz        |     MHz     |            |
-| clock_frequency_max                |    fixed    | MHz  |       MHz        |     MHz     |            |
-| memory_clock_frequency_min         |    fixed    |  -   |       MHz        |     MHz     |            |
-| memory_clock_frequency_max         |    fixed    |  -   |       MHz        |     MHz     |            |
-| socket_clock_frequency_min         |    fixed    |  -   |        -         |     MHz     |     -      |
-| socket_clock_frequency_min         |    fixed    |  -   |        -         |     MHz     |     -      |
-| sm_clock_frequency_max             |    fixed    |  -   |       MHz        |      -      |     -      |
-| available_clock_frequencies        |    fixed    |  -   |    map of MHz    | list of MHz |            |
-| available_memory_clock_frequencies |    fixed    |  -   |   list of MHz    | list of MHz |            |
-| clock_frequency                    |   sampled   | MHz  |       MHz        |     MHz     |            |
-| average_non_idle_clock_frequency   |   sampled   | MHz  |        -         |      -      |     -      |
-| time_stamp_counter                 |   sampled   | MHz  |        -         |      -      |     -      |
-| memory_clock_frequency             |   sampled   |  -   |       MHz        |     MHz     |            |
-| socket_clock_frequency             |   sampled   |  -   |        -         |     MHz     |     -      |
-| sm_clock_frequency                 |   sampled   |  -   |       MHz        |      -      |     -      |
-| overdrive_level                    |   sampled   |  -   |        -         |      %      |     -      |
-| memory_overdrive_level             |   sampled   |  -   |        -         |      %      |     -      |
-| throttle_reason                    |   sampled   |  -   | string (bitmask) |      -      |            |
-| memory_throttle_reason             |             |  -   |        -         |      -      |            |
-| auto_boosted_clock                 |   sampled   |  -   |       bool       |      -      |     -      |
-| tdp_frequency_limit                |             |  -   |        -         |      -      |            |
-| memory_tdp_frequency_limit         |             |  -   |        -         |      -      |            |
+| sample                             | sample type | CPUs |   NVIDIA GPUs    |  AMD GPUs   |    Intel GPUs    |
+|:-----------------------------------|:-----------:|:----:|:----------------:|:-----------:|:----------------:|
+| auto_boosted_clock_enabled         |    fixed    | bool |       bool       |      -      |        -         |
+| clock_frequency_min                |    fixed    | MHz  |       MHz        |     MHz     |       MHz        |
+| clock_frequency_max                |    fixed    | MHz  |       MHz        |     MHz     |       MHz        |
+| memory_clock_frequency_min         |    fixed    |  -   |       MHz        |     MHz     |       MHz        |
+| memory_clock_frequency_max         |    fixed    |  -   |       MHz        |     MHz     |       MHz        |
+| socket_clock_frequency_min         |    fixed    |  -   |        -         |     MHz     |        -         |
+| socket_clock_frequency_min         |    fixed    |  -   |        -         |     MHz     |        -         |
+| sm_clock_frequency_max             |    fixed    |  -   |       MHz        |      -      |        -         |
+| available_clock_frequencies        |    fixed    |  -   |    map of MHz    | list of MHz |   list of MHz    |
+| available_memory_clock_frequencies |    fixed    |  -   |   list of MHz    | list of MHz |   list of MHz    |
+| clock_frequency                    |   sampled   | MHz  |       MHz        |     MHz     |       MHz        |
+| average_non_idle_clock_frequency   |   sampled   | MHz  |        -         |      -      |        -         |
+| time_stamp_counter                 |   sampled   | MHz  |        -         |      -      |        -         |
+| memory_clock_frequency             |   sampled   |  -   |       MHz        |     MHz     |       MHz        |
+| socket_clock_frequency             |   sampled   |  -   |        -         |     MHz     |        -         |
+| sm_clock_frequency                 |   sampled   |  -   |       MHz        |      -      |        -         |
+| overdrive_level                    |   sampled   |  -   |        -         |      %      |        -         |
+| memory_overdrive_level             |   sampled   |  -   |        -         |      %      |        -         |
+| throttle_reason                    |   sampled   |  -   | string (bitmask) |      -      | string (bitmask) |
+| memory_throttle_reason             |   sampled   |  -   |        -         |      -      | string (bitmask) |
+| auto_boosted_clock                 |   sampled   |  -   |       bool       |      -      |        -         |
+| frequency_limit_tdp                |   sampled   |  -   |        -         |      -      |       MHz        |
+| memory_frequency_limit_tdp         |   sampled   |  -   |        -         |      -      |       MHz        |
 
 ### power-related samples
 
-| sample                         | sample type |               CPUs                | NVIDIA GPUs |  AMD GPUs   | Intel GPUs |
-|:-------------------------------|:-----------:|:---------------------------------:|:-----------:|:-----------:|:----------:|
-| power_management_limit         |    fixed    |                 -                 |      W      |      W      |            |
-| power_enforced_limit           |    fixed    |                 -                 |      W      |      W      |            |
-| power_measurement_type         |    fixed    |             str (fix)             |     str     |     str     |            |
-| power_management_mode          |    fixed    |                 -                 |    bool     |      -      |            |
-| available_power_profiles       |    fixed    |                 -                 | list of int | list of str |            |
-| power_usage                    |   sampled   |                 W                 |      W      |      W      |            |
-| core_watt                      |   sampled   |                 W                 |      -      |      -      |     -      |
-| dram_watt                      |   sampled   |                 W                 |      -      |      -      |     -      |
-| package_rapl_throttling        |   sampled   |                 %                 |      -      |      -      |     -      |
-| dram_rapl_throttling           |   sampled   |                 %                 |      -      |      -      |     -      |
-| power_total_energy_consumption |   sampled   | J<br>(calculated via power_usage) |      J      |      J      |     J      |
-| power_profile                  |   sampled   |                 -                 |     int     |     str     |            |
-| energy_threshold_enabled       |             |                                   |             |             |    bool    |
-| energy_threshold               |             |                                   |             |             |     J      |
+| sample                         | sample type |               CPUs                | NVIDIA GPUs |                                        AMD GPUs                                        |                      Intel GPUs                      |
+|:-------------------------------|:-----------:|:---------------------------------:|:-----------:|:--------------------------------------------------------------------------------------:|:----------------------------------------------------:|
+| power_management_limit         |    fixed    |                 -                 |      W      |                                           W                                            |                          -                           |
+| power_enforced_limit           |    fixed    |                 -                 |      W      |                                           W                                            |                          W                           |
+| power_measurement_type         |    fixed    |             str (fix)             |     str     |                                          str                                           |                         str                          |
+| power_management_mode          |    fixed    |                 -                 |    bool     |                                           -                                            |                         bool                         |
+| available_power_profiles       |    fixed    |                 -                 | list of int |                                      list of str                                       |                          -                           |
+| power_usage                    |   sampled   |                 W                 |      W      |                                           W                                            | W<br>(calculated via power_total_energy_consumption) |
+| core_watt                      |   sampled   |                 W                 |      -      |                                           -                                            |                          -                           |
+| dram_watt                      |   sampled   |                 W                 |      -      |                                           -                                            |                          -                           |
+| package_rapl_throttling        |   sampled   |                 %                 |      -      |                                           -                                            |                          -                           |
+| dram_rapl_throttling           |   sampled   |                 %                 |      -      |                                           -                                            |                          -                           |
+| power_total_energy_consumption |   sampled   | J<br>(calculated via power_usage) |      J      | J<br>(calculated via power_usage if<br>power_total_energy_consumption isn't available) |                          J                           |
+| power_profile                  |   sampled   |                 -                 |     int     |                                          str                                           |                          -                           |
 
 ### memory-related samples
 
-| sample                      | sample type | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs |
-|:----------------------------|:-----------:|:----:|:-----------:|:--------:|:----------:|
-| cache_size_L1d              |    fixed    | str  |      -      |    -     |     -      |
-| cache_size_L1i              |    fixed    | str  |      -      |    -     |     -      |
-| cache_size_L2               |    fixed    | str  |      -      |    -     |     -      |
-| cache_size_L3               |    fixed    | str  |      -      |    -     |     -      |
-| memory_total                |    fixed    |  B   |      B      |    B     |            |
-| visible_memory_total        |    fixed    |  -   |      -      |    B     |     -      |
-| swap_memory_total           |    fixed    |  B   |      -      |    -     |     -      |
-| memory_total_{}             |             |  -   |             |          |     B      |
-| allocatable_memory_total_{} |             |  -   |             |          |     B      |
-| num_pcie_lanes_min          |    fixed    |  -   |      -      |   int    |            |
-| num_pcie_lanes_max          |    fixed    |  -   |     int     |   int    |            |
-| pcie_link_generation_max    |    fixed    |  -   |     int     |    -     |    int     |
-| pcie_link_speed_max         |    fixed    |  -   |    MBPS     |    -     |    BPS     |
-| pcie_link_transfer_rate_min |    fixed    |  -   |      -      |   MT/s   |            |
-| pcie_link_transfer_rate_max |    fixed    |  -   |      -      |   MT/s   |            |
-| memory_bus_width            |    fixed    |  -   |     Bit     |    -     |            |
-| memory_used                 |   sampled   |  B   |      B      |    B     |            |
-| memory_free                 |   sampled   |  B   |      B      |    B     |            |
-| swap_memory_used            |   sampled   |  B   |      -      |    -     |     -      |
-| swap_memory_free            |   sampled   |  B   |      -      |    -     |     -      |
-| num_pcie_lanes              |   sampled   |  -   |     int     |   int    |            |
-| pcie_link_generation        |   sampled   |  -   |     int     |    -     |    int     |
-| pcie_link_speed             |   sampled   |  -   |    MBPS     |    -     |    MBPS    |
-| pcie_link_transfer_rate     |   sampled   |  -   |      -      |   T/s    |     -      |
-| memory_used_{}              |             |      |             |          |     B      |
-| memory_free_{}              |             |      |             |          |     B      |
-| memory_bus_width_{}         |             |      |             |          |    Bit     |
-| memory_num_channels_{}      |             |      |             |          |    int     |
-| memory_location_{}          |             |      |             |          |    str     |
+| sample                      | sample type | CPUs | NVIDIA GPUs | AMD GPUs |           Intel GPUs           |
+|:----------------------------|:-----------:|:----:|:-----------:|:--------:|:------------------------------:|
+| cache_size_L1d              |    fixed    | str  |      -      |    -     |               -                |
+| cache_size_L1i              |    fixed    | str  |      -      |    -     |               -                |
+| cache_size_L2               |    fixed    | str  |      -      |    -     |               -                |
+| cache_size_L3               |    fixed    | str  |      -      |    -     |               -                |
+| memory_total                |    fixed    |  B   |      B      |    B     |  B<br>(map of memory modules)  |
+| visible_memory_total        |    fixed    |  -   |      -      |    B     |  B<br>(map of memory modules)  |
+| swap_memory_total           |    fixed    |  B   |      -      |    -     |               -                |
+| num_pcie_lanes_min          |    fixed    |  -   |      -      |   int    |               -                |
+| num_pcie_lanes_max          |    fixed    |  -   |     int     |   int    |              int               |
+| pcie_link_generation_max    |    fixed    |  -   |     int     |    -     |              int               |
+| pcie_link_speed_max         |    fixed    |  -   |    MBPS     |    -     |              MBPS              |
+| pcie_link_transfer_rate_min |    fixed    |  -   |      -      |   MT/s   |               -                |
+| pcie_link_transfer_rate_max |    fixed    |  -   |      -      |   MT/s   |               -                |
+| memory_bus_width            |    fixed    |  -   |     Bit     |    -     | Bit<br>(map of memory modules) |
+| memory_num_channels         |    fixed    |  -   |      -      |    -     | int<br>(map of memory modules) |
+| memory_used                 |   sampled   |  B   |      B      |    B     |  B<br>(map of memory modules)  |
+| memory_free                 |   sampled   |  B   |      B      |    B     |  B<br>(map of memory modules)  |
+| swap_memory_used            |   sampled   |  B   |      -      |    -     |               -                |
+| swap_memory_free            |   sampled   |  B   |      -      |    -     |               -                |
+| num_pcie_lanes              |   sampled   |  -   |     int     |   int    |              int               |
+| pcie_link_generation        |   sampled   |  -   |     int     |    -     |              int               |
+| pcie_link_speed             |   sampled   |  -   |    MBPS     |    -     |              MBPS              |
+| pcie_link_transfer_rate     |   sampled   |  -   |      -      |   T/s    |               -                |
 
 ### temperature-related samples
 
 | sample                  | sample type | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs |
 |:------------------------|:-----------:|:----:|:-----------:|:--------:|:----------:|
-| num_fans                |    fixed    |  -   |     int     |   int    |            |
-| fan_speed_min           |    fixed    |  -   |      %      |    -     |            | 
-| fan_speed_max           |    fixed    |  -   |      %      |   RPM    |            |
-| temperature_min         |    fixed    |  -   |      -      |    °C    |            |
-| temperature_max         |    fixed    |  -   |     °C      |    °C    |            |
-| memory_temperature_min  |    fixed    |  -   |      -      |    °C    |            |
-| memory_temperature_max  |    fixed    |  -   |     °C      |    °C    |            |
-| hotspot_temperature_min |    fixed    |  -   |      -      |    °C    |            |
-| hotspot_temperature_max |    fixed    |  -   |      -      |    °C    |            |
-| hbm_0_temperature_min   |    fixed    |  -   |      -      |    °C    |            |
-| hbm_0_temperature_max   |    fixed    |  -   |      -      |    °C    |            |
-| hbm_1_temperature_min   |    fixed    |  -   |      -      |    °C    |            |
-| hbm_1_temperature_max   |    fixed    |  -   |      -      |    °C    |            |
-| hbm_2_temperature_min   |    fixed    |  -   |      -      |    °C    |            |
-| hbm_2_temperature_max   |    fixed    |  -   |      -      |    °C    |            |
-| hbm_3_temperature_min   |    fixed    |  -   |      -      |    °C    |            |
-| hbm_3_temperature_max   |    fixed    |  -   |      -      |    °C    |            |
-| fan_speed_percentage    |   sampled   |  -   |      %      |    %     |            |
-| temperature             |   sampled   |  °C  |     °C      |    °C    |            |
-| memory_temperature      |   sampled   |  -   |      -      |    °C    |            |
-| hotspot_temperature     |   sampled   |  -   |      -      |    °C    |            |
-| hbm_0_temperature       |   sampled   |  -   |      -      |    °C    |            |
-| hbm_1_temperature       |   sampled   |  -   |      -      |    °C    |            |
-| hbm_2_temperature       |   sampled   |  -   |      -      |    °C    |            |
-| hbm_3_temperature       |   sampled   |  -   |      -      |    °C    |            |
-| temperature_{}_max      |             |      |             |          |            |
-| temperature_psu         |             |      |             |          |            |
-| temperature_{}          |             |      |             |          |            |
+| num_fans                |    fixed    |  -   |     int     |   int    |    int     |
+| fan_speed_min           |    fixed    |  -   |      %      |    -     |     -      | 
+| fan_speed_max           |    fixed    |  -   |      %      |   RPM    |    RPM     |
+| temperature_min         |    fixed    |  -   |      -      |    °C    |     -      |
+| temperature_max         |    fixed    |  -   |     °C      |    °C    |     °C     |
+| memory_temperature_min  |    fixed    |  -   |      -      |    °C    |     -      |
+| memory_temperature_max  |    fixed    |  -   |     °C      |    °C    |     °C     |
+| hotspot_temperature_min |    fixed    |  -   |      -      |    °C    |     -      |
+| hotspot_temperature_max |    fixed    |  -   |      -      |    °C    |     -      |
+| hbm_0_temperature_min   |    fixed    |  -   |      -      |    °C    |     -      |
+| hbm_0_temperature_max   |    fixed    |  -   |      -      |    °C    |     -      |
+| hbm_1_temperature_min   |    fixed    |  -   |      -      |    °C    |     -      |
+| hbm_1_temperature_max   |    fixed    |  -   |      -      |    °C    |     -      |
+| hbm_2_temperature_min   |    fixed    |  -   |      -      |    °C    |     -      |
+| hbm_2_temperature_max   |    fixed    |  -   |      -      |    °C    |     -      |
+| hbm_3_temperature_min   |    fixed    |  -   |      -      |    °C    |     -      |
+| hbm_3_temperature_max   |    fixed    |  -   |      -      |    °C    |     -      |
+| global_temperature_max  |    fixed    |  -   |      -      |    °C    |     °C     |
+| fan_speed_percentage    |   sampled   |  -   |      %      |    %     |     %      |
+| temperature             |   sampled   |  °C  |     °C      |    °C    |     °C     |
+| memory_temperature      |   sampled   |  -   |      -      |    °C    |     °C     |
+| hotspot_temperature     |   sampled   |  -   |      -      |    °C    |     -      |
+| hbm_0_temperature       |   sampled   |  -   |      -      |    °C    |     -      |
+| hbm_1_temperature       |   sampled   |  -   |      -      |    °C    |     -      |
+| hbm_2_temperature       |   sampled   |  -   |      -      |    °C    |     -      |
+| hbm_3_temperature       |   sampled   |  -   |      -      |    °C    |     -      |
+| global_temperature      |   sampled   |  -   |      -      |    -     |     °C     |
+| psu_temperature         |   sampled   |  -   |      -      |    -     |     °C     |
 | core_temperature        |   sampled   |  °C  |      -      |    -     |     -      |
 | core_throttle_percent   |   sampled   |  %   |      -      |    -     |     -      |
 
diff --git a/bindings/gpu_intel_hardware_sampler.cpp b/bindings/gpu_intel_hardware_sampler.cpp
index 0b05a55..8cf6f83 100644
--- a/bindings/gpu_intel_hardware_sampler.cpp
+++ b/bindings/gpu_intel_hardware_sampler.cpp
@@ -22,7 +22,10 @@ namespace py = pybind11;
 void init_gpu_intel_hardware_sampler(py::module_ &m) {
     // bind the general samples
     py::class_<hws::level_zero_general_samples>(m, "LevelZeroGeneralSamples")
+        .def("get_byte_order", &hws::level_zero_general_samples::get_byte_order, "the byte order (e.g., little/big endian)")
+        .def("get_vendor_id", &hws::level_zero_general_samples::get_vendor_id, "the vendor ID")
         .def("get_name", &hws::level_zero_general_samples::get_name, "the model name of the device")
+        .def("get_flags", &hws::level_zero_general_samples::get_flags, "potential GPU flags (e.g. integrated device)")
         .def("get_standby_mode", &hws::level_zero_general_samples::get_standby_mode, "the enabled standby mode (power saving or never)")
         .def("get_num_threads_per_eu", &hws::level_zero_general_samples::get_num_threads_per_eu, "the number of threads per EU unit")
         .def("get_eu_simd_width", &hws::level_zero_general_samples::get_eu_simd_width, "the physical EU unit SIMD width")
@@ -32,27 +35,29 @@ void init_gpu_intel_hardware_sampler(py::module_ &m) {
 
     // bind the clock samples
     py::class_<hws::level_zero_clock_samples>(m, "LevelZeroClockSamples")
-        .def("get_clock_gpu_min", &hws::level_zero_clock_samples::get_clock_gpu_min, "the minimum possible GPU clock frequency in MHz")
-        .def("get_clock_gpu_max", &hws::level_zero_clock_samples::get_clock_gpu_max, "the maximum possible GPU clock frequency in MHz")
-        .def("get_available_clocks_gpu", &hws::level_zero_clock_samples::get_available_clocks_gpu, "the available GPU clock frequencies in MHz (slowest to fastest)")
-        .def("get_clock_mem_min", &hws::level_zero_clock_samples::get_clock_mem_min, "the minimum possible memory clock frequency in MHz")
-        .def("get_clock_mem_max", &hws::level_zero_clock_samples::get_clock_mem_max, "the maximum possible memory clock frequency in MHz")
-        .def("get_available_clocks_mem", &hws::level_zero_clock_samples::get_available_clocks_mem, "the available memory clock frequencies in MHz (slowest to fastest)")
-        .def("get_tdp_frequency_limit_gpu", &hws::level_zero_clock_samples::get_tdp_frequency_limit_gpu, "the current maximum allowed GPU frequency based on the TDP limit in MHz")
-        .def("get_clock_gpu", &hws::level_zero_clock_samples::get_clock_gpu, "the current GPU frequency in MHz")
-        .def("get_throttle_reason_gpu", &hws::level_zero_clock_samples::get_throttle_reason_gpu, "the current GPU frequency throttle reason")
-        .def("get_tdp_frequency_limit_mem", &hws::level_zero_clock_samples::get_tdp_frequency_limit_mem, "the current maximum allowed memory frequency based on the TDP limit in MHz")
-        .def("get_clock_mem", &hws::level_zero_clock_samples::get_clock_mem, "the current memory frequency in MHz")
-        .def("get_throttle_reason_mem", &hws::level_zero_clock_samples::get_throttle_reason_mem, "the current memory frequency throttle reason")
+        .def("get_clock_frequency_min", &hws::level_zero_clock_samples::get_clock_frequency_min, "the minimum possible GPU clock frequency in MHz")
+        .def("get_clock_frequency_max", &hws::level_zero_clock_samples::get_clock_frequency_max, "the maximum possible GPU clock frequency in MHz")
+        .def("get_memory_clock_frequency_min", &hws::level_zero_clock_samples::get_memory_clock_frequency_min, "the minimum possible memory clock frequency in MHz")
+        .def("get_memory_clock_frequency_max", &hws::level_zero_clock_samples::get_memory_clock_frequency_max, "the maximum possible memory clock frequency in MHz")
+        .def("get_available_clock_frequencies", &hws::level_zero_clock_samples::get_available_clock_frequencies, "the available GPU clock frequencies in MHz (slowest to fastest)")
+        .def("get_available_memory_clock_frequencies", &hws::level_zero_clock_samples::get_available_memory_clock_frequencies, "the available memory clock frequencies in MHz (slowest to fastest)")
+        .def("get_clock_frequency", &hws::level_zero_clock_samples::get_clock_frequency, "the current GPU frequency in MHz")
+        .def("get_memory_clock_frequency", &hws::level_zero_clock_samples::get_memory_clock_frequency, "the current memory frequency in MHz")
+        .def("get_throttle_reason", &hws::level_zero_clock_samples::get_throttle_reason, "the current GPU frequency throttle reason")
+        .def("get_memory_throttle_reason", &hws::level_zero_clock_samples::get_memory_throttle_reason, "the current memory frequency throttle reason")
+        .def("get_frequency_limit_tdp", &hws::level_zero_clock_samples::get_frequency_limit_tdp, "the current maximum allowed GPU frequency based on the TDP limit in MHz")
+        .def("get_memory_frequency_limit_tdp", &hws::level_zero_clock_samples::get_memory_frequency_limit_tdp, "the current maximum allowed memory frequency based on the TDP limit in MHz")
         .def("__repr__", [](const hws::level_zero_clock_samples &self) {
             return fmt::format("<HardwareSampling.LevelZeroClockSamples with\n{}\n>", self);
         });
 
     // bind the power samples
     py::class_<hws::level_zero_power_samples>(m, "LevelZeroPowerSamples")
-        .def("get_energy_threshold_enabled", &hws::level_zero_power_samples::get_energy_threshold_enabled, "true if the energy threshold is enabled")
-        .def("get_energy_threshold", &hws::level_zero_power_samples::get_energy_threshold, "the energy threshold in J")
-        .def("get_power_total_energy_consumption", &hws::level_zero_power_samples::get_power_total_energy_consumption, "the total power consumption since the last driver reload in mJ")
+        .def("get_power_enforced_limit", &hws::level_zero_power_samples::get_power_enforced_limit, "the actually enforced power limit (W), may be different from power management limit if external limiters are set")
+        .def("get_power_measurement_type", &hws::level_zero_power_samples::get_power_measurement_type, "the type of the power readings")
+        .def("get_power_management_mode", &hws::level_zero_power_samples::get_power_management_mode, "true if power management limits are enabled")
+        .def("get_power_usage", &hws::level_zero_power_samples::get_power_usage, "the current power draw of the GPU in W (calculated from power_total_energy_consumption)")
+        .def("get_power_total_energy_consumption", &hws::level_zero_power_samples::get_power_total_energy_consumption, "the total power consumption since the last driver reload in J")
         .def("__repr__", [](const hws::level_zero_power_samples &self) {
             return fmt::format("<HardwareSampling.LevelZeroPowerSamples with\n{}\n>", self);
         });
@@ -60,26 +65,34 @@ void init_gpu_intel_hardware_sampler(py::module_ &m) {
     // bind the memory samples
     py::class_<hws::level_zero_memory_samples>(m, "LevelZeroMemorySamples")
         .def("get_memory_total", &hws::level_zero_memory_samples::get_memory_total, "the total memory size of the different memory modules in Bytes")
-        .def("get_allocatable_memory_total", &hws::level_zero_memory_samples::get_allocatable_memory_total, "the total allocatable memory size of the different memory modules in Bytes")
-        .def("get_pcie_link_max_speed", &hws::level_zero_memory_samples::get_pcie_link_max_speed, "the maximum PCIe bandwidth in bytes/sec")
-        .def("get_pcie_max_width", &hws::level_zero_memory_samples::get_pcie_max_width, "the PCIe lane width")
-        .def("get_max_pcie_link_generation", &hws::level_zero_memory_samples::get_max_pcie_link_generation, "the PCIe generation")
-        .def("get_bus_width", &hws::level_zero_memory_samples::get_bus_width, "the bus width of the different memory modules")
-        .def("get_num_channels", &hws::level_zero_memory_samples::get_num_channels, "the number of memory channels of the different memory modules")
-        .def("get_location", &hws::level_zero_memory_samples::get_location, "the location of the different memory modules (system or device)")
+        .def("get_visible_memory_total", &hws::level_zero_memory_samples::get_visible_memory_total, "the total allocatable memory size of the different memory modules in Bytes")
+        .def("get_memory_location", &hws::level_zero_memory_samples::get_memory_location, "the location of the different memory modules (system or device)")
+        .def("get_num_pcie_lanes_max", &hws::level_zero_memory_samples::get_num_pcie_lanes_max, "the PCIe lane width")
+        .def("get_pcie_link_generation_max", &hws::level_zero_memory_samples::get_pcie_link_generation_max, "the PCIe generation")
+        .def("get_pcie_link_speed_max", &hws::level_zero_memory_samples::get_pcie_link_speed_max, "the maximum PCIe bandwidth in bytes/sec")
+        .def("get_memory_bus_width", &hws::level_zero_memory_samples::get_memory_bus_width, "the bus width of the different memory modules")
+        .def("get_memory_num_channels", &hws::level_zero_memory_samples::get_memory_num_channels, "the number of memory channels of the different memory modules")
         .def("get_memory_free", &hws::level_zero_memory_samples::get_memory_free, "the currently free memory of the different memory modules in Bytes")
-        .def("get_pcie_link_speed", &hws::level_zero_memory_samples::get_pcie_link_speed, "the current PCIe bandwidth in bytes/sec")
-        .def("get_pcie_link_width", &hws::level_zero_memory_samples::get_pcie_link_width, "the current PCIe lane width")
+        .def("get_memory_used", &hws::level_zero_memory_samples::get_memory_used, "the currently used memory of the different memory modules in Bytes")
+        .def("get_num_pcie_lanes", &hws::level_zero_memory_samples::get_num_pcie_lanes, "the current PCIe lane width")
         .def("get_pcie_link_generation", &hws::level_zero_memory_samples::get_pcie_link_generation, "the current PCIe generation")
+        .def("get_pcie_link_speed", &hws::level_zero_memory_samples::get_pcie_link_speed, "the current PCIe bandwidth in bytes/sec")
         .def("__repr__", [](const hws::level_zero_memory_samples &self) {
             return fmt::format("<HardwareSampling.LevelZeroMemorySamples with\n{}\n>", self);
         });
 
     // bind the temperature samples
     py::class_<hws::level_zero_temperature_samples>(m, "LevelZeroTemperatureSamples")
-        .def("get_temperature_max", &hws::level_zero_temperature_samples::get_temperature_max, "the maximum temperature for the sensor in °C")
-        .def("get_temperature_psu", &hws::level_zero_temperature_samples::get_temperature_psu, "the temperature of the PSU in °C")
-        .def("get_temperature", &hws::level_zero_temperature_samples::get_temperature, "the current temperature for the sensor in °C")
+        .def("get_num_fans", &hws::level_zero_temperature_samples::get_num_fans, "the number of fans")
+        .def("get_fan_speed_max", &hws::level_zero_temperature_samples::get_fan_speed_max, "the maximum fan speed the user can set in RPM")
+        .def("get_temperature_max", &hws::level_zero_temperature_samples::get_temperature_max, "the maximum GPU temperature in °C")
+        .def("get_memory_temperature_max", &hws::level_zero_temperature_samples::get_memory_temperature_max, "the maximum memory temperature in °C")
+        .def("get_global_temperature_max", &hws::level_zero_temperature_samples::get_global_temperature_max, "the maximum global temperature in °C")
+        .def("get_fan_speed_percentage", &hws::level_zero_temperature_samples::get_fan_speed_percentage, "the current intended fan speed in %")
+        .def("get_temperature", &hws::level_zero_temperature_samples::get_temperature, "the current GPU temperature in °C")
+        .def("get_memory_temperature", &hws::level_zero_temperature_samples::get_memory_temperature, "the current memory temperature in °C")
+        .def("get_global_temperature", &hws::level_zero_temperature_samples::get_global_temperature, "the current global temperature in °C")
+        .def("get_psu_temperature", &hws::level_zero_temperature_samples::get_psu_temperature, "the current PSU temperature in °C")
         .def("__repr__", [](const hws::level_zero_temperature_samples &self) {
             return fmt::format("<HardwareSampling.LevelZeroTemperatureSamples with\n{}\n>", self);
         });
diff --git a/include/hardware_sampling/gpu_intel/hardware_sampler.hpp b/include/hardware_sampling/gpu_intel/hardware_sampler.hpp
index 442be04..d47bd32 100644
--- a/include/hardware_sampling/gpu_intel/hardware_sampler.hpp
+++ b/include/hardware_sampling/gpu_intel/hardware_sampler.hpp
@@ -16,10 +16,12 @@
 #include "hardware_sampling/gpu_intel/level_zero_samples.hpp"        // hws::{level_zero_general_samples, level_zero_clock_samples, level_zero_power_samples, level_zero_memory_samples, level_zero_temperature_samples}
 #include "hardware_sampling/hardware_sampler.hpp"                    // hws::hardware_sampler
 
+#include "fmt/format.h"  // fmt::formatter, fmt::ostream_formatter
+
 #include <atomic>   // std::atomic
-#include <chrono>   // std::chrono::{steady_clock, milliseconds}, std::chrono_literals namespace
+#include <chrono>   // std::chrono::milliseconds, std::chrono_literals namespace
 #include <cstddef>  // std::size_t
-#include <format>   // std::formatter
+#include <iosfwd>   // std::ostream forward declaration
 #include <string>   // std::string
 
 namespace hws {
@@ -157,6 +159,6 @@ std::ostream &operator<<(std::ostream &out, const gpu_intel_hardware_sampler &sa
 }  // namespace hws
 
 template <>
-struct std::formatter<hws::gpu_intel_hardware_sampler> : hws::detail::ostream_formatter { };
+struct fmt::formatter<hws::gpu_intel_hardware_sampler> : fmt::ostream_formatter { };
 
 #endif  // HARDWARE_SAMPLING_GPU_INTEL_HARDWARE_SAMPLER_HPP_
diff --git a/include/hardware_sampling/gpu_intel/level_zero_device_handle_impl.hpp b/include/hardware_sampling/gpu_intel/level_zero_device_handle_impl.hpp
index 640cdcc..a0f2ccd 100644
--- a/include/hardware_sampling/gpu_intel/level_zero_device_handle_impl.hpp
+++ b/include/hardware_sampling/gpu_intel/level_zero_device_handle_impl.hpp
@@ -15,11 +15,11 @@
 #include "hardware_sampling/gpu_intel/level_zero_device_handle.hpp"  // hws::detail::level_zero_device_handle
 #include "hardware_sampling/gpu_intel/utility.hpp"                   // HWS_LEVEL_ZERO_ERROR_CHECK
 
+#include "fmt/format.h"         // fmt::format
 #include "level_zero/ze_api.h"  // Level Zero runtime functions
 
 #include <cstddef>    // std::size_t
 #include <cstdint>    // std::uint32_t
-#include <format>     // std::format
 #include <memory>     // std::make_shared
 #include <stdexcept>  // std::runtime_error
 #include <vector>     // std::vector
@@ -38,28 +38,28 @@ struct level_zero_device_handle::level_zero_device_handle_impl {
     explicit level_zero_device_handle_impl(const std::size_t device_id) {
         // discover the number of drivers
         std::uint32_t driver_count{ 0 };
-        HWS_LEVEL_ZERO_ERROR_CHECK(zeDriverGet(&driver_count, nullptr));
+        HWS_LEVEL_ZERO_ERROR_CHECK(zeDriverGet(&driver_count, nullptr))
 
         // check if only the single GPU driver has been found
         if (driver_count > 1) {
-            throw std::runtime_error{ std::format("Found too many GPU drivers ({})!", driver_count) };
+            throw std::runtime_error{ fmt::format("Found too many GPU drivers ({})!", driver_count) };
         }
 
         // get the GPU driver
-        HWS_LEVEL_ZERO_ERROR_CHECK(zeDriverGet(&driver_count, &driver));
+        HWS_LEVEL_ZERO_ERROR_CHECK(zeDriverGet(&driver_count, &driver))
 
         // get all GPUs for the current driver
         std::uint32_t device_count{ 0 };
-        HWS_LEVEL_ZERO_ERROR_CHECK(zeDeviceGet(driver, &device_count, nullptr));
+        HWS_LEVEL_ZERO_ERROR_CHECK(zeDeviceGet(driver, &device_count, nullptr))
 
         // check if enough GPUs have been found
         if (driver_count <= device_id) {
-            throw std::runtime_error{ std::format("Found only {} GPUs, but GPU with the ID was requested!", device_count, device_id) };
+            throw std::runtime_error{ fmt::format("Found only {} GPUs, but GPU with the ID was requested!", device_count, device_id) };
         }
 
         // get the GPUs
         std::vector<ze_device_handle_t> all_devices(device_count);
-        HWS_LEVEL_ZERO_ERROR_CHECK(zeDeviceGet(driver, &device_count, all_devices.data()));
+        HWS_LEVEL_ZERO_ERROR_CHECK(zeDeviceGet(driver, &device_count, all_devices.data()))
 
         // save the requested device
         device = all_devices[device_id];
diff --git a/include/hardware_sampling/gpu_intel/level_zero_samples.hpp b/include/hardware_sampling/gpu_intel/level_zero_samples.hpp
index dc8b411..9900f95 100644
--- a/include/hardware_sampling/gpu_intel/level_zero_samples.hpp
+++ b/include/hardware_sampling/gpu_intel/level_zero_samples.hpp
@@ -12,10 +12,11 @@
 #define HARDWARE_SAMPLING_GPU_INTEL_LEVEL_ZERO_SAMPLES_HPP_
 #pragma once
 
-#include "hardware_sampling/utility.hpp"  // HWS_SAMPLE_STRUCT_FIXED_MEMBER, HWS_SAMPLE_STRUCT_SAMPLING_MEMBER, hws::detail::ostream_formatter
+#include "hardware_sampling/utility.hpp"  // HWS_SAMPLE_STRUCT_FIXED_MEMBER, HWS_SAMPLE_STRUCT_SAMPLING_MEMBER
+
+#include "fmt/ostream.h"  // fmt::formatter, fmt::ostream_formatter
 
 #include <cstdint>        // std::uint64_t, std::int32_t
-#include <format>         // std::format
 #include <iosfwd>         // std::ostream forward declaration
 #include <optional>       // std::optional
 #include <string>         // std::string
@@ -43,9 +44,11 @@ class level_zero_general_samples {
      */
     [[nodiscard]] std::string generate_yaml_string() const;
 
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, byte_order)            // the byte order (e.g., little/big endian)
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, vendor_id)             // the vendor ID
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, name)                  // the model name of the device
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, byte_order)          // the byte order (e.g., little/big endian)
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, vendor_id)           // the vendor ID
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, name)                // the model name of the device
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector<std::string>, flags)  // potential GPU flags (e.g. integrated device)
+
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, standby_mode)          // the enabled standby mode (power saving or never)
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint32_t, num_threads_per_eu)  // the number of threads per EU unit
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint32_t, eu_simd_width)       // the physical EU unit SIMD width
@@ -79,19 +82,19 @@ class level_zero_clock_samples {
      */
     [[nodiscard]] std::string generate_yaml_string() const;
 
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, clock_gpu_min)                      // the minimum possible GPU clock frequency in MHz
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, clock_gpu_max)                      // the maximum possible GPU clock frequency in MHz
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector<double>, available_clocks_gpu)  // the available GPU clock frequencies in MHz (slowest to fastest)
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, clock_mem_min)                      // the minimum possible memory clock frequency in MHz
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, clock_mem_max)                      // the maximum possible memory clock frequency in MHz
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector<double>, available_clocks_mem)  // the available memory clock frequencies in MHz (slowest to fastest)
-
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, tdp_frequency_limit_gpu)  // the current maximum allowed GPU frequency based on the TDP limit in MHz
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, clock_gpu)                // the current GPU frequency in MHz
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(int, throttle_reason_gpu)         // the current GPU frequency throttle reason
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, tdp_frequency_limit_mem)  // the current maximum allowed memory frequency based on the TDP limit in MHz
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, clock_mem)                // the current memory frequency in MHz
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(int, throttle_reason_mem)         // the current memory frequency throttle reason
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, clock_frequency_min)                              // the minimum possible GPU clock frequency in MHz
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, clock_frequency_max)                              // the maximum possible GPU clock frequency in MHz
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, memory_clock_frequency_min)                       // the minimum possible memory clock frequency in MHz
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, memory_clock_frequency_max)                       // the maximum possible memory clock frequency in MHz
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector<double>, available_clock_frequencies)         // the available GPU clock frequencies in MHz (slowest to fastest)
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector<double>, available_memory_clock_frequencies)  // the available memory clock frequencies in MHz (slowest to fastest)
+
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, clock_frequency)              // the current GPU frequency in MHz
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, memory_clock_frequency)       // the current memory frequency in MHz
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::string, throttle_reason)         // the current GPU frequency throttle reason
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::string, memory_throttle_reason)  // the current memory frequency throttle reason
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, frequency_limit_tdp)          // the current maximum allowed GPU frequency based on the TDP limit in MHz
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, memory_frequency_limit_tdp)   // the current maximum allowed memory frequency based on the TDP limit in MHz
 };
 
 /**
@@ -126,6 +129,7 @@ class level_zero_power_samples {
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, power_measurement_type)  // the type of the power readings
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(bool, power_management_mode)          // true if power management limits are enabled
 
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, power_usage)                     // the current power draw of the GPU in W (calculated from power_total_energy_consumption)
     HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, power_total_energy_consumption)  // the total power consumption since the last driver reload in J
 };
 
@@ -164,19 +168,20 @@ class level_zero_memory_samples {
      */
     [[nodiscard]] std::string generate_yaml_string() const;
 
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type<std::uint64_t>, memory_total)              // the total memory size of the different memory modules in Bytes
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type<std::uint64_t>, allocatable_memory_total)  // the total allocatable memory size of the different memory modules in Bytes
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, pcie_link_max_speed)                  // the maximum PCIe bandwidth in bytes/sec
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int32_t, pcie_max_width)                       // the PCIe lane width
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int32_t, max_pcie_link_generation)             // the PCIe generation
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type<std::int32_t>, bus_width)                  // the bus width of the different memory modules
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type<std::int32_t>, num_channels)               // the number of memory channels of the different memory modules
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type<std::string>, location)                    // the location of the different memory modules (system or device)
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type<std::uint64_t>, memory_total)          // the total memory size of the different memory modules in Bytes
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type<std::uint64_t>, visible_memory_total)  // the total allocatable memory size of the different memory modules in Bytes
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type<std::string>, memory_location)         // the location of the different memory modules (system or device)
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int32_t, num_pcie_lanes_max)               // the maximum PCIe lane width
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int32_t, pcie_link_generation_max)         // the maximum PCIe generation
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, pcie_link_speed_max)              // the maximum PCIe bandwidth in MBPS
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type<std::int32_t>, memory_bus_width)       // the bus width of the different memory modules
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type<std::int32_t>, memory_num_channels)    // the number of memory channels of the different memory modules
 
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type<std::vector<std::uint64_t>>, memory_free)  // the currently free memory of the different memory modules in Bytes
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int64_t, pcie_link_speed)                   // the current PCIe bandwidth in bytes/sec
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int32_t, pcie_link_width)                   // the current PCIe lane width
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type<std::vector<std::uint64_t>>, memory_used)  // the currently used memory of the different memory modules in Bytes
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int32_t, num_pcie_lanes)                    // the current PCIe lane width
     HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int32_t, pcie_link_generation)              // the current PCIe generation
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int64_t, pcie_link_speed)                   // the current PCIe bandwidth in bytes/sec
 };
 
 /**
@@ -199,13 +204,6 @@ class level_zero_temperature_samples {
     // befriend hardware sampler class
     friend class gpu_intel_hardware_sampler;
 
-    /**
-     * @brief The map type used if the number of potential Level Zero domains is unknown at compile time.
-     * @tparam T the mapped type
-     */
-    template <typename T>
-    using map_type = std::unordered_map<std::string, T>;
-
   public:
     /**
      * @brief Assemble the YAML string containing all available general hardware samples.
@@ -214,10 +212,17 @@ class level_zero_temperature_samples {
      */
     [[nodiscard]] std::string generate_yaml_string() const;
 
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type<double>, temperature_max)  // the maximum temperature for the sensor in °C
-
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int32_t, temperature_psu)            // the temperature of the PSU in °C
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type<std::vector<double>>, temperature)  // the current temperature for the sensor in °C
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint32_t, num_fans)         // the number of fans
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int32_t, fan_speed_max)     // the maximum fan speed the user can set in RPM
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, temperature_max)         // the maximum GPU temperature in °C
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, memory_temperature_max)  // the maximum memory temperature in °C
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, global_temperature_max)  // the maximum global temperature in °C
+
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, fan_speed_percentage)  // the current intended fan speed in %
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, temperature)           // the temperature of the GPU in °C
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, memory_temperature)    // the temperature of the memory in °C
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, global_temperature)    // the global temperature in °C
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, psu_temperature)       // the temperature of the PSU in °C
 };
 
 /**
@@ -232,18 +237,18 @@ std::ostream &operator<<(std::ostream &out, const level_zero_temperature_samples
 }  // namespace hws
 
 template <>
-struct std::formatter<hws::level_zero_general_samples> : hws::detail::ostream_formatter { };
+struct fmt::formatter<hws::level_zero_general_samples> : fmt::ostream_formatter { };
 
 template <>
-struct std::formatter<hws::level_zero_clock_samples> : hws::detail::ostream_formatter { };
+struct fmt::formatter<hws::level_zero_clock_samples> : fmt::ostream_formatter { };
 
 template <>
-struct std::formatter<hws::level_zero_power_samples> : hws::detail::ostream_formatter { };
+struct fmt::formatter<hws::level_zero_power_samples> : fmt::ostream_formatter { };
 
 template <>
-struct std::formatter<hws::level_zero_memory_samples> : hws::detail::ostream_formatter { };
+struct fmt::formatter<hws::level_zero_memory_samples> : fmt::ostream_formatter { };
 
 template <>
-struct std::formatter<hws::level_zero_temperature_samples> : hws::detail::ostream_formatter { };
+struct fmt::formatter<hws::level_zero_temperature_samples> : fmt::ostream_formatter { };
 
 #endif  // HARDWARE_SAMPLING_GPU_INTEL_LEVEL_ZERO_SAMPLES_HPP_
diff --git a/include/hardware_sampling/gpu_intel/utility.hpp b/include/hardware_sampling/gpu_intel/utility.hpp
index 810901f..03f9f8d 100644
--- a/include/hardware_sampling/gpu_intel/utility.hpp
+++ b/include/hardware_sampling/gpu_intel/utility.hpp
@@ -12,13 +12,14 @@
 #define HARDWARE_SAMPLING_GPU_INTEL_UTILITY_HPP_
 #pragma once
 
+#include "fmt/format.h"          // fmt::format
 #include "level_zero/ze_api.h"   // Level Zero runtime functions
 #include "level_zero/zes_api.h"  // Level Zero runtime functions
 
-#include <format>       // std::format
 #include <stdexcept>    // std::runtime_error
 #include <string>       // std::string
 #include <string_view>  // std::string_view
+#include <vector>       // std::vector
 
 namespace hws::detail {
 
@@ -39,13 +40,27 @@ namespace hws::detail {
         {                                                                                                                                          \
             const ze_result_t errc = level_zero_func;                                                                                              \
             if (errc != ZE_RESULT_SUCCESS) {                                                                                                       \
-                throw std::runtime_error{ std::format("Error in Level Zero function call \"{}\": {}", #level_zero_func, to_result_string(errc)) }; \
+                throw std::runtime_error{ fmt::format("Error in Level Zero function call \"{}\": {}", #level_zero_func, to_result_string(errc)) }; \
             }                                                                                                                                      \
         }
 #else
     #define HWS_LEVEL_ZERO_ERROR_CHECK(level_zero_func) level_zero_func;
 #endif
 
+/**
+ * @brief Convert the @p flags to a vector of strings.
+ * @param[in] flags the flags to convert to strings
+ * @return a vector containing all flags as strings (`[[nodiscard]]`)
+ */
+[[nodiscard]] std::vector<std::string> property_flags_to_vector(ze_device_property_flags_t flags);
+
+/**
+ * @brief Convert the throttle reason bitmask to a string representation. If the provided bitmask represents multiple reasons, they are split using "|".
+ * @param[in] reasons the bitmask to convert to a string
+ * @return all throttle reasons (`[[nodiscard]]`)
+ */
+[[nodiscard]] std::string throttle_reason_to_string(zes_freq_throttle_reason_flags_t reasons);
+
 /**
  * @brief Convert a Level Zero memory type to a string representation.
  * @param[in] mem_type the Level Zero memory type
@@ -60,13 +75,6 @@ namespace hws::detail {
  */
 [[nodiscard]] std::string memory_location_to_name(zes_mem_loc_t mem_loc);
 
-/**
- * @brief Convert a Level Zero temperature sensor type to a string representation.
- * @param[in] sensor_type the Level Zero temperature sensor type
- * @return the string representation (`[[nodiscard]]`)
- */
-[[nodiscard]] std::string temperature_sensor_type_to_name(zes_temp_sensors_t sensor_type);
-
 }  // namespace hws::detail
 
 #endif  // HARDWARE_SAMPLING_GPU_INTEL_UTILITY_HPP_
diff --git a/src/hardware_sampling/gpu_intel/hardware_sampler.cpp b/src/hardware_sampling/gpu_intel/hardware_sampler.cpp
index 2027135..774ab6c 100644
--- a/src/hardware_sampling/gpu_intel/hardware_sampler.cpp
+++ b/src/hardware_sampling/gpu_intel/hardware_sampler.cpp
@@ -13,6 +13,7 @@
 #include "hardware_sampling/hardware_sampler.hpp"                         // hws::hardware_sampler
 #include "hardware_sampling/utility.hpp"                                  // hws::{durations_from_reference_time, join}
 
+#include "fmt/format.h"          // fmt::format
 #include "level_zero/ze_api.h"   // Level Zero runtime functions
 #include "level_zero/zes_api.h"  // Level Zero runtime functions
 
@@ -20,7 +21,6 @@
 #include <cstddef>    // std::size_t
 #include <cstdint>    // std::int32_t
 #include <exception>  // std::exception, std::terminate
-#include <format>     // std::format
 #include <ios>        // std::ios_base
 #include <iostream>   // std::cerr, std::endl
 #include <stdexcept>  // std::runtime_error
@@ -44,7 +44,7 @@ gpu_intel_hardware_sampler::gpu_intel_hardware_sampler(const std::size_t device_
     hardware_sampler{ sampling_interval } {
     // make sure that zeInit is only called once for all instances
     if (instances_++ == 0) {
-        HWS_LEVEL_ZERO_ERROR_CHECK(zeInit(ZE_INIT_FLAG_GPU_ONLY));
+        HWS_LEVEL_ZERO_ERROR_CHECK(zeInit(ZE_INIT_FLAG_GPU_ONLY))
         // notify that initialization has been finished
         init_finished_ = true;
     } else {
@@ -77,6 +77,7 @@ void gpu_intel_hardware_sampler::sampling_loop() {
     std::vector<zes_freq_handle_t> frequency_handles{};
     std::vector<zes_pwr_handle_t> power_handles{};
     std::vector<zes_mem_handle_t> memory_handles{};
+    std::vector<zes_fan_handle_t> fan_handles{};
     std::vector<zes_psu_handle_t> psu_handles{};
     std::vector<zes_temp_handle_t> temperature_handles{};
 
@@ -86,6 +87,8 @@ void gpu_intel_hardware_sampler::sampling_loop() {
 
     this->add_time_point(std::chrono::steady_clock::now());
 
+    double initial_total_power_consumption{};  // initial total power consumption in J
+
     // retrieve initial general information
     {
         // the byte order is given by Intel directly
@@ -93,9 +96,12 @@ void gpu_intel_hardware_sampler::sampling_loop() {
 
         ze_device_properties_t ze_device_prop{};
         if (zeDeviceGetProperties(device, &ze_device_prop) == ZE_RESULT_SUCCESS) {
-            general_samples_.vendor_id_ = std::format("{:x}", ze_device_prop.vendorId);  // TODO: PCI configuration ID to name?
+            general_samples_.vendor_id_ = fmt::format("{:x}", ze_device_prop.vendorId);
             general_samples_.num_threads_per_eu_ = ze_device_prop.numThreadsPerEU;
             general_samples_.eu_simd_width_ = ze_device_prop.physicalEUSimdWidth;
+
+            // assemble list of GPU flags
+            general_samples_.flags_ = detail::property_flags_to_vector(ze_device_prop.flags);
         }
 
         zes_device_properties_t zes_device_prop{};
@@ -143,12 +149,12 @@ void gpu_intel_hardware_sampler::sampling_loop() {
                         // determine the frequency domain (e.g. GPU, memory, etc)
                         switch (prop.type) {
                             case ZES_FREQ_DOMAIN_GPU:
-                                clock_samples_.clock_gpu_min_ = prop.min;
-                                clock_samples_.clock_gpu_max_ = prop.max;
+                                clock_samples_.clock_frequency_min_ = prop.min;
+                                clock_samples_.clock_frequency_max_ = prop.max;
                                 break;
                             case ZES_FREQ_DOMAIN_MEMORY:
-                                clock_samples_.clock_mem_min_ = prop.min;
-                                clock_samples_.clock_mem_max_ = prop.max;
+                                clock_samples_.memory_clock_frequency_min_ = prop.min;
+                                clock_samples_.memory_clock_frequency_max_ = prop.max;
                                 break;
                             default:
                                 // do nothing
@@ -163,10 +169,10 @@ void gpu_intel_hardware_sampler::sampling_loop() {
                                 // determine the frequency domain (e.g. GPU, memory, etc)
                                 switch (prop.type) {
                                     case ZES_FREQ_DOMAIN_GPU:
-                                        clock_samples_.available_clocks_gpu_ = available_clocks;
+                                        clock_samples_.available_clock_frequencies_ = available_clocks;
                                         break;
                                     case ZES_FREQ_DOMAIN_MEMORY:
-                                        clock_samples_.available_clocks_mem_ = available_clocks;
+                                        clock_samples_.available_memory_clock_frequencies_ = available_clocks;
                                         break;
                                     default:
                                         // do nothing
@@ -183,28 +189,28 @@ void gpu_intel_hardware_sampler::sampling_loop() {
                                 case ZES_FREQ_DOMAIN_GPU:
                                     {
                                         if (frequency_state.tdp >= 0.0) {
-                                            clock_samples_.tdp_frequency_limit_gpu_ = decltype(clock_samples_.tdp_frequency_limit_gpu_)::value_type{ frequency_state.tdp };
+                                            clock_samples_.frequency_limit_tdp_ = decltype(clock_samples_.frequency_limit_tdp_)::value_type{ frequency_state.tdp };
                                         }
                                         if (frequency_state.actual >= 0.0) {
-                                            clock_samples_.clock_gpu_ = decltype(clock_samples_.clock_gpu_)::value_type{ frequency_state.actual };
+                                            clock_samples_.clock_frequency_ = decltype(clock_samples_.clock_frequency_)::value_type{ frequency_state.actual };
                                         }
                                         if (frequency_state.throttleReasons >= 0.0) {
-                                            using vector_type = decltype(clock_samples_.throttle_reason_gpu_)::value_type;
-                                            clock_samples_.throttle_reason_gpu_ = vector_type{ static_cast<vector_type::value_type>(frequency_state.throttleReasons) };
+                                            using vector_type = decltype(clock_samples_.throttle_reason_)::value_type;
+                                            clock_samples_.throttle_reason_ = vector_type{ static_cast<vector_type::value_type>(detail::throttle_reason_to_string(frequency_state.throttleReasons)) };
                                         }
                                     }
                                     break;
                                 case ZES_FREQ_DOMAIN_MEMORY:
                                     {
                                         if (frequency_state.tdp >= 0.0) {
-                                            clock_samples_.tdp_frequency_limit_mem_ = decltype(clock_samples_.tdp_frequency_limit_mem_)::value_type{ frequency_state.tdp };
+                                            clock_samples_.memory_frequency_limit_tdp_ = decltype(clock_samples_.memory_frequency_limit_tdp_)::value_type{ frequency_state.tdp };
                                         }
                                         if (frequency_state.actual >= 0.0) {
-                                            clock_samples_.clock_mem_ = decltype(clock_samples_.clock_mem_)::value_type{ frequency_state.actual };
+                                            clock_samples_.memory_clock_frequency_ = decltype(clock_samples_.memory_clock_frequency_)::value_type{ frequency_state.actual };
                                         }
                                         if (frequency_state.throttleReasons >= 0.0) {
-                                            using vector_type = decltype(clock_samples_.throttle_reason_mem_)::value_type;
-                                            clock_samples_.throttle_reason_mem_ = vector_type{ static_cast<vector_type::value_type>(frequency_state.throttleReasons) };
+                                            using vector_type = decltype(clock_samples_.memory_throttle_reason_)::value_type;
+                                            clock_samples_.memory_throttle_reason_ = vector_type{ static_cast<vector_type::value_type>(detail::throttle_reason_to_string(frequency_state.throttleReasons)) };
                                         }
                                     }
                                     break;
@@ -259,7 +265,9 @@ void gpu_intel_hardware_sampler::sampling_loop() {
                     // get total power consumption
                     zes_power_energy_counter_t energy_counter{};
                     if (zesPowerGetEnergyCounter(power_handles.front(), &energy_counter) == ZE_RESULT_SUCCESS) {
-                        power_samples_.power_total_energy_consumption_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ static_cast<decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type>(energy_counter.energy) / 1000.0 / 1000.0 };
+                        initial_total_power_consumption = static_cast<decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type>(energy_counter.energy) / 1000.0 / 1000.0;
+                        power_samples_.power_total_energy_consumption_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ 0.0 };
+                        power_samples_.power_usage_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ 0.0 };
                     }
 
                     // get energy thresholds
@@ -294,40 +302,46 @@ void gpu_intel_hardware_sampler::sampling_loop() {
                         }
                         if (prop.busWidth != -1) {
                             // first value to add -> initialize map
-                            if (!memory_samples_.bus_width_.has_value()) {
-                                memory_samples_.bus_width_ = decltype(memory_samples_.bus_width_)::value_type{};
+                            if (!memory_samples_.memory_bus_width_.has_value()) {
+                                memory_samples_.memory_bus_width_ = decltype(memory_samples_.memory_bus_width_)::value_type{};
                             }
                             // add new memory bus width
-                            memory_samples_.bus_width_.value()[memory_module_name] = prop.busWidth;
+                            memory_samples_.memory_bus_width_.value()[memory_module_name] = prop.busWidth;
                         }
                         if (prop.numChannels != -1) {
                             // first value to add -> initialize map
-                            if (!memory_samples_.num_channels_.has_value()) {
-                                memory_samples_.num_channels_ = decltype(memory_samples_.num_channels_)::value_type{};
+                            if (!memory_samples_.memory_num_channels_.has_value()) {
+                                memory_samples_.memory_num_channels_ = decltype(memory_samples_.memory_num_channels_)::value_type{};
                             }
                             // add new number of memory channels
-                            memory_samples_.num_channels_.value()[memory_module_name] = prop.numChannels;
+                            memory_samples_.memory_num_channels_.value()[memory_module_name] = prop.numChannels;
                         }
                         // first value to add -> initialize map
-                        if (!memory_samples_.location_.has_value()) {
-                            memory_samples_.location_ = decltype(memory_samples_.location_)::value_type{};
+                        if (!memory_samples_.memory_location_.has_value()) {
+                            memory_samples_.memory_location_ = decltype(memory_samples_.memory_location_)::value_type{};
                         }
-                        memory_samples_.location_.value()[memory_module_name] = detail::memory_location_to_name(prop.location);
+                        memory_samples_.memory_location_.value()[memory_module_name] = detail::memory_location_to_name(prop.location);
 
                         // get current memory information
                         zes_mem_state_t mem_state{};
                         if (zesMemoryGetState(handle, &mem_state) == ZE_RESULT_SUCCESS) {
                             // first value to add -> initialize map
-                            if (!memory_samples_.allocatable_memory_total_.has_value()) {
-                                memory_samples_.allocatable_memory_total_ = decltype(memory_samples_.allocatable_memory_total_)::value_type{};
+                            if (!memory_samples_.visible_memory_total_.has_value()) {
+                                memory_samples_.visible_memory_total_ = decltype(memory_samples_.visible_memory_total_)::value_type{};
                             }
-                            memory_samples_.allocatable_memory_total_.value()[memory_module_name] = mem_state.size;
+                            memory_samples_.visible_memory_total_.value()[memory_module_name] = mem_state.size;
 
                             // first value to add -> initialize map
                             if (!memory_samples_.memory_free_.has_value()) {
                                 memory_samples_.memory_free_ = decltype(memory_samples_.memory_free_)::value_type{};
                             }
                             memory_samples_.memory_free_.value()[memory_module_name].push_back(mem_state.free);
+
+                            // first value to add -> initialize map
+                            if (!memory_samples_.memory_used_.has_value()) {
+                                memory_samples_.memory_used_ = decltype(memory_samples_.memory_used_)::value_type{};
+                            }
+                            memory_samples_.memory_used_.value()[memory_module_name].push_back(mem_state.size - mem_state.free);
                         }
                     }
                 }
@@ -336,13 +350,13 @@ void gpu_intel_hardware_sampler::sampling_loop() {
                 zes_pci_properties_t pci_prop{};
                 if (zesDevicePciGetProperties(device, &pci_prop) == ZE_RESULT_SUCCESS) {
                     if (pci_prop.maxSpeed.gen != -1) {
-                        memory_samples_.max_pcie_link_generation_ = pci_prop.maxSpeed.gen;
+                        memory_samples_.pcie_link_generation_max_ = pci_prop.maxSpeed.gen;
                     }
                     if (pci_prop.maxSpeed.width != -1) {
-                        memory_samples_.pcie_max_width_ = pci_prop.maxSpeed.width;
+                        memory_samples_.num_pcie_lanes_max_ = pci_prop.maxSpeed.width;
                     }
                     if (pci_prop.maxSpeed.maxBandwidth != -1) {
-                        memory_samples_.pcie_link_max_speed_ = pci_prop.maxSpeed.maxBandwidth;
+                        memory_samples_.pcie_link_speed_max_ = static_cast<decltype(memory_samples_.pcie_link_speed_max_)::value_type>(static_cast<double>(pci_prop.maxSpeed.maxBandwidth) / 1e6);
                     }
                 }
 
@@ -350,10 +364,10 @@ void gpu_intel_hardware_sampler::sampling_loop() {
                 zes_pci_state_t pci_state{};
                 if (zesDevicePciGetState(device, &pci_state) == ZE_RESULT_SUCCESS) {
                     if (pci_state.speed.maxBandwidth != -1) {
-                        memory_samples_.pcie_link_speed_ = decltype(memory_samples_.pcie_link_speed_)::value_type{ pci_state.speed.maxBandwidth };
+                        memory_samples_.pcie_link_speed_ = decltype(memory_samples_.pcie_link_speed_)::value_type{ static_cast<decltype(memory_samples_.pcie_link_speed_max_)::value_type>(static_cast<double>(pci_state.speed.maxBandwidth) / 1e6) };
                     }
                     if (pci_state.speed.width != -1) {
-                        memory_samples_.pcie_link_width_ = decltype(memory_samples_.pcie_link_width_)::value_type{ pci_state.speed.width };
+                        memory_samples_.num_pcie_lanes_ = decltype(memory_samples_.num_pcie_lanes_)::value_type{ pci_state.speed.width };
                     }
                     if (pci_state.speed.gen != -1) {
                         memory_samples_.pcie_link_generation_ = decltype(memory_samples_.pcie_link_generation_)::value_type{ pci_state.speed.gen };
@@ -365,6 +379,29 @@ void gpu_intel_hardware_sampler::sampling_loop() {
 
     // retrieve initial temperature related information
     {
+        std::uint32_t num_fans{ 0 };
+        if (zesDeviceEnumFans(device, &num_fans, nullptr) == ZE_RESULT_SUCCESS) {
+            temperature_samples_.num_fans_ = num_fans;
+
+            fan_handles.resize(num_fans);
+            if (zesDeviceEnumFans(device, &num_fans, fan_handles.data()) == ZE_RESULT_SUCCESS) {
+                // NOTE: only the first fan handle is used here
+                if (!fan_handles.empty()) {
+                    zes_fan_properties_t prop{};
+                    if (zesFanGetProperties(fan_handles.front(), &prop) == ZE_RESULT_SUCCESS) {
+                        temperature_samples_.fan_speed_max_ = prop.maxRPM;
+                    }
+
+                    std::int32_t fan_speed{};
+                    if (zesFanGetState(fan_handles.front(), ZES_FAN_SPEED_UNITS_PERCENT, &fan_speed) == ZE_RESULT_SUCCESS) {
+                        if (fan_speed != -1) {
+                            temperature_samples_.fan_speed_percentage_ = decltype(temperature_samples_.fan_speed_percentage_)::value_type{ static_cast<decltype(temperature_samples_.fan_speed_percentage_)::value_type::value_type>(fan_speed) };
+                        }
+                    }
+                }
+            }
+        }
+
         std::uint32_t num_psus{ 0 };
         if (zesDeviceEnumPsus(device, &num_psus, nullptr) == ZE_RESULT_SUCCESS) {
             psu_handles.resize(num_psus);
@@ -374,7 +411,7 @@ void gpu_intel_hardware_sampler::sampling_loop() {
                     zes_psu_state_t psu_state{};
                     if (zesPsuGetState(psu_handles.front(), &psu_state) == ZE_RESULT_SUCCESS) {
                         if (psu_state.temperature != -1) {
-                            temperature_samples_.temperature_psu_ = decltype(temperature_samples_.temperature_psu_)::value_type{ psu_state.temperature };
+                            temperature_samples_.psu_temperature_ = static_cast<decltype(temperature_samples_.psu_temperature_)::value_type>(psu_state.temperature);
                         }
                     }
                 }
@@ -388,26 +425,66 @@ void gpu_intel_hardware_sampler::sampling_loop() {
                 for (zes_temp_handle_t handle : temperature_handles) {
                     zes_temp_properties_t prop{};
                     if (zesTemperatureGetProperties(handle, &prop) == ZE_RESULT_SUCCESS) {
-                        const std::string sensor_name = detail::temperature_sensor_type_to_name(prop.type);
-                        if (sensor_name.empty()) {
-                            // unsupported sensor type
-                            continue;
-                        }
+                        switch (prop.type) {
+                            case ZES_TEMP_SENSORS_GLOBAL:
+                                {
+                                    // first value to add -> initialize map
+                                    if (!temperature_samples_.global_temperature_max_.has_value()) {
+                                        temperature_samples_.global_temperature_max_ = decltype(temperature_samples_.global_temperature_max_)::value_type{};
+                                    }
+                                    // add new maximum temperature
+                                    temperature_samples_.global_temperature_max_ = prop.maxTemperature;
 
-                        // first value to add -> initialize map
-                        if (!temperature_samples_.temperature_max_.has_value()) {
-                            temperature_samples_.temperature_max_ = decltype(temperature_samples_.temperature_max_)::value_type{};
-                        }
-                        // add new maximum temperature
-                        temperature_samples_.temperature_max_.value()[sensor_name] = prop.maxTemperature;
+                                    // first value to add -> initialize map
+                                    if (!temperature_samples_.global_temperature_.has_value()) {
+                                        temperature_samples_.global_temperature_ = decltype(temperature_samples_.global_temperature_)::value_type{};
+                                    }
+                                    double temp{};
+                                    if (zesTemperatureGetState(handle, &temp) == ZE_RESULT_SUCCESS) {
+                                        temperature_samples_.global_temperature_->push_back(temp);
+                                    }
+                                }
+                                break;
+                            case ZES_TEMP_SENSORS_GPU:
+                                {
+                                    // first value to add -> initialize map
+                                    if (!temperature_samples_.temperature_max_.has_value()) {
+                                        temperature_samples_.temperature_max_ = decltype(temperature_samples_.temperature_max_)::value_type{};
+                                    }
+                                    // add new maximum temperature
+                                    temperature_samples_.temperature_max_ = prop.maxTemperature;
 
-                        // first value to add -> initialize map
-                        if (!temperature_samples_.temperature_.has_value()) {
-                            temperature_samples_.temperature_ = decltype(temperature_samples_.temperature_)::value_type{};
-                        }
-                        double temp{};
-                        if (zesTemperatureGetState(handle, &temp) == ZE_RESULT_SUCCESS) {
-                            temperature_samples_.temperature_.value()[sensor_name].push_back(temp);
+                                    // first value to add -> initialize map
+                                    if (!temperature_samples_.temperature_.has_value()) {
+                                        temperature_samples_.temperature_ = decltype(temperature_samples_.temperature_)::value_type{};
+                                    }
+                                    double temp{};
+                                    if (zesTemperatureGetState(handle, &temp) == ZE_RESULT_SUCCESS) {
+                                        temperature_samples_.temperature_->push_back(temp);
+                                    }
+                                }
+                                break;
+                            case ZES_TEMP_SENSORS_MEMORY:
+                                {
+                                    // first value to add -> initialize map
+                                    if (!temperature_samples_.memory_temperature_max_.has_value()) {
+                                        temperature_samples_.memory_temperature_max_ = decltype(temperature_samples_.memory_temperature_max_)::value_type{};
+                                    }
+                                    // add new maximum temperature
+                                    temperature_samples_.memory_temperature_max_ = prop.maxTemperature;
+
+                                    // first value to add -> initialize map
+                                    if (!temperature_samples_.memory_temperature_.has_value()) {
+                                        temperature_samples_.memory_temperature_ = decltype(temperature_samples_.memory_temperature_)::value_type{};
+                                    }
+                                    double temp{};
+                                    if (zesTemperatureGetState(handle, &temp) == ZE_RESULT_SUCCESS) {
+                                        temperature_samples_.memory_temperature_->push_back(temp);
+                                    }
+                                }
+                                break;
+                            default:
+                                break;
                         }
                     }
                 }
@@ -430,37 +507,37 @@ void gpu_intel_hardware_sampler::sampling_loop() {
                 for (zes_freq_handle_t handle : frequency_handles) {
                     // get frequency properties
                     zes_freq_properties_t prop{};
-                    HWS_LEVEL_ZERO_ERROR_CHECK(zesFrequencyGetProperties(handle, &prop));
+                    HWS_LEVEL_ZERO_ERROR_CHECK(zesFrequencyGetProperties(handle, &prop))
 
                     // get current frequency information
                     zes_freq_state_t frequency_state{};
-                    if (clock_samples_.clock_gpu_.has_value() || clock_samples_.clock_mem_.has_value()) {
-                        HWS_LEVEL_ZERO_ERROR_CHECK(zesFrequencyGetState(handle, &frequency_state));
+                    if (clock_samples_.clock_frequency_.has_value() || clock_samples_.memory_clock_frequency_.has_value()) {
+                        HWS_LEVEL_ZERO_ERROR_CHECK(zesFrequencyGetState(handle, &frequency_state))
                         // determine the frequency domain (e.g. GPU, memory, etc)
                         switch (prop.type) {
                             case ZES_FREQ_DOMAIN_GPU:
                                 {
-                                    if (clock_samples_.tdp_frequency_limit_gpu_.has_value()) {
-                                        clock_samples_.tdp_frequency_limit_gpu_->push_back(frequency_state.tdp);
+                                    if (clock_samples_.frequency_limit_tdp_.has_value()) {
+                                        clock_samples_.frequency_limit_tdp_->push_back(frequency_state.tdp);
                                     }
-                                    if (clock_samples_.clock_gpu_.has_value()) {
-                                        clock_samples_.clock_gpu_->push_back(frequency_state.actual);
+                                    if (clock_samples_.clock_frequency_.has_value()) {
+                                        clock_samples_.clock_frequency_->push_back(frequency_state.actual);
                                     }
-                                    if (clock_samples_.throttle_reason_gpu_.has_value()) {
-                                        clock_samples_.throttle_reason_gpu_->push_back(static_cast<decltype(clock_samples_.throttle_reason_gpu_)::value_type::value_type>(frequency_state.throttleReasons));
+                                    if (clock_samples_.throttle_reason_.has_value()) {
+                                        clock_samples_.throttle_reason_->push_back(detail::throttle_reason_to_string(frequency_state.throttleReasons));
                                     }
                                 }
                                 break;
                             case ZES_FREQ_DOMAIN_MEMORY:
                                 {
-                                    if (clock_samples_.tdp_frequency_limit_mem_.has_value()) {
-                                        clock_samples_.tdp_frequency_limit_mem_->push_back(frequency_state.tdp);
+                                    if (clock_samples_.memory_frequency_limit_tdp_.has_value()) {
+                                        clock_samples_.memory_frequency_limit_tdp_->push_back(frequency_state.tdp);
                                     }
-                                    if (clock_samples_.clock_mem_.has_value()) {
-                                        clock_samples_.clock_mem_->push_back(frequency_state.actual);
+                                    if (clock_samples_.memory_clock_frequency_.has_value()) {
+                                        clock_samples_.memory_clock_frequency_->push_back(frequency_state.actual);
                                     }
-                                    if (clock_samples_.throttle_reason_mem_.has_value()) {
-                                        clock_samples_.throttle_reason_mem_->push_back(static_cast<decltype(clock_samples_.throttle_reason_mem_)::value_type::value_type>(frequency_state.throttleReasons));
+                                    if (clock_samples_.memory_throttle_reason_.has_value()) {
+                                        clock_samples_.memory_throttle_reason_->push_back(detail::throttle_reason_to_string(frequency_state.throttleReasons));
                                     }
                                 }
                                 break;
@@ -479,9 +556,17 @@ void gpu_intel_hardware_sampler::sampling_loop() {
                     if (power_samples_.power_total_energy_consumption_.has_value()) {
                         // get total power consumption
                         zes_power_energy_counter_t energy_counter{};
-                        HWS_LEVEL_ZERO_ERROR_CHECK(zesPowerGetEnergyCounter(power_handles.front(), &energy_counter));
+                        HWS_LEVEL_ZERO_ERROR_CHECK(zesPowerGetEnergyCounter(power_handles.front(), &energy_counter))
 
-                        power_samples_.power_total_energy_consumption_->push_back(static_cast<decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type>(energy_counter.energy) / 1000.0 / 1000.0);
+                        const auto power_consumption = static_cast<decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type>(energy_counter.energy) / 1000.0 / 1000.0;
+
+                        // calculate current power draw as (Energy Difference [J]) / (Time Difference [s])
+                        const std::size_t last_index = this->sampling_time_points().size() - 1;
+                        const double power_usage = (power_consumption - power_samples_.power_total_energy_consumption_->back()) / (std::chrono::duration<double>(this->sampling_time_points()[last_index] - this->sampling_time_points()[last_index - 1]).count());
+                        power_samples_.power_usage_->push_back(power_usage);
+
+                        // add power consumption last to be able to use the std::vector::back() function
+                        power_samples_.power_total_energy_consumption_->push_back(power_consumption - initial_total_power_consumption);
                     }
                 }
             }
@@ -490,7 +575,7 @@ void gpu_intel_hardware_sampler::sampling_loop() {
             {
                 for (zes_mem_handle_t handle : memory_handles) {
                     zes_mem_properties_t prop{};
-                    HWS_LEVEL_ZERO_ERROR_CHECK(zesMemoryGetProperties(handle, &prop));
+                    HWS_LEVEL_ZERO_ERROR_CHECK(zesMemoryGetProperties(handle, &prop))
 
                     // get the memory module name
                     const std::string memory_module_name = detail::memory_module_to_name(prop.type);
@@ -498,23 +583,27 @@ void gpu_intel_hardware_sampler::sampling_loop() {
                     if (memory_samples_.memory_free_.has_value()) {
                         // get current memory information
                         zes_mem_state_t mem_state{};
-                        HWS_LEVEL_ZERO_ERROR_CHECK(zesMemoryGetState(handle, &mem_state));
+                        HWS_LEVEL_ZERO_ERROR_CHECK(zesMemoryGetState(handle, &mem_state))
 
                         memory_samples_.memory_free_.value()[memory_module_name].push_back(mem_state.free);
+
+                        if (memory_samples_.visible_memory_total_.has_value()) {
+                            memory_samples_.memory_used_.value()[memory_module_name].push_back(memory_samples_.visible_memory_total_.value()[memory_module_name] - mem_state.free);
+                        }
                     }
                 }
 
-                if (memory_samples_.pcie_link_speed_.has_value() || memory_samples_.pcie_link_width_.has_value() || memory_samples_.pcie_link_width_.has_value()) {
+                if (memory_samples_.pcie_link_speed_.has_value() || memory_samples_.num_pcie_lanes_.has_value() || memory_samples_.num_pcie_lanes_.has_value()) {
                     // the current PCIe stats
                     zes_pci_state_t pci_state{};
-                    HWS_LEVEL_ZERO_ERROR_CHECK(zesDevicePciGetState(device, &pci_state));
+                    HWS_LEVEL_ZERO_ERROR_CHECK(zesDevicePciGetState(device, &pci_state))
                     if (memory_samples_.pcie_link_speed_.has_value()) {
-                        memory_samples_.pcie_link_speed_->push_back(pci_state.speed.maxBandwidth);
+                        memory_samples_.pcie_link_speed_->push_back(static_cast<decltype(memory_samples_.pcie_link_speed_)::value_type::value_type>(static_cast<double>(pci_state.speed.maxBandwidth) / 1e6));
                     }
-                    if (memory_samples_.pcie_link_width_.has_value()) {
-                        memory_samples_.pcie_link_width_->push_back(pci_state.speed.width);
+                    if (memory_samples_.num_pcie_lanes_.has_value()) {
+                        memory_samples_.num_pcie_lanes_->push_back(pci_state.speed.width);
                     }
-                    if (memory_samples_.pcie_link_width_.has_value()) {
+                    if (memory_samples_.pcie_link_generation_.has_value()) {
                         memory_samples_.pcie_link_generation_->push_back(pci_state.speed.gen);
                     }
                 }
@@ -523,28 +612,48 @@ void gpu_intel_hardware_sampler::sampling_loop() {
             // retrieve temperature related samples
             {
                 if (!psu_handles.empty()) {
-                    if (temperature_samples_.temperature_psu_.has_value()) {
+                    if (temperature_samples_.psu_temperature_.has_value()) {
                         // NOTE: only the first PSU is used here
                         zes_psu_state_t psu_state{};
-                        HWS_LEVEL_ZERO_ERROR_CHECK(zesPsuGetState(psu_handles.front(), &psu_state));
-                        temperature_samples_.temperature_psu_->push_back(psu_state.temperature);
+                        HWS_LEVEL_ZERO_ERROR_CHECK(zesPsuGetState(psu_handles.front(), &psu_state))
+                        temperature_samples_.psu_temperature_->push_back(psu_state.temperature);
                     }
                 }
 
                 for (zes_temp_handle_t handle : temperature_handles) {
                     zes_temp_properties_t prop{};
-                    HWS_LEVEL_ZERO_ERROR_CHECK(zesTemperatureGetProperties(handle, &prop));
-
-                    const std::string sensor_name = detail::temperature_sensor_type_to_name(prop.type);
-                    if (sensor_name.empty()) {
-                        // unsupported sensor type
-                        continue;
-                    }
-
-                    if (temperature_samples_.temperature_.has_value() && temperature_samples_.temperature_.value().contains(sensor_name)) {
-                        double temp{};
-                        HWS_LEVEL_ZERO_ERROR_CHECK(zesTemperatureGetState(handle, &temp));
-                        temperature_samples_.temperature_.value()[sensor_name].push_back(temp);
+                    HWS_LEVEL_ZERO_ERROR_CHECK(zesTemperatureGetProperties(handle, &prop))
+
+                    switch (prop.type) {
+                        case ZES_TEMP_SENSORS_GLOBAL:
+                            {
+                                if (temperature_samples_.global_temperature_.has_value()) {
+                                    double temp{};
+                                    HWS_LEVEL_ZERO_ERROR_CHECK(zesTemperatureGetState(handle, &temp))
+                                    temperature_samples_.global_temperature_->push_back(temp);
+                                }
+                            }
+                            break;
+                        case ZES_TEMP_SENSORS_GPU:
+                            {
+                                if (temperature_samples_.temperature_.has_value()) {
+                                    double temp{};
+                                    HWS_LEVEL_ZERO_ERROR_CHECK(zesTemperatureGetState(handle, &temp))
+                                    temperature_samples_.temperature_->push_back(temp);
+                                }
+                            }
+                            break;
+                        case ZES_TEMP_SENSORS_MEMORY:
+                            {
+                                if (temperature_samples_.memory_temperature_.has_value()) {
+                                    double temp{};
+                                    HWS_LEVEL_ZERO_ERROR_CHECK(zesTemperatureGetState(handle, &temp))
+                                    temperature_samples_.memory_temperature_->push_back(temp);
+                                }
+                            }
+                            break;
+                        default:
+                            break;
                     }
                 }
             }
@@ -559,8 +668,8 @@ std::string gpu_intel_hardware_sampler::device_identification() const {
     // get the level zero handle from the device
     ze_device_handle_t device = device_.get_impl().device;
     ze_device_properties_t prop{};
-    HWS_LEVEL_ZERO_ERROR_CHECK(zeDeviceGetProperties(device, &prop));
-    return std::format("gpu_intel_device_{}", prop.deviceId);
+    HWS_LEVEL_ZERO_ERROR_CHECK(zeDeviceGetProperties(device, &prop))
+    return fmt::format("gpu_intel_device_{}", prop.deviceId);
 }
 
 std::string gpu_intel_hardware_sampler::generate_yaml_string() const {
@@ -569,10 +678,10 @@ std::string gpu_intel_hardware_sampler::generate_yaml_string() const {
         throw std::runtime_error{ "Can't create the final YAML entry if the hardware sampler is still running!" };
     }
 
-    return std::format("{}\n"
-                       "{}\n"
-                       "{}\n"
-                       "{}\n"
+    return fmt::format("{}\n\n"
+                       "{}\n\n"
+                       "{}\n\n"
+                       "{}\n\n"
                        "{}",
                        general_samples_.generate_yaml_string(),
                        clock_samples_.generate_yaml_string(),
@@ -586,7 +695,7 @@ std::ostream &operator<<(std::ostream &out, const gpu_intel_hardware_sampler &sa
         out.setstate(std::ios_base::failbit);
         return out;
     } else {
-        return out << std::format("sampling interval: {}\n"
+        return out << fmt::format("sampling interval: {}\n"
                                   "time points: [{}]\n\n"
                                   "general samples:\n{}\n\n"
                                   "clock samples:\n{}\n\n"
@@ -594,7 +703,7 @@ std::ostream &operator<<(std::ostream &out, const gpu_intel_hardware_sampler &sa
                                   "memory samples:\n{}\n\n"
                                   "temperature samples:\n{}",
                                   sampler.sampling_interval(),
-                                  detail::join(detail::time_points_to_epoch(sampler.sampling_time_points()), ", "),
+                                  fmt::join(detail::time_points_to_epoch(sampler.sampling_time_points()), ", "),
                                   sampler.general_samples(),
                                   sampler.clock_samples(),
                                   sampler.power_samples(),
diff --git a/src/hardware_sampling/gpu_intel/level_zero_samples.cpp b/src/hardware_sampling/gpu_intel/level_zero_samples.cpp
index 971bfb9..5ceffcf 100644
--- a/src/hardware_sampling/gpu_intel/level_zero_samples.cpp
+++ b/src/hardware_sampling/gpu_intel/level_zero_samples.cpp
@@ -7,9 +7,8 @@
 
 #include "hardware_sampling/gpu_intel/level_zero_samples.hpp"
 
-#include "hardware_sampling/utility.hpp"  // hws::detail::{value_or_default, join}
+#include "hardware_sampling/utility.hpp"  // hws::detail::{value_or_default, remove_cvref_t}
 
-#include <format>       // std::format
 #include <ostream>      // std::ostream
 #include <string>       // std::string
 #include <string_view>  // std::string_view
@@ -18,22 +17,22 @@
 
 namespace hws {
 
-namespace detail {
+namespace {
 
 template <typename MapType>
 void append_map_values(std::string &str, const std::string_view entry_name, const MapType &map) {
     if (map.has_value()) {
         for (const auto &[key, value] : map.value()) {
-            if constexpr (is_vector_v<std::remove_cvref_t<decltype(value)>>) {
-                str += std::format("{}_{}: [{}]\n", entry_name, key, detail::join(value, ", "));
+            if constexpr (detail::is_vector_v<detail::remove_cvref_t<decltype(value)>>) {
+                str += fmt::format("{}_{}: [{}]\n", entry_name, key, fmt::join(value, ", "));
             } else {
-                str += std::format("{}_{}: {}\n", entry_name, key, value);
+                str += fmt::format("{}_{}: {}\n", entry_name, key, value);
             }
         }
     }
 }
 
-}  // namespace detail
+}  // namespace
 
 //*************************************************************************************************************************************//
 //                                                           general samples                                                           //
@@ -44,42 +43,49 @@ std::string level_zero_general_samples::generate_yaml_string() const {
 
     // device byte order
     if (this->byte_order_.has_value()) {
-        str += std::format("  byte_order:\n"
+        str += fmt::format("  byte_order:\n"
                            "    unit: \"string\"\n"
                            "    values: \"{}\"\n",
                            this->byte_order_.value());
     }
     // the vendor specific ID
     if (this->vendor_id_.has_value()) {
-        str += std::format("  vendor_id:\n"
+        str += fmt::format("  vendor_id:\n"
                            "    unit: \"string\"\n"
                            "    values: \"{}\"\n",
                            this->vendor_id_.value());
     }
     // device name
     if (this->name_.has_value()) {
-        str += std::format("  name:\n"
+        str += fmt::format("  name:\n"
                            "    unit: \"string\"\n"
                            "    values: \"{}\"\n",
                            this->name_.value());
     }
+    // GPU specific flags
+    if (this->flags_.has_value()) {
+        str += fmt::format("  flags:\n"
+                           "    unit: \"string\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(detail::quote(this->flags_.value()), ", "));
+    }
     // the standby mode
     if (this->standby_mode_.has_value()) {
-        str += std::format("  standby_mode:\n"
+        str += fmt::format("  standby_mode:\n"
                            "    unit: \"string\"\n"
                            "    values: \"{}\"\n",
                            this->standby_mode_.value());
     }
     // the number of threads per EU unit
     if (this->num_threads_per_eu_.has_value()) {
-        str += std::format("  num_threads_per_eu:\n"
+        str += fmt::format("  num_threads_per_eu:\n"
                            "    unit: \"int\"\n"
                            "    values: {}\n",
                            this->num_threads_per_eu_.value());
     }
     // the EU SIMD width
     if (this->eu_simd_width_.has_value()) {
-        str += std::format("  physical_eu_simd_width:\n"
+        str += fmt::format("  eu_simd_width:\n"
                            "    unit: \"int\"\n"
                            "    values: {}\n",
                            this->eu_simd_width_.value());
@@ -92,15 +98,17 @@ std::string level_zero_general_samples::generate_yaml_string() const {
 }
 
 std::ostream &operator<<(std::ostream &out, const level_zero_general_samples &samples) {
-    return out << std::format("byte_order [string]: {}\n"
+    return out << fmt::format("byte_order [string]: {}\n"
                               "vendor_id [string]: {}\n"
                               "name [string]: {}\n"
+                              "flags [string]: [{}]\n"
                               "standby_mode [string]: {}\n"
                               "num_threads_per_eu [int]: {}\n"
                               "eu_simd_width [int]: {}",
                               detail::value_or_default(samples.get_byte_order()),
                               detail::value_or_default(samples.get_vendor_id()),
                               detail::value_or_default(samples.get_name()),
+                              fmt::join(detail::value_or_default(samples.get_flags()), ", "),
                               detail::value_or_default(samples.get_standby_mode()),
                               detail::value_or_default(samples.get_num_threads_per_eu()),
                               detail::value_or_default(samples.get_eu_simd_width()));
@@ -114,89 +122,89 @@ std::string level_zero_clock_samples::generate_yaml_string() const {
     std::string str{ "clock:\n" };
 
     // minimum GPU core clock
-    if (this->clock_gpu_min_.has_value()) {
-        str += std::format("  clock_gpu_min:\n"
+    if (this->clock_frequency_min_.has_value()) {
+        str += fmt::format("  clock_frequency_min:\n"
                            "    unit: \"MHz\"\n"
                            "    values: {}\n",
-                           this->clock_gpu_min_.value());
+                           this->clock_frequency_min_.value());
     }
     // maximum GPU core clock
-    if (this->clock_gpu_max_.has_value()) {
-        str += std::format("  clock_gpu_max:\n"
+    if (this->clock_frequency_max_.has_value()) {
+        str += fmt::format("  clock_gpu_max:\n"
                            "    unit: \"MHz\"\n"
                            "    values: {}\n",
-                           this->clock_gpu_max_.value());
-    }
-    // all possible GPU core clock frequencies
-    if (this->available_clocks_gpu_.has_value()) {
-        str += std::format("  available_clocks_gpu:\n"
-                           "    unit: \"MHz\"\n"
-                           "    values: [{}]\n",
-                           detail::join(this->available_clocks_gpu_.value(), ", "));
+                           this->clock_frequency_max_.value());
     }
     // minimum memory clock
-    if (this->clock_mem_min_.has_value()) {
-        str += std::format("  clock_mem_min:\n"
+    if (this->memory_clock_frequency_min_.has_value()) {
+        str += fmt::format("  memory_clock_frequency_min:\n"
                            "    unit: \"MHz\"\n"
                            "    values: {}\n",
-                           this->clock_mem_min_.value());
+                           this->memory_clock_frequency_min_.value());
     }
     // maximum memory clock
-    if (this->clock_mem_max_.has_value()) {
-        str += std::format("  clock_mem_max:\n"
+    if (this->memory_clock_frequency_max_.has_value()) {
+        str += fmt::format("  memory_clock_frequency_max:\n"
                            "    unit: \"MHz\"\n"
                            "    values: {}\n",
-                           this->clock_mem_max_.value());
+                           this->memory_clock_frequency_max_.value());
+    }
+    // all possible GPU core clock frequencies
+    if (this->available_clock_frequencies_.has_value()) {
+        str += fmt::format("  available_clock_frequencies:\n"
+                           "    unit: \"MHz\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->available_clock_frequencies_.value(), ", "));
     }
     // all possible memory clock frequencies
-    if (this->available_clocks_mem_.has_value()) {
-        str += std::format("  available_clocks_mem:\n"
+    if (this->available_memory_clock_frequencies_.has_value()) {
+        str += fmt::format("  available_memory_clock_frequencies:\n"
                            "    unit: \"MHz\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->available_clocks_mem_.value(), ", "));
+                           fmt::join(this->available_memory_clock_frequencies_.value(), ", "));
     }
 
-    // the maximum GPU core frequency based on the current TDP limit
-    if (this->tdp_frequency_limit_gpu_.has_value()) {
-        str += std::format("  tdp_frequency_limit_gpu:\n"
+    // the current GPU core clock frequency
+    if (this->clock_frequency_.has_value()) {
+        str += fmt::format("  clock_frequency:\n"
                            "    unit: \"MHz\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->tdp_frequency_limit_gpu_.value(), ", "));
+                           fmt::join(this->clock_frequency_.value(), ", "));
     }
-    // the current GPU core clock frequency
-    if (this->clock_gpu_.has_value()) {
-        str += std::format("  clock_gpu:\n"
+    // the current memory clock frequency
+    if (this->memory_clock_frequency_.has_value()) {
+        str += fmt::format("  memory_clock_frequency:\n"
                            "    unit: \"MHz\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->clock_gpu_.value(), ", "));
+                           fmt::join(this->memory_clock_frequency_.value(), ", "));
     }
     // the current GPU core throttle reason
-    if (this->throttle_reason_gpu_.has_value()) {
-        str += std::format("  throttle_reason_gpu:\n"
-                           "    unit: \"bitmask\"\n"
+    if (this->throttle_reason_.has_value()) {
+        str += fmt::format("  throttle_reason:\n"
+                           "    unit: \"string\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->throttle_reason_gpu_.value(), ", "));
+                           fmt::join(this->throttle_reason_.value(), ", "));
     }
-    // the maximum memory frequency based on the current TDP limit
-    if (this->tdp_frequency_limit_mem_.has_value()) {
-        str += std::format("  tdp_frequency_limit_mem:\n"
-                           "    unit: \"MHz\"\n"
+    // the current memory throttle reason
+    if (this->memory_throttle_reason_.has_value()) {
+        str += fmt::format("  memory_throttle_reason:\n"
+                           "    unit: \"string\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->tdp_frequency_limit_mem_.value(), ", "));
+                           fmt::join(this->memory_throttle_reason_.value(), ", "));
     }
-    // the current memory clock frequency
-    if (this->clock_mem_.has_value()) {
-        str += std::format("  clock_mem:\n"
+    // the maximum GPU core frequency based on the current TDP limit
+    if (this->frequency_limit_tdp_.has_value()) {
+        str += fmt::format("  frequency_limit_tdp:\n"
                            "    unit: \"MHz\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->clock_mem_.value(), ", "));
+                           fmt::join(this->frequency_limit_tdp_.value(), ", "));
     }
-    // the current memory throttle reason
-    if (this->throttle_reason_mem_.has_value()) {
-        str += std::format("  throttle_reason_mem:\n"
-                           "    unit: \"bitmask\"\n"
+    // the maximum memory frequency based on the current TDP limit
+    if (this->memory_frequency_limit_tdp_.has_value()) {
+        str += fmt::format("  memory_frequency_limit_tdp:\n"
+                           "    unit: \"MHz\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->throttle_reason_mem_.value(), ", "));
+                           fmt::join(this->memory_frequency_limit_tdp_.value(), ", "));
     }
 
     // remove last newline
@@ -206,30 +214,30 @@ std::string level_zero_clock_samples::generate_yaml_string() const {
 }
 
 std::ostream &operator<<(std::ostream &out, const level_zero_clock_samples &samples) {
-    return out << std::format("clock_gpu_min [MHz]: {}\n"
-                              "clock_gpu_max [MHz]: {}\n"
-                              "available_clocks_gpu [MHz]: [{}]\n"
-                              "clock_mem_min [MHz]: {}\n"
-                              "clock_mem_max [MHz]: {}\n"
-                              "available_clocks_mem [MHz]: [{}]\n"
-                              "tdp_frequency_limit_gpu [MHz]: [{}]\n"
-                              "clock_gpu [MHz]: [{}]\n"
-                              "throttle_reason_gpu [bitmask]: [{}]\n"
-                              "tdp_frequency_limit_mem [MHz]: [{}]\n"
-                              "clock_mem [MHz]: [{}]\n"
-                              "throttle_reason_mem [bitmask]: [{}]",
-                              detail::value_or_default(samples.get_clock_gpu_min()),
-                              detail::value_or_default(samples.get_clock_gpu_max()),
-                              detail::join(detail::value_or_default(samples.get_available_clocks_gpu()), ", "),
-                              detail::value_or_default(samples.get_clock_mem_min()),
-                              detail::value_or_default(samples.get_clock_mem_max()),
-                              detail::join(detail::value_or_default(samples.get_available_clocks_mem()), ", "),
-                              detail::join(detail::value_or_default(samples.get_tdp_frequency_limit_gpu()), ", "),
-                              detail::join(detail::value_or_default(samples.get_clock_gpu()), ", "),
-                              detail::join(detail::value_or_default(samples.get_throttle_reason_gpu()), ", "),
-                              detail::join(detail::value_or_default(samples.get_tdp_frequency_limit_mem()), ", "),
-                              detail::join(detail::value_or_default(samples.get_clock_mem()), ", "),
-                              detail::join(detail::value_or_default(samples.get_throttle_reason_mem()), ", "));
+    return out << fmt::format("clock_frequency_min [MHz]: {}\n"
+                              "clock_frequency_max [MHz]: {}\n"
+                              "memory_clock_frequency_min [MHz]: {}\n"
+                              "memory_clock_frequency_max [MHz]: {}\n"
+                              "available_clock_frequencies [MHz]: [{}]\n"
+                              "available_memory_clock_frequencies [MHz]: [{}]\n"
+                              "clock_frequency [MHz]: [{}]\n"
+                              "memory_clock_frequency [MHz]: [{}]\n"
+                              "throttle_reason [string]: [{}]\n"
+                              "memory_throttle_reason [string]: [{}]\n"
+                              "frequency_limit_tdp [MHz]: [{}]\n"
+                              "memory_frequency_limit_tdp [MHz]: [{}]",
+                              detail::value_or_default(samples.get_clock_frequency_min()),
+                              detail::value_or_default(samples.get_clock_frequency_max()),
+                              detail::value_or_default(samples.get_memory_clock_frequency_min()),
+                              detail::value_or_default(samples.get_memory_clock_frequency_max()),
+                              fmt::join(detail::value_or_default(samples.get_available_clock_frequencies()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_available_memory_clock_frequencies()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_clock_frequency()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_memory_clock_frequency()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_throttle_reason()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_memory_throttle_reason()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_frequency_limit_tdp()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_memory_frequency_limit_tdp()), ", "));
 }
 
 //*************************************************************************************************************************************//
@@ -241,36 +249,39 @@ std::string level_zero_power_samples::generate_yaml_string() const {
 
     // power enforced limit
     if (this->power_enforced_limit_.has_value()) {
-        str += std::format("  power_enforced_limit:\n"
+        str += fmt::format("  power_enforced_limit:\n"
                            "    unit: \"W\"\n"
                            "    values: {}\n",
                            this->power_enforced_limit_.value());
     }
     // power measurement type
     if (this->power_measurement_type_.has_value()) {
-        str += std::format("  power_measurement_type:\n"
+        str += fmt::format("  power_measurement_type:\n"
                            "    unit: \"string\"\n"
                            "    values: {}\n",
                            this->power_measurement_type_.value());
     }
     // the power management mode
     if (this->power_management_mode_.has_value()) {
-        str += std::format("  power_management_mode:\n"
+        str += fmt::format("  power_management_mode:\n"
                            "    unit: \"bool\"\n"
                            "    values: {}\n",
                            this->power_management_mode_.value());
     }
 
+    // the current power draw
+    if (this->power_usage_.has_value()) {
+        str += fmt::format("  power_usage:\n"
+                           "    unit: \"W\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->power_usage_.value(), ", "));
+    }
     // the total consumed energy
     if (this->power_total_energy_consumption_.has_value()) {
-        decltype(level_zero_power_samples::power_total_energy_consumption_)::value_type consumed_energy(this->power_total_energy_consumption_->size());
-        for (std::size_t i = 0; i < consumed_energy.size(); ++i) {
-            consumed_energy[i] = this->power_total_energy_consumption_.value()[i] - this->power_total_energy_consumption_->front();
-        }
-        str += std::format("  power_total_energy_consumed:\n"
+        str += fmt::format("  power_total_energy_consumption:\n"
                            "    unit: \"J\"\n"
                            "    values: [{}]\n",
-                           detail::join(consumed_energy, ", "));
+                           fmt::join(this->power_total_energy_consumption_.value(), ", "));
     }
 
     // remove last newline
@@ -280,14 +291,16 @@ std::string level_zero_power_samples::generate_yaml_string() const {
 }
 
 std::ostream &operator<<(std::ostream &out, const level_zero_power_samples &samples) {
-    return out << std::format("power_enforced_limit [W]: {}\n"
+    return out << fmt::format("power_enforced_limit [W]: {}\n"
                               "power_measurement_type [string]: {}\n"
                               "power_management_mode [bool]: {}\n"
+                              "power_usage [W]: [{}]\n"
                               "power_total_energy_consumption [J]: [{}]",
                               detail::value_or_default(samples.get_power_enforced_limit()),
                               detail::value_or_default(samples.get_power_measurement_type()),
                               detail::value_or_default(samples.get_power_management_mode()),
-                              detail::join(detail::value_or_default(samples.get_power_total_energy_consumption()), ", "));
+                              fmt::join(detail::value_or_default(samples.get_power_usage()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_power_total_energy_consumption()), ", "));
 }
 
 //*************************************************************************************************************************************//
@@ -300,7 +313,7 @@ std::string level_zero_memory_samples::generate_yaml_string() const {
     // the total memory
     if (this->memory_total_.has_value()) {
         for (const auto &[key, value] : this->memory_total_.value()) {
-            str += std::format("  memory_total_{}:\n"
+            str += fmt::format("  {}_memory_total:\n"
                                "    unit: \"B\"\n"
                                "    values: {}\n",
                                key,
@@ -308,40 +321,50 @@ std::string level_zero_memory_samples::generate_yaml_string() const {
         }
     }
     // the total allocatable memory
-    if (this->allocatable_memory_total_.has_value()) {
-        for (const auto &[key, value] : this->allocatable_memory_total_.value()) {
-            str += std::format("  allocatable_memory_total_{}:\n"
+    if (this->visible_memory_total_.has_value()) {
+        for (const auto &[key, value] : this->visible_memory_total_.value()) {
+            str += fmt::format("  {}_visible_memory_total:\n"
                                "    unit: \"B\"\n"
                                "    values: {}\n",
                                key,
                                value);
         }
     }
-    // the pcie max bandwidth
-    if (this->pcie_link_max_speed_.has_value()) {
-        str += std::format("  pcie_max_bandwidth:\n"
-                           "    unit: \"BPS\"\n"
-                           "    values: {}\n",
-                           this->pcie_link_max_speed_.value());
+    // the memory location (system or device)
+    if (this->memory_location_.has_value()) {
+        for (const auto &[key, value] : this->memory_location_.value()) {
+            str += fmt::format("  {}_memory_location:\n"
+                               "    unit: \"string\"\n"
+                               "    values: \"{}\"\n",
+                               key,
+                               value);
+        }
     }
     // the pcie link width
-    if (this->pcie_max_width_.has_value()) {
-        str += std::format("  max_pcie_link_width:\n"
+    if (this->num_pcie_lanes_max_.has_value()) {
+        str += fmt::format("  num_pcie_lanes_max:\n"
                            "    unit: \"int\"\n"
                            "    values: {}\n",
-                           this->pcie_max_width_.value());
+                           this->num_pcie_lanes_max_.value());
     }
     // the pcie generation
-    if (this->max_pcie_link_generation_.has_value()) {
-        str += std::format("  max_pcie_link_generation:\n"
+    if (this->pcie_link_generation_max_.has_value()) {
+        str += fmt::format("  pcie_link_generation_max:\n"
                            "    unit: \"int\"\n"
                            "    values: {}\n",
-                           this->max_pcie_link_generation_.value());
+                           this->pcie_link_generation_max_.value());
+    }
+    // the pcie max bandwidth
+    if (this->pcie_link_speed_max_.has_value()) {
+        str += fmt::format("  pcie_link_speed_max:\n"
+                           "    unit: \"MBPS\"\n"
+                           "    values: {}\n",
+                           this->pcie_link_speed_max_.value());
     }
     // the memory bus width
-    if (this->bus_width_.has_value()) {
-        for (const auto &[key, value] : this->bus_width_.value()) {
-            str += std::format("  memory_bus_width_{}:\n"
+    if (this->memory_bus_width_.has_value()) {
+        for (const auto &[key, value] : this->memory_bus_width_.value()) {
+            str += fmt::format("  {}_memory_bus_width:\n"
                                "    unit: \"Bit\"\n"
                                "    values: {}\n",
                                key,
@@ -349,69 +372,56 @@ std::string level_zero_memory_samples::generate_yaml_string() const {
         }
     }
     // the number of memory channels
-    if (this->num_channels_.has_value()) {
-        for (const auto &[key, value] : this->num_channels_.value()) {
-            str += std::format("  memory_num_channels_{}:\n"
+    if (this->memory_num_channels_.has_value()) {
+        for (const auto &[key, value] : this->memory_num_channels_.value()) {
+            str += fmt::format("  {}_memory_num_channels:\n"
                                "    unit: \"int\"\n"
                                "    values: {}\n",
                                key,
                                value);
         }
     }
-    // the memory location (system or device)
-    if (this->location_.has_value()) {
-        for (const auto &[key, value] : this->location_.value()) {
-            str += std::format("  memory_location_{}:\n"
-                               "    unit: \"string\"\n"
-                               "    values: \"{}\"\n",
-                               key,
-                               value);
-        }
-    }
 
-    // the currently free and used memory
+    // the currently free memory
     if (this->memory_free_.has_value()) {
         for (const auto &[key, value] : this->memory_free_.value()) {
-            str += std::format("  memory_free_{}:\n"
+            str += fmt::format("  {}_memory_free:\n"
                                "    unit: \"string\"\n"
                                "    values: [{}]\n",
                                key,
-                               detail::join(value, ", "));
-
-            // calculate the used memory
-            if (this->allocatable_memory_total_.has_value()) {
-                decltype(level_zero_memory_samples::memory_free_)::value_type::mapped_type memory_used(value.size(), this->allocatable_memory_total_->at(key));
-                for (std::size_t i = 0; i < memory_used.size(); ++i) {
-                    memory_used[i] -= value[i];
-                }
-                str += std::format("  memory_used_{}:\n"
-                                   "    unit: \"string\"\n"
-                                   "    values: [{}]\n",
-                                   key,
-                                   detail::join(memory_used, ", "));
-            }
+                               fmt::join(value, ", "));
         }
     }
-    // PCIe link speed
-    if (this->pcie_link_speed_.has_value()) {
-        str += std::format("  pcie_bandwidth:\n"
-                           "    unit: \"MBPS\"\n"
-                           "    values: [{}]\n",
-                           detail::join(this->pcie_link_speed_.value(), ", "));
+    // the currently used memory
+    if (this->memory_used_.has_value()) {
+        for (const auto &[key, value] : this->memory_used_.value()) {
+            str += fmt::format("  {}_memory_used:\n"
+                               "    unit: \"string\"\n"
+                               "    values: [{}]\n",
+                               key,
+                               fmt::join(value, ", "));
+        }
     }
     // PCIe link width
-    if (this->pcie_link_width_.has_value()) {
-        str += std::format("  pcie_link_width:\n"
+    if (this->num_pcie_lanes_.has_value()) {
+        str += fmt::format("  num_pcie_lanes:\n"
                            "    unit: \"int\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->pcie_link_width_.value(), ", "));
+                           fmt::join(this->num_pcie_lanes_.value(), ", "));
     }
     // PCIe link generation
     if (this->pcie_link_generation_.has_value()) {
-        str += std::format("  pcie_link_generation:\n"
+        str += fmt::format("  pcie_link_generation:\n"
                            "    unit: \"int\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->pcie_link_generation_.value(), ", "));
+                           fmt::join(this->pcie_link_generation_.value(), ", "));
+    }
+    // PCIe link speed
+    if (this->pcie_link_speed_.has_value()) {
+        str += fmt::format("  pcie_link_speed:\n"
+                           "    unit: \"MBPS\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->pcie_link_speed_.value(), ", "));
     }
 
     // remove last newline
@@ -423,27 +433,26 @@ std::string level_zero_memory_samples::generate_yaml_string() const {
 std::ostream &operator<<(std::ostream &out, const level_zero_memory_samples &samples) {
     std::string str{};
 
-    detail::append_map_values(str, "memory_total [B]", samples.get_memory_total());
-    detail::append_map_values(str, "allocatable_memory_total [B]", samples.get_allocatable_memory_total());
-
-    str += std::format("pcie_link_max_speed [BPS]: {}\n"
-                       "pcie_max_width [int]: {}\n"
-                       "max_pcie_link_generation [int]: {}\n",
-                       detail::value_or_default(samples.get_pcie_link_max_speed()),
-                       detail::value_or_default(samples.get_pcie_max_width()),
-                       detail::value_or_default(samples.get_max_pcie_link_generation()));
-
-    detail::append_map_values(str, "bus_width [Bit]", samples.get_bus_width());
-    detail::append_map_values(str, "num_channels [int]", samples.get_num_channels());
-    detail::append_map_values(str, "location [string]", samples.get_location());
-    detail::append_map_values(str, "memory_free [string]", samples.get_memory_free());
-
-    str += std::format("pcie_link_speed [MBPS]: [{}]\n"
-                       "pcie_link_width [int]: [{}]\n"
-                       "pcie_link_generation [int]: [{}]",
-                       detail::join(detail::value_or_default(samples.get_pcie_link_speed()), ", "),
-                       detail::join(detail::value_or_default(samples.get_pcie_link_width()), ", "),
-                       detail::join(detail::value_or_default(samples.get_pcie_link_generation()), ", "));
+    append_map_values(str, "memory_total [B]", samples.get_memory_total());
+    append_map_values(str, "visible_memory_total [B]", samples.get_visible_memory_total());
+    append_map_values(str, "memory_location [string]", samples.get_memory_location());
+    str += fmt::format("num_pcie_lanes_max [int]: {}\n"
+                       "pcie_link_generation_max [int]: {}\n"
+                       "pcie_link_speed_max [MBPS]: {}\n",
+                       detail::value_or_default(samples.get_num_pcie_lanes_max()),
+                       detail::value_or_default(samples.get_pcie_link_generation_max()),
+                       detail::value_or_default(samples.get_pcie_link_speed_max()));
+    append_map_values(str, "memory_bus_width [Bit]", samples.get_memory_bus_width());
+    append_map_values(str, "memory_num_channels [int]", samples.get_memory_num_channels());
+
+    append_map_values(str, "memory_free [string]", samples.get_memory_free());
+    append_map_values(str, "memory_used [string]", samples.get_memory_used());
+    str += fmt::format("num_pcie_lanes [int]: [{}]\n"
+                       "pcie_link_generation [int]: [{}]\n"
+                       "pcie_link_speed [MBPS]: [{}]",
+                       fmt::join(detail::value_or_default(samples.get_num_pcie_lanes()), ", "),
+                       fmt::join(detail::value_or_default(samples.get_pcie_link_generation()), ", "),
+                       fmt::join(detail::value_or_default(samples.get_pcie_link_speed()), ", "));
 
     return out << str;
 }
@@ -455,33 +464,76 @@ std::ostream &operator<<(std::ostream &out, const level_zero_memory_samples &sam
 std::string level_zero_temperature_samples::generate_yaml_string() const {
     std::string str{ "temperature:\n" };
 
-    // the maximum sensor temperature
+    // the number of fans
+    if (this->num_fans_.has_value()) {
+        str += fmt::format("  num_fans:\n"
+                           "    unit: \"int\"\n"
+                           "    values: {}\n",
+                           this->num_fans_.value());
+    }
+    // the maximum fan speed in RPM
+    if (this->fan_speed_max_.has_value()) {
+        str += fmt::format("  fan_speed_max:\n"
+                           "    unit: \"RPM\"\n"
+                           "    values: {}\n",
+                           this->fan_speed_max_.value());
+    }
+    // the maximum GPU temperature
     if (this->temperature_max_.has_value()) {
-        for (const auto &[key, value] : this->temperature_max_.value()) {
-            str += std::format("  temperature_{}_max:\n"
-                               "    unit: \"°C\"\n"
-                               "    values: {}\n",
-                               key,
-                               value);
-        }
+        str += fmt::format("  temperature_max:\n"
+                           "    unit: \"°C\"\n"
+                           "    values: {}\n",
+                           this->temperature_max_.value());
     }
-
-    // the current PSU temperatures
-    if (this->temperature_psu_.has_value()) {
-        str += std::format("  temperature_psu:\n"
+    // the maximum memory temperature
+    if (this->memory_temperature_max_.has_value()) {
+        str += fmt::format("  memory_temperature_max:\n"
+                           "    unit: \"°C\"\n"
+                           "    values: {}\n",
+                           this->memory_temperature_max_.value());
+    }
+    // the maximum global temperature
+    if (this->global_temperature_max_.has_value()) {
+        str += fmt::format("  global_temperature_max:\n"
                            "    unit: \"°C\"\n"
+                           "    values: {}\n",
+                           this->global_temperature_max_.value());
+    }
+
+    // the current fan speed in percent
+    if (this->fan_speed_percentage_.has_value()) {
+        str += fmt::format("  fan_speed_percentage:\n"
+                           "    unit: \"percentage\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->temperature_psu_.value(), ", "));
+                           fmt::join(this->fan_speed_percentage_.value(), ", "));
     }
-    // the current sensor temperatures
+    // the current GPU temperature
     if (this->temperature_.has_value()) {
-        for (const auto &[key, value] : this->temperature_.value()) {
-            str += std::format("  temperature_{}:\n"
-                               "    unit: \"°C\"\n"
-                               "    values: [{}]\n",
-                               key,
-                               detail::join(value, ", "));
-        }
+        str += fmt::format("  temperature:\n"
+                           "    unit: \"°C\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->temperature_.value(), ", "));
+    }
+    // the current memory temperature
+    if (this->memory_temperature_.has_value()) {
+        str += fmt::format("  memory_temperature:\n"
+                           "    unit: \"°C\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->memory_temperature_.value(), ", "));
+    }
+    // the current global temperature
+    if (this->global_temperature_.has_value()) {
+        str += fmt::format("  global_temperature:\n"
+                           "    unit: \"°C\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->global_temperature_.value(), ", "));
+    }
+    // the current PSU temperature
+    if (this->psu_temperature_.has_value()) {
+        str += fmt::format("  psu_temperature:\n"
+                           "    unit: \"°C\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->psu_temperature_.value(), ", "));
     }
 
     // remove last newline
@@ -491,19 +543,26 @@ std::string level_zero_temperature_samples::generate_yaml_string() const {
 }
 
 std::ostream &operator<<(std::ostream &out, const level_zero_temperature_samples &samples) {
-    std::string str{};
-
-    detail::append_map_values(str, "temperature_max [°C]", samples.get_temperature_max());
-
-    str += std::format("temperature_psu [°C]: [{}]\n",
-                       detail::join(detail::value_or_default(samples.get_temperature_psu()), ", "));
-
-    detail::append_map_values(str, "temperature [°C]", samples.get_temperature());
-
-    // remove last newline
-    str.pop_back();
-
-    return out << str;
+    return out << fmt::format("num_fans [int]: {}\n"
+                              "fan_speed_max [RPM]: {}\n"
+                              "temperature_max [°C]: {}\n"
+                              "memory_temperature_max [°C]: {}\n"
+                              "global_temperature_max [°C]: {}\n"
+                              "fan_speed_percentage [%]: [{}]\n"
+                              "temperature [°C]: [{}]\n"
+                              "memory_temperature [°C]: [{}]\n"
+                              "global_temperature [°C]: [{}]\n"
+                              "psu_temperature [°C]: [{}]",
+                              detail::value_or_default(samples.get_num_fans()),
+                              detail::value_or_default(samples.get_fan_speed_max()),
+                              detail::value_or_default(samples.get_temperature_max()),
+                              detail::value_or_default(samples.get_memory_temperature_max()),
+                              detail::value_or_default(samples.get_global_temperature_max()),
+                              fmt::join(detail::value_or_default(samples.get_fan_speed_percentage()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_temperature()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_memory_temperature()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_global_temperature()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_psu_temperature()), ", "));
 }
 
 }  // namespace hws
diff --git a/src/hardware_sampling/gpu_intel/utility.cpp b/src/hardware_sampling/gpu_intel/utility.cpp
index a0cefe9..635b5c1 100644
--- a/src/hardware_sampling/gpu_intel/utility.cpp
+++ b/src/hardware_sampling/gpu_intel/utility.cpp
@@ -7,14 +7,66 @@
 
 #include "hardware_sampling/gpu_intel/utility.hpp"
 
+#include "fmt/format.h"          // fmt::format
+#include "fmt/ranges.h"          // fmt::join
 #include "level_zero/ze_api.h"   // Level Zero runtime functions
 #include "level_zero/zes_api.h"  // Level Zero runtime functions
 
 #include <string>       // std::string
 #include <string_view>  // std::string_view
+#include <vector>       // std::vector
 
 namespace hws::detail {
 
+std::vector<std::string> property_flags_to_vector(const ze_device_property_flags_t flags) {
+    std::vector<std::string> string_flags{};
+
+    if ((flags & ze_device_property_flag_t::ZE_DEVICE_PROPERTY_FLAG_INTEGRATED) != 0) {
+        string_flags.emplace_back("integrated_gpu");
+    }
+    if ((flags & ze_device_property_flag_t::ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE) != 0) {
+        string_flags.emplace_back("sub-device");
+    }
+    if ((flags & ze_device_property_flag_t::ZE_DEVICE_PROPERTY_FLAG_ECC) != 0) {
+        string_flags.emplace_back("ecc");
+    }
+    if ((flags & ze_device_property_flag_t::ZE_DEVICE_PROPERTY_FLAG_ONDEMANDPAGING) != 0) {
+        string_flags.emplace_back("on-demand_page-faulting");
+    }
+
+    return string_flags;
+}
+
+std::string throttle_reason_to_string(const zes_freq_throttle_reason_flags_t reasons) {
+    if (reasons == 0) {
+        return "None";
+    } else {
+        std::vector<std::string> string_reasons{};
+        if ((reasons & zes_freq_throttle_reason_flag_t::ZES_FREQ_THROTTLE_REASON_FLAG_AVE_PWR_CAP) != 0) {
+            string_reasons.emplace_back("average_power");
+        }
+        if ((reasons & zes_freq_throttle_reason_flag_t::ZES_FREQ_THROTTLE_REASON_FLAG_BURST_PWR_CAP) != 0) {
+            string_reasons.emplace_back("burst_power");
+        }
+        if ((reasons & zes_freq_throttle_reason_flag_t::ZES_FREQ_THROTTLE_REASON_FLAG_CURRENT_LIMIT) != 0) {
+            string_reasons.emplace_back("current_limit");
+        }
+        if ((reasons & zes_freq_throttle_reason_flag_t::ZES_FREQ_THROTTLE_REASON_FLAG_THERMAL_LIMIT) != 0) {
+            string_reasons.emplace_back("thermal_limit");
+        }
+        if ((reasons & zes_freq_throttle_reason_flag_t::ZES_FREQ_THROTTLE_REASON_FLAG_PSU_ALERT) != 0) {
+            string_reasons.emplace_back("psu_assertion");
+        }
+        if ((reasons & zes_freq_throttle_reason_flag_t::ZES_FREQ_THROTTLE_REASON_FLAG_SW_RANGE) != 0) {
+            string_reasons.emplace_back("software_frequency_range");
+        }
+        if ((reasons & zes_freq_throttle_reason_flag_t::ZES_FREQ_THROTTLE_REASON_FLAG_HW_RANGE) != 0) {
+            string_reasons.emplace_back("hardware_frequency_range");
+        }
+        return fmt::format("{}", fmt::join(string_reasons, "|"));
+    }
+}
+
 std::string_view to_result_string(const ze_result_t errc) {
     switch (errc) {
         case ZE_RESULT_SUCCESS:
@@ -175,17 +227,4 @@ std::string memory_location_to_name(const zes_mem_loc_t mem_loc) {
     }
 }
 
-std::string temperature_sensor_type_to_name(const zes_temp_sensors_t sensor_type) {
-    switch (sensor_type) {
-        case ZES_TEMP_SENSORS_GLOBAL:
-            return "global";
-        case ZES_TEMP_SENSORS_GPU:
-            return "gpu";
-        case ZES_TEMP_SENSORS_MEMORY:
-            return "memory";
-        default:
-            return "";
-    }
-}
-
 }  // namespace hws::detail

From 81fe9cd966384cc4c81edddd5ae08c9b1cdf7cfe Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Tue, 24 Sep 2024 13:47:29 +0200
Subject: [PATCH 46/69] Add function to check whether a sample category as any
 sample. Output category to YAML file only if at least one sample is
 available.

---
 bindings/cpu_hardware_sampler.cpp             |  7 ++
 bindings/gpu_amd_hardware_sampler.cpp         |  5 +
 bindings/gpu_intel_hardware_sampler.cpp       |  5 +
 bindings/gpu_nvidia_hardware_sampler.cpp      |  5 +
 include/hardware_sampling/cpu/cpu_samples.hpp | 42 +++++++++
 .../gpu_amd/rocm_smi_samples.hpp              | 30 ++++++
 .../gpu_intel/level_zero_samples.hpp          | 30 ++++++
 .../gpu_nvidia/nvml_samples.hpp               | 29 ++++++
 src/hardware_sampling/cpu/cpu_samples.cpp     | 94 ++++++++++++++-----
 .../cpu/hardware_sampler.cpp                  | 12 +--
 .../gpu_amd/hardware_sampler.cpp              |  8 +-
 .../gpu_amd/rocm_smi_samples.cpp              | 76 ++++++++++++---
 .../gpu_intel/hardware_sampler.cpp            |  8 +-
 .../gpu_intel/level_zero_samples.cpp          | 71 +++++++++++---
 .../gpu_nvidia/hardware_sampler.cpp           |  8 +-
 .../gpu_nvidia/nvml_samples.cpp               | 72 +++++++++++---
 src/hardware_sampling/hardware_sampler.cpp    |  6 +-
 17 files changed, 421 insertions(+), 87 deletions(-)

diff --git a/bindings/cpu_hardware_sampler.cpp b/bindings/cpu_hardware_sampler.cpp
index 354c585..ba31dfe 100644
--- a/bindings/cpu_hardware_sampler.cpp
+++ b/bindings/cpu_hardware_sampler.cpp
@@ -21,6 +21,7 @@ namespace py = pybind11;
 void init_cpu_hardware_sampler(py::module_ &m) {
     // bind the general samples
     py::class_<hws::cpu_general_samples>(m, "CpuGeneralSamples")
+        .def("has_samples", &hws::cpu_general_samples::has_samples, "true if any sample is available, false otherwise")
         .def("get_architecture", &hws::cpu_general_samples::get_architecture, "the CPU architecture (e.g., x86_64)")
         .def("get_byte_order", &hws::cpu_general_samples::get_byte_order, "the byte order (e.g., little/big endian)")
         .def("get_num_cores", &hws::cpu_general_samples::get_num_cores, "the total number of cores of the CPU(s)")
@@ -44,6 +45,7 @@ void init_cpu_hardware_sampler(py::module_ &m) {
 
     // bind the clock samples
     py::class_<hws::cpu_clock_samples>(m, "CpuClockSamples")
+        .def("has_samples", &hws::cpu_clock_samples::has_samples, "true if any sample is available, false otherwise")
         .def("get_auto_boosted_clock_enabled", &hws::cpu_clock_samples::get_auto_boosted_clock_enabled, "true if frequency boosting is enabled")
         .def("get_clock_frequency_min", &hws::cpu_clock_samples::get_clock_frequency_min, "the minimum possible CPU frequency in MHz")
         .def("get_clock_frequency_max", &hws::cpu_clock_samples::get_clock_frequency_max, "the maximum possible CPU frequency in MHz")
@@ -56,6 +58,7 @@ void init_cpu_hardware_sampler(py::module_ &m) {
 
     // bind the power samples
     py::class_<hws::cpu_power_samples>(m, "CpuPowerSamples")
+        .def("has_samples", &hws::cpu_power_samples::has_samples, "true if any sample is available, false otherwise")
         .def("get_power_measurement_type", &hws::cpu_power_samples::get_power_measurement_type, "the type of the power readings: always \"instant/current\"")
         .def("get_power_usage", &hws::cpu_power_samples::get_power_usage, "the currently consumed power of the package of the CPU in W")
         .def("get_power_total_energy_consumed", &hws::cpu_power_samples::get_power_total_energy_consumption, "the total power consumption in J")
@@ -69,6 +72,7 @@ void init_cpu_hardware_sampler(py::module_ &m) {
 
     // bind the memory samples
     py::class_<hws::cpu_memory_samples>(m, "CpuMemorySamples")
+        .def("has_samples", &hws::cpu_memory_samples::has_samples, "true if any sample is available, false otherwise")
         .def("get_cache_size_L1d", &hws::cpu_memory_samples::get_cache_size_L1d, "the size of the L1 data cache")
         .def("get_cache_size_L1i", &hws::cpu_memory_samples::get_cache_size_L1i, "the size of the L1 instruction cache")
         .def("get_cache_size_L2", &hws::cpu_memory_samples::get_cache_size_L2, "the size of the L2 cache")
@@ -85,6 +89,7 @@ void init_cpu_hardware_sampler(py::module_ &m) {
 
     // bind the temperature samples
     py::class_<hws::cpu_temperature_samples>(m, "CpuTemperatureSamples")
+        .def("has_samples", &hws::cpu_temperature_samples::has_samples, "true if any sample is available, false otherwise")
         .def("get_temperature", &hws::cpu_temperature_samples::get_temperature, "the current temperature of the whole package in °C")
         .def("get_core_temperature", &hws::cpu_temperature_samples::get_core_temperature, "the current temperature of the core part of the CPU in °C")
         .def("get_core_throttle_percent", &hws::cpu_temperature_samples::get_core_throttle_percent, "the percent of time the CPU has throttled")
@@ -94,6 +99,7 @@ void init_cpu_hardware_sampler(py::module_ &m) {
 
     // bind the gfx samples
     py::class_<hws::cpu_gfx_samples>(m, "CpuGfxSamples")
+        .def("has_samples", &hws::cpu_gfx_samples::has_samples, "true if any sample is available, false otherwise")
         .def("get_gfx_render_state_percent", &hws::cpu_gfx_samples::get_gfx_render_state_percent, "the percent of time the iGPU was in the render state")
         .def("get_gfx_frequency", &hws::cpu_gfx_samples::get_gfx_frequency, "the current iGPU power consumption in W")
         .def("get_average_gfx_frequency", &hws::cpu_gfx_samples::get_average_gfx_frequency, "the average iGPU frequency in MHz")
@@ -106,6 +112,7 @@ void init_cpu_hardware_sampler(py::module_ &m) {
 
     // bind the idle state samples
     py::class_<hws::cpu_idle_states_samples>(m, "CpuIdleStateSamples")
+        .def("has_samples", &hws::cpu_idle_states_samples::has_samples, "true if any sample is available, false otherwise")
         .def("get_idle_states", &hws::cpu_idle_states_samples::get_idle_states, "the map of additional CPU idle states")
         .def("get_all_cpus_state_c0_percent", &hws::cpu_idle_states_samples::get_all_cpus_state_c0_percent, "the percent of time all CPUs were in idle state c0")
         .def("get_any_cpu_state_c0_percent", &hws::cpu_idle_states_samples::get_any_cpu_state_c0_percent, "the percent of time any CPU was in the idle state c0")
diff --git a/bindings/gpu_amd_hardware_sampler.cpp b/bindings/gpu_amd_hardware_sampler.cpp
index 9ffd042..c738340 100644
--- a/bindings/gpu_amd_hardware_sampler.cpp
+++ b/bindings/gpu_amd_hardware_sampler.cpp
@@ -22,6 +22,7 @@ namespace py = pybind11;
 void init_gpu_amd_hardware_sampler(py::module_ &m) {
     // bind the general samples
     py::class_<hws::rocm_smi_general_samples>(m, "RocmSmiGeneralSamples")
+        .def("has_samples", &hws::rocm_smi_general_samples::has_samples, "true if any sample is available, false otherwise")
         .def("get_architecture", &hws::rocm_smi_general_samples::get_name, "the architecture name of the device")
         .def("get_byte_order", &hws::rocm_smi_general_samples::get_byte_order, "the byte order (e.g., little/big endian)")
         .def("get_vendor_id", &hws::rocm_smi_general_samples::get_vendor_id, "the vendor ID")
@@ -35,6 +36,7 @@ void init_gpu_amd_hardware_sampler(py::module_ &m) {
 
     // bind the clock samples
     py::class_<hws::rocm_smi_clock_samples>(m, "RocmSmiClockSamples")
+        .def("has_samples", &hws::rocm_smi_clock_samples::has_samples, "true if any sample is available, false otherwise")
         .def("get_clock_frequency_min", &hws::rocm_smi_clock_samples::get_clock_frequency_min, "the minimum possible system clock frequency in MHz")
         .def("get_clock_frequency_max", &hws::rocm_smi_clock_samples::get_clock_frequency_max, "the maximum possible system clock frequency in MHz")
         .def("get_memory_clock_frequency_min", &hws::rocm_smi_clock_samples::get_memory_clock_frequency_min, "the minimum possible memory clock frequency in MHz")
@@ -54,6 +56,7 @@ void init_gpu_amd_hardware_sampler(py::module_ &m) {
 
     // bind the power samples
     py::class_<hws::rocm_smi_power_samples>(m, "RocmSmiPowerSamples")
+        .def("has_samples", &hws::rocm_smi_power_samples::has_samples, "true if any sample is available, false otherwise")
         .def("get_power_management_limit", &hws::rocm_smi_power_samples::get_power_management_limit, "the default power cap (W), may be different from power cap")
         .def("get_power_enforced_limit", &hws::rocm_smi_power_samples::get_power_enforced_limit, "if the GPU draws more power (W) than the power cap, the GPU may throttle")
         .def("get_power_measurement_type", &hws::rocm_smi_power_samples::get_power_measurement_type, "the type of the power readings: either current power draw or average power draw")
@@ -67,6 +70,7 @@ void init_gpu_amd_hardware_sampler(py::module_ &m) {
 
     // bind the memory samples
     py::class_<hws::rocm_smi_memory_samples>(m, "RocmSmiMemorySamples")
+        .def("has_samples", &hws::rocm_smi_memory_samples::has_samples, "true if any sample is available, false otherwise")
         .def("get_memory_total", &hws::rocm_smi_memory_samples::get_memory_total, "the total available memory in Byte")
         .def("get_visible_memory_total", &hws::rocm_smi_memory_samples::get_visible_memory_total, "the total visible available memory in Byte, may be smaller than the total memory")
         .def("get_num_pcie_lanes_min", &hws::rocm_smi_memory_samples::get_num_pcie_lanes_min, "the minimum number of used PCIe lanes")
@@ -83,6 +87,7 @@ void init_gpu_amd_hardware_sampler(py::module_ &m) {
 
     // bind the temperature samples
     py::class_<hws::rocm_smi_temperature_samples>(m, "RocmSmiTemperatureSamples")
+        .def("has_samples", &hws::rocm_smi_temperature_samples::has_samples, "true if any sample is available, false otherwise")
         .def("get_num_fans", &hws::rocm_smi_temperature_samples::get_num_fans, "the number of fans (if any)")
         .def("get_fan_speed_max", &hws::rocm_smi_temperature_samples::get_fan_speed_max, "the maximum fan speed in RPM")
         .def("get_temperature_min", &hws::rocm_smi_temperature_samples::get_temperature_min, "the minimum temperature on the GPU's edge temperature sensor in °C")
diff --git a/bindings/gpu_intel_hardware_sampler.cpp b/bindings/gpu_intel_hardware_sampler.cpp
index 8cf6f83..77b67fc 100644
--- a/bindings/gpu_intel_hardware_sampler.cpp
+++ b/bindings/gpu_intel_hardware_sampler.cpp
@@ -22,6 +22,7 @@ namespace py = pybind11;
 void init_gpu_intel_hardware_sampler(py::module_ &m) {
     // bind the general samples
     py::class_<hws::level_zero_general_samples>(m, "LevelZeroGeneralSamples")
+        .def("has_samples", &hws::level_zero_general_samples::has_samples, "true if any sample is available, false otherwise")
         .def("get_byte_order", &hws::level_zero_general_samples::get_byte_order, "the byte order (e.g., little/big endian)")
         .def("get_vendor_id", &hws::level_zero_general_samples::get_vendor_id, "the vendor ID")
         .def("get_name", &hws::level_zero_general_samples::get_name, "the model name of the device")
@@ -35,6 +36,7 @@ void init_gpu_intel_hardware_sampler(py::module_ &m) {
 
     // bind the clock samples
     py::class_<hws::level_zero_clock_samples>(m, "LevelZeroClockSamples")
+        .def("has_samples", &hws::level_zero_clock_samples::has_samples, "true if any sample is available, false otherwise")
         .def("get_clock_frequency_min", &hws::level_zero_clock_samples::get_clock_frequency_min, "the minimum possible GPU clock frequency in MHz")
         .def("get_clock_frequency_max", &hws::level_zero_clock_samples::get_clock_frequency_max, "the maximum possible GPU clock frequency in MHz")
         .def("get_memory_clock_frequency_min", &hws::level_zero_clock_samples::get_memory_clock_frequency_min, "the minimum possible memory clock frequency in MHz")
@@ -53,6 +55,7 @@ void init_gpu_intel_hardware_sampler(py::module_ &m) {
 
     // bind the power samples
     py::class_<hws::level_zero_power_samples>(m, "LevelZeroPowerSamples")
+        .def("has_samples", &hws::level_zero_power_samples::has_samples, "true if any sample is available, false otherwise")
         .def("get_power_enforced_limit", &hws::level_zero_power_samples::get_power_enforced_limit, "the actually enforced power limit (W), may be different from power management limit if external limiters are set")
         .def("get_power_measurement_type", &hws::level_zero_power_samples::get_power_measurement_type, "the type of the power readings")
         .def("get_power_management_mode", &hws::level_zero_power_samples::get_power_management_mode, "true if power management limits are enabled")
@@ -64,6 +67,7 @@ void init_gpu_intel_hardware_sampler(py::module_ &m) {
 
     // bind the memory samples
     py::class_<hws::level_zero_memory_samples>(m, "LevelZeroMemorySamples")
+        .def("has_samples", &hws::level_zero_memory_samples::has_samples, "true if any sample is available, false otherwise")
         .def("get_memory_total", &hws::level_zero_memory_samples::get_memory_total, "the total memory size of the different memory modules in Bytes")
         .def("get_visible_memory_total", &hws::level_zero_memory_samples::get_visible_memory_total, "the total allocatable memory size of the different memory modules in Bytes")
         .def("get_memory_location", &hws::level_zero_memory_samples::get_memory_location, "the location of the different memory modules (system or device)")
@@ -83,6 +87,7 @@ void init_gpu_intel_hardware_sampler(py::module_ &m) {
 
     // bind the temperature samples
     py::class_<hws::level_zero_temperature_samples>(m, "LevelZeroTemperatureSamples")
+        .def("has_samples", &hws::level_zero_temperature_samples::has_samples, "true if any sample is available, false otherwise")
         .def("get_num_fans", &hws::level_zero_temperature_samples::get_num_fans, "the number of fans")
         .def("get_fan_speed_max", &hws::level_zero_temperature_samples::get_fan_speed_max, "the maximum fan speed the user can set in RPM")
         .def("get_temperature_max", &hws::level_zero_temperature_samples::get_temperature_max, "the maximum GPU temperature in °C")
diff --git a/bindings/gpu_nvidia_hardware_sampler.cpp b/bindings/gpu_nvidia_hardware_sampler.cpp
index 1550c07..f9b9261 100644
--- a/bindings/gpu_nvidia_hardware_sampler.cpp
+++ b/bindings/gpu_nvidia_hardware_sampler.cpp
@@ -22,6 +22,7 @@ namespace py = pybind11;
 void init_gpu_nvidia_hardware_sampler(py::module_ &m) {
     // bind the general samples
     py::class_<hws::nvml_general_samples>(m, "NvmlGeneralSamples")
+        .def("has_samples", &hws::nvml_general_samples::has_samples, "true if any sample is available, false otherwise")
         .def("get_architecture", &hws::nvml_general_samples::get_architecture, "the architecture name of the device")
         .def("get_byte_order", &hws::nvml_general_samples::get_byte_order, "the byte order (e.g., little/big endian)")
         .def("get_num_cores", &hws::nvml_general_samples::get_num_cores, "the number of CUDA cores")
@@ -37,6 +38,7 @@ void init_gpu_nvidia_hardware_sampler(py::module_ &m) {
 
     // bind the clock samples
     py::class_<hws::nvml_clock_samples>(m, "NvmlClockSamples")
+        .def("has_samples", &hws::nvml_clock_samples::has_samples, "true if any sample is available, false otherwise")
         .def("get_auto_boosted_clock_enabled", &hws::nvml_clock_samples::get_auto_boosted_clock_enabled, "true if clock boosting is currently enabled")
         .def("get_clock_frequency_min", &hws::nvml_clock_samples::get_clock_frequency_min, "the minimum possible graphics clock frequency in MHz")
         .def("get_clock_frequency_max", &hws::nvml_clock_samples::get_clock_frequency_max, "the maximum possible graphics clock frequency in MHz")
@@ -56,6 +58,7 @@ void init_gpu_nvidia_hardware_sampler(py::module_ &m) {
 
     // bind the power samples
     py::class_<hws::nvml_power_samples>(m, "NvmlPowerSamples")
+        .def("has_samples", &hws::nvml_power_samples::has_samples, "true if any sample is available, false otherwise")
         .def("get_power_management_limit", &hws::nvml_power_samples::get_power_management_limit, "if the GPU draws more power (mW) than the power management limit, the GPU may throttle")
         .def("get_power_enforced_limit", &hws::nvml_power_samples::get_power_enforced_limit, "the actually enforced power limit, may be different from power management limit if external limiters are set")
         .def("get_power_measurement_type", &hws::nvml_power_samples::get_power_measurement_type, "the type of the power readings: either current power draw or average power draw")
@@ -70,6 +73,7 @@ void init_gpu_nvidia_hardware_sampler(py::module_ &m) {
 
     // bind the memory samples
     py::class_<hws::nvml_memory_samples>(m, "NvmlMemorySamples")
+        .def("has_samples", &hws::nvml_memory_samples::has_samples, "true if any sample is available, false otherwise")
         .def("get_memory_total", &hws::nvml_memory_samples::get_memory_total, "the total available memory in Byte")
         .def("get_num_pcie_lanes_max", &hws::nvml_memory_samples::get_num_pcie_lanes_max, "the maximum number of PCIe lanes")
         .def("get_pcie_link_generation_max", &hws::nvml_memory_samples::get_pcie_link_generation_max, "the current PCIe link generation (e.g., PCIe 4.0, PCIe 5.0, etc)")
@@ -86,6 +90,7 @@ void init_gpu_nvidia_hardware_sampler(py::module_ &m) {
 
     // bind the temperature samples
     py::class_<hws::nvml_temperature_samples>(m, "NvmlTemperatureSamples")
+        .def("has_samples", &hws::nvml_temperature_samples::has_samples, "true if any sample is available, false otherwise")
         .def("get_num_fans", &hws::nvml_temperature_samples::get_num_fans, "the number of fans (if any)")
         .def("get_fan_speed_min", &hws::nvml_temperature_samples::get_fan_speed_min, "the minimum fan speed the user can set in %")
         .def("get_fan_speed_max", &hws::nvml_temperature_samples::get_fan_speed_max, "the maximum fan speed the user can set in %")
diff --git a/include/hardware_sampling/cpu/cpu_samples.hpp b/include/hardware_sampling/cpu/cpu_samples.hpp
index f92ba0d..3bd8a49 100644
--- a/include/hardware_sampling/cpu/cpu_samples.hpp
+++ b/include/hardware_sampling/cpu/cpu_samples.hpp
@@ -36,9 +36,15 @@ class cpu_general_samples {
     friend class cpu_hardware_sampler;
 
   public:
+    /**
+     * @brief Checks whether any general hardware sample is present.
+     * @return `true` if any general hardware sample is, otherwise `false`.
+     */
+    [[nodiscard]] bool has_samples() const;
     /**
      * @brief Assemble the YAML string containing all available general hardware samples.
      * @details Hardware samples that are not supported by the current device are omitted in the YAML output.
+     *          Returns an empty string if `has_samples()` returns `false`.
      * @return the YAML string (`[[nodiscard]]`)
      */
     [[nodiscard]] std::string generate_yaml_string() const;
@@ -84,9 +90,15 @@ class cpu_clock_samples {
     friend class cpu_hardware_sampler;
 
   public:
+    /**
+     * @brief Checks whether any clock related hardware sample is present.
+     * @return `true` if any clock related hardware sample is, otherwise `false`.
+     */
+    [[nodiscard]] bool has_samples() const;
     /**
      * @brief Assemble the YAML string containing all available general hardware samples.
      * @details Hardware samples that are not supported by the current device are omitted in the YAML output.
+     *          Returns an empty string if `has_samples()` returns `false`.
      * @return the YAML string (`[[nodiscard]]`)
      */
     [[nodiscard]] std::string generate_yaml_string() const;
@@ -121,9 +133,15 @@ class cpu_power_samples {
     friend class cpu_hardware_sampler;
 
   public:
+    /**
+     * @brief Checks whether any power related hardware sample is present.
+     * @return `true` if any power related hardware sample is, otherwise `false`.
+     */
+    [[nodiscard]] bool has_samples() const;
     /**
      * @brief Assemble the YAML string containing all available general hardware samples.
      * @details Hardware samples that are not supported by the current device are omitted in the YAML output.
+     *          Returns an empty string if `has_samples()` returns `false`.
      * @return the YAML string (`[[nodiscard]]`)
      */
     [[nodiscard]] std::string generate_yaml_string() const;
@@ -159,9 +177,15 @@ class cpu_memory_samples {
     friend class cpu_hardware_sampler;
 
   public:
+    /**
+     * @brief Checks whether any memory related hardware sample is present.
+     * @return `true` if any memory related hardware sample is, otherwise `false`.
+     */
+    [[nodiscard]] bool has_samples() const;
     /**
      * @brief Assemble the YAML string containing all available general hardware samples.
      * @details Hardware samples that are not supported by the current device are omitted in the YAML output.
+     *          Returns an empty string if `has_samples()` returns `false`.
      * @return the YAML string (`[[nodiscard]]`)
      */
     [[nodiscard]] std::string generate_yaml_string() const;
@@ -200,9 +224,15 @@ class cpu_temperature_samples {
     friend class cpu_hardware_sampler;
 
   public:
+    /**
+     * @brief Checks whether any temperature related hardware sample is present.
+     * @return `true` if any temperature related hardware sample is, otherwise `false`.
+     */
+    [[nodiscard]] bool has_samples() const;
     /**
      * @brief Assemble the YAML string containing all available general hardware samples.
      * @details Hardware samples that are not supported by the current device are omitted in the YAML output.
+     *          Returns an empty string if `has_samples()` returns `false`.
      * @return the YAML string (`[[nodiscard]]`)
      */
     [[nodiscard]] std::string generate_yaml_string() const;
@@ -233,9 +263,15 @@ class cpu_gfx_samples {
     friend class cpu_hardware_sampler;
 
   public:
+    /**
+     * @brief Checks whether any gfx related hardware sample is present.
+     * @return `true` if any gfx related hardware sample is, otherwise `false`.
+     */
+    [[nodiscard]] bool has_samples() const;
     /**
      * @brief Assemble the YAML string containing all available general hardware samples.
      * @details Hardware samples that are not supported by the current device are omitted in the YAML output.
+     *          Returns an empty string if `has_samples()` returns `false`.
      * @return the YAML string (`[[nodiscard]]`)
      */
     [[nodiscard]] std::string generate_yaml_string() const;
@@ -271,9 +307,15 @@ class cpu_idle_states_samples {
     using map_type = std::unordered_map<std::string, std::vector<double>>;
 
   public:
+    /**
+     * @brief Checks whether any idle state related hardware sample is present.
+     * @return `true` if any idle state related hardware sample is, otherwise `false`.
+     */
+    [[nodiscard]] bool has_samples() const;
     /**
      * @brief Assemble the YAML string containing all available general hardware samples.
      * @details Hardware samples that are not supported by the current device are omitted in the YAML output.
+     *          Returns an empty string if `has_samples()` returns `false`.
      * @return the YAML string (`[[nodiscard]]`)
      */
     [[nodiscard]] std::string generate_yaml_string() const;
diff --git a/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp b/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp
index af5228b..727e683 100644
--- a/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp
+++ b/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp
@@ -36,9 +36,15 @@ class rocm_smi_general_samples {
     friend class gpu_amd_hardware_sampler;
 
   public:
+    /**
+     * @brief Checks whether any general hardware sample is present.
+     * @return `true` if any general hardware sample is, otherwise `false`.
+     */
+    [[nodiscard]] bool has_samples() const;
     /**
      * @brief Assemble the YAML string containing all available general hardware samples.
      * @details Hardware samples that are not supported by the current device are omitted in the YAML output.
+     *          Returns an empty string if `has_samples()` returns `false`.
      * @return the YAML string (`[[nodiscard]]`)
      */
     [[nodiscard]] std::string generate_yaml_string() const;
@@ -74,9 +80,15 @@ class rocm_smi_clock_samples {
     friend class gpu_amd_hardware_sampler;
 
   public:
+    /**
+     * @brief Checks whether any clock related hardware sample is present.
+     * @return `true` if any clock related hardware sample is, otherwise `false`.
+     */
+    [[nodiscard]] bool has_samples() const;
     /**
      * @brief Assemble the YAML string containing all available general hardware samples.
      * @details Hardware samples that are not supported by the current device are omitted in the YAML output.
+     *          Returns an empty string if `has_samples()` returns `false`.
      * @return the YAML string (`[[nodiscard]]`)
      */
     [[nodiscard]] std::string generate_yaml_string() const;
@@ -118,9 +130,15 @@ class rocm_smi_power_samples {
     friend class gpu_amd_hardware_sampler;
 
   public:
+    /**
+     * @brief Checks whether any power related hardware sample is present.
+     * @return `true` if any power related hardware sample is, otherwise `false`.
+     */
+    [[nodiscard]] bool has_samples() const;
     /**
      * @brief Assemble the YAML string containing all available general hardware samples.
      * @details Hardware samples that are not supported by the current device are omitted in the YAML output.
+     *          Returns an empty string if `has_samples()` returns `false`.
      * @return the YAML string (`[[nodiscard]]`)
      */
     [[nodiscard]] std::string generate_yaml_string() const;
@@ -156,9 +174,15 @@ class rocm_smi_memory_samples {
     friend class gpu_amd_hardware_sampler;
 
   public:
+    /**
+     * @brief Checks whether any memory related hardware sample is present.
+     * @return `true` if any memory related hardware sample is, otherwise `false`.
+     */
+    [[nodiscard]] bool has_samples() const;
     /**
      * @brief Assemble the YAML string containing all available general hardware samples.
      * @details Hardware samples that are not supported by the current device are omitted in the YAML output.
+     *          Returns an empty string if `has_samples()` returns `false`.
      * @return the YAML string (`[[nodiscard]]`)
      */
     [[nodiscard]] std::string generate_yaml_string() const;
@@ -197,9 +221,15 @@ class rocm_smi_temperature_samples {
     friend class gpu_amd_hardware_sampler;
 
   public:
+    /**
+     * @brief Checks whether any temperature related hardware sample is present.
+     * @return `true` if any temperature related hardware sample is, otherwise `false`.
+     */
+    [[nodiscard]] bool has_samples() const;
     /**
      * @brief Assemble the YAML string containing all available general hardware samples.
      * @details Hardware samples that are not supported by the current device are omitted in the YAML output.
+     *          Returns an empty string if `has_samples()` returns `false`.
      * @return the YAML string (`[[nodiscard]]`)
      */
     [[nodiscard]] std::string generate_yaml_string() const;
diff --git a/include/hardware_sampling/gpu_intel/level_zero_samples.hpp b/include/hardware_sampling/gpu_intel/level_zero_samples.hpp
index 9900f95..a1c56c2 100644
--- a/include/hardware_sampling/gpu_intel/level_zero_samples.hpp
+++ b/include/hardware_sampling/gpu_intel/level_zero_samples.hpp
@@ -37,9 +37,15 @@ class level_zero_general_samples {
     friend class gpu_intel_hardware_sampler;
 
   public:
+    /**
+     * @brief Checks whether any general hardware sample is present.
+     * @return `true` if any general hardware sample is, otherwise `false`.
+     */
+    [[nodiscard]] bool has_samples() const;
     /**
      * @brief Assemble the YAML string containing all available general hardware samples.
      * @details Hardware samples that are not supported by the current device are omitted in the YAML output.
+     *          Returns an empty string if `has_samples()` returns `false`.
      * @return the YAML string (`[[nodiscard]]`)
      */
     [[nodiscard]] std::string generate_yaml_string() const;
@@ -75,9 +81,15 @@ class level_zero_clock_samples {
     friend class gpu_intel_hardware_sampler;
 
   public:
+    /**
+     * @brief Checks whether any clock related hardware sample is present.
+     * @return `true` if any clock related hardware sample is, otherwise `false`.
+     */
+    [[nodiscard]] bool has_samples() const;
     /**
      * @brief Assemble the YAML string containing all available general hardware samples.
      * @details Hardware samples that are not supported by the current device are omitted in the YAML output.
+     *          Returns an empty string if `has_samples()` returns `false`.
      * @return the YAML string (`[[nodiscard]]`)
      */
     [[nodiscard]] std::string generate_yaml_string() const;
@@ -118,9 +130,15 @@ class level_zero_power_samples {
     friend class gpu_intel_hardware_sampler;
 
   public:
+    /**
+     * @brief Checks whether any power related hardware sample is present.
+     * @return `true` if any power related hardware sample is, otherwise `false`.
+     */
+    [[nodiscard]] bool has_samples() const;
     /**
      * @brief Assemble the YAML string containing all available general hardware samples.
      * @details Hardware samples that are not supported by the current device are omitted in the YAML output.
+     *          Returns an empty string if `has_samples()` returns `false`.
      * @return the YAML string (`[[nodiscard]]`)
      */
     [[nodiscard]] std::string generate_yaml_string() const;
@@ -161,9 +179,15 @@ class level_zero_memory_samples {
     using map_type = std::unordered_map<std::string, T>;
 
   public:
+    /**
+     * @brief Checks whether any memory related hardware sample is present.
+     * @return `true` if any memory related hardware sample is, otherwise `false`.
+     */
+    [[nodiscard]] bool has_samples() const;
     /**
      * @brief Assemble the YAML string containing all available general hardware samples.
      * @details Hardware samples that are not supported by the current device are omitted in the YAML output.
+     *          Returns an empty string if `has_samples()` returns `false`.
      * @return the YAML string (`[[nodiscard]]`)
      */
     [[nodiscard]] std::string generate_yaml_string() const;
@@ -205,9 +229,15 @@ class level_zero_temperature_samples {
     friend class gpu_intel_hardware_sampler;
 
   public:
+    /**
+     * @brief Checks whether any temperature related hardware sample is present.
+     * @return `true` if any temperature related hardware sample is, otherwise `false`.
+     */
+    [[nodiscard]] bool has_samples() const;
     /**
      * @brief Assemble the YAML string containing all available general hardware samples.
      * @details Hardware samples that are not supported by the current device are omitted in the YAML output.
+     *          Returns an empty string if `has_samples()` returns `false`.
      * @return the YAML string (`[[nodiscard]]`)
      */
     [[nodiscard]] std::string generate_yaml_string() const;
diff --git a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp
index c6e7ad9..31acebb 100644
--- a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp
+++ b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp
@@ -36,9 +36,15 @@ class nvml_general_samples {
     friend class gpu_nvidia_hardware_sampler;
 
   public:
+    /**
+     * @brief Checks whether any general hardware sample is present.
+     * @return `true` if any general hardware sample is, otherwise `false`.
+     */
+    [[nodiscard]] bool has_samples() const;
     /**
      * @brief Assemble the YAML string containing all available general hardware samples.
      * @details Hardware samples that are not supported by the current device are omitted in the YAML output.
+     *          Returns an empty string if `has_samples()` returns `false`.
      * @return the YAML string (`[[nodiscard]]`)
      */
     [[nodiscard]] std::string generate_yaml_string() const;
@@ -78,9 +84,15 @@ class nvml_clock_samples {
     using map_type = std::map<double, std::vector<double>>;
 
   public:
+    /**
+     * @brief Checks whether any clock related hardware sample is present.
+     * @return `true` if any clock related hardware sample is, otherwise `false`.
+     */
+    [[nodiscard]] bool has_samples() const;
     /**
      * @brief Assemble the YAML string containing all available general hardware samples.
      * @details Hardware samples that are not supported by the current device are omitted in the YAML output.
+     *          Returns an empty string if `has_samples()` returns `false`.
      * @return the YAML string (`[[nodiscard]]`)
      */
     [[nodiscard]] std::string generate_yaml_string() const;
@@ -122,9 +134,15 @@ class nvml_power_samples {
     friend class gpu_nvidia_hardware_sampler;
 
   public:
+    /**
+     * @brief Checks whether any power related hardware sample is present.
+     * @return `true` if any power related hardware sample is, otherwise `false`.
+     */
+    [[nodiscard]] bool has_samples() const;
     /**
      * @brief Assemble the YAML string containing all available general hardware samples.
      * @details Hardware samples that are not supported by the current device are omitted in the YAML output.
+     *          Returns an empty string if `has_samples()` returns `false`.
      * @return the YAML string (`[[nodiscard]]`)
      */
     [[nodiscard]] std::string generate_yaml_string() const;
@@ -161,9 +179,15 @@ class nvml_memory_samples {
     friend class gpu_nvidia_hardware_sampler;
 
   public:
+    /**
+     * @brief Checks whether any memory related hardware sample is present.
+     * @return `true` if any memory related hardware sample is, otherwise `false`.
+     */
+    [[nodiscard]] bool has_samples() const;
     /**
      * @brief Assemble the YAML string containing all available general hardware samples.
      * @details Hardware samples that are not supported by the current device are omitted in the YAML output.
+     *          Returns an empty string if `has_samples()` returns `false`.
      * @return the YAML string (`[[nodiscard]]`)
      */
     [[nodiscard]] std::string generate_yaml_string() const;
@@ -202,6 +226,11 @@ class nvml_temperature_samples {
     friend class gpu_nvidia_hardware_sampler;
 
   public:
+    /**
+     * @brief Checks whether any temperature related hardware sample is present.
+     * @return `true` if any temperature related hardware sample is, otherwise `false`.
+     */
+    [[nodiscard]] bool has_samples() const;
     /**
      * @brief Assemble the YAML string containing all available general hardware samples.
      * @details Hardware samples that are not supported by the current device are omitted in the YAML output.
diff --git a/src/hardware_sampling/cpu/cpu_samples.cpp b/src/hardware_sampling/cpu/cpu_samples.cpp
index cc99d76..3ef3ad7 100644
--- a/src/hardware_sampling/cpu/cpu_samples.cpp
+++ b/src/hardware_sampling/cpu/cpu_samples.cpp
@@ -26,7 +26,19 @@ namespace hws {
 //                                                           general samples                                                           //
 //*************************************************************************************************************************************//
 
+bool cpu_general_samples::has_samples() const {
+    return this->architecture_.has_value() || this->byte_order_.has_value() || this->num_cores_.has_value() || this->num_threads_.has_value()
+           || this->threads_per_core_.has_value() || this->cores_per_socket_.has_value() || this->num_sockets_.has_value() || this->numa_nodes_.has_value()
+           || this->vendor_id_.has_value() || this->name_.has_value() || this->flags_.has_value() || this->compute_utilization_.has_value()
+           || this->ipc_.has_value() || this->irq_.has_value() || this->smi_.has_value() || this->poll_.has_value() || this->poll_percent_.has_value();
+}
+
 std::string cpu_general_samples::generate_yaml_string() const {
+    // if no samples are available, return an empty string
+    if (!this->has_samples()) {
+        return "";
+    }
+
     std::string str{ "general:\n" };
 
     // architecture
@@ -156,9 +168,6 @@ std::string cpu_general_samples::generate_yaml_string() const {
                            fmt::join(this->poll_percent_.value(), ", "));
     }
 
-    // remove last newline
-    str.pop_back();
-
     return str;
 }
 
@@ -208,7 +217,17 @@ std::ostream &operator<<(std::ostream &out, const cpu_general_samples &samples)
 //                                                            clock samples                                                            //
 //*************************************************************************************************************************************//
 
+bool cpu_clock_samples::has_samples() const {
+    return this->auto_boosted_clock_enabled_.has_value() || this->clock_frequency_min_.has_value() || this->clock_frequency_max_.has_value()
+           || this->clock_frequency_.has_value() || this->average_non_idle_clock_frequency_.has_value() || this->time_stamp_counter_.has_value();
+}
+
 std::string cpu_clock_samples::generate_yaml_string() const {
+    // if no samples are available, return an empty string
+    if (!this->has_samples()) {
+        return "";
+    }
+
     std::string str{ "clock:\n" };
 
     // true if frequency boost is enabled
@@ -258,9 +277,6 @@ std::string cpu_clock_samples::generate_yaml_string() const {
                            fmt::join(this->time_stamp_counter_.value(), ", "));
     }
 
-    // remove last newline
-    str.pop_back();
-
     return str;
 }
 
@@ -283,7 +299,18 @@ std::ostream &operator<<(std::ostream &out, const cpu_clock_samples &samples) {
 //                                                            power samples                                                            //
 //*************************************************************************************************************************************//
 
+bool cpu_power_samples::has_samples() const {
+    return this->power_measurement_type_.has_value() || this->power_usage_.has_value() || this->power_total_energy_consumption_.has_value()
+           || this->core_watt_.has_value() || this->ram_watt_.has_value() || this->package_rapl_throttle_percent_.has_value()
+           || this->dram_rapl_throttle_percent_.has_value();
+}
+
 std::string cpu_power_samples::generate_yaml_string() const {
+    // if no samples are available, return an empty string
+    if (!this->has_samples()) {
+        return "";
+    }
+
     std::string str{ "power:\n" };
 
     // power measurement type
@@ -343,9 +370,6 @@ std::string cpu_power_samples::generate_yaml_string() const {
                            fmt::join(this->dram_rapl_throttle_percent_.value(), ", "));
     }
 
-    // remove last newline
-    str.pop_back();
-
     return str;
 }
 
@@ -370,7 +394,18 @@ std::ostream &operator<<(std::ostream &out, const cpu_power_samples &samples) {
 //                                                            memory samples                                                           //
 //*************************************************************************************************************************************//
 
+bool cpu_memory_samples::has_samples() const {
+    return this->cache_size_L1d_.has_value() || this->cache_size_L1i_.has_value() || this->cache_size_L2_.has_value() || this->cache_size_L3_.has_value()
+           || this->memory_total_.has_value() || this->swap_memory_total_.has_value() || this->memory_used_.has_value() || this->swap_memory_free_.has_value()
+           || this->swap_memory_used_.has_value() || this->swap_memory_free_.has_value();
+}
+
 std::string cpu_memory_samples::generate_yaml_string() const {
+    // if no samples are available, return an empty string
+    if (!this->has_samples()) {
+        return "";
+    }
+
     std::string str{ "memory:\n" };
 
     // the size of the L1 data cache
@@ -446,9 +481,6 @@ std::string cpu_memory_samples::generate_yaml_string() const {
                            fmt::join(this->swap_memory_free_.value(), ", "));
     }
 
-    // remove last newline
-    str.pop_back();
-
     return str;
 }
 
@@ -479,7 +511,16 @@ std::ostream &operator<<(std::ostream &out, const cpu_memory_samples &samples) {
 //                                                         temperature samples                                                         //
 //*************************************************************************************************************************************//
 
+bool cpu_temperature_samples::has_samples() const {
+    return this->temperature_.has_value() || this->core_temperature_.has_value() || this->core_throttle_percent_.has_value();
+}
+
 std::string cpu_temperature_samples::generate_yaml_string() const {
+    // if no samples are available, return an empty string
+    if (!this->has_samples()) {
+        return "";
+    }
+
     std::string str{ "temperature:\n" };
 
     // the temperature of the whole package
@@ -507,9 +548,6 @@ std::string cpu_temperature_samples::generate_yaml_string() const {
                            fmt::join(this->core_throttle_percent_.value(), ", "));
     }
 
-    // remove last newline
-    str.pop_back();
-
     return str;
 }
 
@@ -526,7 +564,17 @@ std::ostream &operator<<(std::ostream &out, const cpu_temperature_samples &sampl
 //                                                          gfx (iGPU) samples                                                         //
 //*************************************************************************************************************************************//
 
+bool cpu_gfx_samples::has_samples() const {
+    return this->gfx_render_state_percent_.has_value() || this->gfx_frequency_.has_value() || this->average_gfx_frequency_.has_value()
+           || this->gfx_state_c0_percent_.has_value() || this->cpu_works_for_gpu_percent_.has_value() || this->gfx_watt_.has_value();
+}
+
 std::string cpu_gfx_samples::generate_yaml_string() const {
+    // if no samples are available, return an empty string
+    if (!this->has_samples()) {
+        return "";
+    }
+
     std::string str{ "integrated_gpu:\n" };
 
     // the percentage of time the iGPU was in the render state
@@ -578,9 +626,6 @@ std::string cpu_gfx_samples::generate_yaml_string() const {
                            fmt::join(this->gfx_watt_.value(), ", "));
     }
 
-    // remove last newline
-    str.pop_back();
-
     return str;
 }
 
@@ -603,7 +648,17 @@ std::ostream &operator<<(std::ostream &out, const cpu_gfx_samples &samples) {
 //                                                          idle state samples                                                         //
 //*************************************************************************************************************************************//
 
+bool cpu_idle_states_samples::has_samples() const {
+    return this->all_cpus_state_c0_percent_.has_value() || this->any_cpu_state_c0_percent_.has_value() || this->low_power_idle_state_percent_.has_value()
+           || this->system_low_power_idle_state_percent_.has_value() || this->package_low_power_idle_state_percent_.has_value() || this->idle_states_.has_value();
+}
+
 std::string cpu_idle_states_samples::generate_yaml_string() const {
+    // if no samples are available, return an empty string
+    if (!this->has_samples()) {
+        return "";
+    }
+
     std::string str{ "idle_states:\n" };
 
     // the percentage of time all CPUs were in the c0 state
@@ -687,9 +742,6 @@ std::string cpu_idle_states_samples::generate_yaml_string() const {
         }
     }
 
-    // remove last newline
-    str.pop_back();
-
     return str;
 }
 
diff --git a/src/hardware_sampling/cpu/hardware_sampler.cpp b/src/hardware_sampling/cpu/hardware_sampler.cpp
index d4a6754..7e89eca 100644
--- a/src/hardware_sampling/cpu/hardware_sampler.cpp
+++ b/src/hardware_sampling/cpu/hardware_sampler.cpp
@@ -428,12 +428,12 @@ std::string cpu_hardware_sampler::generate_yaml_string() const {
         throw std::runtime_error{ "Can't create the final YAML entry if the hardware sampler is still running!" };
     }
 
-    return fmt::format("{}\n\n"
-                       "{}\n\n"
-                       "{}\n\n"
-                       "{}\n\n"
-                       "{}\n\n"
-                       "{}\n\n"
+    return fmt::format("{}\n"
+                       "{}\n"
+                       "{}\n"
+                       "{}\n"
+                       "{}\n"
+                       "{}\n"
                        "{}",
                        general_samples_.generate_yaml_string(),
                        clock_samples_.generate_yaml_string(),
diff --git a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
index dbd2971..61a8456 100644
--- a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
+++ b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
@@ -687,10 +687,10 @@ std::string gpu_amd_hardware_sampler::generate_yaml_string() const {
         throw std::runtime_error{ "Can't create the final YAML entry if the hardware sampler is still running!" };
     }
 
-    return fmt::format("{}\n\n"
-                       "{}\n\n"
-                       "{}\n\n"
-                       "{}\n\n"
+    return fmt::format("{}\n"
+                       "{}\n"
+                       "{}\n"
+                       "{}\n"
                        "{}",
                        general_samples_.generate_yaml_string(),
                        clock_samples_.generate_yaml_string(),
diff --git a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp b/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp
index 641ca29..e93c36b 100644
--- a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp
+++ b/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp
@@ -21,7 +21,17 @@ namespace hws {
 //                                                           general samples                                                           //
 //*************************************************************************************************************************************//
 
+bool rocm_smi_general_samples::has_samples() const {
+    return this->architecture_.has_value() || this->byte_order_.has_value() || this->vendor_id_.has_value() || this->name_.has_value()
+           || this->compute_utilization_.has_value() || this->memory_utilization_.has_value() || this->performance_level_.has_value();
+}
+
 std::string rocm_smi_general_samples::generate_yaml_string() const {
+    // if no samples are available, return an empty string
+    if (!this->has_samples()) {
+        return "";
+    }
+
     std::string str{ "general:\n" };
 
     // device architecture
@@ -75,9 +85,6 @@ std::string rocm_smi_general_samples::generate_yaml_string() const {
                            fmt::join(detail::quote(this->performance_level_.value()), ", "));
     }
 
-    // remove last newline
-    str.pop_back();
-
     return str;
 }
 
@@ -102,7 +109,20 @@ std::ostream &operator<<(std::ostream &out, const rocm_smi_general_samples &samp
 //                                                            clock samples                                                            //
 //*************************************************************************************************************************************//
 
+bool rocm_smi_clock_samples::has_samples() const {
+    return this->clock_frequency_min_.has_value() || this->clock_frequency_max_.has_value() || this->memory_clock_frequency_min_.has_value()
+           || this->memory_clock_frequency_max_.has_value() || this->socket_clock_frequency_min_.has_value() || this->socket_clock_frequency_max_.has_value()
+           || this->available_clock_frequencies_.has_value() || this->available_memory_clock_frequencies_.has_value() || this->clock_frequency_.has_value()
+           || this->memory_clock_frequency_.has_value() || this->socket_clock_frequency_.has_value() || this->overdrive_level_.has_value()
+           || this->memory_overdrive_level_.has_value();
+}
+
 std::string rocm_smi_clock_samples::generate_yaml_string() const {
+    // if no samples are available, return an empty string
+    if (!this->has_samples()) {
+        return "";
+    }
+
     std::string str{ "clock:\n" };
 
     // system clock min frequencies
@@ -198,9 +218,6 @@ std::string rocm_smi_clock_samples::generate_yaml_string() const {
                            fmt::join(this->memory_overdrive_level_.value(), ", "));
     }
 
-    // remove last newline
-    str.pop_back();
-
     return str;
 }
 
@@ -237,7 +254,18 @@ std::ostream &operator<<(std::ostream &out, const rocm_smi_clock_samples &sample
 //                                                            power samples                                                            //
 //*************************************************************************************************************************************//
 
+bool rocm_smi_power_samples::has_samples() const {
+    return this->power_management_limit_.has_value() || this->power_enforced_limit_.has_value() || this->power_measurement_type_.has_value()
+           || this->available_power_profiles_.has_value() || this->power_usage_.has_value() || this->power_total_energy_consumption_.has_value()
+           || this->power_profile_.has_value();
+}
+
 std::string rocm_smi_power_samples::generate_yaml_string() const {
+    // if no samples are available, return an empty string
+    if (!this->has_samples()) {
+        return "";
+    }
+
     std::string str{ "power:\n" };
 
     // power management limit
@@ -291,9 +319,6 @@ std::string rocm_smi_power_samples::generate_yaml_string() const {
                            fmt::join(detail::quote(this->power_profile_.value()), ", "));
     }
 
-    // remove last newline
-    str.pop_back();
-
     return str;
 }
 
@@ -318,7 +343,18 @@ std::ostream &operator<<(std::ostream &out, const rocm_smi_power_samples &sample
 //                                                            memory samples                                                           //
 //*************************************************************************************************************************************//
 
+bool rocm_smi_memory_samples::has_samples() const {
+    return this->memory_total_.has_value() || this->visible_memory_total_.has_value() || this->num_pcie_lanes_min_.has_value()
+           || this->num_pcie_lanes_max_.has_value() || this->pcie_link_transfer_rate_min_.has_value() || this->pcie_link_transfer_rate_max_.has_value()
+           || this->memory_used_.has_value() || this->memory_free_.has_value() || this->num_pcie_lanes_.has_value() || this->pcie_link_transfer_rate_.has_value();
+}
+
 std::string rocm_smi_memory_samples::generate_yaml_string() const {
+    // if no samples are available, return an empty string
+    if (!this->has_samples()) {
+        return "";
+    }
+
     std::string str{ "memory:\n" };
 
     // total memory
@@ -394,9 +430,6 @@ std::string rocm_smi_memory_samples::generate_yaml_string() const {
                            fmt::join(this->pcie_link_transfer_rate_.value(), ", "));
     }
 
-    // remove last newline
-    str.pop_back();
-
     return str;
 }
 
@@ -427,7 +460,23 @@ std::ostream &operator<<(std::ostream &out, const rocm_smi_memory_samples &sampl
 //                                                         temperature samples                                                         //
 //*************************************************************************************************************************************//
 
+bool rocm_smi_temperature_samples::has_samples() const {
+    return this->num_fans_.has_value() || this->fan_speed_max_.has_value() || this->temperature_min_.has_value() || this->temperature_max_.has_value()
+           || this->memory_temperature_min_.has_value() || this->memory_temperature_max_.has_value() || this->hotspot_temperature_min_.has_value()
+           || this->hotspot_temperature_max_.has_value() || this->hbm_0_temperature_min_.has_value() || this->hbm_0_temperature_max_.has_value()
+           || this->hbm_1_temperature_min_.has_value() || this->hbm_1_temperature_max_.has_value() || this->hbm_2_temperature_min_.has_value()
+           || this->hbm_2_temperature_max_.has_value() || this->hbm_3_temperature_min_.has_value() || this->hbm_3_temperature_max_.has_value()
+           || this->fan_speed_percentage_.has_value() || this->temperature_.has_value() || this->memory_temperature_.has_value()
+           || this->hotspot_temperature_.has_value() || this->hbm_0_temperature_.has_value() || this->hbm_1_temperature_.has_value()
+           || this->hbm_2_temperature_.has_value() || this->hbm_3_temperature_.has_value();
+}
+
 std::string rocm_smi_temperature_samples::generate_yaml_string() const {
+    // if no samples are available, return an empty string
+    if (!this->has_samples()) {
+        return "";
+    }
+
     std::string str{ "temperature:\n" };
 
     // number of fans (emulated)
@@ -600,9 +649,6 @@ std::string rocm_smi_temperature_samples::generate_yaml_string() const {
                            fmt::join(this->hbm_3_temperature_.value(), ", "));
     }
 
-    // remove last newline
-    str.pop_back();
-
     return str;
 }
 
diff --git a/src/hardware_sampling/gpu_intel/hardware_sampler.cpp b/src/hardware_sampling/gpu_intel/hardware_sampler.cpp
index 774ab6c..c2e8eec 100644
--- a/src/hardware_sampling/gpu_intel/hardware_sampler.cpp
+++ b/src/hardware_sampling/gpu_intel/hardware_sampler.cpp
@@ -678,10 +678,10 @@ std::string gpu_intel_hardware_sampler::generate_yaml_string() const {
         throw std::runtime_error{ "Can't create the final YAML entry if the hardware sampler is still running!" };
     }
 
-    return fmt::format("{}\n\n"
-                       "{}\n\n"
-                       "{}\n\n"
-                       "{}\n\n"
+    return fmt::format("{}\n"
+                       "{}\n"
+                       "{}\n"
+                       "{}\n"
                        "{}",
                        general_samples_.generate_yaml_string(),
                        clock_samples_.generate_yaml_string(),
diff --git a/src/hardware_sampling/gpu_intel/level_zero_samples.cpp b/src/hardware_sampling/gpu_intel/level_zero_samples.cpp
index 5ceffcf..a10a358 100644
--- a/src/hardware_sampling/gpu_intel/level_zero_samples.cpp
+++ b/src/hardware_sampling/gpu_intel/level_zero_samples.cpp
@@ -38,7 +38,17 @@ void append_map_values(std::string &str, const std::string_view entry_name, cons
 //                                                           general samples                                                           //
 //*************************************************************************************************************************************//
 
+bool level_zero_general_samples::has_samples() const {
+    return this->byte_order_.has_value() || this->vendor_id_.has_value() || this->name_.has_value() || this->flags_.has_value() || this->standby_mode_.has_value()
+           || this->num_threads_per_eu_.has_value() || this->eu_simd_width_.has_value();
+}
+
 std::string level_zero_general_samples::generate_yaml_string() const {
+    // if no samples are available, return an empty string
+    if (!this->has_samples()) {
+        return "";
+    }
+
     std::string str{ "general:\n" };
 
     // device byte order
@@ -91,9 +101,6 @@ std::string level_zero_general_samples::generate_yaml_string() const {
                            this->eu_simd_width_.value());
     }
 
-    // remove last newline
-    str.pop_back();
-
     return str;
 }
 
@@ -118,7 +125,19 @@ std::ostream &operator<<(std::ostream &out, const level_zero_general_samples &sa
 //                                                            clock samples                                                            //
 //*************************************************************************************************************************************//
 
+bool level_zero_clock_samples::has_samples() const {
+    return this->clock_frequency_min_.has_value() || this->clock_frequency_max_.has_value() || this->memory_clock_frequency_min_.has_value()
+           || this->memory_clock_frequency_max_.has_value() || this->available_clock_frequencies_.has_value() || this->available_memory_clock_frequencies_.has_value()
+           || this->clock_frequency_.has_value() || this->memory_clock_frequency_.has_value() || this->throttle_reason_.has_value()
+           || this->memory_throttle_reason_.has_value() || this->frequency_limit_tdp_.has_value() || this->memory_frequency_limit_tdp_.has_value();
+}
+
 std::string level_zero_clock_samples::generate_yaml_string() const {
+    // if no samples are available, return an empty string
+    if (!this->has_samples()) {
+        return "";
+    }
+
     std::string str{ "clock:\n" };
 
     // minimum GPU core clock
@@ -207,9 +226,6 @@ std::string level_zero_clock_samples::generate_yaml_string() const {
                            fmt::join(this->memory_frequency_limit_tdp_.value(), ", "));
     }
 
-    // remove last newline
-    str.pop_back();
-
     return str;
 }
 
@@ -244,7 +260,17 @@ std::ostream &operator<<(std::ostream &out, const level_zero_clock_samples &samp
 //                                                            power samples                                                            //
 //*************************************************************************************************************************************//
 
+bool level_zero_power_samples::has_samples() const {
+    return this->power_enforced_limit_.has_value() || this->power_measurement_type_.has_value() || this->power_management_mode_.has_value()
+           || this->power_usage_.has_value() || this->power_total_energy_consumption_.has_value();
+}
+
 std::string level_zero_power_samples::generate_yaml_string() const {
+    // if no samples are available, return an empty string
+    if (!this->has_samples()) {
+        return "";
+    }
+
     std::string str{ "power:\n" };
 
     // power enforced limit
@@ -284,9 +310,6 @@ std::string level_zero_power_samples::generate_yaml_string() const {
                            fmt::join(this->power_total_energy_consumption_.value(), ", "));
     }
 
-    // remove last newline
-    str.pop_back();
-
     return str;
 }
 
@@ -307,7 +330,20 @@ std::ostream &operator<<(std::ostream &out, const level_zero_power_samples &samp
 //                                                            memory samples                                                           //
 //*************************************************************************************************************************************//
 
+bool level_zero_memory_samples::has_samples() const {
+    return this->memory_total_.has_value() || this->visible_memory_total_.has_value() || this->memory_location_.has_value()
+           || this->num_pcie_lanes_max_.has_value() || this->pcie_link_generation_max_.has_value() || this->pcie_link_speed_max_.has_value()
+           || this->memory_bus_width_.has_value() || this->memory_num_channels_.has_value() || this->memory_free_.has_value()
+           || this->memory_used_.has_value() || this->num_pcie_lanes_.has_value() || this->pcie_link_generation_.has_value()
+           || this->pcie_link_speed_.has_value();
+}
+
 std::string level_zero_memory_samples::generate_yaml_string() const {
+    // if no samples are available, return an empty string
+    if (!this->has_samples()) {
+        return "";
+    }
+
     std::string str{ "memory:\n" };
 
     // the total memory
@@ -424,9 +460,6 @@ std::string level_zero_memory_samples::generate_yaml_string() const {
                            fmt::join(this->pcie_link_speed_.value(), ", "));
     }
 
-    // remove last newline
-    str.pop_back();
-
     return str;
 }
 
@@ -461,7 +494,18 @@ std::ostream &operator<<(std::ostream &out, const level_zero_memory_samples &sam
 //                                                         temperature samples                                                         //
 //*************************************************************************************************************************************//
 
+bool level_zero_temperature_samples::has_samples() const {
+    return this->num_fans_.has_value() || this->fan_speed_max_.has_value() || this->temperature_max_.has_value() || this->memory_temperature_max_.has_value()
+           || this->global_temperature_max_.has_value() || this->fan_speed_percentage_.has_value() || this->temperature_.has_value()
+           || this->memory_temperature_.has_value() || this->global_temperature_.has_value() || this->psu_temperature_.has_value();
+}
+
 std::string level_zero_temperature_samples::generate_yaml_string() const {
+    // if no samples are available, return an empty string
+    if (!this->has_samples()) {
+        return "";
+    }
+
     std::string str{ "temperature:\n" };
 
     // the number of fans
@@ -536,9 +580,6 @@ std::string level_zero_temperature_samples::generate_yaml_string() const {
                            fmt::join(this->psu_temperature_.value(), ", "));
     }
 
-    // remove last newline
-    str.pop_back();
-
     return str;
 }
 
diff --git a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
index 769f0a6..1536237 100644
--- a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
+++ b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
@@ -551,10 +551,10 @@ std::string gpu_nvidia_hardware_sampler::generate_yaml_string() const {
         throw std::runtime_error{ "Can't create the final YAML entry if the hardware sampler is still running!" };
     }
 
-    return fmt::format("{}\n\n"
-                       "{}\n\n"
-                       "{}\n\n"
-                       "{}\n\n"
+    return fmt::format("{}\n"
+                       "{}\n"
+                       "{}\n"
+                       "{}\n"
                        "{}",
                        general_samples_.generate_yaml_string(),
                        clock_samples_.generate_yaml_string(),
diff --git a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp
index 94bf97b..0412ef2 100644
--- a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp
+++ b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp
@@ -21,7 +21,18 @@ namespace hws {
 //                                                           general samples                                                           //
 //*************************************************************************************************************************************//
 
+bool nvml_general_samples::has_samples() const {
+    return this->architecture_.has_value() || this->byte_order_.has_value() || this->vendor_id_.has_value() || this->name_.has_value()
+           || this->persistence_mode_.has_value() || this->num_cores_.has_value() || this->compute_utilization_.has_value()
+           || this->memory_utilization_.has_value() || this->performance_level_.has_value();
+}
+
 std::string nvml_general_samples::generate_yaml_string() const {
+    // if no samples are available, return an empty string
+    if (!this->has_samples()) {
+        return "";
+    }
+
     std::string str{ "general:\n" };
 
     // device architecture
@@ -90,9 +101,6 @@ std::string nvml_general_samples::generate_yaml_string() const {
                            fmt::join(this->performance_level_.value(), ", "));
     }
 
-    // remove last newline
-    str.pop_back();
-
     return str;
 }
 
@@ -121,7 +129,20 @@ std::ostream &operator<<(std::ostream &out, const nvml_general_samples &samples)
 //                                                            clock samples                                                            //
 //*************************************************************************************************************************************//
 
+bool nvml_clock_samples::has_samples() const {
+    return this->auto_boosted_clock_enabled_.has_value() || this->clock_frequency_min_.has_value() || this->clock_frequency_max_.has_value()
+           || this->memory_clock_frequency_min_.has_value() || this->memory_clock_frequency_max_.has_value() || this->sm_clock_frequency_max_.has_value()
+           || this->available_clock_frequencies_.has_value() || this->available_memory_clock_frequencies_.has_value() || this->clock_frequency_.has_value()
+           || this->memory_clock_frequency_.has_value() || this->sm_clock_frequency_.has_value() || this->throttle_reason_.has_value()
+           || this->auto_boosted_clock_.has_value();
+}
+
 std::string nvml_clock_samples::generate_yaml_string() const {
+    // if no samples are available, return an empty string
+    if (!this->has_samples()) {
+        return "";
+    }
+
     std::string str{ "clock:\n" };
 
     // adaptive clock status
@@ -219,9 +240,6 @@ std::string nvml_clock_samples::generate_yaml_string() const {
                            fmt::join(this->auto_boosted_clock_.value(), ", "));
     }
 
-    // remove last newline
-    str.pop_back();
-
     return str;
 }
 
@@ -258,7 +276,18 @@ std::ostream &operator<<(std::ostream &out, const nvml_clock_samples &samples) {
 //                                                            power samples                                                            //
 //*************************************************************************************************************************************//
 
+bool nvml_power_samples::has_samples() const {
+    return this->power_management_limit_.has_value() || this->power_enforced_limit_.has_value() || this->power_measurement_type_.has_value()
+           || this->power_management_mode_.has_value() || this->available_power_profiles_.has_value() || this->power_usage_.has_value()
+           || this->power_total_energy_consumption_.has_value() || this->power_profile_.has_value();
+}
+
 std::string nvml_power_samples::generate_yaml_string() const {
+    // if no samples are available, return an empty string
+    if (!this->has_samples()) {
+        return "";
+    }
+
     std::string str{ "power:\n" };
 
     // power management limit
@@ -319,9 +348,6 @@ std::string nvml_power_samples::generate_yaml_string() const {
                            fmt::join(this->power_profile_.value(), ", "));
     }
 
-    // remove last newline
-    str.pop_back();
-
     return str;
 }
 
@@ -348,7 +374,19 @@ std::ostream &operator<<(std::ostream &out, const nvml_power_samples &samples) {
 //                                                            memory samples                                                           //
 //*************************************************************************************************************************************//
 
+bool nvml_memory_samples::has_samples() const {
+    return this->memory_total_.has_value() || this->pcie_link_speed_max_.has_value() || this->pcie_link_generation_max_.has_value()
+           || this->num_pcie_lanes_max_.has_value() || this->memory_bus_width_.has_value() || this->memory_used_.has_value()
+           || this->memory_free_.has_value() || this->num_pcie_lanes_.has_value() || this->pcie_link_generation_.has_value()
+           || this->pcie_link_speed_.has_value();
+}
+
 std::string nvml_memory_samples::generate_yaml_string() const {
+    // if no samples are available, return an empty string
+    if (!this->has_samples()) {
+        return "";
+    }
+
     std::string str{ "memory:\n" };
 
     // total memory size
@@ -423,9 +461,6 @@ std::string nvml_memory_samples::generate_yaml_string() const {
                            fmt::join(this->pcie_link_speed_.value(), ", "));
     }
 
-    // remove last newline
-    str.pop_back();
-
     return str;
 }
 
@@ -456,7 +491,17 @@ std::ostream &operator<<(std::ostream &out, const nvml_memory_samples &samples)
 //                                                         temperature samples                                                         //
 //*************************************************************************************************************************************//
 
+bool nvml_temperature_samples::has_samples() const {
+    return this->num_fans_.has_value() || this->fan_speed_min_.has_value() || this->fan_speed_max_.has_value() || this->temperature_max_.has_value()
+           || this->memory_temperature_max_.has_value() || this->fan_speed_percentage_.has_value() || this->temperature_.has_value();
+}
+
 std::string nvml_temperature_samples::generate_yaml_string() const {
+    // if no samples are available, return an empty string
+    if (!this->has_samples()) {
+        return "";
+    }
+
     std::string str{ "temperature:\n" };
 
     // number of fans
@@ -510,9 +555,6 @@ std::string nvml_temperature_samples::generate_yaml_string() const {
                            fmt::join(this->temperature_.value(), ", "));
     }
 
-    // remove last newline
-    str.pop_back();
-
     return str;
 }
 
diff --git a/src/hardware_sampling/hardware_sampler.cpp b/src/hardware_sampling/hardware_sampler.cpp
index 5d27972..903db8a 100644
--- a/src/hardware_sampling/hardware_sampler.cpp
+++ b/src/hardware_sampling/hardware_sampler.cpp
@@ -152,11 +152,11 @@ void hardware_sampler::dump_yaml(const char *filename) const {
     // output the sampling information
     file << fmt::format("sampling_interval:\n"
                         "  unit: \"ms\"\n"
-                        "  values: {}\n"
+                        "  values: {}\n\n"
                         "time_points:\n"
                         "  unit: \"s\"\n"
-                        "  values: [{}]\n"
-                        "{}\n\n",
+                        "  values: [{}]\n\n"
+                        "{}\n",
                         this->sampling_interval().count(),
                         fmt::join(detail::durations_from_reference_time(this->sampling_time_points(), this->get_event(0).time_point), ", "),
                         this->generate_yaml_string());

From e3f7f3b0f79a916f3127adcc900eee1d9c3b0194 Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Tue, 24 Sep 2024 15:59:36 +0200
Subject: [PATCH 47/69] Only add newlines if the sample category isn't empty.

---
 src/hardware_sampling/cpu/hardware_sampler.cpp | 18 ++++++++++++------
 .../gpu_amd/hardware_sampler.cpp               | 12 ++++++++----
 .../gpu_intel/hardware_sampler.cpp             | 12 ++++++++----
 .../gpu_nvidia/hardware_sampler.cpp            | 12 ++++++++----
 4 files changed, 36 insertions(+), 18 deletions(-)

diff --git a/src/hardware_sampling/cpu/hardware_sampler.cpp b/src/hardware_sampling/cpu/hardware_sampler.cpp
index 7e89eca..824ea7b 100644
--- a/src/hardware_sampling/cpu/hardware_sampler.cpp
+++ b/src/hardware_sampling/cpu/hardware_sampler.cpp
@@ -428,19 +428,25 @@ std::string cpu_hardware_sampler::generate_yaml_string() const {
         throw std::runtime_error{ "Can't create the final YAML entry if the hardware sampler is still running!" };
     }
 
-    return fmt::format("{}\n"
-                       "{}\n"
-                       "{}\n"
-                       "{}\n"
-                       "{}\n"
-                       "{}\n"
+    return fmt::format("{}{}"
+                       "{}{}"
+                       "{}{}"
+                       "{}{}"
+                       "{}{}"
+                       "{}{}"
                        "{}",
                        general_samples_.generate_yaml_string(),
+                       general_samples_.has_samples() ? "\n" : "",
                        clock_samples_.generate_yaml_string(),
+                       clock_samples_.has_samples() ? "\n" : "",
                        power_samples_.generate_yaml_string(),
+                       power_samples_.has_samples() ? "\n" : "",
                        memory_samples_.generate_yaml_string(),
+                       memory_samples_.has_samples() ? "\n" : "",
                        temperature_samples_.generate_yaml_string(),
+                       temperature_samples_.has_samples() ? "\n" : "",
                        gfx_samples_.generate_yaml_string(),
+                       gfx_samples_.has_samples() ? "\n" : "",
                        idle_state_samples_.generate_yaml_string());
 }
 
diff --git a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
index 61a8456..d92e594 100644
--- a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
+++ b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
@@ -687,15 +687,19 @@ std::string gpu_amd_hardware_sampler::generate_yaml_string() const {
         throw std::runtime_error{ "Can't create the final YAML entry if the hardware sampler is still running!" };
     }
 
-    return fmt::format("{}\n"
-                       "{}\n"
-                       "{}\n"
-                       "{}\n"
+    return fmt::format("{}{}"
+                       "{}{}"
+                       "{}{}"
+                       "{}{}"
                        "{}",
                        general_samples_.generate_yaml_string(),
+                       general_samples_.has_samples() ? "\n" : "",
                        clock_samples_.generate_yaml_string(),
+                       clock_samples_.has_samples() ? "\n" : "",
                        power_samples_.generate_yaml_string(),
+                       power_samples_.has_samples() ? "\n" : "",
                        memory_samples_.generate_yaml_string(),
+                       memory_samples_.has_samples() ? "\n" : "",
                        temperature_samples_.generate_yaml_string());
 }
 
diff --git a/src/hardware_sampling/gpu_intel/hardware_sampler.cpp b/src/hardware_sampling/gpu_intel/hardware_sampler.cpp
index c2e8eec..48e459b 100644
--- a/src/hardware_sampling/gpu_intel/hardware_sampler.cpp
+++ b/src/hardware_sampling/gpu_intel/hardware_sampler.cpp
@@ -678,15 +678,19 @@ std::string gpu_intel_hardware_sampler::generate_yaml_string() const {
         throw std::runtime_error{ "Can't create the final YAML entry if the hardware sampler is still running!" };
     }
 
-    return fmt::format("{}\n"
-                       "{}\n"
-                       "{}\n"
-                       "{}\n"
+    return fmt::format("{}{}"
+                       "{}{}"
+                       "{}{}"
+                       "{}{}"
                        "{}",
                        general_samples_.generate_yaml_string(),
+                       general_samples_.has_samples() ? "\n" : "",
                        clock_samples_.generate_yaml_string(),
+                       clock_samples_.has_samples() ? "\n" : "",
                        power_samples_.generate_yaml_string(),
+                       power_samples_.has_samples() ? "\n" : "",
                        memory_samples_.generate_yaml_string(),
+                       memory_samples_.has_samples() ? "\n" : "",
                        temperature_samples_.generate_yaml_string());
 }
 
diff --git a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
index 1536237..f9a064b 100644
--- a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
+++ b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
@@ -551,15 +551,19 @@ std::string gpu_nvidia_hardware_sampler::generate_yaml_string() const {
         throw std::runtime_error{ "Can't create the final YAML entry if the hardware sampler is still running!" };
     }
 
-    return fmt::format("{}\n"
-                       "{}\n"
-                       "{}\n"
-                       "{}\n"
+    return fmt::format("{}{}"
+                       "{}{}"
+                       "{}{}"
+                       "{}{}"
                        "{}",
                        general_samples_.generate_yaml_string(),
+                       general_samples_.has_samples() ? "\n" : "",
                        clock_samples_.generate_yaml_string(),
+                       clock_samples_.has_samples() ? "\n" : "",
                        power_samples_.generate_yaml_string(),
+                       power_samples_.has_samples() ? "\n" : "",
                        memory_samples_.generate_yaml_string(),
+                       memory_samples_.has_samples() ? "\n" : "",
                        temperature_samples_.generate_yaml_string());
 }
 

From 66ba78bf1f26ae5730a911ecc08f5d379007eb26 Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Tue, 24 Sep 2024 16:55:24 +0200
Subject: [PATCH 48/69] Add the possibility to disable sampling categories.

---
 bindings/CMakeLists.txt                       |   1 +
 bindings/cpu_hardware_sampler.cpp             |   3 +
 bindings/gpu_amd_hardware_sampler.cpp         |   5 +
 bindings/gpu_intel_hardware_sampler.cpp       |   5 +
 bindings/gpu_nvidia_hardware_sampler.cpp      |   5 +
 bindings/main.cpp                             |   2 +
 bindings/sample_category.cpp                  |  30 +
 bindings/system_hardware_sampler.cpp          |   7 +-
 include/hardware_sampling/core.hpp            |   1 +
 .../cpu/hardware_sampler.hpp                  |   7 +-
 .../gpu_amd/hardware_sampler.hpp              |  13 +-
 .../gpu_intel/hardware_sampler.hpp            |  13 +-
 .../gpu_intel/level_zero_samples.hpp          |   1 -
 .../gpu_nvidia/hardware_sampler.hpp           |  13 +-
 .../hardware_sampling/hardware_sampler.hpp    |  18 +-
 include/hardware_sampling/sample_category.hpp | 117 ++++
 .../system_hardware_sampler.hpp               |   7 +-
 .../cpu/hardware_sampler.cpp                  | 515 ++++++++++--------
 .../gpu_amd/hardware_sampler.cpp              |  91 ++--
 .../gpu_intel/hardware_sampler.cpp            |  35 +-
 .../gpu_nvidia/hardware_sampler.cpp           |  37 +-
 src/hardware_sampling/hardware_sampler.cpp    |  15 +-
 .../system_hardware_sampler.cpp               |  15 +-
 23 files changed, 608 insertions(+), 348 deletions(-)
 create mode 100644 bindings/sample_category.cpp
 create mode 100644 include/hardware_sampling/sample_category.hpp

diff --git a/bindings/CMakeLists.txt b/bindings/CMakeLists.txt
index fb00d29..ffa7984 100644
--- a/bindings/CMakeLists.txt
+++ b/bindings/CMakeLists.txt
@@ -34,6 +34,7 @@ set(HWS_PYTHON_BINDINGS_SOURCES
         ${CMAKE_CURRENT_SOURCE_DIR}/event.cpp
         ${CMAKE_CURRENT_SOURCE_DIR}/relative_event.cpp
         ${CMAKE_CURRENT_SOURCE_DIR}/hardware_sampler.cpp
+        ${CMAKE_CURRENT_SOURCE_DIR}/sample_category.cpp
         ${CMAKE_CURRENT_SOURCE_DIR}/system_hardware_sampler.cpp
         ${CMAKE_CURRENT_SOURCE_DIR}/main.cpp
 )
diff --git a/bindings/cpu_hardware_sampler.cpp b/bindings/cpu_hardware_sampler.cpp
index ba31dfe..6d18fe1 100644
--- a/bindings/cpu_hardware_sampler.cpp
+++ b/bindings/cpu_hardware_sampler.cpp
@@ -8,6 +8,7 @@
 #include "hardware_sampling/cpu/cpu_samples.hpp"       // hws::{cpu_general_samples, clock_samples, power_samples, memory_samples, temperature_samples, gfx_samples, idle_state_samples}
 #include "hardware_sampling/cpu/hardware_sampler.hpp"  // hws::cpu_hardware_sampler
 #include "hardware_sampling/hardware_sampler.hpp"      // hws::hardware_sampler
+#include "hardware_sampling/sample_category.hpp"       // hws::sample_category
 
 #include "fmt/format.h"         // fmt::format
 #include "pybind11/chrono.h"    // automatic bindings for std::chrono::milliseconds
@@ -126,7 +127,9 @@ void init_cpu_hardware_sampler(py::module_ &m) {
     // bind the CPU hardware sampler class
     py::class_<hws::cpu_hardware_sampler, hws::hardware_sampler>(m, "CpuHardwareSampler")
         .def(py::init<>(), "construct a new CPU hardware sampler")
+        .def(py::init<hws::sample_category>(), "construct a new CPU hardware sampler sampling only the provided sample_category samples")
         .def(py::init<std::chrono::milliseconds>(), "construct a new CPU hardware sampler specifying the used sampling interval")
+        .def(py::init<std::chrono::milliseconds, hws::sample_category>(), "construct a new CPU hardware sampler specifying the used sampling interval sampling only the provided sample_category samples")
         .def("general_samples", &hws::cpu_hardware_sampler::general_samples, "get all general samples")
         .def("clock_samples", &hws::cpu_hardware_sampler::clock_samples, "get all clock related samples")
         .def("power_samples", &hws::cpu_hardware_sampler::power_samples, "get all power related samples")
diff --git a/bindings/gpu_amd_hardware_sampler.cpp b/bindings/gpu_amd_hardware_sampler.cpp
index c738340..55fbc75 100644
--- a/bindings/gpu_amd_hardware_sampler.cpp
+++ b/bindings/gpu_amd_hardware_sampler.cpp
@@ -8,6 +8,7 @@
 #include "hardware_sampling/gpu_amd/hardware_sampler.hpp"  // hws::gpu_amd_hardware_sampler
 #include "hardware_sampling/gpu_amd/rocm_smi_samples.hpp"  // hws::{rocm_smi_general_samples, rocm_smi_clock_samples, rocm_smi_power_samples, rocm_smi_memory_samples, rocm_smi_temperature_samples}
 #include "hardware_sampling/hardware_sampler.hpp"          // hws::hardware_sampler
+#include "hardware_sampling/sample_category.hpp"           // hws::sample_category
 
 #include "fmt/format.h"         // fmt::format
 #include "pybind11/chrono.h"    // automatic bindings for std::chrono::milliseconds
@@ -119,9 +120,13 @@ void init_gpu_amd_hardware_sampler(py::module_ &m) {
     // bind the GPU AMD hardware sampler class
     py::class_<hws::gpu_amd_hardware_sampler, hws::hardware_sampler>(m, "GpuAmdHardwareSampler")
         .def(py::init<>(), "construct a new AMD GPU hardware sampler for the default device with the default sampling interval")
+        .def(py::init<hws::sample_category>(), "construct a new AMD GPU hardware sampler for the default device with the default sampling interval sampling only the provided sample_category samples")
         .def(py::init<std::size_t>(), "construct a new AMD GPU hardware sampler for the specified device with the default sampling interval")
+        .def(py::init<std::size_t, hws::sample_category>(), "construct a new AMD GPU hardware sampler for the specified device with the default sampling interval sampling only the provided sample_category samples")
         .def(py::init<std::chrono::milliseconds>(), "construct a new AMD GPU hardware sampler for the default device with the specified sampling interval")
+        .def(py::init<std::chrono::milliseconds, hws::sample_category>(), "construct a new AMD GPU hardware sampler for the default device with the specified sampling interval sampling only the provided sample_category samples")
         .def(py::init<std::size_t, std::chrono::milliseconds>(), "construct a new AMD GPU hardware sampler for the specified device and sampling interval")
+        .def(py::init<std::size_t, std::chrono::milliseconds, hws::sample_category>(), "construct a new AMD GPU hardware sampler for the specified device and sampling interval sampling only the provided sample_category samples")
         .def("general_samples", &hws::gpu_amd_hardware_sampler::general_samples, "get all general samples")
         .def("clock_samples", &hws::gpu_amd_hardware_sampler::clock_samples, "get all clock related samples")
         .def("power_samples", &hws::gpu_amd_hardware_sampler::power_samples, "get all power related samples")
diff --git a/bindings/gpu_intel_hardware_sampler.cpp b/bindings/gpu_intel_hardware_sampler.cpp
index 77b67fc..546d295 100644
--- a/bindings/gpu_intel_hardware_sampler.cpp
+++ b/bindings/gpu_intel_hardware_sampler.cpp
@@ -8,6 +8,7 @@
 #include "hardware_sampling/gpu_intel/hardware_sampler.hpp"    // hws::gpu_intel_hardware_sampler
 #include "hardware_sampling/gpu_intel/level_zero_samples.hpp"  // hws::{level_zero_general_samples, level_zero_clock_samples, level_zero_power_samples, level_zero_memory_samples, level_zero_temperature_samples}
 #include "hardware_sampling/hardware_sampler.hpp"              // hws::hardware_sampler
+#include "hardware_sampling/sample_category.hpp"               // hws::sample_category
 
 #include "fmt/format.h"         // fmt::format
 #include "pybind11/chrono.h"    // automatic bindings for std::chrono::milliseconds
@@ -105,9 +106,13 @@ void init_gpu_intel_hardware_sampler(py::module_ &m) {
     // bind the GPU Intel hardware sampler class
     py::class_<hws::gpu_intel_hardware_sampler, hws::hardware_sampler>(m, "GpuIntelHardwareSampler")
         .def(py::init<>(), "construct a new Intel GPU hardware sampler for the default device with the default sampling interval")
+        .def(py::init<hws::sample_category>(), "construct a new Intel GPU hardware sampler for the default device with the default sampling interval sampling only the provided sample_category samples")
         .def(py::init<std::size_t>(), "construct a new Intel GPU hardware sampler for the specified device with the default sampling interval")
+        .def(py::init<std::size_t, hws::sample_category>(), "construct a new Intel GPU hardware sampler for the specified device with the default sampling interval sampling only the provided sample_category samples")
         .def(py::init<std::chrono::milliseconds>(), "construct a new Intel GPU hardware sampler for the default device with the specified sampling interval")
+        .def(py::init<std::chrono::milliseconds, hws::sample_category>(), "construct a new Intel GPU hardware sampler for the default device with the specified sampling interval sampling only the provided sample_category samples")
         .def(py::init<std::size_t, std::chrono::milliseconds>(), "construct a new Intel GPU hardware sampler for the specified device and sampling interval")
+        .def(py::init<std::size_t, std::chrono::milliseconds, hws::sample_category>(), "construct a new Intel GPU hardware sampler for the specified device and sampling interval sampling only the provided sample_category samples")
         .def("general_samples", &hws::gpu_intel_hardware_sampler::general_samples, "get all general samples")
         .def("clock_samples", &hws::gpu_intel_hardware_sampler::clock_samples, "get all clock related samples")
         .def("power_samples", &hws::gpu_intel_hardware_sampler::power_samples, "get all power related samples")
diff --git a/bindings/gpu_nvidia_hardware_sampler.cpp b/bindings/gpu_nvidia_hardware_sampler.cpp
index f9b9261..10a04b3 100644
--- a/bindings/gpu_nvidia_hardware_sampler.cpp
+++ b/bindings/gpu_nvidia_hardware_sampler.cpp
@@ -8,6 +8,7 @@
 #include "hardware_sampling/gpu_nvidia/hardware_sampler.hpp"  // hws::gpu_nvidia_hardware_sampler
 #include "hardware_sampling/gpu_nvidia/nvml_samples.hpp"      // hws::{nvml_general_samples, nvml_clock_samples, nvml_power_samples, nvml_memory_samples, nvml_temperature_samples}
 #include "hardware_sampling/hardware_sampler.hpp"             // hws::hardware_sampler
+#include "hardware_sampling/sample_category.hpp"              // hws::sample_category
 
 #include "fmt/format.h"         // fmt::format
 #include "pybind11/chrono.h"    // automatic bindings for std::chrono::milliseconds
@@ -105,9 +106,13 @@ void init_gpu_nvidia_hardware_sampler(py::module_ &m) {
     // bind the GPU NVIDIA hardware sampler class
     py::class_<hws::gpu_nvidia_hardware_sampler, hws::hardware_sampler>(m, "GpuNvidiaHardwareSampler")
         .def(py::init<>(), "construct a new NVIDIA GPU hardware sampler for the default device with the default sampling interval")
+        .def(py::init<hws::sample_category>(), "construct a new NVIDIA GPU hardware sampler for the default device with the default sampling interval sampling only the provided sample_category samples")
         .def(py::init<std::size_t>(), "construct a new NVIDIA GPU hardware sampler for the specified device with the default sampling interval")
+        .def(py::init<std::size_t, hws::sample_category>(), "construct a new NVIDIA GPU hardware sampler for the specified device with the default sampling interval sampling only the provided sample_category samples")
         .def(py::init<std::chrono::milliseconds>(), "construct a new NVIDIA GPU hardware sampler for the default device with the specified sampling interval")
+        .def(py::init<std::chrono::milliseconds, hws::sample_category>(), "construct a new NVIDIA GPU hardware sampler for the default device with the specified sampling interval sampling only the provided sample_category samples")
         .def(py::init<std::size_t, std::chrono::milliseconds>(), "construct a new NVIDIA GPU hardware sampler for the specified device and sampling interval")
+        .def(py::init<std::size_t, std::chrono::milliseconds, hws::sample_category>(), "construct a new NVIDIA GPU hardware sampler for the specified device and sampling interval sampling only the provided sample_category samples")
         .def("general_samples", &hws::gpu_nvidia_hardware_sampler::general_samples, "get all general samples")
         .def("clock_samples", &hws::gpu_nvidia_hardware_sampler::clock_samples, "get all clock related samples")
         .def("power_samples", &hws::gpu_nvidia_hardware_sampler::power_samples, "get all power related samples")
diff --git a/bindings/main.cpp b/bindings/main.cpp
index 11dbf33..39eb521 100644
--- a/bindings/main.cpp
+++ b/bindings/main.cpp
@@ -16,6 +16,7 @@ namespace py = pybind11;
 
 // forward declare binding functions
 void init_event(py::module_ &);
+void init_sample_category(py::module_ &);
 void init_relative_event(py::module_ &);
 void init_hardware_sampler(py::module_ &);
 void init_system_hardware_sampler(py::module_ &);
@@ -28,6 +29,7 @@ PYBIND11_MODULE(HardwareSampling, m) {
     m.doc() = "Hardware Sampling for CPUs and GPUs";
 
     init_event(m);
+    init_sample_category(m);
     init_relative_event(m);
     init_hardware_sampler(m);
     init_system_hardware_sampler(m);
diff --git a/bindings/sample_category.cpp b/bindings/sample_category.cpp
new file mode 100644
index 0000000..2db6563
--- /dev/null
+++ b/bindings/sample_category.cpp
@@ -0,0 +1,30 @@
+/**
+ * @author Marcel Breyer
+ * @copyright 2024-today All Rights Reserved
+ * @license This file is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ */
+
+#include "hardware_sampling/sample_category.hpp"  // hws::sample_category
+
+#include "pybind11/operators.h"  // operator overloading
+#include "pybind11/pybind11.h"   // py::module_, py::overload_cast
+
+namespace py = pybind11;
+
+void init_sample_category(py::module_ &m) {
+    // sample_category enum and bitwise operations on the sample_category enum
+    py::enum_<hws::sample_category>(m, "SampleCategory")
+        .value("GENERAL", hws::sample_category::general, "General hardware samples like architecture, names, or utilization.")
+        .value("CLOCK", hws::sample_category::clock, "Clock-related hardware samples like minimum, maximum, and current frequencies or throttle reasons.")
+        .value("POWER", hws::sample_category::power, "Power-related hardware samples like current power draw or total energy consumption.")
+        .value("MEMORY", hws::sample_category::memory, "Memory-related hardware samples like memory usage or PCIe information.")
+        .value("TEMPERATURE", hws::sample_category::temperature, "Temperature-related hardware samples like maximum and current temperatures.")
+        .value("GFX", hws::sample_category::gfx, "Gfx-related (iGPU) hardware samples. Only used in the cpu_hardware_sampler.")
+        .value("IDLE_STATE", hws::sample_category::idle_state, "Idle-state-related hardware samples. Only used in the cpu_hardware_sampler.")
+        .value("ALL", hws::sample_category::all, "Shortcut to enable all available hardware samples (default).")
+        .def("__invert__", py::overload_cast<hws::sample_category>(&hws::operator~))
+        .def("__and__", py::overload_cast<hws::sample_category, hws::sample_category>(&hws::operator&))
+        .def("__or__", py::overload_cast<hws::sample_category, hws::sample_category>(&hws::operator|))
+        .def("__xor__", py::overload_cast<hws::sample_category, hws::sample_category>(&hws::operator^));
+}
diff --git a/bindings/system_hardware_sampler.cpp b/bindings/system_hardware_sampler.cpp
index 3c24ad3..f26a4b6 100644
--- a/bindings/system_hardware_sampler.cpp
+++ b/bindings/system_hardware_sampler.cpp
@@ -7,8 +7,9 @@
 
 #include "hardware_sampling/system_hardware_sampler.hpp"  // hws::system_hardware_sampler
 
-#include "hardware_sampling/event.hpp"    // hws::event
-#include "hardware_sampling/utility.hpp"  // hws::detail::durations_from_reference_time
+#include "hardware_sampling/event.hpp"            // hws::event
+#include "hardware_sampling/sample_category.hpp"  // hws::sample_category
+#include "hardware_sampling/utility.hpp"          // hws::detail::durations_from_reference_time
 
 #include "fmt/format.h"         // fmt::format
 #include "pybind11/chrono.h"    // bind std::chrono types
@@ -24,7 +25,9 @@ void init_system_hardware_sampler(py::module_ &m) {
     // bind the pure virtual hardware sampler base class
     py::class_<hws::system_hardware_sampler>(m, "SystemHardwareSampler")
         .def(py::init<>(), "construct a new system hardware sampler with the default sampling interval")
+        .def(py::init<hws::sample_category>(), "construct a new system hardware sampler with the default sampling interval sampling only the provided sample_category samples")
         .def(py::init<std::chrono::milliseconds>(), "construct a new system hardware sampler for with the specified sampling interval")
+        .def(py::init<std::chrono::milliseconds, hws::sample_category>(), "construct a new system hardware sampler for with the specified sampling interval sampling only the provided sample_category samples")
         .def("start", &hws::system_hardware_sampler::start_sampling, "start hardware sampling for all available hardware samplers")
         .def("stop", &hws::system_hardware_sampler::stop_sampling, "stop hardware sampling for all available hardware samplers")
         .def("pause", &hws::system_hardware_sampler::pause_sampling, "pause hardware sampling for all available hardware samplers")
diff --git a/include/hardware_sampling/core.hpp b/include/hardware_sampling/core.hpp
index 15d65df..7b259f4 100644
--- a/include/hardware_sampling/core.hpp
+++ b/include/hardware_sampling/core.hpp
@@ -14,6 +14,7 @@
 
 #include "hardware_sampling/event.hpp"
 #include "hardware_sampling/hardware_sampler.hpp"
+#include "hardware_sampling/sample_category.hpp"
 #include "hardware_sampling/system_hardware_sampler.hpp"
 
 #if defined(HWS_FOR_CPUS_ENABLED)
diff --git a/include/hardware_sampling/cpu/hardware_sampler.hpp b/include/hardware_sampling/cpu/hardware_sampler.hpp
index 4e65338..8105fd4 100644
--- a/include/hardware_sampling/cpu/hardware_sampler.hpp
+++ b/include/hardware_sampling/cpu/hardware_sampler.hpp
@@ -14,6 +14,7 @@
 
 #include "hardware_sampling/cpu/cpu_samples.hpp"   // hws::{cpu_general_samples, clock_samples, power_samples, memory_samples, temperature_samples, gfx_samples, idle_state_samples}
 #include "hardware_sampling/hardware_sampler.hpp"  // hws::hardware_sampler
+#include "hardware_sampling/sample_category.hpp"                // hws::sample_category
 
 #include "fmt/ostream.h"  // fmt::formatter, fmt::ostream_formatter
 
@@ -32,13 +33,15 @@ class cpu_hardware_sampler : public hardware_sampler {
   public:
     /**
      * @brief Construct a new CPU hardware sampler with the default sampling interval.
+     * @param[in] category the sample categories that are enabled for hardware sampling (default: all)
      */
-    cpu_hardware_sampler();
+    explicit cpu_hardware_sampler(sample_category category = sample_category::all);
     /**
      * @brief Construct a new CPU hardware sampler with the @p sampling_interval.
      * @param[in] sampling_interval the used sampling interval
+     * @param[in] category the sample categories that are enabled for hardware sampling (default: all)
      */
-    explicit cpu_hardware_sampler(std::chrono::milliseconds sampling_interval);
+    explicit cpu_hardware_sampler(std::chrono::milliseconds sampling_interval, sample_category category = sample_category::all);
 
     /**
      * @brief Delete the copy-constructor (already implicitly deleted due to the base class's std::atomic member).
diff --git a/include/hardware_sampling/gpu_amd/hardware_sampler.hpp b/include/hardware_sampling/gpu_amd/hardware_sampler.hpp
index 65e6ca3..a44dec7 100644
--- a/include/hardware_sampling/gpu_amd/hardware_sampler.hpp
+++ b/include/hardware_sampling/gpu_amd/hardware_sampler.hpp
@@ -14,6 +14,7 @@
 
 #include "hardware_sampling/gpu_amd/rocm_smi_samples.hpp"  // hws::{rocm_smi_general_samples, rocm_smi_clock_samples, rocm_smi_power_samples, rocm_smi_memory_samples, rocm_smi_temperature_samples}
 #include "hardware_sampling/hardware_sampler.hpp"          // hws::hardware_sampler
+#include "hardware_sampling/sample_category.hpp"           // hws::sample_category
 
 #include "fmt/ostream.h"  // fmt::formatter, fmt::ostream_formatter
 
@@ -36,27 +37,31 @@ class gpu_amd_hardware_sampler : public hardware_sampler {
     /**
      * @brief Construct a new AMD GPU hardware sampler for the default device with the default sampling interval.
      * @details If this is the first AMD GPU sampler, initializes the ROCm SMI environment.
+     * @param[in] category the sample categories that are enabled for hardware sampling (default: all)
      */
-    gpu_amd_hardware_sampler();
+    explicit gpu_amd_hardware_sampler(sample_category category = sample_category::all);
     /**
      * @brief Construct a new AMD GPU hardware sampler for device @p device_id with the default sampling interval.
      * @details If this is the first AMD GPU sampler, initializes the ROCm SMI environment.
      * @param[in] device_id the ID of the device to sample
+     * @param[in] category the sample categories that are enabled for hardware sampling (default: all)
      */
-    explicit gpu_amd_hardware_sampler(std::size_t device_id);
+    explicit gpu_amd_hardware_sampler(std::size_t device_id, sample_category category = sample_category::all);
     /**
      * @brief Construct a new AMD GPU hardware sampler for the default device with the @p sampling_interval.
      * @details If this is the first AMD GPU sampler, initializes the ROCm SMI environment.
      * @param[in] sampling_interval the used sampling interval
+     * @param[in] category the sample categories that are enabled for hardware sampling (default: all)
      */
-    explicit gpu_amd_hardware_sampler(std::chrono::milliseconds sampling_interval);
+    explicit gpu_amd_hardware_sampler(std::chrono::milliseconds sampling_interval, sample_category category = sample_category::all);
     /**
      * @brief Construct a new AMD GPU hardware sampler for device @p device_id with the @p sampling_interval.
      * @details If this is the first AMD GPU sampler, initializes the ROCm SMI environment.
      * @param[in] device_id the ID of the device to sample
      * @param[in] sampling_interval the used sampling interval
+     * @param[in] category the sample categories that are enabled for hardware sampling (default: all)
      */
-    gpu_amd_hardware_sampler(std::size_t device_id, std::chrono::milliseconds sampling_interval);
+    gpu_amd_hardware_sampler(std::size_t device_id, std::chrono::milliseconds sampling_interval, sample_category category = sample_category::all);
 
     /**
      * @brief Delete the copy-constructor (already implicitly deleted due to the base class's std::atomic member).
diff --git a/include/hardware_sampling/gpu_intel/hardware_sampler.hpp b/include/hardware_sampling/gpu_intel/hardware_sampler.hpp
index d47bd32..bea3103 100644
--- a/include/hardware_sampling/gpu_intel/hardware_sampler.hpp
+++ b/include/hardware_sampling/gpu_intel/hardware_sampler.hpp
@@ -15,6 +15,7 @@
 #include "hardware_sampling/gpu_intel/level_zero_device_handle.hpp"  // hws::detail::level_zero_device_handle
 #include "hardware_sampling/gpu_intel/level_zero_samples.hpp"        // hws::{level_zero_general_samples, level_zero_clock_samples, level_zero_power_samples, level_zero_memory_samples, level_zero_temperature_samples}
 #include "hardware_sampling/hardware_sampler.hpp"                    // hws::hardware_sampler
+#include "hardware_sampling/sample_category.hpp"                     // hws::sample_category
 
 #include "fmt/format.h"  // fmt::formatter, fmt::ostream_formatter
 
@@ -37,27 +38,31 @@ class gpu_intel_hardware_sampler : public hardware_sampler {
     /**
      * @brief Construct a new Intel GPU hardware sampler for the default device with the default sampling interval.
      * @details If this is the first Intel GPU sampler, initializes the Level Zero environment.
+     * @param[in] category the sample categories that are enabled for hardware sampling (default: all)
      */
-    gpu_intel_hardware_sampler();
+    explicit gpu_intel_hardware_sampler(sample_category category = sample_category::all);
     /**
      * @brief Construct a new Intel GPU hardware sampler for device @p device_id with the default sampling interval.
      * @details If this is the first Intel GPU sampler, initializes the Level Zero environment.
      * @param[in] device_id the ID of the device to sample
+     * @param[in] category the sample categories that are enabled for hardware sampling (default: all)
      */
-    explicit gpu_intel_hardware_sampler(std::size_t device_id);
+    explicit gpu_intel_hardware_sampler(std::size_t device_id, sample_category category = sample_category::all);
     /**
      * @brief Construct a new Intel GPU hardware sampler for the default device with the @p sampling_interval.
      * @details If this is the first Intel GPU sampler, initializes the Level Zero environment.
      * @param[in] sampling_interval the used sampling interval
+     * @param[in] category the sample categories that are enabled for hardware sampling (default: all)
      */
-    explicit gpu_intel_hardware_sampler(std::chrono::milliseconds sampling_interval);
+    explicit gpu_intel_hardware_sampler(std::chrono::milliseconds sampling_interval, sample_category category = sample_category::all);
     /**
      * @brief Construct a new Intel GPU hardware sampler for device @p device_id with the @p sampling_interval.
      * @details If this is the first Intel GPU sampler, initializes the Level Zero environment.
      * @param[in] device_id the ID of the device to sample
      * @param[in] sampling_interval the used sampling interval
+     * @param[in] category the sample categories that are enabled for hardware sampling (default: all)
      */
-    gpu_intel_hardware_sampler(std::size_t device_id, std::chrono::milliseconds sampling_interval);
+    gpu_intel_hardware_sampler(std::size_t device_id, std::chrono::milliseconds sampling_interval, sample_category category = sample_category::all);
 
     /**
      * @brief Delete the copy-constructor (already implicitly deleted due to the base class's std::atomic member).
diff --git a/include/hardware_sampling/gpu_intel/level_zero_samples.hpp b/include/hardware_sampling/gpu_intel/level_zero_samples.hpp
index a1c56c2..f178f7f 100644
--- a/include/hardware_sampling/gpu_intel/level_zero_samples.hpp
+++ b/include/hardware_sampling/gpu_intel/level_zero_samples.hpp
@@ -54,7 +54,6 @@ class level_zero_general_samples {
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, vendor_id)           // the vendor ID
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, name)                // the model name of the device
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector<std::string>, flags)  // potential GPU flags (e.g. integrated device)
-
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, standby_mode)          // the enabled standby mode (power saving or never)
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint32_t, num_threads_per_eu)  // the number of threads per EU unit
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint32_t, eu_simd_width)       // the physical EU unit SIMD width
diff --git a/include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp b/include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp
index 562348a..d73cd07 100644
--- a/include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp
+++ b/include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp
@@ -15,6 +15,7 @@
 #include "hardware_sampling/gpu_nvidia/nvml_device_handle.hpp"  // hws::nvml_device_handle
 #include "hardware_sampling/gpu_nvidia/nvml_samples.hpp"        // hws::{nvml_general_samples, nvml_clock_samples, nvml_power_samples, nvml_memory_samples, nvml_temperature_samples}
 #include "hardware_sampling/hardware_sampler.hpp"               // hws::hardware_sampler
+#include "hardware_sampling/sample_category.hpp"                // hws::sample_category
 
 #include "fmt/format.h"  // fmt::formatter, fmt::ostream_formatter
 
@@ -37,27 +38,31 @@ class gpu_nvidia_hardware_sampler : public hardware_sampler {
     /**
      * @brief Construct a new NVIDIA GPU hardware sampler for the default device with the default sampling interval.
      * @details If this is the first NVIDIA GPU sampler, initializes the NVML environment.
+     * @param[in] category the sample categories that are enabled for hardware sampling (default: all)
      */
-    gpu_nvidia_hardware_sampler();
+    explicit gpu_nvidia_hardware_sampler(sample_category category = sample_category::all);
     /**
      * @brief Construct a new NVIDIA GPU hardware sampler for device @p device_id with the default sampling interval.
      * @details If this is the first NVIDIA GPU sampler, initializes the NVML environment.
      * @param[in] device_id the ID of the device to sample
+     * @param[in] category the sample categories that are enabled for hardware sampling (default: all)
      */
-    explicit gpu_nvidia_hardware_sampler(std::size_t device_id);
+    explicit gpu_nvidia_hardware_sampler(std::size_t device_id, sample_category category = sample_category::all);
     /**
      * @brief Construct a new NVIDIA GPU hardware sampler for the default device with the @p sampling_interval.
      * @details If this is the first NVIDIA GPU sampler, initializes the NVML environment.
      * @param[in] sampling_interval the used sampling interval
+     * @param[in] category the sample categories that are enabled for hardware sampling (default: all)
      */
-    explicit gpu_nvidia_hardware_sampler(std::chrono::milliseconds sampling_interval);
+    explicit gpu_nvidia_hardware_sampler(std::chrono::milliseconds sampling_interval, sample_category category = sample_category::all);
     /**
      * @brief Construct a new NVIDIA GPU hardware sampler for device @p device_id with the @p sampling_interval.
      * @details If this is the first NVIDIA GPU sampler, initializes the NVML environment.
      * @param[in] device_id the ID of the device to sample
      * @param[in] sampling_interval the used sampling interval
+     * @param[in] category the sample categories that are enabled for hardware sampling (default: all)
      */
-    gpu_nvidia_hardware_sampler(std::size_t device_id, std::chrono::milliseconds sampling_interval);
+    gpu_nvidia_hardware_sampler(std::size_t device_id, std::chrono::milliseconds sampling_interval, sample_category category = sample_category::all);
 
     /**
      * @brief Delete the copy-constructor (already implicitly deleted due to the base class's std::atomic member).
diff --git a/include/hardware_sampling/hardware_sampler.hpp b/include/hardware_sampling/hardware_sampler.hpp
index 64eb833..e534d19 100644
--- a/include/hardware_sampling/hardware_sampler.hpp
+++ b/include/hardware_sampling/hardware_sampler.hpp
@@ -12,7 +12,8 @@
 #define HARDWARE_SAMPLING_HARDWARE_SAMPLER_HPP_
 #pragma once
 
-#include "hardware_sampling/event.hpp"  // hws::event
+#include "hardware_sampling/event.hpp"            // hws::event
+#include "hardware_sampling/sample_category.hpp"  // hws::sample_category
 
 #include <atomic>      // std::atomic
 #include <chrono>      // std::chrono::{system_clock::time_point, steady_clock::time_point, milliseconds}
@@ -32,8 +33,9 @@ class hardware_sampler {
     /**
      * @brief Construct a new hardware sampler with the provided @p sampling_interval.
      * @param[in] sampling_interval the used sampling interval
+     * @param[in] category the sample categories that are enabled for hardware sampling
      */
-    explicit hardware_sampler(std::chrono::milliseconds sampling_interval);
+    hardware_sampler(std::chrono::milliseconds sampling_interval, sample_category category);
 
     /**
      * @brief Delete the copy-constructor (already implicitly deleted due to the std::atomic member).
@@ -182,7 +184,14 @@ class hardware_sampler {
      * @brief Add a new time point to this hardware sampler. Called during the sampling loop.
      * @param time_point the new time point to add
      */
-    void add_time_point(const std::chrono::steady_clock::time_point time_point) { time_points_.push_back(time_point); }
+    void add_time_point(std::chrono::steady_clock::time_point time_point);
+
+    /**
+     * @brief Check whether the @p category is currently enabled for hardware sampling or not.
+     * @param[in] category the sample_category to check
+     * @return Returns `true` if @p category is enabled for sampling, otherwise `false` (`[[nodiscard]]`)
+     */
+    [[nodiscard]] bool sample_category_enabled(sample_category category) const noexcept;
 
   private:
     /// A boolean flag indicating whether the sampling has already started.
@@ -206,6 +215,9 @@ class hardware_sampler {
 
     /// The sampling interval of this hardware sampler.
     const std::chrono::milliseconds sampling_interval_{};
+
+    /// The bitmask of sample categories to use.
+    const sample_category sample_category_{};
 };
 
 }  // namespace hws
diff --git a/include/hardware_sampling/sample_category.hpp b/include/hardware_sampling/sample_category.hpp
new file mode 100644
index 0000000..e740544
--- /dev/null
+++ b/include/hardware_sampling/sample_category.hpp
@@ -0,0 +1,117 @@
+/**
+ * @file
+ * @author Marcel Breyer
+ * @copyright 2024-today All Rights Reserved
+ * @license This file is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Defines an enum class with all sample categories to be able to only selectively enable some samples.
+ */
+
+#ifndef HARDWARE_SAMPLING_SAMPLE_CATEGORY_HPP_
+#define HARDWARE_SAMPLING_SAMPLE_CATEGORY_HPP_
+#pragma once
+
+namespace hws {
+
+/**
+ * @brief Enum class as bitfield containing the possible sample categories.
+ * @details The sample_category "gfx" and "idle_state" are only used in the cpu_hardware_sampler.
+ *          Additionally, the "all" sample_category is available to easily enable all hardware samples (default).
+ */
+enum class sample_category : int {
+    // clang-format off
+    /// General hardware samples like architecture, names, or utilization.
+    general     = 0b00000001,
+    /// Clock-related hardware samples like minimum, maximum, and current frequencies or throttle reasons.
+    clock       = 0b00000010,
+    /// Power-related hardware samples like current power draw or total energy consumption.
+    power       = 0b00000100,
+    /// Memory-related hardware samples like memory usage or PCIe information.
+    memory      = 0b00001000,
+    /// Temperature-related hardware samples like maximum and current temperatures.
+    temperature = 0b00010000,
+    /// Gfx-related (iGPU) hardware samples. Only used in the cpu_hardware_sampler.
+    gfx         = 0b00100000,
+    /// Idle-state-related hardware samples. Only used in the cpu_hardware_sampler.
+    idle_state  = 0b01000000,
+    /// Shortcut to enable all available hardware samples (default).
+    all         = 0b01111111
+    // clang-format on
+};
+
+/**
+ * @brief Compute the bitwise not of @p sc.
+ * @param[in] sc the sample_category to apply the bitwise not to
+ * @return the bitwise not result (`[[nodiscard]]`)
+ */
+[[nodiscard]] constexpr sample_category operator~(const sample_category sc) noexcept {
+    return static_cast<sample_category>(~static_cast<int>(sc));
+}
+
+/**
+ * @brief Compute the bitwise and between @p lhs and @p rhs and return a new sample_category.
+ * @param[in] lhs the first sample_category
+ * @param[in] rhs the second sample_category
+ * @return the bitwise and result (`[[nodiscard]]`)
+ */
+[[nodiscard]] constexpr sample_category operator&(const sample_category lhs, const sample_category rhs) noexcept {
+    return static_cast<sample_category>(static_cast<int>(lhs) & static_cast<int>(rhs));
+}
+
+/**
+ * @brief Compute the bitwise or between @p lhs and @p rhs and return a new sample_category.
+ * @param[in] lhs the first sample_category
+ * @param[in] rhs the second sample_category
+ * @return the bitwise or result (`[[nodiscard]]`)
+ */
+[[nodiscard]] constexpr sample_category operator|(const sample_category lhs, const sample_category rhs) noexcept {
+    return static_cast<sample_category>(static_cast<int>(lhs) | static_cast<int>(rhs));
+}
+
+/**
+ * @brief Compute the bitwise xor between @p lhs and @p rhs and return a new sample_category.
+ * @param[in] lhs the first sample_category
+ * @param[in] rhs the second sample_category
+ * @return the bitwise xor result (`[[nodiscard]]`)
+ */
+[[nodiscard]] constexpr sample_category operator^(const sample_category lhs, const sample_category rhs) noexcept {
+    return static_cast<sample_category>(static_cast<int>(lhs) ^ static_cast<int>(rhs));
+}
+
+/**
+ * @brief Compute the bitwise compound and between @p lhs and @p rhs and return the result in @p lhs.
+ * @param[in,out] lhs the first sample_category
+ * @param[in] rhs the second sample_category
+ * @return a reference to @p lhs containing the bitwise and result
+ */
+constexpr sample_category &operator&=(sample_category &lhs, const sample_category rhs) noexcept {
+    lhs = lhs & rhs;
+    return lhs;
+}
+
+/**
+ * @brief Compute the bitwise compound or between @p lhs and @p rhs and return the result in @p lhs.
+ * @param[in,out] lhs the first sample_category
+ * @param[in] rhs the second sample_category
+ * @return a reference to @p lhs containing the bitwise or result
+ */
+constexpr sample_category &operator|=(sample_category &lhs, const sample_category rhs) noexcept {
+    lhs = lhs | rhs;
+    return lhs;
+}
+
+/**
+ * @brief Compute the bitwise compound xor between @p lhs and @p rhs and return the result in @p lhs.
+ * @param[in,out] lhs the first sample_category
+ * @param[in] rhs the second sample_category
+ * @return a reference to @p lhs containing the bitwise xor result
+ */
+constexpr sample_category &operator^=(sample_category &lhs, const sample_category rhs) noexcept {
+    lhs = lhs ^ rhs;
+    return lhs;
+}
+
+}  // namespace hws
+
+#endif  // HARDWARE_SAMPLING_SAMPLE_CATEGORY_HPP_
diff --git a/include/hardware_sampling/system_hardware_sampler.hpp b/include/hardware_sampling/system_hardware_sampler.hpp
index 394a0c6..d8c56fd 100644
--- a/include/hardware_sampling/system_hardware_sampler.hpp
+++ b/include/hardware_sampling/system_hardware_sampler.hpp
@@ -13,6 +13,7 @@
 
 #include "hardware_sampling/event.hpp"             // hws::event
 #include "hardware_sampling/hardware_sampler.hpp"  // hws::hardware_sampler
+#include "hardware_sampling/sample_category.hpp"   // hws::sample_category
 
 #include <chrono>      // std::chrono::{milliseconds, steady_clock::time_point}
 #include <cstddef>     // std::size_t
@@ -31,13 +32,15 @@ class system_hardware_sampler {
   public:
     /**
      * @brief Construct hardware samplers with the default sampling interval.
+     * @param[in] category the sample categories that are enabled for hardware sampling (default: all)
      */
-    system_hardware_sampler();
+    explicit system_hardware_sampler(sample_category category = sample_category::all);
     /**
      * @brief Construct hardware samplers with the provided @p sampling_interval.
      * @param[in] sampling_interval the used sampling interval
+     * @param[in] category the sample categories that are enabled for hardware sampling (default: all)
      */
-    explicit system_hardware_sampler(std::chrono::milliseconds sampling_interval);
+    explicit system_hardware_sampler(std::chrono::milliseconds sampling_interval, sample_category category = sample_category::all);
 
     /**
      * @brief Delete the copy-constructor.
diff --git a/src/hardware_sampling/cpu/hardware_sampler.cpp b/src/hardware_sampling/cpu/hardware_sampler.cpp
index 824ea7b..4a51c10 100644
--- a/src/hardware_sampling/cpu/hardware_sampler.cpp
+++ b/src/hardware_sampling/cpu/hardware_sampler.cpp
@@ -10,6 +10,7 @@
 #include "hardware_sampling/cpu/cpu_samples.hpp"   // hws::{cpu_general_samples, clock_samples, power_samples, memory_samples, temperature_samples, gfx_samples, idle_state_samples}
 #include "hardware_sampling/cpu/utility.hpp"       // HWS_SUBPROCESS_ERROR_CHECK, hws::detail::run_subprocess
 #include "hardware_sampling/hardware_sampler.hpp"  // hws::tracking::hardware_sampler
+#include "hardware_sampling/sample_category.hpp"   // hws::sample_category
 #include "hardware_sampling/utility.hpp"           // hws::detail::{split, split_as, trim, convert_to, starts_with}
 
 #include "fmt/format.h"  // fmt::format
@@ -33,11 +34,11 @@
 
 namespace hws {
 
-cpu_hardware_sampler::cpu_hardware_sampler() :
-    cpu_hardware_sampler{ HWS_SAMPLING_INTERVAL } { }
+cpu_hardware_sampler::cpu_hardware_sampler(const sample_category category) :
+    cpu_hardware_sampler{ HWS_SAMPLING_INTERVAL, category } { }
 
-cpu_hardware_sampler::cpu_hardware_sampler(const std::chrono::milliseconds sampling_interval) :
-    hardware_sampler{ sampling_interval } { }
+cpu_hardware_sampler::cpu_hardware_sampler(const std::chrono::milliseconds sampling_interval, const sample_category category) :
+    hardware_sampler{ sampling_interval, category } { }
 
 cpu_hardware_sampler::~cpu_hardware_sampler() {
     try {
@@ -71,53 +72,63 @@ void cpu_hardware_sampler::sampling_loop() {
             value = detail::trim(value);
 
             // check the lines if the start with an entry that we want to sample
-            if (detail::starts_with(line, "Architecture")) {
-                general_samples_.architecture_ = detail::convert_to<decltype(general_samples_.architecture_)::value_type>(value);
-            } else if (detail::starts_with(line, "Byte Order")) {
-                general_samples_.byte_order_ = detail::convert_to<decltype(general_samples_.byte_order_)::value_type>(value);
-            } else if (detail::starts_with(line, "CPU(s)")) {
-                general_samples_.num_threads_ = detail::convert_to<decltype(general_samples_.num_threads_)::value_type>(value);
-            } else if (detail::starts_with(line, "Thread(s) per core")) {
-                general_samples_.threads_per_core_ = detail::convert_to<decltype(general_samples_.threads_per_core_)::value_type>(value);
-            } else if (detail::starts_with(line, "Core(s) per socket")) {
-                general_samples_.cores_per_socket_ = detail::convert_to<decltype(general_samples_.cores_per_socket_)::value_type>(value);
-            } else if (detail::starts_with(line, "Socket(s)")) {
-                general_samples_.num_sockets_ = detail::convert_to<decltype(general_samples_.num_sockets_)::value_type>(value);
-            } else if (detail::starts_with(line, "NUMA node(s)")) {
-                general_samples_.numa_nodes_ = detail::convert_to<decltype(general_samples_.numa_nodes_)::value_type>(value);
-            } else if (detail::starts_with(line, "Vendor ID")) {
-                general_samples_.vendor_id_ = detail::convert_to<decltype(general_samples_.vendor_id_)::value_type>(value);
-            } else if (detail::starts_with(line, "Model name")) {
-                general_samples_.name_ = detail::convert_to<decltype(general_samples_.name_)::value_type>(value);
-            } else if (detail::starts_with(line, "Flags")) {
-                general_samples_.flags_ = detail::split_as<decltype(general_samples_.flags_)::value_type::value_type>(value, ' ');
-            } else if (detail::starts_with(line, "Frequency boost")) {
-                clock_samples_.auto_boosted_clock_enabled_ = value == "enabled";
-            } else if (detail::starts_with(line, "CPU max MHz")) {
-                clock_samples_.clock_frequency_max_ = detail::convert_to<decltype(clock_samples_.clock_frequency_max_)::value_type>(value);
-            } else if (detail::starts_with(line, "CPU min MHz")) {
-                clock_samples_.clock_frequency_min_ = detail::convert_to<decltype(clock_samples_.clock_frequency_min_)::value_type>(value);
-            } else if (detail::starts_with(line, "L1d cache")) {
-                memory_samples_.cache_size_L1d_ = detail::convert_to<decltype(memory_samples_.cache_size_L1d_)::value_type>(value);
-            } else if (detail::starts_with(line, "L1i cache")) {
-                memory_samples_.cache_size_L1i_ = detail::convert_to<decltype(memory_samples_.cache_size_L1i_)::value_type>(value);
-            } else if (detail::starts_with(line, "L2 cache")) {
-                memory_samples_.cache_size_L2_ = detail::convert_to<decltype(memory_samples_.cache_size_L2_)::value_type>(value);
-            } else if (detail::starts_with(line, "L3 cache")) {
-                memory_samples_.cache_size_L3_ = detail::convert_to<decltype(memory_samples_.cache_size_L3_)::value_type>(value);
+            if (this->sample_category_enabled(sample_category::general)) {
+                if (detail::starts_with(line, "Architecture")) {
+                    general_samples_.architecture_ = detail::convert_to<decltype(general_samples_.architecture_)::value_type>(value);
+                } else if (detail::starts_with(line, "Byte Order")) {
+                    general_samples_.byte_order_ = detail::convert_to<decltype(general_samples_.byte_order_)::value_type>(value);
+                } else if (detail::starts_with(line, "CPU(s)")) {
+                    general_samples_.num_threads_ = detail::convert_to<decltype(general_samples_.num_threads_)::value_type>(value);
+                } else if (detail::starts_with(line, "Thread(s) per core")) {
+                    general_samples_.threads_per_core_ = detail::convert_to<decltype(general_samples_.threads_per_core_)::value_type>(value);
+                } else if (detail::starts_with(line, "Core(s) per socket")) {
+                    general_samples_.cores_per_socket_ = detail::convert_to<decltype(general_samples_.cores_per_socket_)::value_type>(value);
+                } else if (detail::starts_with(line, "Socket(s)")) {
+                    general_samples_.num_sockets_ = detail::convert_to<decltype(general_samples_.num_sockets_)::value_type>(value);
+                } else if (detail::starts_with(line, "NUMA node(s)")) {
+                    general_samples_.numa_nodes_ = detail::convert_to<decltype(general_samples_.numa_nodes_)::value_type>(value);
+                } else if (detail::starts_with(line, "Vendor ID")) {
+                    general_samples_.vendor_id_ = detail::convert_to<decltype(general_samples_.vendor_id_)::value_type>(value);
+                } else if (detail::starts_with(line, "Model name")) {
+                    general_samples_.name_ = detail::convert_to<decltype(general_samples_.name_)::value_type>(value);
+                } else if (detail::starts_with(line, "Flags")) {
+                    general_samples_.flags_ = detail::split_as<decltype(general_samples_.flags_)::value_type::value_type>(value, ' ');
+                }
+            }
+            if (this->sample_category_enabled(sample_category::clock)) {
+                if (detail::starts_with(line, "Frequency boost")) {
+                    clock_samples_.auto_boosted_clock_enabled_ = value == "enabled";
+                } else if (detail::starts_with(line, "CPU max MHz")) {
+                    clock_samples_.clock_frequency_max_ = detail::convert_to<decltype(clock_samples_.clock_frequency_max_)::value_type>(value);
+                } else if (detail::starts_with(line, "CPU min MHz")) {
+                    clock_samples_.clock_frequency_min_ = detail::convert_to<decltype(clock_samples_.clock_frequency_min_)::value_type>(value);
+                }
+            }
+            if (this->sample_category_enabled(sample_category::memory)) {
+                if (detail::starts_with(line, "L1d cache")) {
+                    memory_samples_.cache_size_L1d_ = detail::convert_to<decltype(memory_samples_.cache_size_L1d_)::value_type>(value);
+                } else if (detail::starts_with(line, "L1i cache")) {
+                    memory_samples_.cache_size_L1i_ = detail::convert_to<decltype(memory_samples_.cache_size_L1i_)::value_type>(value);
+                } else if (detail::starts_with(line, "L2 cache")) {
+                    memory_samples_.cache_size_L2_ = detail::convert_to<decltype(memory_samples_.cache_size_L2_)::value_type>(value);
+                } else if (detail::starts_with(line, "L3 cache")) {
+                    memory_samples_.cache_size_L3_ = detail::convert_to<decltype(memory_samples_.cache_size_L3_)::value_type>(value);
+                }
             }
         }
 
-        // check if the number of cores can be derived from the otherwise found values
-        if (general_samples_.num_threads_.has_value() && general_samples_.threads_per_core_.has_value()) {
-            general_samples_.num_cores_ = general_samples_.num_threads_.value() / general_samples_.threads_per_core_.value();
+        if (this->sample_category_enabled(sample_category::general)) {
+            // check if the number of cores can be derived from the otherwise found values
+            if (general_samples_.num_threads_.has_value() && general_samples_.threads_per_core_.has_value()) {
+                general_samples_.num_cores_ = general_samples_.num_threads_.value() / general_samples_.threads_per_core_.value();
+            }
         }
     }
 #endif
 
 #if defined(HWS_VIA_FREE_ENABLED)
     const std::regex whitespace_replace_reg{ "[ ]+", std::regex::extended };
-    {
+    if (this->sample_category_enabled(sample_category::memory)) {
         std::string free_output = detail::run_subprocess("free -b");
         free_output = std::regex_replace(free_output, whitespace_replace_reg, " ");
         const std::vector<std::string_view> free_lines = detail::split(detail::trim(free_output), '\n');
@@ -164,104 +175,121 @@ void cpu_hardware_sampler::sampling_loop() {
         const std::vector<std::string_view> values = detail::split(data[1], '\t');
 
         for (std::size_t i = 0; i < header.size(); ++i) {
-            if (header[i] == "Avg_MHz") {
-                using vector_type = decltype(clock_samples_.clock_frequency_)::value_type;
-                clock_samples_.clock_frequency_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-            } else if (header[i] == "Busy%") {
-                using vector_type = decltype(general_samples_.compute_utilization_)::value_type;
-                general_samples_.compute_utilization_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-            } else if (header[i] == "Bzy_MHz") {
-                using vector_type = decltype(clock_samples_.average_non_idle_clock_frequency_)::value_type;
-                clock_samples_.average_non_idle_clock_frequency_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-            } else if (header[i] == "TSC_MHz") {
-                using vector_type = decltype(clock_samples_.time_stamp_counter_)::value_type;
-                clock_samples_.time_stamp_counter_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-            } else if (header[i] == "IPC") {
-                using vector_type = decltype(general_samples_.ipc_)::value_type;
-                general_samples_.ipc_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-            } else if (header[i] == "IRQ") {
-                using vector_type = decltype(general_samples_.irq_)::value_type;
-                general_samples_.irq_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-            } else if (header[i] == "SMI") {
-                using vector_type = decltype(general_samples_.smi_)::value_type;
-                general_samples_.smi_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-            } else if (header[i] == "POLL") {
-                using vector_type = decltype(general_samples_.poll_)::value_type;
-                general_samples_.poll_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-            } else if (header[i] == "POLL%") {
-                using vector_type = decltype(general_samples_.poll_percent_)::value_type;
-                general_samples_.poll_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-            } else if (header[i] == "CoreTmp") {
-                using vector_type = decltype(temperature_samples_.core_temperature_)::value_type;
-                temperature_samples_.core_temperature_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-            } else if (header[i] == "CoreThr") {
-                using vector_type = decltype(temperature_samples_.core_throttle_percent_)::value_type;
-                temperature_samples_.core_throttle_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-            } else if (header[i] == "PkgTmp") {
-                using vector_type = decltype(temperature_samples_.temperature_)::value_type;
-                temperature_samples_.temperature_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-            } else if (header[i] == "GFX%rc6") {
-                using vector_type = decltype(gfx_samples_.gfx_render_state_percent_)::value_type;
-                gfx_samples_.gfx_render_state_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-            } else if (header[i] == "GFXMHz") {
-                using vector_type = decltype(gfx_samples_.gfx_frequency_)::value_type;
-                gfx_samples_.gfx_frequency_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-            } else if (header[i] == "GFXAMHz") {
-                using vector_type = decltype(gfx_samples_.average_gfx_frequency_)::value_type;
-                gfx_samples_.average_gfx_frequency_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-            } else if (header[i] == "Totl%C0") {
-                using vector_type = decltype(idle_state_samples_.all_cpus_state_c0_percent_)::value_type;
-                idle_state_samples_.all_cpus_state_c0_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-            } else if (header[i] == "Any%C0") {
-                using vector_type = decltype(idle_state_samples_.any_cpu_state_c0_percent_)::value_type;
-                idle_state_samples_.any_cpu_state_c0_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-            } else if (header[i] == "GFX%C0") {
-                using vector_type = decltype(gfx_samples_.gfx_state_c0_percent_)::value_type;
-                gfx_samples_.gfx_state_c0_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-            } else if (header[i] == "CPUGFX%") {
-                using vector_type = decltype(gfx_samples_.cpu_works_for_gpu_percent_)::value_type;
-                gfx_samples_.cpu_works_for_gpu_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-            } else if (header[i] == "CPU%LPI") {
-                using vector_type = decltype(idle_state_samples_.low_power_idle_state_percent_)::value_type;
-                idle_state_samples_.low_power_idle_state_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-            } else if (header[i] == "SYS%LPI") {
-                using vector_type = decltype(idle_state_samples_.system_low_power_idle_state_percent_)::value_type;
-                idle_state_samples_.system_low_power_idle_state_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-            } else if (header[i] == "Pkg%LPI") {
-                using vector_type = decltype(idle_state_samples_.package_low_power_idle_state_percent_)::value_type;
-                idle_state_samples_.package_low_power_idle_state_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-            } else if (header[i] == "PkgWatt") {
-                using vector_type = decltype(power_samples_.power_usage_)::value_type;
-                power_samples_.power_usage_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-                power_samples_.power_measurement_type_ = "current/instant";
-                power_samples_.power_total_energy_consumption_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ 0 };
-            } else if (header[i] == "CorWatt") {
-                using vector_type = decltype(power_samples_.core_watt_)::value_type;
-                power_samples_.core_watt_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-            } else if (header[i] == "GFXWatt") {
-                using vector_type = decltype(gfx_samples_.gfx_watt_)::value_type;
-                gfx_samples_.gfx_watt_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-            } else if (header[i] == "RAMWatt") {
-                using vector_type = decltype(power_samples_.ram_watt_)::value_type;
-                power_samples_.ram_watt_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-            } else if (header[i] == "PKG_%") {
-                using vector_type = decltype(power_samples_.package_rapl_throttle_percent_)::value_type;
-                power_samples_.package_rapl_throttle_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-            } else if (header[i] == "RAM_%") {
-                using vector_type = decltype(power_samples_.dram_rapl_throttle_percent_)::value_type;
-                power_samples_.dram_rapl_throttle_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-            } else {
-                // test against regex
-                const std::string header_str{ header[i] };
-                const std::regex reg{ std::string{ "CPU%[0-9a-zA-Z]+|Pkg%[0-9a-zA-Z]+|Pk%[0-9a-zA-Z]+|C[0-9a-zA-Z]+%|C[0-9a-zA-Z]+" }, std::regex::extended };
-                if (std::regex_match(header_str, reg)) {
-                    // first time this branch is reached -> create optional value
-                    if (!idle_state_samples_.idle_states_.has_value()) {
-                        idle_state_samples_.idle_states_ = std::make_optional<typename cpu_idle_states_samples::map_type>();
-                    }
+            if (this->sample_category_enabled(sample_category::general)) {
+                if (header[i] == "Busy%") {
+                    using vector_type = decltype(general_samples_.compute_utilization_)::value_type;
+                    general_samples_.compute_utilization_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                } else if (header[i] == "IPC") {
+                    using vector_type = decltype(general_samples_.ipc_)::value_type;
+                    general_samples_.ipc_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                } else if (header[i] == "IRQ") {
+                    using vector_type = decltype(general_samples_.irq_)::value_type;
+                    general_samples_.irq_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                } else if (header[i] == "SMI") {
+                    using vector_type = decltype(general_samples_.smi_)::value_type;
+                    general_samples_.smi_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                } else if (header[i] == "POLL") {
+                    using vector_type = decltype(general_samples_.poll_)::value_type;
+                    general_samples_.poll_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                } else if (header[i] == "POLL%") {
+                    using vector_type = decltype(general_samples_.poll_percent_)::value_type;
+                    general_samples_.poll_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                }
+            }
+            if (this->sample_category_enabled(sample_category::clock)) {
+                if (header[i] == "Avg_MHz") {
+                    using vector_type = decltype(clock_samples_.clock_frequency_)::value_type;
+                    clock_samples_.clock_frequency_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                } else if (header[i] == "Bzy_MHz") {
+                    using vector_type = decltype(clock_samples_.average_non_idle_clock_frequency_)::value_type;
+                    clock_samples_.average_non_idle_clock_frequency_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                } else if (header[i] == "TSC_MHz") {
+                    using vector_type = decltype(clock_samples_.time_stamp_counter_)::value_type;
+                    clock_samples_.time_stamp_counter_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                }
+            }
+            if (this->sample_category_enabled(sample_category::power)) {
+                if (header[i] == "PkgWatt") {
+                    using vector_type = decltype(power_samples_.power_usage_)::value_type;
+                    power_samples_.power_usage_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                    power_samples_.power_measurement_type_ = "current/instant";
+                    power_samples_.power_total_energy_consumption_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ 0 };
+                } else if (header[i] == "CorWatt") {
+                    using vector_type = decltype(power_samples_.core_watt_)::value_type;
+                    power_samples_.core_watt_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                } else if (header[i] == "RAMWatt") {
+                    using vector_type = decltype(power_samples_.ram_watt_)::value_type;
+                    power_samples_.ram_watt_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                } else if (header[i] == "PKG_%") {
+                    using vector_type = decltype(power_samples_.package_rapl_throttle_percent_)::value_type;
+                    power_samples_.package_rapl_throttle_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                } else if (header[i] == "RAM_%") {
+                    using vector_type = decltype(power_samples_.dram_rapl_throttle_percent_)::value_type;
+                    power_samples_.dram_rapl_throttle_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                }
+            }
+            if (this->sample_category_enabled(sample_category::temperature)) {
+                if (header[i] == "CoreTmp") {
+                    using vector_type = decltype(temperature_samples_.core_temperature_)::value_type;
+                    temperature_samples_.core_temperature_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                } else if (header[i] == "CoreThr") {
+                    using vector_type = decltype(temperature_samples_.core_throttle_percent_)::value_type;
+                    temperature_samples_.core_throttle_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                } else if (header[i] == "PkgTmp") {
+                    using vector_type = decltype(temperature_samples_.temperature_)::value_type;
+                    temperature_samples_.temperature_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                }
+            }
+            if (this->sample_category_enabled(sample_category::gfx)) {
+                if (header[i] == "GFX%rc6") {
+                    using vector_type = decltype(gfx_samples_.gfx_render_state_percent_)::value_type;
+                    gfx_samples_.gfx_render_state_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                } else if (header[i] == "GFXMHz") {
+                    using vector_type = decltype(gfx_samples_.gfx_frequency_)::value_type;
+                    gfx_samples_.gfx_frequency_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                } else if (header[i] == "GFXAMHz") {
+                    using vector_type = decltype(gfx_samples_.average_gfx_frequency_)::value_type;
+                    gfx_samples_.average_gfx_frequency_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                } else if (header[i] == "GFX%C0") {
+                    using vector_type = decltype(gfx_samples_.gfx_state_c0_percent_)::value_type;
+                    gfx_samples_.gfx_state_c0_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                } else if (header[i] == "CPUGFX%") {
+                    using vector_type = decltype(gfx_samples_.cpu_works_for_gpu_percent_)::value_type;
+                    gfx_samples_.cpu_works_for_gpu_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                } else if (header[i] == "GFXWatt") {
+                    using vector_type = decltype(gfx_samples_.gfx_watt_)::value_type;
+                    gfx_samples_.gfx_watt_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                }
+            }
+            if (this->sample_category_enabled(sample_category::idle_state)) {
+                if (header[i] == "Totl%C0") {
+                    using vector_type = decltype(idle_state_samples_.all_cpus_state_c0_percent_)::value_type;
+                    idle_state_samples_.all_cpus_state_c0_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                } else if (header[i] == "Any%C0") {
+                    using vector_type = decltype(idle_state_samples_.any_cpu_state_c0_percent_)::value_type;
+                    idle_state_samples_.any_cpu_state_c0_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                } else if (header[i] == "CPU%LPI") {
+                    using vector_type = decltype(idle_state_samples_.low_power_idle_state_percent_)::value_type;
+                    idle_state_samples_.low_power_idle_state_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                } else if (header[i] == "SYS%LPI") {
+                    using vector_type = decltype(idle_state_samples_.system_low_power_idle_state_percent_)::value_type;
+                    idle_state_samples_.system_low_power_idle_state_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                } else if (header[i] == "Pkg%LPI") {
+                    using vector_type = decltype(idle_state_samples_.package_low_power_idle_state_percent_)::value_type;
+                    idle_state_samples_.package_low_power_idle_state_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                } else {
+                    // test against regex
+                    const std::string header_str{ header[i] };
+                    const std::regex reg{ std::string{ "CPU%[0-9a-zA-Z]+|Pkg%[0-9a-zA-Z]+|Pk%[0-9a-zA-Z]+|C[0-9a-zA-Z]+%|C[0-9a-zA-Z]+" }, std::regex::extended };
+                    if (std::regex_match(header_str, reg)) {
+                        // first time this branch is reached -> create optional value
+                        if (!idle_state_samples_.idle_states_.has_value()) {
+                            idle_state_samples_.idle_states_ = std::make_optional<typename cpu_idle_states_samples::map_type>();
+                        }
 
-                    using vector_type = cpu_idle_states_samples::map_type::mapped_type;
-                    idle_state_samples_.idle_states_.value()[header_str] = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                        using vector_type = cpu_idle_states_samples::map_type::mapped_type;
+                        idle_state_samples_.idle_states_.value()[header_str] = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                    }
                 }
             }
         }
@@ -279,7 +307,7 @@ void cpu_hardware_sampler::sampling_loop() {
             this->add_time_point(std::chrono::steady_clock::now());
 
 #if defined(HWS_VIA_FREE_ENABLED)
-            {
+            if (this->sample_category_enabled(sample_category::memory)) {
                 // run free
                 std::string free_output = detail::run_subprocess("free -b");
                 free_output = std::regex_replace(free_output, whitespace_replace_reg, " ");
@@ -311,101 +339,118 @@ void cpu_hardware_sampler::sampling_loop() {
 
                 // add values to the respective sample entries
                 for (std::size_t i = 0; i < header.size(); ++i) {
-                    if (header[i] == "Avg_MHz") {
-                        using vector_type = decltype(clock_samples_.clock_frequency_)::value_type;
-                        clock_samples_.clock_frequency_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                    } else if (header[i] == "Busy%") {
-                        using vector_type = decltype(general_samples_.compute_utilization_)::value_type;
-                        general_samples_.compute_utilization_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                    } else if (header[i] == "Bzy_MHz") {
-                        using vector_type = decltype(clock_samples_.average_non_idle_clock_frequency_)::value_type;
-                        clock_samples_.average_non_idle_clock_frequency_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                    } else if (header[i] == "TSC_MHz") {
-                        using vector_type = decltype(clock_samples_.time_stamp_counter_)::value_type;
-                        clock_samples_.time_stamp_counter_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                    } else if (header[i] == "IPC") {
-                        using vector_type = decltype(general_samples_.ipc_)::value_type;
-                        general_samples_.ipc_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                    } else if (header[i] == "IRQ") {
-                        using vector_type = decltype(general_samples_.irq_)::value_type;
-                        general_samples_.irq_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                    } else if (header[i] == "SMI") {
-                        using vector_type = decltype(general_samples_.smi_)::value_type;
-                        general_samples_.smi_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                    } else if (header[i] == "POLL") {
-                        using vector_type = decltype(general_samples_.poll_)::value_type;
-                        general_samples_.poll_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                    } else if (header[i] == "POLL%") {
-                        using vector_type = decltype(general_samples_.poll_percent_)::value_type;
-                        general_samples_.poll_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                    } else if (header[i] == "CoreTmp") {
-                        using vector_type = decltype(temperature_samples_.core_temperature_)::value_type;
-                        temperature_samples_.core_temperature_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                    } else if (header[i] == "CoreThr") {
-                        using vector_type = decltype(temperature_samples_.core_throttle_percent_)::value_type;
-                        temperature_samples_.core_throttle_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                    } else if (header[i] == "PkgTmp") {
-                        using vector_type = decltype(temperature_samples_.temperature_)::value_type;
-                        temperature_samples_.temperature_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                    } else if (header[i] == "GFX%rc6") {
-                        using vector_type = decltype(gfx_samples_.gfx_render_state_percent_)::value_type;
-                        gfx_samples_.gfx_render_state_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                    } else if (header[i] == "GFXMHz") {
-                        using vector_type = decltype(gfx_samples_.gfx_frequency_)::value_type;
-                        gfx_samples_.gfx_frequency_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                    } else if (header[i] == "GFXAMHz") {
-                        using vector_type = decltype(gfx_samples_.average_gfx_frequency_)::value_type;
-                        gfx_samples_.average_gfx_frequency_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                    } else if (header[i] == "Totl%C0") {
-                        using vector_type = decltype(idle_state_samples_.all_cpus_state_c0_percent_)::value_type;
-                        idle_state_samples_.all_cpus_state_c0_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                    } else if (header[i] == "Any%C0") {
-                        using vector_type = decltype(idle_state_samples_.any_cpu_state_c0_percent_)::value_type;
-                        idle_state_samples_.any_cpu_state_c0_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                    } else if (header[i] == "GFX%C0") {
-                        using vector_type = decltype(gfx_samples_.gfx_state_c0_percent_)::value_type;
-                        gfx_samples_.gfx_state_c0_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                    } else if (header[i] == "CPUGFX%") {
-                        using vector_type = decltype(gfx_samples_.cpu_works_for_gpu_percent_)::value_type;
-                        gfx_samples_.cpu_works_for_gpu_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                    } else if (header[i] == "CPU%LPI") {
-                        using vector_type = decltype(idle_state_samples_.low_power_idle_state_percent_)::value_type;
-                        idle_state_samples_.low_power_idle_state_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                    } else if (header[i] == "SYS%LPI") {
-                        using vector_type = decltype(idle_state_samples_.system_low_power_idle_state_percent_)::value_type;
-                        idle_state_samples_.system_low_power_idle_state_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                    } else if (header[i] == "Pkg%LPI") {
-                        using vector_type = decltype(idle_state_samples_.package_low_power_idle_state_percent_)::value_type;
-                        idle_state_samples_.package_low_power_idle_state_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                    } else if (header[i] == "PkgWatt") {
-                        using vector_type = decltype(power_samples_.power_usage_)::value_type;
-                        power_samples_.power_usage_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                        // calculate total energy consumption
-                        using value_type = decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type;
-                        const std::size_t num_time_points = this->sampling_time_points().size();
-                        const value_type time_difference = std::chrono::duration<value_type>(this->sampling_time_points()[num_time_points - 1] - this->sampling_time_points()[num_time_points - 2]).count();
-                        const auto current = power_samples_.power_usage_->back() * time_difference;
-                        power_samples_.power_total_energy_consumption_->push_back(power_samples_.power_total_energy_consumption_->back() + current);
-                    } else if (header[i] == "CorWatt") {
-                        using vector_type = decltype(power_samples_.core_watt_)::value_type;
-                        power_samples_.core_watt_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                    } else if (header[i] == "GFXWatt") {
-                        using vector_type = decltype(gfx_samples_.gfx_watt_)::value_type;
-                        gfx_samples_.gfx_watt_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                    } else if (header[i] == "RAMWatt") {
-                        using vector_type = decltype(power_samples_.ram_watt_)::value_type;
-                        power_samples_.ram_watt_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                    } else if (header[i] == "PKG_%") {
-                        using vector_type = decltype(power_samples_.package_rapl_throttle_percent_)::value_type;
-                        power_samples_.package_rapl_throttle_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                    } else if (header[i] == "RAM_%") {
-                        using vector_type = decltype(power_samples_.dram_rapl_throttle_percent_)::value_type;
-                        power_samples_.dram_rapl_throttle_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                    } else {
-                        const std::string header_str{ header[i] };
-                        if (idle_state_samples_.idle_states_.value().count(header_str) > decltype(idle_state_samples_)::map_type::size_type{ 0 }) {
-                            using vector_type = cpu_idle_states_samples::map_type::mapped_type;
-                            idle_state_samples_.idle_states_.value()[header_str].push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                    if (this->sample_category_enabled(sample_category::general)) {
+                        if (header[i] == "Busy%") {
+                            using vector_type = decltype(general_samples_.compute_utilization_)::value_type;
+                            general_samples_.compute_utilization_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        } else if (header[i] == "IPC") {
+                            using vector_type = decltype(general_samples_.ipc_)::value_type;
+                            general_samples_.ipc_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        } else if (header[i] == "IRQ") {
+                            using vector_type = decltype(general_samples_.irq_)::value_type;
+                            general_samples_.irq_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        } else if (header[i] == "SMI") {
+                            using vector_type = decltype(general_samples_.smi_)::value_type;
+                            general_samples_.smi_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        } else if (header[i] == "POLL") {
+                            using vector_type = decltype(general_samples_.poll_)::value_type;
+                            general_samples_.poll_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        } else if (header[i] == "POLL%") {
+                            using vector_type = decltype(general_samples_.poll_percent_)::value_type;
+                            general_samples_.poll_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        }
+                    }
+                    if (this->sample_category_enabled(sample_category::clock)) {
+                        if (header[i] == "Avg_MHz") {
+                            using vector_type = decltype(clock_samples_.clock_frequency_)::value_type;
+                            clock_samples_.clock_frequency_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        } else if (header[i] == "Bzy_MHz") {
+                            using vector_type = decltype(clock_samples_.average_non_idle_clock_frequency_)::value_type;
+                            clock_samples_.average_non_idle_clock_frequency_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        } else if (header[i] == "TSC_MHz") {
+                            using vector_type = decltype(clock_samples_.time_stamp_counter_)::value_type;
+                            clock_samples_.time_stamp_counter_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        }
+                    }
+                    if (this->sample_category_enabled(sample_category::power)) {
+                        if (header[i] == "PkgWatt") {
+                            using vector_type = decltype(power_samples_.power_usage_)::value_type;
+                            power_samples_.power_usage_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                            // calculate total energy consumption
+                            using value_type = decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type;
+                            const std::size_t num_time_points = this->sampling_time_points().size();
+                            const value_type time_difference = std::chrono::duration<value_type>(this->sampling_time_points()[num_time_points - 1] - this->sampling_time_points()[num_time_points - 2]).count();
+                            const auto current = power_samples_.power_usage_->back() * time_difference;
+                            power_samples_.power_total_energy_consumption_->push_back(power_samples_.power_total_energy_consumption_->back() + current);
+                        } else if (header[i] == "CorWatt") {
+                            using vector_type = decltype(power_samples_.core_watt_)::value_type;
+                            power_samples_.core_watt_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        } else if (header[i] == "RAMWatt") {
+                            using vector_type = decltype(power_samples_.ram_watt_)::value_type;
+                            power_samples_.ram_watt_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        } else if (header[i] == "PKG_%") {
+                            using vector_type = decltype(power_samples_.package_rapl_throttle_percent_)::value_type;
+                            power_samples_.package_rapl_throttle_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        } else if (header[i] == "RAM_%") {
+                            using vector_type = decltype(power_samples_.dram_rapl_throttle_percent_)::value_type;
+                            power_samples_.dram_rapl_throttle_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        }
+                    }
+                    if (this->sample_category_enabled(sample_category::temperature)) {
+                        if (header[i] == "CoreTmp") {
+                            using vector_type = decltype(temperature_samples_.core_temperature_)::value_type;
+                            temperature_samples_.core_temperature_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        } else if (header[i] == "CoreThr") {
+                            using vector_type = decltype(temperature_samples_.core_throttle_percent_)::value_type;
+                            temperature_samples_.core_throttle_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        } else if (header[i] == "PkgTmp") {
+                            using vector_type = decltype(temperature_samples_.temperature_)::value_type;
+                            temperature_samples_.temperature_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        }
+                    }
+                    if (this->sample_category_enabled(sample_category::gfx)) {
+                        if (header[i] == "GFX%rc6") {
+                            using vector_type = decltype(gfx_samples_.gfx_render_state_percent_)::value_type;
+                            gfx_samples_.gfx_render_state_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        } else if (header[i] == "GFXMHz") {
+                            using vector_type = decltype(gfx_samples_.gfx_frequency_)::value_type;
+                            gfx_samples_.gfx_frequency_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        } else if (header[i] == "GFXAMHz") {
+                            using vector_type = decltype(gfx_samples_.average_gfx_frequency_)::value_type;
+                            gfx_samples_.average_gfx_frequency_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        } else if (header[i] == "GFX%C0") {
+                            using vector_type = decltype(gfx_samples_.gfx_state_c0_percent_)::value_type;
+                            gfx_samples_.gfx_state_c0_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        } else if (header[i] == "CPUGFX%") {
+                            using vector_type = decltype(gfx_samples_.cpu_works_for_gpu_percent_)::value_type;
+                            gfx_samples_.cpu_works_for_gpu_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        } else if (header[i] == "GFXWatt") {
+                            using vector_type = decltype(gfx_samples_.gfx_watt_)::value_type;
+                            gfx_samples_.gfx_watt_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        }
+                    }
+                    if (this->sample_category_enabled(sample_category::idle_state)) {
+                        if (header[i] == "Totl%C0") {
+                            using vector_type = decltype(idle_state_samples_.all_cpus_state_c0_percent_)::value_type;
+                            idle_state_samples_.all_cpus_state_c0_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        } else if (header[i] == "Any%C0") {
+                            using vector_type = decltype(idle_state_samples_.any_cpu_state_c0_percent_)::value_type;
+                            idle_state_samples_.any_cpu_state_c0_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        } else if (header[i] == "CPU%LPI") {
+                            using vector_type = decltype(idle_state_samples_.low_power_idle_state_percent_)::value_type;
+                            idle_state_samples_.low_power_idle_state_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        } else if (header[i] == "SYS%LPI") {
+                            using vector_type = decltype(idle_state_samples_.system_low_power_idle_state_percent_)::value_type;
+                            idle_state_samples_.system_low_power_idle_state_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        } else if (header[i] == "Pkg%LPI") {
+                            using vector_type = decltype(idle_state_samples_.package_low_power_idle_state_percent_)::value_type;
+                            idle_state_samples_.package_low_power_idle_state_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        } else {
+                            const std::string header_str{ header[i] };
+                            if (idle_state_samples_.idle_states_.value().count(header_str) > decltype(idle_state_samples_)::map_type::size_type{ 0 }) {
+                                using vector_type = cpu_idle_states_samples::map_type::mapped_type;
+                                idle_state_samples_.idle_states_.value()[header_str].push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                            }
                         }
                     }
                 }
diff --git a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
index d92e594..7369fa3 100644
--- a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
+++ b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
@@ -10,6 +10,7 @@
 #include "hardware_sampling/gpu_amd/rocm_smi_samples.hpp"  // hws::{rocm_smi_general_samples, rocm_smi_clock_samples, rocm_smi_power_samples, rocm_smi_memory_samples, rocm_smi_temperature_samples}
 #include "hardware_sampling/gpu_amd/utility.hpp"           // hws::detail::performance_level_to_string, HWS_ROCM_SMI_ERROR_CHECK
 #include "hardware_sampling/hardware_sampler.hpp"          // hws::hardware_sampler
+#include "hardware_sampling/sample_category.hpp"           // hws::sample_category
 #include "hardware_sampling/utility.hpp"                   // hws::detail::time_points_to_epoch
 
 #include "fmt/format.h"           // fmt::format
@@ -33,17 +34,17 @@
 
 namespace hws {
 
-gpu_amd_hardware_sampler::gpu_amd_hardware_sampler() :
-    gpu_amd_hardware_sampler{ 0, HWS_SAMPLING_INTERVAL } { }
+gpu_amd_hardware_sampler::gpu_amd_hardware_sampler(const sample_category category) :
+    gpu_amd_hardware_sampler{ 0, HWS_SAMPLING_INTERVAL, category } { }
 
-gpu_amd_hardware_sampler::gpu_amd_hardware_sampler(const std::size_t device_id) :
-    gpu_amd_hardware_sampler{ device_id, HWS_SAMPLING_INTERVAL } { }
+gpu_amd_hardware_sampler::gpu_amd_hardware_sampler(const std::size_t device_id, const sample_category category) :
+    gpu_amd_hardware_sampler{ device_id, HWS_SAMPLING_INTERVAL, category } { }
 
-gpu_amd_hardware_sampler::gpu_amd_hardware_sampler(const std::chrono::milliseconds sampling_interval) :
-    gpu_amd_hardware_sampler{ 0, sampling_interval } { }
+gpu_amd_hardware_sampler::gpu_amd_hardware_sampler(const std::chrono::milliseconds sampling_interval, const sample_category category) :
+    gpu_amd_hardware_sampler{ 0, sampling_interval, category } { }
 
-gpu_amd_hardware_sampler::gpu_amd_hardware_sampler(const std::size_t device_id, const std::chrono::milliseconds sampling_interval) :
-    hardware_sampler{ sampling_interval },
+gpu_amd_hardware_sampler::gpu_amd_hardware_sampler(const std::size_t device_id, const std::chrono::milliseconds sampling_interval, const sample_category category) :
+    hardware_sampler{ sampling_interval, category },
     device_id_{ static_cast<std::uint32_t>(device_id) } {
     // make sure that rsmi_init is only called once for all instances
     if (instances_++ == 0) {
@@ -86,7 +87,7 @@ void gpu_amd_hardware_sampler::sampling_loop() {
     double initial_total_power_consumption{};  // initial total power consumption in J
 
     // retrieve initial general information
-    {
+    if (this->sample_category_enabled(sample_category::general)) {
         // fixed information -> only retrieved once
         // the byte order is given by AMD directly
         general_samples_.byte_order_ = "Little Endian";
@@ -125,51 +126,51 @@ void gpu_amd_hardware_sampler::sampling_loop() {
     }
 
     // retrieve initial clock related information
-    {
+    if (this->sample_category_enabled(sample_category::clock)) {
         rsmi_frequencies_t frequency_info{};
         if (rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_SYS, &frequency_info) == RSMI_STATUS_SUCCESS) {
-            clock_samples_.clock_frequency_min_ = static_cast<decltype(clock_samples_.clock_frequency_min_)::value_type>(frequency_info.frequency[0]) / 1000.0 / 1000.0;
-            clock_samples_.clock_frequency_max_ = static_cast<decltype(clock_samples_.clock_frequency_max_)::value_type>(frequency_info.frequency[frequency_info.num_supported - 1]) / 1000.0 / 1000.0;
+            clock_samples_.clock_frequency_min_ = static_cast<decltype(clock_samples_.clock_frequency_min_)::value_type>(frequency_info.frequency[0]) / 1000'000.0;
+            clock_samples_.clock_frequency_max_ = static_cast<decltype(clock_samples_.clock_frequency_max_)::value_type>(frequency_info.frequency[frequency_info.num_supported - 1]) / 1000'000.0;
             decltype(clock_samples_.available_clock_frequencies_)::value_type frequencies{};
             for (std::size_t i = 0; i < frequency_info.num_supported; ++i) {
-                frequencies.push_back(static_cast<decltype(frequencies)::value_type>(frequency_info.frequency[i]) / 1000.0 / 1000.0);
+                frequencies.push_back(static_cast<decltype(frequencies)::value_type>(frequency_info.frequency[i]) / 1000'000.0);
             }
             clock_samples_.available_clock_frequencies_ = frequencies;
 
             // queried samples -> retrieved every iteration if available
             clock_samples_.clock_frequency_ = decltype(clock_samples_.clock_frequency_)::value_type{};
             if (frequency_info.current < RSMI_MAX_NUM_FREQUENCIES) {
-                clock_samples_.clock_frequency_->push_back(static_cast<decltype(clock_samples_.clock_frequency_)::value_type::value_type>(frequency_info.frequency[frequency_info.current]) / 1000.0 / 1000.0);
+                clock_samples_.clock_frequency_->push_back(static_cast<decltype(clock_samples_.clock_frequency_)::value_type::value_type>(frequency_info.frequency[frequency_info.current]) / 1000'000.0);
             } else {
                 clock_samples_.clock_frequency_->push_back(0);
             }
         }
 
         if (rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_SOC, &frequency_info) == RSMI_STATUS_SUCCESS) {
-            clock_samples_.socket_clock_frequency_min_ = static_cast<decltype(clock_samples_.socket_clock_frequency_min_)::value_type>(frequency_info.frequency[0]) / 1000.0 / 1000.0;
-            clock_samples_.socket_clock_frequency_max_ = static_cast<decltype(clock_samples_.socket_clock_frequency_max_)::value_type>(frequency_info.frequency[frequency_info.num_supported - 1]) / 1000.0 / 1000.0;
+            clock_samples_.socket_clock_frequency_min_ = static_cast<decltype(clock_samples_.socket_clock_frequency_min_)::value_type>(frequency_info.frequency[0]) / 1000'000.0;
+            clock_samples_.socket_clock_frequency_max_ = static_cast<decltype(clock_samples_.socket_clock_frequency_max_)::value_type>(frequency_info.frequency[frequency_info.num_supported - 1]) / 1000'000.0;
             // queried samples -> retrieved every iteration if available
             clock_samples_.socket_clock_frequency_ = decltype(clock_samples_.socket_clock_frequency_)::value_type{};
             if (frequency_info.current < RSMI_MAX_NUM_FREQUENCIES) {
-                clock_samples_.socket_clock_frequency_->push_back(static_cast<decltype(clock_samples_.socket_clock_frequency_)::value_type::value_type>(frequency_info.frequency[frequency_info.current]) / 1000.0 / 1000.0);
+                clock_samples_.socket_clock_frequency_->push_back(static_cast<decltype(clock_samples_.socket_clock_frequency_)::value_type::value_type>(frequency_info.frequency[frequency_info.current]) / 1000'000.0);
             } else {
                 clock_samples_.socket_clock_frequency_->push_back(0);
             }
         }
 
         if (rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_MEM, &frequency_info) == RSMI_STATUS_SUCCESS) {
-            clock_samples_.memory_clock_frequency_min_ = static_cast<decltype(clock_samples_.memory_clock_frequency_min_)::value_type>(frequency_info.frequency[0]) / 1000.0 / 1000.0;
-            clock_samples_.memory_clock_frequency_max_ = static_cast<decltype(clock_samples_.memory_clock_frequency_max_)::value_type>(frequency_info.frequency[frequency_info.num_supported - 1]) / 1000.0 / 1000.0;
+            clock_samples_.memory_clock_frequency_min_ = static_cast<decltype(clock_samples_.memory_clock_frequency_min_)::value_type>(frequency_info.frequency[0]) / 1000'000.0;
+            clock_samples_.memory_clock_frequency_max_ = static_cast<decltype(clock_samples_.memory_clock_frequency_max_)::value_type>(frequency_info.frequency[frequency_info.num_supported - 1]) / 1000'000.0;
             decltype(clock_samples_.available_memory_clock_frequencies_)::value_type frequencies{};
             for (std::size_t i = 0; i < frequency_info.num_supported; ++i) {
-                frequencies.push_back(static_cast<decltype(frequencies)::value_type>(frequency_info.frequency[i]) / 1000.0 / 1000.0);
+                frequencies.push_back(static_cast<decltype(frequencies)::value_type>(frequency_info.frequency[i]) / 1000'000.0);
             }
             clock_samples_.available_memory_clock_frequencies_ = frequencies;
 
             // queried samples -> retrieved every iteration if available
             clock_samples_.memory_clock_frequency_ = decltype(clock_samples_.memory_clock_frequency_)::value_type{};
             if (frequency_info.current < RSMI_MAX_NUM_FREQUENCIES) {
-                clock_samples_.memory_clock_frequency_->push_back(static_cast<decltype(clock_samples_.memory_clock_frequency_)::value_type::value_type>(frequency_info.frequency[frequency_info.current]) / 1000.0 / 1000.0);
+                clock_samples_.memory_clock_frequency_->push_back(static_cast<decltype(clock_samples_.memory_clock_frequency_)::value_type::value_type>(frequency_info.frequency[frequency_info.current]) / 1000'000.0);
             } else {
                 clock_samples_.memory_clock_frequency_->push_back(0);
             }
@@ -188,15 +189,15 @@ void gpu_amd_hardware_sampler::sampling_loop() {
     }
 
     // retrieve initial power related information
-    {
+    if (this->sample_category_enabled(sample_category::power)) {
         std::uint64_t power_default_cap{};
         if (rsmi_dev_power_cap_default_get(device_id_, &power_default_cap) == RSMI_STATUS_SUCCESS) {
-            power_samples_.power_management_limit_ = static_cast<decltype(power_samples_.power_management_limit_)::value_type>(power_default_cap) / 1000.0 / 1000.0;
+            power_samples_.power_management_limit_ = static_cast<decltype(power_samples_.power_management_limit_)::value_type>(power_default_cap) / 1000'000.0;
         }
 
         std::uint64_t power_cap{};
         if (rsmi_dev_power_cap_get(device_id_, std::uint32_t{ 0 }, &power_cap) == RSMI_STATUS_SUCCESS) {
-            power_samples_.power_enforced_limit_ = static_cast<decltype(power_samples_.power_enforced_limit_)::value_type>(power_cap) / 1000.0 / 1000.0;
+            power_samples_.power_enforced_limit_ = static_cast<decltype(power_samples_.power_enforced_limit_)::value_type>(power_cap) / 1000'000.0;
         }
 
         {
@@ -215,7 +216,7 @@ void gpu_amd_hardware_sampler::sampling_loop() {
                         break;
                 }
                 // report power usage since the first sample
-                power_samples_.power_usage_ = decltype(power_samples_.power_usage_)::value_type{ static_cast<decltype(power_samples_.power_usage_)::value_type::value_type>(power_usage) / 1000.0 / 1000.0 };
+                power_samples_.power_usage_ = decltype(power_samples_.power_usage_)::value_type{ static_cast<decltype(power_samples_.power_usage_)::value_type::value_type>(power_usage) / 1000'000.0 };
             }
         }
 
@@ -281,7 +282,7 @@ void gpu_amd_hardware_sampler::sampling_loop() {
         std::uint64_t power_total_energy_consumption{};
         if (rsmi_dev_energy_count_get(device_id_, &power_total_energy_consumption, &resolution, &timestamp) == RSMI_STATUS_SUCCESS) {
             const auto scaled_value = static_cast<decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type>(power_total_energy_consumption) * static_cast<decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type>(resolution);
-            initial_total_power_consumption = scaled_value / 1000.0 / 1000.0;
+            initial_total_power_consumption = scaled_value / 1000'000.0;
             power_samples_.power_total_energy_consumption_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ 0.0 };
         } else if (power_samples_.power_usage_.has_value()) {
             // if the total energy consumption cannot be retrieved, but the current power draw, approximate it
@@ -290,7 +291,7 @@ void gpu_amd_hardware_sampler::sampling_loop() {
     }
 
     // retrieve initial memory related information
-    {
+    if (this->sample_category_enabled(sample_category::memory)) {
         decltype(memory_samples_.memory_total_)::value_type memory_total{};
         if (rsmi_dev_memory_total_get(device_id_, RSMI_MEM_TYPE_VRAM, &memory_total) == RSMI_STATUS_SUCCESS) {
             memory_samples_.memory_total_ = memory_total;
@@ -305,14 +306,14 @@ void gpu_amd_hardware_sampler::sampling_loop() {
         if (rsmi_dev_pci_bandwidth_get(device_id_, &bandwidth_info) == RSMI_STATUS_SUCCESS) {
             memory_samples_.num_pcie_lanes_min_ = bandwidth_info.lanes[0];
             memory_samples_.num_pcie_lanes_max_ = bandwidth_info.lanes[bandwidth_info.transfer_rate.num_supported - 1];
-            memory_samples_.pcie_link_transfer_rate_min_ = bandwidth_info.transfer_rate.frequency[0] / 1000000;
-            memory_samples_.pcie_link_transfer_rate_max_ = bandwidth_info.transfer_rate.frequency[bandwidth_info.transfer_rate.num_supported - 1] / 1000000;
+            memory_samples_.pcie_link_transfer_rate_min_ = bandwidth_info.transfer_rate.frequency[0] / 1'000'000;
+            memory_samples_.pcie_link_transfer_rate_max_ = bandwidth_info.transfer_rate.frequency[bandwidth_info.transfer_rate.num_supported - 1] / 1'000'000;
 
             // queried samples -> retrieved every iteration if available
             memory_samples_.pcie_link_transfer_rate_ = decltype(memory_samples_.pcie_link_transfer_rate_)::value_type{};
             memory_samples_.num_pcie_lanes_ = decltype(memory_samples_.num_pcie_lanes_)::value_type{};
             if (bandwidth_info.transfer_rate.current < RSMI_MAX_NUM_FREQUENCIES) {
-                memory_samples_.pcie_link_transfer_rate_->push_back(bandwidth_info.transfer_rate.frequency[bandwidth_info.transfer_rate.current] / 1000000);
+                memory_samples_.pcie_link_transfer_rate_->push_back(bandwidth_info.transfer_rate.frequency[bandwidth_info.transfer_rate.current] / 1'000'000);
                 memory_samples_.num_pcie_lanes_->push_back(bandwidth_info.lanes[bandwidth_info.transfer_rate.current]);
             } else {
                 // the current index is (somehow) wrong
@@ -332,14 +333,13 @@ void gpu_amd_hardware_sampler::sampling_loop() {
     }
 
     // retrieve fixed temperature related information
-    {
+    if (this->sample_category_enabled(sample_category::temperature)) {
         std::uint32_t fan_id{ 0 };
         std::int64_t fan_speed{};
         while (rsmi_dev_fan_speed_get(device_id_, fan_id, &fan_speed) == RSMI_STATUS_SUCCESS) {
             if (fan_id == 0) {
                 // queried samples -> retrieved every iteration if available
-                const auto percentage = static_cast<decltype(temperature_samples_.fan_speed_percentage_)::value_type::value_type>(fan_speed) /
-                                        static_cast<decltype(temperature_samples_.fan_speed_percentage_)::value_type::value_type>(RSMI_MAX_FAN_SPEED);
+                const auto percentage = static_cast<decltype(temperature_samples_.fan_speed_percentage_)::value_type::value_type>(fan_speed) / static_cast<decltype(temperature_samples_.fan_speed_percentage_)::value_type::value_type>(RSMI_MAX_FAN_SPEED);
                 temperature_samples_.fan_speed_percentage_ = decltype(temperature_samples_.fan_speed_percentage_)::value_type{ percentage };
             }
             ++fan_id;
@@ -469,7 +469,7 @@ void gpu_amd_hardware_sampler::sampling_loop() {
             this->add_time_point(std::chrono::steady_clock::now());
 
             // retrieve general samples
-            {
+            if (this->sample_category_enabled(sample_category::general)) {
                 if (general_samples_.performance_level_.has_value()) {
                     rsmi_dev_perf_level_t pstate{};
                     HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_perf_level_get(device_id_, &pstate))
@@ -490,12 +490,12 @@ void gpu_amd_hardware_sampler::sampling_loop() {
             }
 
             // retrieve clock related samples
-            {
+            if (this->sample_category_enabled(sample_category::clock)) {
                 if (clock_samples_.clock_frequency_.has_value()) {
                     rsmi_frequencies_t frequency_info{};
                     HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_SYS, &frequency_info))
                     if (frequency_info.current < RSMI_MAX_NUM_FREQUENCIES) {
-                        clock_samples_.clock_frequency_->push_back(static_cast<decltype(clock_samples_.clock_frequency_)::value_type::value_type>(frequency_info.frequency[frequency_info.current]) / 1000.0 / 1000.0);
+                        clock_samples_.clock_frequency_->push_back(static_cast<decltype(clock_samples_.clock_frequency_)::value_type::value_type>(frequency_info.frequency[frequency_info.current]) / 1000'000.0);
                     } else {
                         // the current index is (somehow) wrong
                         clock_samples_.clock_frequency_->push_back(0);
@@ -506,7 +506,7 @@ void gpu_amd_hardware_sampler::sampling_loop() {
                     rsmi_frequencies_t frequency_info{};
                     HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_SOC, &frequency_info))
                     if (frequency_info.current < RSMI_MAX_NUM_FREQUENCIES) {
-                        clock_samples_.socket_clock_frequency_->push_back(static_cast<decltype(clock_samples_.socket_clock_frequency_)::value_type::value_type>(frequency_info.frequency[frequency_info.current]) / 1000.0 / 1000.0);
+                        clock_samples_.socket_clock_frequency_->push_back(static_cast<decltype(clock_samples_.socket_clock_frequency_)::value_type::value_type>(frequency_info.frequency[frequency_info.current]) / 1000'000.0);
                     } else {
                         // the current index is (somehow) wrong
                         clock_samples_.socket_clock_frequency_->push_back(0);
@@ -517,7 +517,7 @@ void gpu_amd_hardware_sampler::sampling_loop() {
                     rsmi_frequencies_t frequency_info{};
                     HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_MEM, &frequency_info))
                     if (frequency_info.current < RSMI_MAX_NUM_FREQUENCIES) {
-                        clock_samples_.memory_clock_frequency_->push_back(static_cast<decltype(clock_samples_.memory_clock_frequency_)::value_type::value_type>(frequency_info.frequency[frequency_info.current]) / 1000.0 / 1000.0);
+                        clock_samples_.memory_clock_frequency_->push_back(static_cast<decltype(clock_samples_.memory_clock_frequency_)::value_type::value_type>(frequency_info.frequency[frequency_info.current]) / 1000'000.0);
                     } else {
                         // the current index is (somehow) wrong
                         clock_samples_.memory_clock_frequency_->push_back(0);
@@ -538,12 +538,12 @@ void gpu_amd_hardware_sampler::sampling_loop() {
             }
 
             // retrieve power related samples
-            {
+            if (this->sample_category_enabled(sample_category::power)) {
                 if (power_samples_.power_usage_.has_value()) {
                     [[maybe_unused]] RSMI_POWER_TYPE power_type{};
                     std::uint64_t value{};
                     HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_power_get(device_id_, &value, &power_type))
-                    power_samples_.power_usage_->push_back(static_cast<decltype(power_samples_.power_usage_)::value_type::value_type>(value) / 1000.0 / 1000.0);
+                    power_samples_.power_usage_->push_back(static_cast<decltype(power_samples_.power_usage_)::value_type::value_type>(value) / 1000'000.0);
                 }
 
                 if (power_samples_.power_total_energy_consumption_.has_value()) {
@@ -552,7 +552,7 @@ void gpu_amd_hardware_sampler::sampling_loop() {
                     std::uint64_t value{};
                     if (rsmi_dev_energy_count_get(device_id_, &value, &resolution, &timestamp) == RSMI_STATUS_SUCCESS) {
                         const auto scaled_value = static_cast<decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type>(value) * static_cast<decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type>(resolution);
-                        power_samples_.power_total_energy_consumption_->push_back((scaled_value / 1000.0 / 1000.0) - initial_total_power_consumption);
+                        power_samples_.power_total_energy_consumption_->push_back((scaled_value / 1000'000.0) - initial_total_power_consumption);
                     } else if (power_samples_.power_usage_.has_value()) {
                         // if the total energy consumption cannot be retrieved, but the current power draw, approximate it
                         const std::size_t num_time_points = this->sampling_time_points().size();
@@ -595,7 +595,7 @@ void gpu_amd_hardware_sampler::sampling_loop() {
             }
 
             // retrieve memory related samples
-            {
+            if (this->sample_category_enabled(sample_category::memory)) {
                 if (memory_samples_.memory_used_.has_value()) {
                     decltype(memory_samples_.memory_used_)::value_type::value_type value{};
                     HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_memory_usage_get(device_id_, RSMI_MEM_TYPE_VRAM, &value))
@@ -609,7 +609,7 @@ void gpu_amd_hardware_sampler::sampling_loop() {
                     rsmi_pcie_bandwidth_t bandwidth_info{};
                     HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_pci_bandwidth_get(device_id_, &bandwidth_info))
                     if (bandwidth_info.transfer_rate.current < RSMI_MAX_NUM_FREQUENCIES) {
-                        memory_samples_.pcie_link_transfer_rate_->push_back(bandwidth_info.transfer_rate.frequency[bandwidth_info.transfer_rate.current] / 1000000);
+                        memory_samples_.pcie_link_transfer_rate_->push_back(bandwidth_info.transfer_rate.frequency[bandwidth_info.transfer_rate.current] / 1'000'000);
                         memory_samples_.num_pcie_lanes_->push_back(bandwidth_info.lanes[bandwidth_info.transfer_rate.current]);
                     } else {
                         // the current index is (somehow) wrong
@@ -620,12 +620,11 @@ void gpu_amd_hardware_sampler::sampling_loop() {
             }
 
             // retrieve temperature related samples
-            {
+            if (this->sample_category_enabled(sample_category::temperature)) {
                 if (temperature_samples_.fan_speed_percentage_.has_value()) {
                     std::int64_t value{};
                     HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_fan_speed_get(device_id_, std::uint32_t{ 0 }, &value))
-                    temperature_samples_.fan_speed_percentage_->push_back(static_cast<decltype(temperature_samples_.fan_speed_percentage_)::value_type::value_type>(value) /
-                                                                          static_cast<decltype(temperature_samples_.fan_speed_percentage_)::value_type::value_type>(RSMI_MAX_FAN_SPEED));
+                    temperature_samples_.fan_speed_percentage_->push_back(static_cast<decltype(temperature_samples_.fan_speed_percentage_)::value_type::value_type>(value) / static_cast<decltype(temperature_samples_.fan_speed_percentage_)::value_type::value_type>(RSMI_MAX_FAN_SPEED));
                 }
 
                 if (temperature_samples_.temperature_.has_value()) {
diff --git a/src/hardware_sampling/gpu_intel/hardware_sampler.cpp b/src/hardware_sampling/gpu_intel/hardware_sampler.cpp
index 48e459b..f96a695 100644
--- a/src/hardware_sampling/gpu_intel/hardware_sampler.cpp
+++ b/src/hardware_sampling/gpu_intel/hardware_sampler.cpp
@@ -11,6 +11,7 @@
 #include "hardware_sampling/gpu_intel/level_zero_samples.hpp"             // hws::{level_zero_general_samples, level_zero_clock_samples, level_zero_power_samples, level_zero_memory_samples, level_zero_temperature_samples}
 #include "hardware_sampling/gpu_intel/utility.hpp"                        // HWS_LEVEL_ZERO_ERROR_CHECK
 #include "hardware_sampling/hardware_sampler.hpp"                         // hws::hardware_sampler
+#include "hardware_sampling/sample_category.hpp"                          // hws::sample_category
 #include "hardware_sampling/utility.hpp"                                  // hws::{durations_from_reference_time, join}
 
 #include "fmt/format.h"          // fmt::format
@@ -31,17 +32,17 @@
 
 namespace hws {
 
-gpu_intel_hardware_sampler::gpu_intel_hardware_sampler() :
-    gpu_intel_hardware_sampler{ 0, HWS_SAMPLING_INTERVAL } { }
+gpu_intel_hardware_sampler::gpu_intel_hardware_sampler(const sample_category category) :
+    gpu_intel_hardware_sampler{ 0, HWS_SAMPLING_INTERVAL, category } { }
 
-gpu_intel_hardware_sampler::gpu_intel_hardware_sampler(const std::size_t device_id) :
-    gpu_intel_hardware_sampler{ device_id, HWS_SAMPLING_INTERVAL } { }
+gpu_intel_hardware_sampler::gpu_intel_hardware_sampler(const std::size_t device_id, const sample_category category) :
+    gpu_intel_hardware_sampler{ device_id, HWS_SAMPLING_INTERVAL, category } { }
 
-gpu_intel_hardware_sampler::gpu_intel_hardware_sampler(const std::chrono::milliseconds sampling_interval) :
-    gpu_intel_hardware_sampler{ 0, sampling_interval } { }
+gpu_intel_hardware_sampler::gpu_intel_hardware_sampler(const std::chrono::milliseconds sampling_interval, const sample_category category) :
+    gpu_intel_hardware_sampler{ 0, sampling_interval, category } { }
 
-gpu_intel_hardware_sampler::gpu_intel_hardware_sampler(const std::size_t device_id, const std::chrono::milliseconds sampling_interval) :
-    hardware_sampler{ sampling_interval } {
+gpu_intel_hardware_sampler::gpu_intel_hardware_sampler(const std::size_t device_id, const std::chrono::milliseconds sampling_interval, const sample_category category) :
+    hardware_sampler{ sampling_interval, category } {
     // make sure that zeInit is only called once for all instances
     if (instances_++ == 0) {
         HWS_LEVEL_ZERO_ERROR_CHECK(zeInit(ZE_INIT_FLAG_GPU_ONLY))
@@ -90,7 +91,7 @@ void gpu_intel_hardware_sampler::sampling_loop() {
     double initial_total_power_consumption{};  // initial total power consumption in J
 
     // retrieve initial general information
-    {
+    if (this->sample_category_enabled(sample_category::general)) {
         // the byte order is given by Intel directly
         general_samples_.byte_order_ = "Little Endian";
 
@@ -137,7 +138,7 @@ void gpu_intel_hardware_sampler::sampling_loop() {
     }
 
     // retrieve initial clock related information
-    {
+    if (this->sample_category_enabled(sample_category::clock)) {
         std::uint32_t num_frequency_domains{ 0 };
         if (zesDeviceEnumFrequencyDomains(device, &num_frequency_domains, nullptr) == ZE_RESULT_SUCCESS) {
             frequency_handles.resize(num_frequency_domains);
@@ -226,7 +227,7 @@ void gpu_intel_hardware_sampler::sampling_loop() {
     }
 
     // retrieve initial power related information
-    {
+    if (this->sample_category_enabled(sample_category::power)) {
         std::uint32_t num_power_domains{ 0 };
         if (zesDeviceEnumPowerDomains(device, &num_power_domains, nullptr) == ZE_RESULT_SUCCESS) {
             power_handles.resize(num_power_domains);
@@ -281,7 +282,7 @@ void gpu_intel_hardware_sampler::sampling_loop() {
     }
 
     // retrieve initial memory related information
-    {
+    if (this->sample_category_enabled(sample_category::memory)) {
         std::uint32_t num_memory_modules{ 0 };
         if (zesDeviceEnumMemoryModules(device, &num_memory_modules, nullptr) == ZE_RESULT_SUCCESS) {
             memory_handles.resize(num_memory_modules);
@@ -378,7 +379,7 @@ void gpu_intel_hardware_sampler::sampling_loop() {
     }
 
     // retrieve initial temperature related information
-    {
+    if (this->sample_category_enabled(sample_category::temperature)) {
         std::uint32_t num_fans{ 0 };
         if (zesDeviceEnumFans(device, &num_fans, nullptr) == ZE_RESULT_SUCCESS) {
             temperature_samples_.num_fans_ = num_fans;
@@ -503,7 +504,7 @@ void gpu_intel_hardware_sampler::sampling_loop() {
             this->add_time_point(std::chrono::steady_clock::now());
 
             // retrieve clock related samples
-            {
+            if (this->sample_category_enabled(sample_category::clock)) {
                 for (zes_freq_handle_t handle : frequency_handles) {
                     // get frequency properties
                     zes_freq_properties_t prop{};
@@ -550,7 +551,7 @@ void gpu_intel_hardware_sampler::sampling_loop() {
             }
 
             // retrieve power related samples
-            {
+            if (this->sample_category_enabled(sample_category::power)) {
                 if (!power_handles.empty()) {
                     // NOTE: only the first power domain is used here
                     if (power_samples_.power_total_energy_consumption_.has_value()) {
@@ -572,7 +573,7 @@ void gpu_intel_hardware_sampler::sampling_loop() {
             }
 
             // retrieve memory related samples
-            {
+            if (this->sample_category_enabled(sample_category::memory)) {
                 for (zes_mem_handle_t handle : memory_handles) {
                     zes_mem_properties_t prop{};
                     HWS_LEVEL_ZERO_ERROR_CHECK(zesMemoryGetProperties(handle, &prop))
@@ -610,7 +611,7 @@ void gpu_intel_hardware_sampler::sampling_loop() {
             }
 
             // retrieve temperature related samples
-            {
+            if (this->sample_category_enabled(sample_category::temperature)) {
                 if (!psu_handles.empty()) {
                     if (temperature_samples_.psu_temperature_.has_value()) {
                         // NOTE: only the first PSU is used here
diff --git a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
index f9a064b..7d16a0a 100644
--- a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
+++ b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
@@ -11,6 +11,7 @@
 #include "hardware_sampling/gpu_nvidia/nvml_samples.hpp"             // hws::{nvml_general_samples, nvml_clock_samples, nvml_power_samples, nvml_memory_samples, nvml_temperature_samples}
 #include "hardware_sampling/gpu_nvidia/utility.hpp"                  // HWS_NVML_ERROR_CHECK
 #include "hardware_sampling/hardware_sampler.hpp"                    // hws::hardware_sampler
+#include "hardware_sampling/sample_category.hpp"                     // hws::sample_category
 #include "hardware_sampling/utility.hpp"                             // hws::detail::time_points_to_epoch
 
 #include "fmt/format.h"  // fmt::format
@@ -33,17 +34,17 @@
 
 namespace hws {
 
-gpu_nvidia_hardware_sampler::gpu_nvidia_hardware_sampler() :
-    gpu_nvidia_hardware_sampler{ 0, HWS_SAMPLING_INTERVAL } { }
+gpu_nvidia_hardware_sampler::gpu_nvidia_hardware_sampler(const sample_category category) :
+    gpu_nvidia_hardware_sampler{ 0, HWS_SAMPLING_INTERVAL, category } { }
 
-gpu_nvidia_hardware_sampler::gpu_nvidia_hardware_sampler(const std::size_t device_id) :
-    gpu_nvidia_hardware_sampler{ device_id, HWS_SAMPLING_INTERVAL } { }
+gpu_nvidia_hardware_sampler::gpu_nvidia_hardware_sampler(const std::size_t device_id, const sample_category category) :
+    gpu_nvidia_hardware_sampler{ device_id, HWS_SAMPLING_INTERVAL, category } { }
 
-gpu_nvidia_hardware_sampler::gpu_nvidia_hardware_sampler(const std::chrono::milliseconds sampling_interval) :
-    gpu_nvidia_hardware_sampler{ 0, sampling_interval } { }
+gpu_nvidia_hardware_sampler::gpu_nvidia_hardware_sampler(const std::chrono::milliseconds sampling_interval, const sample_category category) :
+    gpu_nvidia_hardware_sampler{ 0, sampling_interval, category } { }
 
-gpu_nvidia_hardware_sampler::gpu_nvidia_hardware_sampler(const std::size_t device_id, const std::chrono::milliseconds sampling_interval) :
-    hardware_sampler{ sampling_interval } {
+gpu_nvidia_hardware_sampler::gpu_nvidia_hardware_sampler(const std::size_t device_id, const std::chrono::milliseconds sampling_interval, const sample_category category) :
+    hardware_sampler{ sampling_interval, category } {
     // make sure that nvmlInit is only called once for all instances
     if (instances_++ == 0) {
         HWS_NVML_ERROR_CHECK(nvmlInit())
@@ -91,7 +92,7 @@ void gpu_nvidia_hardware_sampler::sampling_loop() {
     double initial_total_power_consumption{};  // initial total power consumption in J
 
     // retrieve initial general information
-    {
+    if (this->sample_category_enabled(sample_category::general)) {
         // fixed information -> only retrieved once
         nvmlDeviceArchitecture_t device_arch{};
         if (nvmlDeviceGetArchitecture(device, &device_arch) == NVML_SUCCESS) {
@@ -186,7 +187,7 @@ void gpu_nvidia_hardware_sampler::sampling_loop() {
     }
 
     // retrieve initial clock related information
-    {
+    if (this->sample_category_enabled(sample_category::clock)) {
         // fixed information -> only retrieved once
         unsigned int adaptive_clock_status{};
         if (nvmlDeviceGetAdaptiveClockInfoStatus(device, &adaptive_clock_status) == NVML_SUCCESS) {
@@ -278,7 +279,7 @@ void gpu_nvidia_hardware_sampler::sampling_loop() {
     }
 
     // retrieve initial power related information
-    {
+    if (this->sample_category_enabled(sample_category::power)) {
         // fixed information -> only retrieved once
         nvmlEnableState_t mode{};
         if (nvmlDeviceGetPowerManagementMode(device, &mode) == NVML_SUCCESS) {
@@ -334,7 +335,7 @@ void gpu_nvidia_hardware_sampler::sampling_loop() {
     }
 
     // retrieve initial memory related information
-    {
+    if (this->sample_category_enabled(sample_category::memory)) {
         // fixed information -> only retrieved once
         nvmlMemory_t memory_info{};
         if (nvmlDeviceGetMemoryInfo(device, &memory_info) == NVML_SUCCESS) {
@@ -377,7 +378,7 @@ void gpu_nvidia_hardware_sampler::sampling_loop() {
     }
 
     // retrieve initial temperature related information
-    {
+    if (this->sample_category_enabled(sample_category::temperature)) {
         // fixed information -> only retrieved once
         decltype(temperature_samples_.num_fans_)::value_type num_fans{};
         if (nvmlDeviceGetNumFans(device, &num_fans) == NVML_SUCCESS) {
@@ -426,7 +427,7 @@ void gpu_nvidia_hardware_sampler::sampling_loop() {
             this->add_time_point(std::chrono::steady_clock::now());
 
             // retrieve general samples
-            {
+            if (this->sample_category_enabled(sample_category::general)) {
                 if (general_samples_.performance_level_.has_value()) {
                     nvmlPstates_t pstate{};
                     HWS_NVML_ERROR_CHECK(nvmlDeviceGetPerformanceState(device, &pstate))
@@ -442,7 +443,7 @@ void gpu_nvidia_hardware_sampler::sampling_loop() {
             }
 
             // retrieve clock related samples
-            {
+            if (this->sample_category_enabled(sample_category::clock)) {
                 if (clock_samples_.clock_frequency_.has_value()) {
                     unsigned int value{};
                     HWS_NVML_ERROR_CHECK(nvmlDeviceGetClockInfo(device, NVML_CLOCK_GRAPHICS, &value))
@@ -476,7 +477,7 @@ void gpu_nvidia_hardware_sampler::sampling_loop() {
             }
 
             // retrieve power related information
-            {
+            if (this->sample_category_enabled(sample_category::power)) {
                 if (power_samples_.power_profile_.has_value()) {
                     nvmlPstates_t pstate{};
                     HWS_NVML_ERROR_CHECK(nvmlDeviceGetPowerState(device, &pstate))
@@ -497,7 +498,7 @@ void gpu_nvidia_hardware_sampler::sampling_loop() {
             }
 
             // retrieve memory related information
-            {
+            if (this->sample_category_enabled(sample_category::memory)) {
                 if (memory_samples_.memory_free_.has_value() && memory_samples_.memory_used_.has_value()) {
                     nvmlMemory_t memory_info{};
                     HWS_NVML_ERROR_CHECK(nvmlDeviceGetMemoryInfo(device, &memory_info))
@@ -519,7 +520,7 @@ void gpu_nvidia_hardware_sampler::sampling_loop() {
             }
 
             // retrieve temperature related information
-            {
+            if (this->sample_category_enabled(sample_category::temperature)) {
                 if (temperature_samples_.fan_speed_percentage_.has_value()) {
                     unsigned int value{};
                     HWS_NVML_ERROR_CHECK(nvmlDeviceGetFanSpeed(device, &value))
diff --git a/src/hardware_sampling/hardware_sampler.cpp b/src/hardware_sampling/hardware_sampler.cpp
index 903db8a..db0a4f5 100644
--- a/src/hardware_sampling/hardware_sampler.cpp
+++ b/src/hardware_sampling/hardware_sampler.cpp
@@ -10,9 +10,9 @@
 #include "hardware_sampling/event.hpp"    // hws::event
 #include "hardware_sampling/utility.hpp"  // hws::detail::durations_from_reference_time
 
+#include "fmt/chrono.h"  // direct formatting of std::chrono types
 #include "fmt/format.h"  // fmt::format
 #include "fmt/ranges.h"  // fmt::join
-#include "fmt/chrono.h"  // direct formatting of std::chrono types
 
 #include <chrono>     // std::chrono::{system_clock, steady_clock, duration_cast, milliseconds}
 #include <cstddef>    // std::size_t
@@ -25,8 +25,9 @@
 
 namespace hws {
 
-hardware_sampler::hardware_sampler(const std::chrono::milliseconds sampling_interval) :
-    sampling_interval_{ sampling_interval } { }
+hardware_sampler::hardware_sampler(const std::chrono::milliseconds sampling_interval, const sample_category category) :
+    sampling_interval_{ sampling_interval },
+    sample_category_{ category } { }
 
 hardware_sampler::~hardware_sampler() = default;
 
@@ -170,4 +171,12 @@ void hardware_sampler::dump_yaml(const std::filesystem::path &filename) const {
     this->dump_yaml(filename.string().c_str());
 }
 
+void hardware_sampler::add_time_point(const std::chrono::steady_clock::time_point time_point) {
+    time_points_.push_back(time_point);
+}
+
+bool hardware_sampler::sample_category_enabled(const sample_category category) const noexcept {
+    return static_cast<int>(this->sample_category_ & category) != 0;
+}
+
 }  // namespace hws
diff --git a/src/hardware_sampling/system_hardware_sampler.cpp b/src/hardware_sampling/system_hardware_sampler.cpp
index d2d63a9..dc5d34e 100644
--- a/src/hardware_sampling/system_hardware_sampler.cpp
+++ b/src/hardware_sampling/system_hardware_sampler.cpp
@@ -7,7 +7,8 @@
 
 #include "hardware_sampling/system_hardware_sampler.hpp"
 
-#include "hardware_sampling/event.hpp"  // hws::event
+#include "hardware_sampling/event.hpp"            // hws::event
+#include "hardware_sampling/sample_category.hpp"  // hws::sample_category
 
 #if defined(HWS_FOR_CPUS_ENABLED)
     #include "hardware_sampling/cpu/hardware_sampler.hpp"  // hws::cpu_hardware_sampler
@@ -37,14 +38,14 @@
 
 namespace hws {
 
-system_hardware_sampler::system_hardware_sampler() :
-    system_hardware_sampler{ HWS_SAMPLING_INTERVAL } { }
+system_hardware_sampler::system_hardware_sampler(const sample_category category) :
+    system_hardware_sampler{ HWS_SAMPLING_INTERVAL, category } { }
 
-system_hardware_sampler::system_hardware_sampler(const std::chrono::milliseconds sampling_interval) {
+system_hardware_sampler::system_hardware_sampler(const std::chrono::milliseconds sampling_interval, sample_category category) {
     // create the hardware samplers based on the available hardware
 #if defined(HWS_FOR_CPUS_ENABLED)
     {
-        samplers_.push_back(std::make_unique<cpu_hardware_sampler>(sampling_interval));
+        samplers_.push_back(std::make_unique<cpu_hardware_sampler>(sampling_interval, category));
     }
 #endif
 #if defined(HWS_FOR_NVIDIA_GPUS_ENABLED)
@@ -52,7 +53,7 @@ system_hardware_sampler::system_hardware_sampler(const std::chrono::milliseconds
         int device_count{};
         HWS_CUDA_ERROR_CHECK(cudaGetDeviceCount(&device_count));
         for (int device = 0; device < device_count; ++device) {
-            samplers_.push_back(std::make_unique<gpu_nvidia_hardware_sampler>(static_cast<std::size_t>(device), sampling_interval));
+            samplers_.push_back(std::make_unique<gpu_nvidia_hardware_sampler>(static_cast<std::size_t>(device), sampling_interval, category));
         }
     }
 #endif
@@ -61,7 +62,7 @@ system_hardware_sampler::system_hardware_sampler(const std::chrono::milliseconds
         int device_count{};
         HWS_HIP_ERROR_CHECK(hipGetDeviceCount(&device_count));
         for (int device = 0; device < device_count; ++device) {
-            samplers_.push_back(std::make_unique<gpu_amd_hardware_sampler>(static_cast<std::size_t>(device), sampling_interval));
+            samplers_.push_back(std::make_unique<gpu_amd_hardware_sampler>(static_cast<std::size_t>(device), sampling_interval, category));
         }
     }
 #endif

From d5e33bfb890c8dc90360ec50c1551aa56fa72500 Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Tue, 24 Sep 2024 17:21:26 +0200
Subject: [PATCH 49/69] Output throttle reasons as string and as bitmask.

---
 README.md                                     | 52 ++++++++++---------
 bindings/gpu_intel_hardware_sampler.cpp       |  6 ++-
 bindings/gpu_nvidia_hardware_sampler.cpp      |  3 +-
 .../gpu_intel/level_zero_samples.hpp          | 24 +++++----
 .../gpu_nvidia/nvml_samples.hpp               | 11 ++--
 .../gpu_intel/hardware_sampler.cpp            | 32 +++++++++---
 .../gpu_intel/level_zero_samples.cpp          | 33 +++++++++---
 .../gpu_nvidia/hardware_sampler.cpp           | 12 +++--
 .../gpu_nvidia/nvml_samples.cpp               | 17 ++++--
 9 files changed, 123 insertions(+), 67 deletions(-)

diff --git a/README.md b/README.md
index 3207e22..77869b4 100644
--- a/README.md
+++ b/README.md
@@ -109,31 +109,33 @@ current clock frequencies, temperatures, or memory consumption.
 
 ### clock-related samples
 
-| sample                             | sample type | CPUs |   NVIDIA GPUs    |  AMD GPUs   |    Intel GPUs    |
-|:-----------------------------------|:-----------:|:----:|:----------------:|:-----------:|:----------------:|
-| auto_boosted_clock_enabled         |    fixed    | bool |       bool       |      -      |        -         |
-| clock_frequency_min                |    fixed    | MHz  |       MHz        |     MHz     |       MHz        |
-| clock_frequency_max                |    fixed    | MHz  |       MHz        |     MHz     |       MHz        |
-| memory_clock_frequency_min         |    fixed    |  -   |       MHz        |     MHz     |       MHz        |
-| memory_clock_frequency_max         |    fixed    |  -   |       MHz        |     MHz     |       MHz        |
-| socket_clock_frequency_min         |    fixed    |  -   |        -         |     MHz     |        -         |
-| socket_clock_frequency_min         |    fixed    |  -   |        -         |     MHz     |        -         |
-| sm_clock_frequency_max             |    fixed    |  -   |       MHz        |      -      |        -         |
-| available_clock_frequencies        |    fixed    |  -   |    map of MHz    | list of MHz |   list of MHz    |
-| available_memory_clock_frequencies |    fixed    |  -   |   list of MHz    | list of MHz |   list of MHz    |
-| clock_frequency                    |   sampled   | MHz  |       MHz        |     MHz     |       MHz        |
-| average_non_idle_clock_frequency   |   sampled   | MHz  |        -         |      -      |        -         |
-| time_stamp_counter                 |   sampled   | MHz  |        -         |      -      |        -         |
-| memory_clock_frequency             |   sampled   |  -   |       MHz        |     MHz     |       MHz        |
-| socket_clock_frequency             |   sampled   |  -   |        -         |     MHz     |        -         |
-| sm_clock_frequency                 |   sampled   |  -   |       MHz        |      -      |        -         |
-| overdrive_level                    |   sampled   |  -   |        -         |      %      |        -         |
-| memory_overdrive_level             |   sampled   |  -   |        -         |      %      |        -         |
-| throttle_reason                    |   sampled   |  -   | string (bitmask) |      -      | string (bitmask) |
-| memory_throttle_reason             |   sampled   |  -   |        -         |      -      | string (bitmask) |
-| auto_boosted_clock                 |   sampled   |  -   |       bool       |      -      |        -         |
-| frequency_limit_tdp                |   sampled   |  -   |        -         |      -      |       MHz        |
-| memory_frequency_limit_tdp         |   sampled   |  -   |        -         |      -      |       MHz        |
+| sample                             | sample type | CPUs | NVIDIA GPUs |  AMD GPUs   | Intel GPUs  |
+|:-----------------------------------|:-----------:|:----:|:-----------:|:-----------:|:-----------:|
+| auto_boosted_clock_enabled         |    fixed    | bool |    bool     |      -      |      -      |
+| clock_frequency_min                |    fixed    | MHz  |     MHz     |     MHz     |     MHz     |
+| clock_frequency_max                |    fixed    | MHz  |     MHz     |     MHz     |     MHz     |
+| memory_clock_frequency_min         |    fixed    |  -   |     MHz     |     MHz     |     MHz     |
+| memory_clock_frequency_max         |    fixed    |  -   |     MHz     |     MHz     |     MHz     |
+| socket_clock_frequency_min         |    fixed    |  -   |      -      |     MHz     |      -      |
+| socket_clock_frequency_min         |    fixed    |  -   |      -      |     MHz     |      -      |
+| sm_clock_frequency_max             |    fixed    |  -   |     MHz     |      -      |      -      |
+| available_clock_frequencies        |    fixed    |  -   | map of MHz  | list of MHz | list of MHz |
+| available_memory_clock_frequencies |    fixed    |  -   | list of MHz | list of MHz | list of MHz |
+| clock_frequency                    |   sampled   | MHz  |     MHz     |     MHz     |     MHz     |
+| average_non_idle_clock_frequency   |   sampled   | MHz  |      -      |      -      |      -      |
+| time_stamp_counter                 |   sampled   | MHz  |      -      |      -      |      -      |
+| memory_clock_frequency             |   sampled   |  -   |     MHz     |     MHz     |     MHz     |
+| socket_clock_frequency             |   sampled   |  -   |      -      |     MHz     |      -      |
+| sm_clock_frequency                 |   sampled   |  -   |     MHz     |      -      |      -      |
+| overdrive_level                    |   sampled   |  -   |      -      |      %      |      -      |
+| memory_overdrive_level             |   sampled   |  -   |      -      |      %      |      -      |
+| throttle_reason                    |   sampled   |  -   |   bitmask   |      -      |   bitmask   |
+| throttle_reason_string             |   sampled   |  -   |     str     |      -      |     str     |
+| memory_throttle_reason             |   sampled   |  -   |      -      |      -      |   bitmask   |
+| memory_throttle_reason_string      |   sampled   |  -   |      -      |      -      |     str     |
+| auto_boosted_clock                 |   sampled   |  -   |    bool     |      -      |      -      |
+| frequency_limit_tdp                |   sampled   |  -   |      -      |      -      |     MHz     |
+| memory_frequency_limit_tdp         |   sampled   |  -   |      -      |      -      |     MHz     |
 
 ### power-related samples
 
diff --git a/bindings/gpu_intel_hardware_sampler.cpp b/bindings/gpu_intel_hardware_sampler.cpp
index 546d295..4485701 100644
--- a/bindings/gpu_intel_hardware_sampler.cpp
+++ b/bindings/gpu_intel_hardware_sampler.cpp
@@ -46,8 +46,10 @@ void init_gpu_intel_hardware_sampler(py::module_ &m) {
         .def("get_available_memory_clock_frequencies", &hws::level_zero_clock_samples::get_available_memory_clock_frequencies, "the available memory clock frequencies in MHz (slowest to fastest)")
         .def("get_clock_frequency", &hws::level_zero_clock_samples::get_clock_frequency, "the current GPU frequency in MHz")
         .def("get_memory_clock_frequency", &hws::level_zero_clock_samples::get_memory_clock_frequency, "the current memory frequency in MHz")
-        .def("get_throttle_reason", &hws::level_zero_clock_samples::get_throttle_reason, "the current GPU frequency throttle reason")
-        .def("get_memory_throttle_reason", &hws::level_zero_clock_samples::get_memory_throttle_reason, "the current memory frequency throttle reason")
+        .def("get_throttle_reason", &hws::level_zero_clock_samples::get_throttle_reason, "the current GPU frequency throttle reason (as bitmask)")
+        .def("get_throttle_reason_string", &hws::level_zero_clock_samples::get_throttle_reason_string, "the current GPU frequency throttle reason (as string)")
+        .def("get_memory_throttle_reason", &hws::level_zero_clock_samples::get_memory_throttle_reason, "the current memory frequency throttle reason (as bitmask)")
+        .def("get_memory_throttle_reason_string", &hws::level_zero_clock_samples::get_memory_throttle_reason_string, "the current memory frequency throttle reason (as string)")
         .def("get_frequency_limit_tdp", &hws::level_zero_clock_samples::get_frequency_limit_tdp, "the current maximum allowed GPU frequency based on the TDP limit in MHz")
         .def("get_memory_frequency_limit_tdp", &hws::level_zero_clock_samples::get_memory_frequency_limit_tdp, "the current maximum allowed memory frequency based on the TDP limit in MHz")
         .def("__repr__", [](const hws::level_zero_clock_samples &self) {
diff --git a/bindings/gpu_nvidia_hardware_sampler.cpp b/bindings/gpu_nvidia_hardware_sampler.cpp
index 10a04b3..23a0549 100644
--- a/bindings/gpu_nvidia_hardware_sampler.cpp
+++ b/bindings/gpu_nvidia_hardware_sampler.cpp
@@ -51,7 +51,8 @@ void init_gpu_nvidia_hardware_sampler(py::module_ &m) {
         .def("get_available_memory_clock_frequencies", &hws::nvml_clock_samples::get_available_memory_clock_frequencies, "the available memory clock frequencies in MHz (slowest to fastest)")
         .def("get_memory_clock_frequency", &hws::nvml_clock_samples::get_memory_clock_frequency, "the current memory clock frequency in MHz")
         .def("get_sm_clock_frequency", &hws::nvml_clock_samples::get_sm_clock_frequency, "the current SM clock frequency in Mhz")
-        .def("get_throttle_reason", &hws::nvml_clock_samples::get_throttle_reason, "the reason the GPU clock throttled")
+        .def("get_throttle_reason", &hws::nvml_clock_samples::get_throttle_reason, "the reason the GPU clock throttled (as bitmask)")
+        .def("get_throttle_reason_string", &hws::nvml_clock_samples::get_throttle_reason_string, "the reason the GPU clock throttled (as string)")
         .def("get_auto_boosted_clock", &hws::nvml_clock_samples::get_auto_boosted_clock, "true if the clocks are currently auto boosted")
         .def("__repr__", [](const hws::nvml_clock_samples &self) {
             return fmt::format("<HardwareSampling.NvmlClockSamples with\n{}\n>", self);
diff --git a/include/hardware_sampling/gpu_intel/level_zero_samples.hpp b/include/hardware_sampling/gpu_intel/level_zero_samples.hpp
index f178f7f..f53422e 100644
--- a/include/hardware_sampling/gpu_intel/level_zero_samples.hpp
+++ b/include/hardware_sampling/gpu_intel/level_zero_samples.hpp
@@ -16,7 +16,7 @@
 
 #include "fmt/ostream.h"  // fmt::formatter, fmt::ostream_formatter
 
-#include <cstdint>        // std::uint64_t, std::int32_t
+#include <cstdint>        // std::uint64_t, std::int64_t, std::int32_t
 #include <iosfwd>         // std::ostream forward declaration
 #include <optional>       // std::optional
 #include <string>         // std::string
@@ -50,10 +50,10 @@ class level_zero_general_samples {
      */
     [[nodiscard]] std::string generate_yaml_string() const;
 
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, byte_order)          // the byte order (e.g., little/big endian)
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, vendor_id)           // the vendor ID
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, name)                // the model name of the device
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector<std::string>, flags)  // potential GPU flags (e.g. integrated device)
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, byte_order)            // the byte order (e.g., little/big endian)
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, vendor_id)             // the vendor ID
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, name)                  // the model name of the device
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector<std::string>, flags)    // potential GPU flags (e.g. integrated device)
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, standby_mode)          // the enabled standby mode (power saving or never)
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint32_t, num_threads_per_eu)  // the number of threads per EU unit
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint32_t, eu_simd_width)       // the physical EU unit SIMD width
@@ -100,12 +100,14 @@ class level_zero_clock_samples {
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector<double>, available_clock_frequencies)         // the available GPU clock frequencies in MHz (slowest to fastest)
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector<double>, available_memory_clock_frequencies)  // the available memory clock frequencies in MHz (slowest to fastest)
 
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, clock_frequency)              // the current GPU frequency in MHz
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, memory_clock_frequency)       // the current memory frequency in MHz
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::string, throttle_reason)         // the current GPU frequency throttle reason
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::string, memory_throttle_reason)  // the current memory frequency throttle reason
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, frequency_limit_tdp)          // the current maximum allowed GPU frequency based on the TDP limit in MHz
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, memory_frequency_limit_tdp)   // the current maximum allowed memory frequency based on the TDP limit in MHz
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, clock_frequency)                     // the current GPU frequency in MHz
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, memory_clock_frequency)              // the current memory frequency in MHz
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int64_t, throttle_reason)               // the current GPU frequency throttle reason as bitmask
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::string, throttle_reason_string)         // the current GPU frequency throttle reason as string
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int64_t, memory_throttle_reason)        // the current memory frequency throttle reason as bitmask
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::string, memory_throttle_reason_string)  // the current memory frequency throttle reason as string
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, frequency_limit_tdp)                 // the current maximum allowed GPU frequency based on the TDP limit in MHz
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, memory_frequency_limit_tdp)          // the current maximum allowed memory frequency based on the TDP limit in MHz
 };
 
 /**
diff --git a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp
index 31acebb..91f4e6b 100644
--- a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp
+++ b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp
@@ -106,11 +106,12 @@ class nvml_clock_samples {
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type, available_clock_frequencies)                    // the available clock frequencies in MHz, based on a memory clock frequency (slowest to fastest)
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector<double>, available_memory_clock_frequencies)  // the available memory clock frequencies in MHz (slowest to fastest)
 
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, clock_frequency)         // the current graphics clock frequency in MHz
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, memory_clock_frequency)  // the current memory clock frequency in MHz
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, sm_clock_frequency)      // the current SM clock frequency in Mhz
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::string, throttle_reason)    // the reason the GPU clock throttled
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(bool, auto_boosted_clock)        // true if the clocks are currently auto boosted
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, clock_frequency)              // the current graphics clock frequency in MHz
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, memory_clock_frequency)       // the current memory clock frequency in MHz
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, sm_clock_frequency)           // the current SM clock frequency in Mhz
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned long long, throttle_reason)  // the reason the GPU clock throttled (as bitmask)
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::string, throttle_reason_string)  // the reason the GPU clock throttled (as string)
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(bool, auto_boosted_clock)             // true if the clocks are currently auto boosted
 };
 
 /**
diff --git a/src/hardware_sampling/gpu_intel/hardware_sampler.cpp b/src/hardware_sampling/gpu_intel/hardware_sampler.cpp
index f96a695..67abc5d 100644
--- a/src/hardware_sampling/gpu_intel/hardware_sampler.cpp
+++ b/src/hardware_sampling/gpu_intel/hardware_sampler.cpp
@@ -20,7 +20,7 @@
 
 #include <chrono>     // std::chrono::{steady_clock, duration_cast, milliseconds}
 #include <cstddef>    // std::size_t
-#include <cstdint>    // std::int32_t
+#include <cstdint>    // std::int32_t, std::int64_t
 #include <exception>  // std::exception, std::terminate
 #include <ios>        // std::ios_base
 #include <iostream>   // std::cerr, std::endl
@@ -196,8 +196,14 @@ void gpu_intel_hardware_sampler::sampling_loop() {
                                             clock_samples_.clock_frequency_ = decltype(clock_samples_.clock_frequency_)::value_type{ frequency_state.actual };
                                         }
                                         if (frequency_state.throttleReasons >= 0.0) {
-                                            using vector_type = decltype(clock_samples_.throttle_reason_)::value_type;
-                                            clock_samples_.throttle_reason_ = vector_type{ static_cast<vector_type::value_type>(detail::throttle_reason_to_string(frequency_state.throttleReasons)) };
+                                            {
+                                                using vector_type = decltype(clock_samples_.throttle_reason_)::value_type;
+                                                clock_samples_.throttle_reason_ = vector_type{ static_cast<vector_type::value_type>(static_cast<std::int64_t>(frequency_state.throttleReasons)) };
+                                            }
+                                            {
+                                                using vector_type = decltype(clock_samples_.throttle_reason_string_)::value_type;
+                                                clock_samples_.throttle_reason_string_ = vector_type{ static_cast<vector_type::value_type>(detail::throttle_reason_to_string(frequency_state.throttleReasons)) };
+                                            }
                                         }
                                     }
                                     break;
@@ -210,8 +216,14 @@ void gpu_intel_hardware_sampler::sampling_loop() {
                                             clock_samples_.memory_clock_frequency_ = decltype(clock_samples_.memory_clock_frequency_)::value_type{ frequency_state.actual };
                                         }
                                         if (frequency_state.throttleReasons >= 0.0) {
-                                            using vector_type = decltype(clock_samples_.memory_throttle_reason_)::value_type;
-                                            clock_samples_.memory_throttle_reason_ = vector_type{ static_cast<vector_type::value_type>(detail::throttle_reason_to_string(frequency_state.throttleReasons)) };
+                                            {
+                                                using vector_type = decltype(clock_samples_.memory_throttle_reason_)::value_type;
+                                                clock_samples_.memory_throttle_reason_ = vector_type{ static_cast<vector_type::value_type>(static_cast<std::int64_t>(frequency_state.throttleReasons)) };
+                                            }
+                                            {
+                                                using vector_type = decltype(clock_samples_.memory_throttle_reason_string_)::value_type;
+                                                clock_samples_.memory_throttle_reason_string_ = vector_type{ static_cast<vector_type::value_type>(detail::throttle_reason_to_string(frequency_state.throttleReasons)) };
+                                            }
                                         }
                                     }
                                     break;
@@ -525,7 +537,10 @@ void gpu_intel_hardware_sampler::sampling_loop() {
                                         clock_samples_.clock_frequency_->push_back(frequency_state.actual);
                                     }
                                     if (clock_samples_.throttle_reason_.has_value()) {
-                                        clock_samples_.throttle_reason_->push_back(detail::throttle_reason_to_string(frequency_state.throttleReasons));
+                                        clock_samples_.throttle_reason_->push_back(static_cast<std::int64_t>(frequency_state.throttleReasons));
+                                    }
+                                    if (clock_samples_.throttle_reason_string_.has_value()) {
+                                        clock_samples_.throttle_reason_string_->push_back(detail::throttle_reason_to_string(frequency_state.throttleReasons));
                                     }
                                 }
                                 break;
@@ -538,7 +553,10 @@ void gpu_intel_hardware_sampler::sampling_loop() {
                                         clock_samples_.memory_clock_frequency_->push_back(frequency_state.actual);
                                     }
                                     if (clock_samples_.memory_throttle_reason_.has_value()) {
-                                        clock_samples_.memory_throttle_reason_->push_back(detail::throttle_reason_to_string(frequency_state.throttleReasons));
+                                        clock_samples_.memory_throttle_reason_->push_back(static_cast<std::int64_t>(frequency_state.throttleReasons));
+                                    }
+                                    if (clock_samples_.memory_throttle_reason_string_.has_value()) {
+                                        clock_samples_.memory_throttle_reason_string_->push_back(detail::throttle_reason_to_string(frequency_state.throttleReasons));
                                     }
                                 }
                                 break;
diff --git a/src/hardware_sampling/gpu_intel/level_zero_samples.cpp b/src/hardware_sampling/gpu_intel/level_zero_samples.cpp
index a10a358..ab749fb 100644
--- a/src/hardware_sampling/gpu_intel/level_zero_samples.cpp
+++ b/src/hardware_sampling/gpu_intel/level_zero_samples.cpp
@@ -129,7 +129,8 @@ bool level_zero_clock_samples::has_samples() const {
     return this->clock_frequency_min_.has_value() || this->clock_frequency_max_.has_value() || this->memory_clock_frequency_min_.has_value()
            || this->memory_clock_frequency_max_.has_value() || this->available_clock_frequencies_.has_value() || this->available_memory_clock_frequencies_.has_value()
            || this->clock_frequency_.has_value() || this->memory_clock_frequency_.has_value() || this->throttle_reason_.has_value()
-           || this->memory_throttle_reason_.has_value() || this->frequency_limit_tdp_.has_value() || this->memory_frequency_limit_tdp_.has_value();
+           || this->throttle_reason_string_.has_value() || this->memory_throttle_reason_.has_value() || this->memory_throttle_reason_string_.has_value()
+           || this->frequency_limit_tdp_.has_value() || this->memory_frequency_limit_tdp_.has_value();
 }
 
 std::string level_zero_clock_samples::generate_yaml_string() const {
@@ -197,20 +198,34 @@ std::string level_zero_clock_samples::generate_yaml_string() const {
                            "    values: [{}]\n",
                            fmt::join(this->memory_clock_frequency_.value(), ", "));
     }
-    // the current GPU core throttle reason
+    // the current GPU core throttle reason as bitmask
     if (this->throttle_reason_.has_value()) {
         str += fmt::format("  throttle_reason:\n"
-                           "    unit: \"string\"\n"
+                           "    unit: \"bitmask\"\n"
                            "    values: [{}]\n",
                            fmt::join(this->throttle_reason_.value(), ", "));
     }
-    // the current memory throttle reason
+    // the current GPU core throttle reason as string
+    if (this->throttle_reason_string_.has_value()) {
+        str += fmt::format("  throttle_reason_string:\n"
+                           "    unit: \"string\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->throttle_reason_string_.value(), ", "));
+    }
+    // the current memory throttle reason as bitmask
     if (this->memory_throttle_reason_.has_value()) {
         str += fmt::format("  memory_throttle_reason:\n"
-                           "    unit: \"string\"\n"
+                           "    unit: \"bitmask\"\n"
                            "    values: [{}]\n",
                            fmt::join(this->memory_throttle_reason_.value(), ", "));
     }
+    // the current memory throttle reason as string
+    if (this->memory_throttle_reason_string_.has_value()) {
+        str += fmt::format("  memory_throttle_reason_string:\n"
+                           "    unit: \"string\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->memory_throttle_reason_string_.value(), ", "));
+    }
     // the maximum GPU core frequency based on the current TDP limit
     if (this->frequency_limit_tdp_.has_value()) {
         str += fmt::format("  frequency_limit_tdp:\n"
@@ -238,8 +253,10 @@ std::ostream &operator<<(std::ostream &out, const level_zero_clock_samples &samp
                               "available_memory_clock_frequencies [MHz]: [{}]\n"
                               "clock_frequency [MHz]: [{}]\n"
                               "memory_clock_frequency [MHz]: [{}]\n"
-                              "throttle_reason [string]: [{}]\n"
-                              "memory_throttle_reason [string]: [{}]\n"
+                              "throttle_reason [bitmask]: [{}]\n"
+                              "throttle_reason_string [string]: [{}]\n"
+                              "memory_throttle_reason [bitmask]: [{}]\n"
+                              "memory_throttle_reason_string [string]: [{}]\n"
                               "frequency_limit_tdp [MHz]: [{}]\n"
                               "memory_frequency_limit_tdp [MHz]: [{}]",
                               detail::value_or_default(samples.get_clock_frequency_min()),
@@ -251,7 +268,9 @@ std::ostream &operator<<(std::ostream &out, const level_zero_clock_samples &samp
                               fmt::join(detail::value_or_default(samples.get_clock_frequency()), ", "),
                               fmt::join(detail::value_or_default(samples.get_memory_clock_frequency()), ", "),
                               fmt::join(detail::value_or_default(samples.get_throttle_reason()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_throttle_reason_string()), ", "),
                               fmt::join(detail::value_or_default(samples.get_memory_throttle_reason()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_memory_throttle_reason_string()), ", "),
                               fmt::join(detail::value_or_default(samples.get_frequency_limit_tdp()), ", "),
                               fmt::join(detail::value_or_default(samples.get_memory_frequency_limit_tdp()), ", "));
 }
diff --git a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
index 7d16a0a..14d6a9f 100644
--- a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
+++ b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
@@ -266,9 +266,10 @@ void gpu_nvidia_hardware_sampler::sampling_loop() {
             clock_samples_.memory_clock_frequency_ = decltype(clock_samples_.memory_clock_frequency_)::value_type{ static_cast<decltype(clock_samples_.memory_clock_frequency_)::value_type::value_type>(clock_mem) };
         }
 
-        unsigned long long clock_throttle_reason{};
+        decltype(clock_samples_.throttle_reason_)::value_type::value_type clock_throttle_reason{};
         if (nvmlDeviceGetCurrentClocksEventReasons(device, &clock_throttle_reason) == NVML_SUCCESS) {
-            clock_samples_.throttle_reason_ = decltype(clock_samples_.throttle_reason_)::value_type{ detail::throttle_event_reason_to_string(clock_throttle_reason) };
+            clock_samples_.throttle_reason_ = decltype(clock_samples_.throttle_reason_)::value_type{ clock_throttle_reason };
+            clock_samples_.throttle_reason_string_ = decltype(clock_samples_.throttle_reason_string_)::value_type{ detail::throttle_event_reason_to_string(clock_throttle_reason) };
         }
 
         nvmlEnableState_t mode{};
@@ -462,10 +463,11 @@ void gpu_nvidia_hardware_sampler::sampling_loop() {
                     clock_samples_.memory_clock_frequency_->push_back(static_cast<decltype(clock_samples_.memory_clock_frequency_)::value_type::value_type>(value));
                 }
 
-                if (clock_samples_.throttle_reason_.has_value()) {
-                    unsigned long long value{};
+                if (clock_samples_.throttle_reason_string_.has_value()) {
+                    decltype(clock_samples_.throttle_reason_)::value_type::value_type value{};
                     HWS_NVML_ERROR_CHECK(nvmlDeviceGetCurrentClocksEventReasons(device, &value))
-                    clock_samples_.throttle_reason_->push_back(detail::throttle_event_reason_to_string(value));
+                    clock_samples_.throttle_reason_->push_back(value);
+                    clock_samples_.throttle_reason_string_->push_back(detail::throttle_event_reason_to_string(value));
                 }
 
                 if (clock_samples_.auto_boosted_clock_.has_value()) {
diff --git a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp
index 0412ef2..b07c7d2 100644
--- a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp
+++ b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp
@@ -134,7 +134,7 @@ bool nvml_clock_samples::has_samples() const {
            || this->memory_clock_frequency_min_.has_value() || this->memory_clock_frequency_max_.has_value() || this->sm_clock_frequency_max_.has_value()
            || this->available_clock_frequencies_.has_value() || this->available_memory_clock_frequencies_.has_value() || this->clock_frequency_.has_value()
            || this->memory_clock_frequency_.has_value() || this->sm_clock_frequency_.has_value() || this->throttle_reason_.has_value()
-           || this->auto_boosted_clock_.has_value();
+           || this->throttle_reason_string_.has_value() || this->auto_boosted_clock_.has_value();
 }
 
 std::string nvml_clock_samples::generate_yaml_string() const {
@@ -225,13 +225,20 @@ std::string nvml_clock_samples::generate_yaml_string() const {
                            "    values: [{}]\n",
                            fmt::join(this->sm_clock_frequency_.value(), ", "));
     }
-    // clock throttle reason
+    // clock throttle reason as bitmask
     if (this->throttle_reason_.has_value()) {
         str += fmt::format("  throttle_reason:\n"
-                           "    unit: \"string\"\n"
+                           "    unit: \"bitmask\"\n"
                            "    values: [{}]\n",
                            fmt::join(detail::quote(this->throttle_reason_.value()), ", "));
     }
+    // clock throttle reason as string
+    if (this->throttle_reason_string_.has_value()) {
+        str += fmt::format("  throttle_reason_string:\n"
+                           "    unit: \"string\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(detail::quote(this->throttle_reason_string_.value()), ", "));
+    }
     // clock is auto-boosted
     if (this->auto_boosted_clock_.has_value()) {
         str += fmt::format("  auto_boosted_clock:\n"
@@ -255,7 +262,8 @@ std::ostream &operator<<(std::ostream &out, const nvml_clock_samples &samples) {
                               "clock_frequency [MHz]: [{}]\n"
                               "memory_clock_frequency [MHz]: [{}]\n"
                               "sm_clock_frequency [MHz]: [{}]\n"
-                              "throttle_reason [string]: [{}]\n"
+                              "throttle_reason [bitmask]: [{}]\n"
+                              "throttle_reason_string [string]: [{}]\n"
                               "auto_boosted_clock [bool]: [{}]",
                               detail::value_or_default(samples.get_auto_boosted_clock_enabled()),
                               detail::value_or_default(samples.get_clock_frequency_min()),
@@ -269,6 +277,7 @@ std::ostream &operator<<(std::ostream &out, const nvml_clock_samples &samples) {
                               fmt::join(detail::value_or_default(samples.get_memory_clock_frequency()), ", "),
                               fmt::join(detail::value_or_default(samples.get_sm_clock_frequency()), ", "),
                               fmt::join(detail::value_or_default(samples.get_throttle_reason()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_throttle_reason_string()), ", "),
                               fmt::join(detail::value_or_default(samples.get_auto_boosted_clock()), ", "));
 }
 

From 4cceea6bcd5ad5a7fa780fa4ab09aae1a6602c88 Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Tue, 24 Sep 2024 17:31:06 +0200
Subject: [PATCH 50/69] Implement Intel GPU system_hardware_sampler device
 discovery.

---
 .../system_hardware_sampler.cpp               | 26 +++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/src/hardware_sampling/system_hardware_sampler.cpp b/src/hardware_sampling/system_hardware_sampler.cpp
index dc5d34e..4f2c15c 100644
--- a/src/hardware_sampling/system_hardware_sampler.cpp
+++ b/src/hardware_sampling/system_hardware_sampler.cpp
@@ -15,7 +15,9 @@
 #endif
 #if defined(HWS_FOR_NVIDIA_GPUS_ENABLED)
     #include "hardware_sampling/gpu_nvidia/hardware_sampler.hpp"  // hws::gpu_nvidia_hardware_sampler
-    #include "hardware_sampling/gpu_nvidia/utility.hpp"           // HWS_CUDA_ERROR_CHECK, hws::detail::
+    #include "hardware_sampling/gpu_nvidia/utility.hpp"           // HWS_CUDA_ERROR_CHECK
+
+    #include "cuda_runtime.h"  // cudaGetDeviceCount
 #endif
 #if defined(HWS_FOR_AMD_GPUS_ENABLED)
     #include "hardware_sampling/gpu_amd/hardware_sampler.hpp"  // hws::gpu_amd_hardware_sampler
@@ -32,6 +34,8 @@
 
 #include <algorithm>  // std::for_each, std::all_of
 #include <chrono>     // std::chrono::milliseconds
+#include <cstddef>    // std::size_t
+#include <cstdint>    // std::uint32_t
 #include <memory>     // std::unique_ptr, std::make_unique
 #include <stdexcept>  // std::out_of_range
 #include <vector>     // std::vector
@@ -68,7 +72,25 @@ system_hardware_sampler::system_hardware_sampler(const std::chrono::milliseconds
 #endif
 #if defined(HWS_FOR_INTEL_GPUS_ENABLED)
     {
-        // TODO: implement
+        // discover the number of drivers
+        std::uint32_t driver_count{ 0 };
+        HWS_LEVEL_ZERO_ERROR_CHECK(zeDriverGet(&driver_count, nullptr))
+
+        // check if only the single GPU driver has been found
+        if (driver_count > 1) {
+            throw std::runtime_error{ fmt::format("Found too many GPU drivers ({})!", driver_count) };
+        }
+
+        // get the GPU driver
+        ze_driver_handle_t driver{};
+        HWS_LEVEL_ZERO_ERROR_CHECK(zeDriverGet(&driver_count, &driver))
+
+        // get all GPUs for the current driver
+        std::uint32_t device_count{ 0 };
+        HWS_LEVEL_ZERO_ERROR_CHECK(zeDeviceGet(driver, &device_count, nullptr))
+        for (std::uint32_t device = 0; device < device_count; ++device) {
+            samplers_.push_back(std::make_unique<gpu_intel_hardware_sampler>(static_cast<std::size_t>(device), sampling_interval, category));
+        }
     }
 #endif
 }

From ab809d9ec5c8d3b9a0955e147a341ef7ff358bfd Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Tue, 24 Sep 2024 18:13:55 +0200
Subject: [PATCH 51/69] Add a function to return the hardware samples as YAML
 string instead of only being able to directly write it to a file.

---
 bindings/hardware_sampler.cpp                 |  1 +
 bindings/system_hardware_sampler.cpp          |  1 +
 .../hardware_sampling/hardware_sampler.hpp    |  6 ++
 .../system_hardware_sampler.hpp               |  6 ++
 src/hardware_sampling/hardware_sampler.cpp    | 72 ++++++++++---------
 .../system_hardware_sampler.cpp               |  5 ++
 6 files changed, 59 insertions(+), 32 deletions(-)

diff --git a/bindings/hardware_sampler.cpp b/bindings/hardware_sampler.cpp
index 5f86f96..e547478 100644
--- a/bindings/hardware_sampler.cpp
+++ b/bindings/hardware_sampler.cpp
@@ -62,6 +62,7 @@ void init_hardware_sampler(py::module_ &m) {
         .def("relative_time_points", [](const hws::hardware_sampler &self) { return hws::detail::durations_from_reference_time(self.sampling_time_points(), self.get_event(0).time_point); }, "get the relative durations of the respective hardware samples in seconds (as \"normal\" number)")
         .def("sampling_interval", &hws::hardware_sampler::sampling_interval, "get the sampling interval of this hardware sampler (in ms)")
         .def("dump_yaml", py::overload_cast<const std::string &>(&hws::hardware_sampler::dump_yaml, py::const_), "dump all hardware samples to the given YAML file")
+        .def("as_yaml_string", &hws::hardware_sampler::as_yaml_string, "return all hardware samples as YAML string")
         .def("__repr__", [](const hws::hardware_sampler &self) {
 #if defined(HWS_FOR_CPUS_ENABLED)
             if (dynamic_cast<const hws::cpu_hardware_sampler *>(&self)) {
diff --git a/bindings/system_hardware_sampler.cpp b/bindings/system_hardware_sampler.cpp
index f26a4b6..2e25e79 100644
--- a/bindings/system_hardware_sampler.cpp
+++ b/bindings/system_hardware_sampler.cpp
@@ -66,5 +66,6 @@ void init_system_hardware_sampler(py::module_ &m) {
             return out; }, "get the hardware samplers available for the whole system")
         .def("sampler", [](hws::system_hardware_sampler &self, const std::size_t idx) { return self.sampler(idx).get(); }, "get the i-th hardware sampler available for the whole system")
         .def("dump_yaml", py::overload_cast<const std::string &>(&hws::system_hardware_sampler::dump_yaml, py::const_), "dump all hardware samples for all hardware samplers to the given YAML file")
+        .def("as_yaml_string", &hws::system_hardware_sampler::as_yaml_string, "return all hardware samples for all hardware samplers as YAML string")
         .def("__repr__", [](const hws::system_hardware_sampler &self) { return fmt::format("<hws.SystemHardwareSampler with {} samples>", self.num_samplers()); });
 }
diff --git a/include/hardware_sampling/hardware_sampler.hpp b/include/hardware_sampling/hardware_sampler.hpp
index e534d19..ab7bd3f 100644
--- a/include/hardware_sampling/hardware_sampler.hpp
+++ b/include/hardware_sampling/hardware_sampler.hpp
@@ -160,6 +160,12 @@ class hardware_sampler {
      */
     void dump_yaml(const std::filesystem::path &filename) const;
 
+    /**
+     * @brief Return the hardware samples as YAML string.
+     * @return the YAML content as string (`[[nodiscard]]`)
+     */
+    [[nodiscard]] std::string as_yaml_string() const;
+
   protected:
     /**
      * @brief Getter the hardware samples. Called in another std::thread.
diff --git a/include/hardware_sampling/system_hardware_sampler.hpp b/include/hardware_sampling/system_hardware_sampler.hpp
index d8c56fd..6346833 100644
--- a/include/hardware_sampling/system_hardware_sampler.hpp
+++ b/include/hardware_sampling/system_hardware_sampler.hpp
@@ -175,6 +175,12 @@ class system_hardware_sampler {
      */
     void dump_yaml(const std::filesystem::path &filename) const;
 
+    /**
+     * @brief Return the hardware samples as YAML string.
+     * @return the YAML content as string (`[[nodiscard]]`)
+     */
+    [[nodiscard]] std::string as_yaml_string() const;
+
   private:
     /// The different hardware sampler for the current system.
     std::vector<std::unique_ptr<hardware_sampler>> samplers_;
diff --git a/src/hardware_sampling/hardware_sampler.cpp b/src/hardware_sampling/hardware_sampler.cpp
index db0a4f5..0ff6a76 100644
--- a/src/hardware_sampling/hardware_sampler.cpp
+++ b/src/hardware_sampling/hardware_sampler.cpp
@@ -127,48 +127,56 @@ void hardware_sampler::dump_yaml(const char *filename) const {
     std::ofstream file{ filename, std::ios_base::app };
 
     // begin a new YAML document (only with "---" multiple YAML documents in a single file are allowed)
-    file << "---\n\n";
+    file << "---\n\n" << this->as_yaml_string();
+}
 
-    // set the device identification
-    file << fmt::format("device_identification: \"{}\"\n\n", this->device_identification());
+void hardware_sampler::dump_yaml(const std::string &filename) const {
+    this->dump_yaml(filename.c_str());
+}
 
-    // output the start date time of this hardware sampling
-    file << fmt::format("start_time: \"{:%Y-%m-%d %X}\"\n\n", start_date_time_);
+void hardware_sampler::dump_yaml(const std::filesystem::path &filename) const {
+    this->dump_yaml(filename.string().c_str());
+}
 
-    // output the event information
+std::string hardware_sampler::as_yaml_string() const {
+    if (!this->has_sampling_stopped()) {
+        throw std::runtime_error{ "Can return samples as string only after the sampling has been stopped!" };
+    }
+
+    // generate the event information
     std::vector<decltype(event::time_point)> event_time_points{};
     std::vector<decltype(event::name)> event_names{};
     for (const auto &[time_point, name] : events_) {
         event_time_points.push_back(time_point);
         event_names.push_back(fmt::format("\"{}\"", name));
     }
-    file << fmt::format("events:\n"
-                        "  time_points:\n"
-                        "    unit: \"s\"\n"
-                        "    values: [{}]\n"
-                        "  names: [{}]\n\n",
-                        fmt::join(detail::durations_from_reference_time(event_time_points, this->get_event(0).time_point), ", "),
-                        fmt::join(event_names, ", "));
-
-    // output the sampling information
-    file << fmt::format("sampling_interval:\n"
-                        "  unit: \"ms\"\n"
-                        "  values: {}\n\n"
-                        "time_points:\n"
-                        "  unit: \"s\"\n"
-                        "  values: [{}]\n\n"
-                        "{}\n",
-                        this->sampling_interval().count(),
-                        fmt::join(detail::durations_from_reference_time(this->sampling_time_points(), this->get_event(0).time_point), ", "),
-                        this->generate_yaml_string());
-}
-
-void hardware_sampler::dump_yaml(const std::string &filename) const {
-    this->dump_yaml(filename.c_str());
-}
 
-void hardware_sampler::dump_yaml(const std::filesystem::path &filename) const {
-    this->dump_yaml(filename.string().c_str());
+    return fmt::format("device_identification: \"{}\"\n"
+                       "\n"
+                       "start_time: \"{:%Y-%m-%d %X}\"\n"
+                       "\n"
+                       "events:\n"
+                       "  time_points:\n"
+                       "    unit: \"s\"\n"
+                       "    values: [{}]\n"
+                       "  names: [{}]\n"
+                       "\n"
+                       "sampling_interval:\n"
+                       "  unit: \"ms\"\n"
+                       "  values: {}\n"
+                       "\n"
+                       "time_points:\n"
+                       "  unit: \"s\"\n"
+                       "  values: [{}]\n"
+                       "\n"
+                       "{}\n",
+                       this->device_identification(),
+                       start_date_time_,
+                       fmt::join(detail::durations_from_reference_time(event_time_points, this->get_event(0).time_point), ", "),
+                       fmt::join(event_names, ", "),
+                       this->sampling_interval().count(),
+                       fmt::join(detail::durations_from_reference_time(this->sampling_time_points(), this->get_event(0).time_point), ", "),
+                       this->generate_yaml_string());
 }
 
 void hardware_sampler::add_time_point(const std::chrono::steady_clock::time_point time_point) {
diff --git a/src/hardware_sampling/system_hardware_sampler.cpp b/src/hardware_sampling/system_hardware_sampler.cpp
index 4f2c15c..234fced 100644
--- a/src/hardware_sampling/system_hardware_sampler.cpp
+++ b/src/hardware_sampling/system_hardware_sampler.cpp
@@ -37,6 +37,7 @@
 #include <cstddef>    // std::size_t
 #include <cstdint>    // std::uint32_t
 #include <memory>     // std::unique_ptr, std::make_unique
+#include <numeric>    // std::accumulate
 #include <stdexcept>  // std::out_of_range
 #include <vector>     // std::vector
 
@@ -197,4 +198,8 @@ void system_hardware_sampler::dump_yaml(const std::filesystem::path &filename) c
     std::for_each(samplers_.cbegin(), samplers_.cend(), [&filename](const auto &ptr) { ptr->dump_yaml(filename); });
 }
 
+std::string system_hardware_sampler::as_yaml_string() const {
+    return std::accumulate(samplers_.cbegin(), samplers_.cend(), std::string{}, [](const std::string str, const auto &ptr) { return str + ptr->as_yaml_string(); });
+}
+
 }  // namespace hws

From 988dc77cdf56f8ec7125bc3c9d306d1cf42e4c63 Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Fri, 27 Sep 2024 10:26:48 +0200
Subject: [PATCH 52/69] Add a new function to retrieve the hardware samples
 only excluding event or time_point information.

---
 bindings/cpu_hardware_sampler.cpp                |  1 +
 bindings/gpu_amd_hardware_sampler.cpp            |  1 +
 bindings/gpu_intel_hardware_sampler.cpp          |  1 +
 bindings/gpu_nvidia_hardware_sampler.cpp         |  1 +
 bindings/hardware_sampler.cpp                    |  3 ++-
 .../hardware_sampling/cpu/hardware_sampler.hpp   | 10 +++++-----
 .../gpu_amd/hardware_sampler.hpp                 | 10 +++++-----
 .../gpu_intel/hardware_sampler.hpp               | 10 +++++-----
 .../gpu_nvidia/hardware_sampler.hpp              | 10 +++++-----
 include/hardware_sampling/hardware_sampler.hpp   | 16 +++++++---------
 .../system_hardware_sampler.hpp                  |  6 ++++++
 src/hardware_sampling/cpu/hardware_sampler.cpp   |  2 +-
 .../gpu_amd/hardware_sampler.cpp                 |  2 +-
 .../gpu_intel/hardware_sampler.cpp               |  2 +-
 .../gpu_nvidia/hardware_sampler.cpp              |  2 +-
 src/hardware_sampling/hardware_sampler.cpp       |  2 +-
 .../system_hardware_sampler.cpp                  |  4 ++++
 17 files changed, 48 insertions(+), 35 deletions(-)

diff --git a/bindings/cpu_hardware_sampler.cpp b/bindings/cpu_hardware_sampler.cpp
index 6d18fe1..9dae939 100644
--- a/bindings/cpu_hardware_sampler.cpp
+++ b/bindings/cpu_hardware_sampler.cpp
@@ -137,6 +137,7 @@ void init_cpu_hardware_sampler(py::module_ &m) {
         .def("temperature_samples", &hws::cpu_hardware_sampler::temperature_samples, "get all temperature related samples")
         .def("gfx_samples", &hws::cpu_hardware_sampler::gfx_samples, "get all gfx (iGPU) related samples")
         .def("idle_state_samples", &hws::cpu_hardware_sampler::idle_state_samples, "get all idle state related samples")
+        .def("samples_only_as_yaml_string", &hws::cpu_hardware_sampler::samples_only_as_yaml_string, "return all hardware samples as YAML string")
         .def("__repr__", [](const hws::cpu_hardware_sampler &self) {
             return fmt::format("<HardwareSampling.CpuHardwareSampler with\n{}\n>", self);
         });
diff --git a/bindings/gpu_amd_hardware_sampler.cpp b/bindings/gpu_amd_hardware_sampler.cpp
index 55fbc75..9da321c 100644
--- a/bindings/gpu_amd_hardware_sampler.cpp
+++ b/bindings/gpu_amd_hardware_sampler.cpp
@@ -132,6 +132,7 @@ void init_gpu_amd_hardware_sampler(py::module_ &m) {
         .def("power_samples", &hws::gpu_amd_hardware_sampler::power_samples, "get all power related samples")
         .def("memory_samples", &hws::gpu_amd_hardware_sampler::memory_samples, "get all memory related samples")
         .def("temperature_samples", &hws::gpu_amd_hardware_sampler::temperature_samples, "get all temperature related samples")
+        .def("samples_only_as_yaml_string", &hws::gpu_amd_hardware_sampler::samples_only_as_yaml_string, "return all hardware samples as YAML string")
         .def("__repr__", [](const hws::gpu_amd_hardware_sampler &self) {
             return fmt::format("<HardwareSampling.GpuAmdHardwareSampler with\n{}\n>", self);
         });
diff --git a/bindings/gpu_intel_hardware_sampler.cpp b/bindings/gpu_intel_hardware_sampler.cpp
index 4485701..901aeed 100644
--- a/bindings/gpu_intel_hardware_sampler.cpp
+++ b/bindings/gpu_intel_hardware_sampler.cpp
@@ -120,6 +120,7 @@ void init_gpu_intel_hardware_sampler(py::module_ &m) {
         .def("power_samples", &hws::gpu_intel_hardware_sampler::power_samples, "get all power related samples")
         .def("memory_samples", &hws::gpu_intel_hardware_sampler::memory_samples, "get all memory related samples")
         .def("temperature_samples", &hws::gpu_intel_hardware_sampler::temperature_samples, "get all temperature related samples")
+        .def("samples_only_as_yaml_string", &hws::gpu_intel_hardware_sampler::samples_only_as_yaml_string, "return all hardware samples as YAML string")
         .def("__repr__", [](const hws::gpu_intel_hardware_sampler &self) {
             return fmt::format("<HardwareSampling.GpuIntelHardwareSampler with\n{}\n>", self);
         });
diff --git a/bindings/gpu_nvidia_hardware_sampler.cpp b/bindings/gpu_nvidia_hardware_sampler.cpp
index 23a0549..b049156 100644
--- a/bindings/gpu_nvidia_hardware_sampler.cpp
+++ b/bindings/gpu_nvidia_hardware_sampler.cpp
@@ -119,6 +119,7 @@ void init_gpu_nvidia_hardware_sampler(py::module_ &m) {
         .def("power_samples", &hws::gpu_nvidia_hardware_sampler::power_samples, "get all power related samples")
         .def("memory_samples", &hws::gpu_nvidia_hardware_sampler::memory_samples, "get all memory related samples")
         .def("temperature_samples", &hws::gpu_nvidia_hardware_sampler::temperature_samples, "get all temperature related samples")
+        .def("samples_only_as_yaml_string", &hws::gpu_nvidia_hardware_sampler::samples_only_as_yaml_string, "return all hardware samples as YAML string")
         .def("__repr__", [](const hws::gpu_nvidia_hardware_sampler &self) {
             return fmt::format("<HardwareSampling.GpuNvidiaHardwareSampler with\n{}\n>", self);
         });
diff --git a/bindings/hardware_sampler.cpp b/bindings/hardware_sampler.cpp
index e547478..2c47046 100644
--- a/bindings/hardware_sampler.cpp
+++ b/bindings/hardware_sampler.cpp
@@ -62,7 +62,8 @@ void init_hardware_sampler(py::module_ &m) {
         .def("relative_time_points", [](const hws::hardware_sampler &self) { return hws::detail::durations_from_reference_time(self.sampling_time_points(), self.get_event(0).time_point); }, "get the relative durations of the respective hardware samples in seconds (as \"normal\" number)")
         .def("sampling_interval", &hws::hardware_sampler::sampling_interval, "get the sampling interval of this hardware sampler (in ms)")
         .def("dump_yaml", py::overload_cast<const std::string &>(&hws::hardware_sampler::dump_yaml, py::const_), "dump all hardware samples to the given YAML file")
-        .def("as_yaml_string", &hws::hardware_sampler::as_yaml_string, "return all hardware samples as YAML string")
+        .def("as_yaml_string", &hws::hardware_sampler::as_yaml_string, "return all hardware samples including additional information like events as YAML string")
+        .def("samples_only_as_yaml_string", &hws::hardware_sampler::samples_only_as_yaml_string, "return all hardware samples as YAML string")
         .def("__repr__", [](const hws::hardware_sampler &self) {
 #if defined(HWS_FOR_CPUS_ENABLED)
             if (dynamic_cast<const hws::cpu_hardware_sampler *>(&self)) {
diff --git a/include/hardware_sampling/cpu/hardware_sampler.hpp b/include/hardware_sampling/cpu/hardware_sampler.hpp
index 8105fd4..1de0a95 100644
--- a/include/hardware_sampling/cpu/hardware_sampler.hpp
+++ b/include/hardware_sampling/cpu/hardware_sampler.hpp
@@ -107,6 +107,11 @@ class cpu_hardware_sampler : public hardware_sampler {
      */
     [[nodiscard]] const cpu_idle_states_samples &idle_state_samples() const noexcept { return idle_state_samples_; }
 
+    /**
+     * @copydoc hws::hardware_sampler::samples_only_as_yaml_string() const
+     */
+    [[nodiscard]] std::string samples_only_as_yaml_string() const final;
+
   private:
     /**
      * @copydoc hws::hardware_sampler::sampling_loop
@@ -118,11 +123,6 @@ class cpu_hardware_sampler : public hardware_sampler {
      */
     [[nodiscard]] std::string device_identification() const final;
 
-    /**
-     * @copydoc hws::hardware_sampler::generate_yaml_string
-     */
-    [[nodiscard]] std::string generate_yaml_string() const final;
-
     /// The general CPU samples.
     cpu_general_samples general_samples_{};
     /// The clock related CPU samples.
diff --git a/include/hardware_sampling/gpu_amd/hardware_sampler.hpp b/include/hardware_sampling/gpu_amd/hardware_sampler.hpp
index a44dec7..59eb1fc 100644
--- a/include/hardware_sampling/gpu_amd/hardware_sampler.hpp
+++ b/include/hardware_sampling/gpu_amd/hardware_sampler.hpp
@@ -116,6 +116,11 @@ class gpu_amd_hardware_sampler : public hardware_sampler {
      */
     [[nodiscard]] const rocm_smi_temperature_samples &temperature_samples() const noexcept { return temperature_samples_; }
 
+    /**
+     * @copydoc hws::hardware_sampler::samples_only_as_yaml_string() const
+     */
+    [[nodiscard]] std::string samples_only_as_yaml_string() const final;
+
   private:
     /**
      * @copydoc hws::hardware_sampler::sampling_loop
@@ -127,11 +132,6 @@ class gpu_amd_hardware_sampler : public hardware_sampler {
      */
     [[nodiscard]] std::string device_identification() const final;
 
-    /**
-     * @copydoc hws::hardware_sampler::generate_yaml_string
-     */
-    [[nodiscard]] std::string generate_yaml_string() const final;
-
     /// The ID of the device to sample.
     std::uint32_t device_id_{};
 
diff --git a/include/hardware_sampling/gpu_intel/hardware_sampler.hpp b/include/hardware_sampling/gpu_intel/hardware_sampler.hpp
index bea3103..0077861 100644
--- a/include/hardware_sampling/gpu_intel/hardware_sampler.hpp
+++ b/include/hardware_sampling/gpu_intel/hardware_sampler.hpp
@@ -116,6 +116,11 @@ class gpu_intel_hardware_sampler : public hardware_sampler {
      */
     [[nodiscard]] const level_zero_temperature_samples &temperature_samples() const noexcept { return temperature_samples_; }
 
+    /**
+     * @copydoc hws::hardware_sampler::samples_only_as_yaml_string() const
+     */
+    [[nodiscard]] std::string samples_only_as_yaml_string() const final;
+
   private:
     /**
      * @copydoc hws::hardware_sampler::sampling_loop
@@ -127,11 +132,6 @@ class gpu_intel_hardware_sampler : public hardware_sampler {
      */
     std::string device_identification() const final;
 
-    /**
-     * @copydoc hws::hardware_sampler::generate_yaml_string
-     */
-    std::string generate_yaml_string() const final;
-
     /// The device handle for the device to sample.
     detail::level_zero_device_handle device_;
 
diff --git a/include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp b/include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp
index d73cd07..6716e63 100644
--- a/include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp
+++ b/include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp
@@ -117,6 +117,11 @@ class gpu_nvidia_hardware_sampler : public hardware_sampler {
      */
     [[nodiscard]] const nvml_temperature_samples &temperature_samples() const noexcept { return temperature_samples_; }
 
+    /**
+     * @copydoc hws::hardware_sampler::samples_only_as_yaml_string() const
+     */
+    [[nodiscard]] std::string samples_only_as_yaml_string() const final;
+
   private:
     /**
      * @copydoc hws::hardware_sampler::sampling_loop
@@ -128,11 +133,6 @@ class gpu_nvidia_hardware_sampler : public hardware_sampler {
      */
     [[nodiscard]] std::string device_identification() const final;
 
-    /**
-     * @copydoc hws::hardware_sampler::generate_yaml_string
-     */
-    [[nodiscard]] std::string generate_yaml_string() const final;
-
     /// The device handle for the device to sample.
     detail::nvml_device_handle device_{};
 
diff --git a/include/hardware_sampling/hardware_sampler.hpp b/include/hardware_sampling/hardware_sampler.hpp
index ab7bd3f..443f5bb 100644
--- a/include/hardware_sampling/hardware_sampler.hpp
+++ b/include/hardware_sampling/hardware_sampler.hpp
@@ -161,10 +161,16 @@ class hardware_sampler {
     void dump_yaml(const std::filesystem::path &filename) const;
 
     /**
-     * @brief Return the hardware samples as YAML string.
+     * @brief Return the hardware samples as well as events and time points as YAML string.
      * @return the YAML content as string (`[[nodiscard]]`)
      */
     [[nodiscard]] std::string as_yaml_string() const;
+    /**
+     * @brief Return only the hardware samples as YAML string.
+     * @throws std::runtime_error if sampling is still running
+     * @return the YAML content as string (`[[nodiscard]]`)
+     */
+    [[nodiscard]] virtual std::string samples_only_as_yaml_string() const = 0;
 
   protected:
     /**
@@ -178,14 +184,6 @@ class hardware_sampler {
      */
     [[nodiscard]] virtual std::string device_identification() const = 0;
 
-    /**
-     * @brief Assemble the YAML string containing all hardware samples.
-     * @param[in] start_time_point the reference time point the hardware samples occurred relative to
-     * @throws std::runtime_error if sampling is still running
-     * @return the YAML string (`[[nodiscard]]`)
-     */
-    [[nodiscard]] virtual std::string generate_yaml_string() const = 0;
-
     /**
      * @brief Add a new time point to this hardware sampler. Called during the sampling loop.
      * @param time_point the new time point to add
diff --git a/include/hardware_sampling/system_hardware_sampler.hpp b/include/hardware_sampling/system_hardware_sampler.hpp
index 6346833..96bff5c 100644
--- a/include/hardware_sampling/system_hardware_sampler.hpp
+++ b/include/hardware_sampling/system_hardware_sampler.hpp
@@ -180,6 +180,12 @@ class system_hardware_sampler {
      * @return the YAML content as string (`[[nodiscard]]`)
      */
     [[nodiscard]] std::string as_yaml_string() const;
+    /**
+     * @brief Return only the hardware samples as YAML string.
+     * @throws std::runtime_error if sampling is still running
+     * @return the YAML content as string (`[[nodiscard]]`)
+     */
+    [[nodiscard]] std::string samples_only_as_yaml_string() const;
 
   private:
     /// The different hardware sampler for the current system.
diff --git a/src/hardware_sampling/cpu/hardware_sampler.cpp b/src/hardware_sampling/cpu/hardware_sampler.cpp
index 4a51c10..7d20b49 100644
--- a/src/hardware_sampling/cpu/hardware_sampler.cpp
+++ b/src/hardware_sampling/cpu/hardware_sampler.cpp
@@ -467,7 +467,7 @@ std::string cpu_hardware_sampler::device_identification() const {
     return "cpu_device";
 }
 
-std::string cpu_hardware_sampler::generate_yaml_string() const {
+std::string cpu_hardware_sampler::samples_only_as_yaml_string() const {
     // check whether it's safe to generate the YAML entry
     if (this->is_sampling()) {
         throw std::runtime_error{ "Can't create the final YAML entry if the hardware sampler is still running!" };
diff --git a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
index 7369fa3..b205718 100644
--- a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
+++ b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
@@ -680,7 +680,7 @@ std::string gpu_amd_hardware_sampler::device_identification() const {
     return fmt::format("gpu_amd_device_{}", device_id_);
 }
 
-std::string gpu_amd_hardware_sampler::generate_yaml_string() const {
+std::string gpu_amd_hardware_sampler::samples_only_as_yaml_string() const {
     // check whether it's safe to generate the YAML entry
     if (this->is_sampling()) {
         throw std::runtime_error{ "Can't create the final YAML entry if the hardware sampler is still running!" };
diff --git a/src/hardware_sampling/gpu_intel/hardware_sampler.cpp b/src/hardware_sampling/gpu_intel/hardware_sampler.cpp
index 67abc5d..ed3aed7 100644
--- a/src/hardware_sampling/gpu_intel/hardware_sampler.cpp
+++ b/src/hardware_sampling/gpu_intel/hardware_sampler.cpp
@@ -691,7 +691,7 @@ std::string gpu_intel_hardware_sampler::device_identification() const {
     return fmt::format("gpu_intel_device_{}", prop.deviceId);
 }
 
-std::string gpu_intel_hardware_sampler::generate_yaml_string() const {
+std::string gpu_intel_hardware_sampler::samples_only_as_yaml_string() const {
     // check whether it's safe to generate the YAML entry
     if (this->is_sampling()) {
         throw std::runtime_error{ "Can't create the final YAML entry if the hardware sampler is still running!" };
diff --git a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
index 14d6a9f..2ffb6e8 100644
--- a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
+++ b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
@@ -548,7 +548,7 @@ std::string gpu_nvidia_hardware_sampler::device_identification() const {
     return fmt::format("gpu_nvidia_device_{}_{}", pcie_info.device, pcie_info.bus);
 }
 
-std::string gpu_nvidia_hardware_sampler::generate_yaml_string() const {
+std::string gpu_nvidia_hardware_sampler::samples_only_as_yaml_string() const {
     // check whether it's safe to generate the YAML entry
     if (this->is_sampling()) {
         throw std::runtime_error{ "Can't create the final YAML entry if the hardware sampler is still running!" };
diff --git a/src/hardware_sampling/hardware_sampler.cpp b/src/hardware_sampling/hardware_sampler.cpp
index 0ff6a76..d2289cc 100644
--- a/src/hardware_sampling/hardware_sampler.cpp
+++ b/src/hardware_sampling/hardware_sampler.cpp
@@ -176,7 +176,7 @@ std::string hardware_sampler::as_yaml_string() const {
                        fmt::join(event_names, ", "),
                        this->sampling_interval().count(),
                        fmt::join(detail::durations_from_reference_time(this->sampling_time_points(), this->get_event(0).time_point), ", "),
-                       this->generate_yaml_string());
+                       this->samples_only_as_yaml_string());
 }
 
 void hardware_sampler::add_time_point(const std::chrono::steady_clock::time_point time_point) {
diff --git a/src/hardware_sampling/system_hardware_sampler.cpp b/src/hardware_sampling/system_hardware_sampler.cpp
index 234fced..51a2fed 100644
--- a/src/hardware_sampling/system_hardware_sampler.cpp
+++ b/src/hardware_sampling/system_hardware_sampler.cpp
@@ -202,4 +202,8 @@ std::string system_hardware_sampler::as_yaml_string() const {
     return std::accumulate(samplers_.cbegin(), samplers_.cend(), std::string{}, [](const std::string str, const auto &ptr) { return str + ptr->as_yaml_string(); });
 }
 
+std::string system_hardware_sampler::samples_only_as_yaml_string() const {
+    return std::accumulate(samplers_.cbegin(), samplers_.cend(), std::string{}, [](const std::string str, const auto &ptr) { return str + ptr->samples_only_as_yaml_string(); });
+}
+
 }  // namespace hws

From 68a3ad1c4ece8ef97cd95ab8b5a896516c206b52 Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Fri, 27 Sep 2024 15:36:07 +0200
Subject: [PATCH 53/69] Make the device_identification function public.

---
 include/hardware_sampling/cpu/hardware_sampler.hpp   | 10 +++++-----
 .../hardware_sampling/gpu_amd/hardware_sampler.hpp   | 10 +++++-----
 .../hardware_sampling/gpu_intel/hardware_sampler.hpp | 10 +++++-----
 .../gpu_nvidia/hardware_sampler.hpp                  | 10 +++++-----
 include/hardware_sampling/hardware_sampler.hpp       | 12 ++++++------
 5 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/include/hardware_sampling/cpu/hardware_sampler.hpp b/include/hardware_sampling/cpu/hardware_sampler.hpp
index 1de0a95..bc6971f 100644
--- a/include/hardware_sampling/cpu/hardware_sampler.hpp
+++ b/include/hardware_sampling/cpu/hardware_sampler.hpp
@@ -107,6 +107,11 @@ class cpu_hardware_sampler : public hardware_sampler {
      */
     [[nodiscard]] const cpu_idle_states_samples &idle_state_samples() const noexcept { return idle_state_samples_; }
 
+    /**
+     * @copydoc hws::hardware_sampler::device_identification
+     */
+    [[nodiscard]] std::string device_identification() const final;
+
     /**
      * @copydoc hws::hardware_sampler::samples_only_as_yaml_string() const
      */
@@ -118,11 +123,6 @@ class cpu_hardware_sampler : public hardware_sampler {
      */
     void sampling_loop() final;
 
-    /**
-     * @copydoc hws::hardware_sampler::device_identification
-     */
-    [[nodiscard]] std::string device_identification() const final;
-
     /// The general CPU samples.
     cpu_general_samples general_samples_{};
     /// The clock related CPU samples.
diff --git a/include/hardware_sampling/gpu_amd/hardware_sampler.hpp b/include/hardware_sampling/gpu_amd/hardware_sampler.hpp
index 59eb1fc..308ca91 100644
--- a/include/hardware_sampling/gpu_amd/hardware_sampler.hpp
+++ b/include/hardware_sampling/gpu_amd/hardware_sampler.hpp
@@ -116,6 +116,11 @@ class gpu_amd_hardware_sampler : public hardware_sampler {
      */
     [[nodiscard]] const rocm_smi_temperature_samples &temperature_samples() const noexcept { return temperature_samples_; }
 
+    /**
+     * @copydoc hws::hardware_sampler::device_identification
+     */
+    [[nodiscard]] std::string device_identification() const final;
+
     /**
      * @copydoc hws::hardware_sampler::samples_only_as_yaml_string() const
      */
@@ -127,11 +132,6 @@ class gpu_amd_hardware_sampler : public hardware_sampler {
      */
     void sampling_loop() final;
 
-    /**
-     * @copydoc hws::hardware_sampler::device_identification
-     */
-    [[nodiscard]] std::string device_identification() const final;
-
     /// The ID of the device to sample.
     std::uint32_t device_id_{};
 
diff --git a/include/hardware_sampling/gpu_intel/hardware_sampler.hpp b/include/hardware_sampling/gpu_intel/hardware_sampler.hpp
index 0077861..6841c7b 100644
--- a/include/hardware_sampling/gpu_intel/hardware_sampler.hpp
+++ b/include/hardware_sampling/gpu_intel/hardware_sampler.hpp
@@ -116,6 +116,11 @@ class gpu_intel_hardware_sampler : public hardware_sampler {
      */
     [[nodiscard]] const level_zero_temperature_samples &temperature_samples() const noexcept { return temperature_samples_; }
 
+    /**
+     * @copydoc hws::hardware_sampler::device_identification
+     */
+    std::string device_identification() const final;
+
     /**
      * @copydoc hws::hardware_sampler::samples_only_as_yaml_string() const
      */
@@ -127,11 +132,6 @@ class gpu_intel_hardware_sampler : public hardware_sampler {
      */
     void sampling_loop() final;
 
-    /**
-     * @copydoc hws::hardware_sampler::device_identification
-     */
-    std::string device_identification() const final;
-
     /// The device handle for the device to sample.
     detail::level_zero_device_handle device_;
 
diff --git a/include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp b/include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp
index 6716e63..4180f95 100644
--- a/include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp
+++ b/include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp
@@ -117,6 +117,11 @@ class gpu_nvidia_hardware_sampler : public hardware_sampler {
      */
     [[nodiscard]] const nvml_temperature_samples &temperature_samples() const noexcept { return temperature_samples_; }
 
+    /**
+     * @copydoc hws::hardware_sampler::device_identification
+     */
+    [[nodiscard]] std::string device_identification() const final;
+
     /**
      * @copydoc hws::hardware_sampler::samples_only_as_yaml_string() const
      */
@@ -128,11 +133,6 @@ class gpu_nvidia_hardware_sampler : public hardware_sampler {
      */
     void sampling_loop() final;
 
-    /**
-     * @copydoc hws::hardware_sampler::device_identification
-     */
-    [[nodiscard]] std::string device_identification() const final;
-
     /// The device handle for the device to sample.
     detail::nvml_device_handle device_{};
 
diff --git a/include/hardware_sampling/hardware_sampler.hpp b/include/hardware_sampling/hardware_sampler.hpp
index 443f5bb..a44fdce 100644
--- a/include/hardware_sampling/hardware_sampler.hpp
+++ b/include/hardware_sampling/hardware_sampler.hpp
@@ -160,6 +160,12 @@ class hardware_sampler {
      */
     void dump_yaml(const std::filesystem::path &filename) const;
 
+    /**
+     * @brief Return the unique device identification. Can be used as unique key in the YAML string.
+     * @return the unique device identification (`[[nodiscard]]`)
+     */
+    [[nodiscard]] virtual std::string device_identification() const = 0;
+
     /**
      * @brief Return the hardware samples as well as events and time points as YAML string.
      * @return the YAML content as string (`[[nodiscard]]`)
@@ -178,12 +184,6 @@ class hardware_sampler {
      */
     virtual void sampling_loop() = 0;
 
-    /**
-     * @brief Return the unique device identification. Can be used as unique key in the YAML string.
-     * @return the unique device identification (`[[nodiscard]]`)
-     */
-    [[nodiscard]] virtual std::string device_identification() const = 0;
-
     /**
      * @brief Add a new time point to this hardware sampler. Called during the sampling loop.
      * @param time_point the new time point to add

From d5395602d9d9d6a477c00afc155a10c83d588864 Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Fri, 27 Sep 2024 16:41:40 +0200
Subject: [PATCH 54/69] Add alias targets.

---
 CMakeLists.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index f3ba9df..d28e999 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -22,6 +22,8 @@ set(HWS_SOURCES
 # create hardware sampling library
 set(HWS_LIBRARY_NAME hardware_sampling)
 add_library(${HWS_LIBRARY_NAME} SHARED ${HWS_SOURCES})
+add_library(hws ALIAS ${HWS_LIBRARY_NAME})
+add_library(hws::hws ALIAS ${HWS_LIBRARY_NAME})
 
 # set install target
 set(HWS_TARGETS_TO_INSTALL ${HWS_LIBRARY_NAME})

From a47e8feba19b6ca1f75ee28973fbc446abfcfb94 Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Mon, 30 Sep 2024 10:05:08 +0200
Subject: [PATCH 55/69] Fix turbostat logic related output bug.

---
 .../cpu/hardware_sampler.cpp                  | 311 ++++++++++++++----
 1 file changed, 241 insertions(+), 70 deletions(-)

diff --git a/src/hardware_sampling/cpu/hardware_sampler.cpp b/src/hardware_sampling/cpu/hardware_sampler.cpp
index 7d20b49..b996f96 100644
--- a/src/hardware_sampling/cpu/hardware_sampler.cpp
+++ b/src/hardware_sampling/cpu/hardware_sampler.cpp
@@ -175,109 +175,193 @@ void cpu_hardware_sampler::sampling_loop() {
         const std::vector<std::string_view> values = detail::split(data[1], '\t');
 
         for (std::size_t i = 0; i < header.size(); ++i) {
-            if (this->sample_category_enabled(sample_category::general)) {
-                if (header[i] == "Busy%") {
+            // general samples
+            if (header[i] == "Busy%") {
+                if (this->sample_category_enabled(sample_category::general)) {
                     using vector_type = decltype(general_samples_.compute_utilization_)::value_type;
                     general_samples_.compute_utilization_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-                } else if (header[i] == "IPC") {
+                }
+                continue;
+            } else if (header[i] == "IPC") {
+                if (this->sample_category_enabled(sample_category::general)) {
                     using vector_type = decltype(general_samples_.ipc_)::value_type;
                     general_samples_.ipc_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-                } else if (header[i] == "IRQ") {
+                }
+                continue;
+            } else if (header[i] == "IRQ") {
+                if (this->sample_category_enabled(sample_category::general)) {
                     using vector_type = decltype(general_samples_.irq_)::value_type;
                     general_samples_.irq_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-                } else if (header[i] == "SMI") {
+                }
+                continue;
+            } else if (header[i] == "SMI") {
+                if (this->sample_category_enabled(sample_category::general)) {
                     using vector_type = decltype(general_samples_.smi_)::value_type;
                     general_samples_.smi_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-                } else if (header[i] == "POLL") {
+                }
+            } else if (header[i] == "POLL") {
+                if (this->sample_category_enabled(sample_category::general)) {
                     using vector_type = decltype(general_samples_.poll_)::value_type;
                     general_samples_.poll_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-                } else if (header[i] == "POLL%") {
+                }
+                continue;
+            } else if (header[i] == "POLL%") {
+                if (this->sample_category_enabled(sample_category::general)) {
                     using vector_type = decltype(general_samples_.poll_percent_)::value_type;
                     general_samples_.poll_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
                 }
+                continue;
             }
-            if (this->sample_category_enabled(sample_category::clock)) {
-                if (header[i] == "Avg_MHz") {
+
+            // clock related samples
+            if (header[i] == "Avg_MHz") {
+                if (this->sample_category_enabled(sample_category::clock)) {
                     using vector_type = decltype(clock_samples_.clock_frequency_)::value_type;
                     clock_samples_.clock_frequency_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-                } else if (header[i] == "Bzy_MHz") {
+                }
+                continue;
+            } else if (header[i] == "Bzy_MHz") {
+                if (this->sample_category_enabled(sample_category::clock)) {
                     using vector_type = decltype(clock_samples_.average_non_idle_clock_frequency_)::value_type;
                     clock_samples_.average_non_idle_clock_frequency_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-                } else if (header[i] == "TSC_MHz") {
+                }
+                continue;
+            } else if (header[i] == "TSC_MHz") {
+                if (this->sample_category_enabled(sample_category::clock)) {
                     using vector_type = decltype(clock_samples_.time_stamp_counter_)::value_type;
                     clock_samples_.time_stamp_counter_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
                 }
+                continue;
             }
-            if (this->sample_category_enabled(sample_category::power)) {
-                if (header[i] == "PkgWatt") {
+
+            // power related samples
+            if (header[i] == "PkgWatt") {
+                if (this->sample_category_enabled(sample_category::power)) {
                     using vector_type = decltype(power_samples_.power_usage_)::value_type;
                     power_samples_.power_usage_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
                     power_samples_.power_measurement_type_ = "current/instant";
                     power_samples_.power_total_energy_consumption_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ 0 };
-                } else if (header[i] == "CorWatt") {
+                }
+                continue;
+            } else if (header[i] == "CorWatt") {
+                if (this->sample_category_enabled(sample_category::power)) {
                     using vector_type = decltype(power_samples_.core_watt_)::value_type;
                     power_samples_.core_watt_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-                } else if (header[i] == "RAMWatt") {
+                }
+                continue;
+            } else if (header[i] == "RAMWatt") {
+                if (this->sample_category_enabled(sample_category::power)) {
                     using vector_type = decltype(power_samples_.ram_watt_)::value_type;
                     power_samples_.ram_watt_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-                } else if (header[i] == "PKG_%") {
+                }
+                continue;
+            } else if (header[i] == "PKG_%") {
+                if (this->sample_category_enabled(sample_category::power)) {
                     using vector_type = decltype(power_samples_.package_rapl_throttle_percent_)::value_type;
                     power_samples_.package_rapl_throttle_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-                } else if (header[i] == "RAM_%") {
+                }
+                continue;
+            } else if (header[i] == "RAM_%") {
+                if (this->sample_category_enabled(sample_category::power)) {
                     using vector_type = decltype(power_samples_.dram_rapl_throttle_percent_)::value_type;
                     power_samples_.dram_rapl_throttle_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
                 }
+                continue;
             }
-            if (this->sample_category_enabled(sample_category::temperature)) {
-                if (header[i] == "CoreTmp") {
+
+            // temperature related samples
+            if (header[i] == "CoreTmp") {
+                if (this->sample_category_enabled(sample_category::temperature)) {
                     using vector_type = decltype(temperature_samples_.core_temperature_)::value_type;
                     temperature_samples_.core_temperature_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-                } else if (header[i] == "CoreThr") {
+                }
+                continue;
+            } else if (header[i] == "CoreThr") {
+                if (this->sample_category_enabled(sample_category::temperature)) {
                     using vector_type = decltype(temperature_samples_.core_throttle_percent_)::value_type;
                     temperature_samples_.core_throttle_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-                } else if (header[i] == "PkgTmp") {
+                }
+                continue;
+            } else if (header[i] == "PkgTmp") {
+                if (this->sample_category_enabled(sample_category::temperature)) {
                     using vector_type = decltype(temperature_samples_.temperature_)::value_type;
                     temperature_samples_.temperature_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
                 }
+                continue;
             }
-            if (this->sample_category_enabled(sample_category::gfx)) {
-                if (header[i] == "GFX%rc6") {
+
+            // gfx (iGPU) related samples
+            if (header[i] == "GFX%rc6") {
+                if (this->sample_category_enabled(sample_category::gfx)) {
                     using vector_type = decltype(gfx_samples_.gfx_render_state_percent_)::value_type;
                     gfx_samples_.gfx_render_state_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-                } else if (header[i] == "GFXMHz") {
+                }
+                continue;
+            } else if (header[i] == "GFXMHz") {
+                if (this->sample_category_enabled(sample_category::gfx)) {
                     using vector_type = decltype(gfx_samples_.gfx_frequency_)::value_type;
                     gfx_samples_.gfx_frequency_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-                } else if (header[i] == "GFXAMHz") {
+                }
+                continue;
+            } else if (header[i] == "GFXAMHz") {
+                if (this->sample_category_enabled(sample_category::gfx)) {
                     using vector_type = decltype(gfx_samples_.average_gfx_frequency_)::value_type;
                     gfx_samples_.average_gfx_frequency_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-                } else if (header[i] == "GFX%C0") {
+                }
+                continue;
+            } else if (header[i] == "GFX%C0") {
+                if (this->sample_category_enabled(sample_category::gfx)) {
                     using vector_type = decltype(gfx_samples_.gfx_state_c0_percent_)::value_type;
                     gfx_samples_.gfx_state_c0_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-                } else if (header[i] == "CPUGFX%") {
+                }
+                continue;
+            } else if (header[i] == "CPUGFX%") {
+                if (this->sample_category_enabled(sample_category::gfx)) {
                     using vector_type = decltype(gfx_samples_.cpu_works_for_gpu_percent_)::value_type;
                     gfx_samples_.cpu_works_for_gpu_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-                } else if (header[i] == "GFXWatt") {
+                }
+                continue;
+            } else if (header[i] == "GFXWatt") {
+                if (this->sample_category_enabled(sample_category::gfx)) {
                     using vector_type = decltype(gfx_samples_.gfx_watt_)::value_type;
                     gfx_samples_.gfx_watt_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
                 }
+                continue;
             }
-            if (this->sample_category_enabled(sample_category::idle_state)) {
-                if (header[i] == "Totl%C0") {
+
+            // idle state related samples
+            if (header[i] == "Totl%C0") {
+                if (this->sample_category_enabled(sample_category::idle_state)) {
                     using vector_type = decltype(idle_state_samples_.all_cpus_state_c0_percent_)::value_type;
                     idle_state_samples_.all_cpus_state_c0_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-                } else if (header[i] == "Any%C0") {
+                }
+                continue;
+            } else if (header[i] == "Any%C0") {
+                if (this->sample_category_enabled(sample_category::idle_state)) {
                     using vector_type = decltype(idle_state_samples_.any_cpu_state_c0_percent_)::value_type;
                     idle_state_samples_.any_cpu_state_c0_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-                } else if (header[i] == "CPU%LPI") {
+                }
+                continue;
+            } else if (header[i] == "CPU%LPI") {
+                if (this->sample_category_enabled(sample_category::idle_state)) {
                     using vector_type = decltype(idle_state_samples_.low_power_idle_state_percent_)::value_type;
                     idle_state_samples_.low_power_idle_state_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-                } else if (header[i] == "SYS%LPI") {
+                }
+                continue;
+            } else if (header[i] == "SYS%LPI") {
+                if (this->sample_category_enabled(sample_category::idle_state)) {
                     using vector_type = decltype(idle_state_samples_.system_low_power_idle_state_percent_)::value_type;
                     idle_state_samples_.system_low_power_idle_state_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-                } else if (header[i] == "Pkg%LPI") {
+                }
+                continue;
+            } else if (header[i] == "Pkg%LPI") {
+                if (this->sample_category_enabled(sample_category::idle_state)) {
                     using vector_type = decltype(idle_state_samples_.package_low_power_idle_state_percent_)::value_type;
                     idle_state_samples_.package_low_power_idle_state_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-                } else {
+                }
+                continue;
+            } else {
+                if (this->sample_category_enabled(sample_category::idle_state)) {
                     // test against regex
                     const std::string header_str{ header[i] };
                     const std::regex reg{ std::string{ "CPU%[0-9a-zA-Z]+|Pkg%[0-9a-zA-Z]+|Pk%[0-9a-zA-Z]+|C[0-9a-zA-Z]+%|C[0-9a-zA-Z]+" }, std::regex::extended };
@@ -291,6 +375,7 @@ void cpu_hardware_sampler::sampling_loop() {
                         idle_state_samples_.idle_states_.value()[header_str] = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
                     }
                 }
+                continue;
             }
         }
     }
@@ -339,41 +424,69 @@ void cpu_hardware_sampler::sampling_loop() {
 
                 // add values to the respective sample entries
                 for (std::size_t i = 0; i < header.size(); ++i) {
-                    if (this->sample_category_enabled(sample_category::general)) {
-                        if (header[i] == "Busy%") {
+                    // general samples
+                    if (header[i] == "Busy%") {
+                        if (this->sample_category_enabled(sample_category::general)) {
                             using vector_type = decltype(general_samples_.compute_utilization_)::value_type;
                             general_samples_.compute_utilization_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                        } else if (header[i] == "IPC") {
+                        }
+                        continue;
+                    } else if (header[i] == "IPC") {
+                        if (this->sample_category_enabled(sample_category::general)) {
                             using vector_type = decltype(general_samples_.ipc_)::value_type;
                             general_samples_.ipc_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                        } else if (header[i] == "IRQ") {
+                        }
+                        continue;
+                    } else if (header[i] == "IRQ") {
+                        if (this->sample_category_enabled(sample_category::general)) {
                             using vector_type = decltype(general_samples_.irq_)::value_type;
                             general_samples_.irq_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                        } else if (header[i] == "SMI") {
+                        }
+                        continue;
+                    } else if (header[i] == "SMI") {
+                        if (this->sample_category_enabled(sample_category::general)) {
                             using vector_type = decltype(general_samples_.smi_)::value_type;
                             general_samples_.smi_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                        } else if (header[i] == "POLL") {
+                        }
+                        continue;
+                    } else if (header[i] == "POLL") {
+                        if (this->sample_category_enabled(sample_category::general)) {
                             using vector_type = decltype(general_samples_.poll_)::value_type;
                             general_samples_.poll_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                        } else if (header[i] == "POLL%") {
+                        }
+                        continue;
+                    } else if (header[i] == "POLL%") {
+                        if (this->sample_category_enabled(sample_category::general)) {
                             using vector_type = decltype(general_samples_.poll_percent_)::value_type;
                             general_samples_.poll_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
                         }
+                        continue;
                     }
-                    if (this->sample_category_enabled(sample_category::clock)) {
-                        if (header[i] == "Avg_MHz") {
+
+                    // clock related samples
+                    if (header[i] == "Avg_MHz") {
+                        if (this->sample_category_enabled(sample_category::clock)) {
                             using vector_type = decltype(clock_samples_.clock_frequency_)::value_type;
                             clock_samples_.clock_frequency_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                        } else if (header[i] == "Bzy_MHz") {
+                        }
+                        continue;
+                    } else if (header[i] == "Bzy_MHz") {
+                        if (this->sample_category_enabled(sample_category::clock)) {
                             using vector_type = decltype(clock_samples_.average_non_idle_clock_frequency_)::value_type;
                             clock_samples_.average_non_idle_clock_frequency_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                        } else if (header[i] == "TSC_MHz") {
+                        }
+                        continue;
+                    } else if (header[i] == "TSC_MHz") {
+                        if (this->sample_category_enabled(sample_category::clock)) {
                             using vector_type = decltype(clock_samples_.time_stamp_counter_)::value_type;
                             clock_samples_.time_stamp_counter_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
                         }
+                        continue;
                     }
-                    if (this->sample_category_enabled(sample_category::power)) {
-                        if (header[i] == "PkgWatt") {
+
+                    // power related samples
+                    if (header[i] == "PkgWatt") {
+                        if (this->sample_category_enabled(sample_category::power)) {
                             using vector_type = decltype(power_samples_.power_usage_)::value_type;
                             power_samples_.power_usage_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
                             // calculate total energy consumption
@@ -382,76 +495,134 @@ void cpu_hardware_sampler::sampling_loop() {
                             const value_type time_difference = std::chrono::duration<value_type>(this->sampling_time_points()[num_time_points - 1] - this->sampling_time_points()[num_time_points - 2]).count();
                             const auto current = power_samples_.power_usage_->back() * time_difference;
                             power_samples_.power_total_energy_consumption_->push_back(power_samples_.power_total_energy_consumption_->back() + current);
-                        } else if (header[i] == "CorWatt") {
+                        }
+                        continue;
+                    } else if (header[i] == "CorWatt") {
+                        if (this->sample_category_enabled(sample_category::power)) {
                             using vector_type = decltype(power_samples_.core_watt_)::value_type;
                             power_samples_.core_watt_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                        } else if (header[i] == "RAMWatt") {
+                        }
+                        continue;
+                    } else if (header[i] == "RAMWatt") {
+                        if (this->sample_category_enabled(sample_category::power)) {
                             using vector_type = decltype(power_samples_.ram_watt_)::value_type;
                             power_samples_.ram_watt_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                        } else if (header[i] == "PKG_%") {
+                        }
+                        continue;
+                    } else if (header[i] == "PKG_%") {
+                        if (this->sample_category_enabled(sample_category::power)) {
                             using vector_type = decltype(power_samples_.package_rapl_throttle_percent_)::value_type;
                             power_samples_.package_rapl_throttle_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                        } else if (header[i] == "RAM_%") {
+                        }
+                        continue;
+                    } else if (header[i] == "RAM_%") {
+                        if (this->sample_category_enabled(sample_category::power)) {
                             using vector_type = decltype(power_samples_.dram_rapl_throttle_percent_)::value_type;
                             power_samples_.dram_rapl_throttle_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
                         }
+                        continue;
                     }
-                    if (this->sample_category_enabled(sample_category::temperature)) {
-                        if (header[i] == "CoreTmp") {
+
+                    // temperature related samples
+                    if (header[i] == "CoreTmp") {
+                        if (this->sample_category_enabled(sample_category::temperature)) {
                             using vector_type = decltype(temperature_samples_.core_temperature_)::value_type;
                             temperature_samples_.core_temperature_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                        } else if (header[i] == "CoreThr") {
+                        }
+                        continue;
+                    } else if (header[i] == "CoreThr") {
+                        if (this->sample_category_enabled(sample_category::temperature)) {
                             using vector_type = decltype(temperature_samples_.core_throttle_percent_)::value_type;
                             temperature_samples_.core_throttle_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                        } else if (header[i] == "PkgTmp") {
+                        }
+                        continue;
+                    } else if (header[i] == "PkgTmp") {
+                        if (this->sample_category_enabled(sample_category::temperature)) {
                             using vector_type = decltype(temperature_samples_.temperature_)::value_type;
                             temperature_samples_.temperature_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
                         }
+                        continue;
                     }
-                    if (this->sample_category_enabled(sample_category::gfx)) {
-                        if (header[i] == "GFX%rc6") {
+
+                    // gfx (iGPU) related samples
+                    if (header[i] == "GFX%rc6") {
+                        if (this->sample_category_enabled(sample_category::gfx)) {
                             using vector_type = decltype(gfx_samples_.gfx_render_state_percent_)::value_type;
                             gfx_samples_.gfx_render_state_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                        } else if (header[i] == "GFXMHz") {
+                        }
+                        continue;
+                    } else if (header[i] == "GFXMHz") {
+                        if (this->sample_category_enabled(sample_category::gfx)) {
                             using vector_type = decltype(gfx_samples_.gfx_frequency_)::value_type;
                             gfx_samples_.gfx_frequency_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                        } else if (header[i] == "GFXAMHz") {
+                        }
+                        continue;
+                    } else if (header[i] == "GFXAMHz") {
+                        if (this->sample_category_enabled(sample_category::gfx)) {
                             using vector_type = decltype(gfx_samples_.average_gfx_frequency_)::value_type;
                             gfx_samples_.average_gfx_frequency_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                        } else if (header[i] == "GFX%C0") {
+                        }
+                        continue;
+                    } else if (header[i] == "GFX%C0") {
+                        if (this->sample_category_enabled(sample_category::gfx)) {
                             using vector_type = decltype(gfx_samples_.gfx_state_c0_percent_)::value_type;
                             gfx_samples_.gfx_state_c0_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                        } else if (header[i] == "CPUGFX%") {
+                        }
+                        continue;
+                    } else if (header[i] == "CPUGFX%") {
+                        if (this->sample_category_enabled(sample_category::gfx)) {
                             using vector_type = decltype(gfx_samples_.cpu_works_for_gpu_percent_)::value_type;
                             gfx_samples_.cpu_works_for_gpu_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                        } else if (header[i] == "GFXWatt") {
+                        }
+                        continue;
+                    } else if (header[i] == "GFXWatt") {
+                        if (this->sample_category_enabled(sample_category::gfx)) {
                             using vector_type = decltype(gfx_samples_.gfx_watt_)::value_type;
                             gfx_samples_.gfx_watt_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
                         }
+                        continue;
                     }
-                    if (this->sample_category_enabled(sample_category::idle_state)) {
-                        if (header[i] == "Totl%C0") {
+
+                    // idle state related samples
+                    if (header[i] == "Totl%C0") {
+                        if (this->sample_category_enabled(sample_category::idle_state)) {
                             using vector_type = decltype(idle_state_samples_.all_cpus_state_c0_percent_)::value_type;
                             idle_state_samples_.all_cpus_state_c0_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                        } else if (header[i] == "Any%C0") {
+                        }
+                        continue;
+                    } else if (header[i] == "Any%C0") {
+                        if (this->sample_category_enabled(sample_category::idle_state)) {
                             using vector_type = decltype(idle_state_samples_.any_cpu_state_c0_percent_)::value_type;
                             idle_state_samples_.any_cpu_state_c0_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                        } else if (header[i] == "CPU%LPI") {
+                        }
+                        continue;
+                    } else if (header[i] == "CPU%LPI") {
+                        if (this->sample_category_enabled(sample_category::idle_state)) {
                             using vector_type = decltype(idle_state_samples_.low_power_idle_state_percent_)::value_type;
                             idle_state_samples_.low_power_idle_state_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                        } else if (header[i] == "SYS%LPI") {
+                        }
+                        continue;
+                    } else if (header[i] == "SYS%LPI") {
+                        if (this->sample_category_enabled(sample_category::idle_state)) {
                             using vector_type = decltype(idle_state_samples_.system_low_power_idle_state_percent_)::value_type;
                             idle_state_samples_.system_low_power_idle_state_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                        } else if (header[i] == "Pkg%LPI") {
+                        }
+                        continue;
+                    } else if (header[i] == "Pkg%LPI") {
+                        if (this->sample_category_enabled(sample_category::idle_state)) {
                             using vector_type = decltype(idle_state_samples_.package_low_power_idle_state_percent_)::value_type;
                             idle_state_samples_.package_low_power_idle_state_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                        } else {
+                        }
+                        continue;
+                    } else {
+                        if (this->sample_category_enabled(sample_category::idle_state)) {
                             const std::string header_str{ header[i] };
                             if (idle_state_samples_.idle_states_.value().count(header_str) > decltype(idle_state_samples_)::map_type::size_type{ 0 }) {
                                 using vector_type = cpu_idle_states_samples::map_type::mapped_type;
                                 idle_state_samples_.idle_states_.value()[header_str].push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
                             }
                         }
+                        continue;
                     }
                 }
             }

From f5747aec6eb20200e1d2f0cf19961fa225fed7b4 Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Mon, 30 Sep 2024 10:25:17 +0200
Subject: [PATCH 56/69] Add version information using CMake configuration.

---
 .gitignore                                 |  5 ++-
 CMakeLists.txt                             | 14 +++++-
 bindings/CMakeLists.txt                    |  1 +
 bindings/main.cpp                          |  3 ++
 bindings/version.cpp                       | 26 +++++++++++
 include/hardware_sampling/core.hpp         |  1 +
 include/hardware_sampling/version.hpp.in   | 51 ++++++++++++++++++++++
 src/hardware_sampling/hardware_sampler.cpp |  7 ++-
 8 files changed, 104 insertions(+), 4 deletions(-)
 create mode 100644 bindings/version.cpp
 create mode 100644 include/hardware_sampling/version.hpp.in

diff --git a/.gitignore b/.gitignore
index 1d90c9b..4ed094d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -54,4 +54,7 @@ CTestTestfile.cmake
 # IDEs ================================
 .idea/
 .vscode/
-.vs/
\ No newline at end of file
+.vs/
+
+# auto-generated version header
+include/hardware_sampling/version.hpp
\ No newline at end of file
diff --git a/CMakeLists.txt b/CMakeLists.txt
index d28e999..eea2868 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -6,10 +6,10 @@
 
 cmake_minimum_required(VERSION 3.22)
 
-project("HWS - Hardware Sampling for GPUs and CPUs"
+project("hws - Hardware Sampling for GPUs and CPUs"
         VERSION 1.0.0
         LANGUAGES CXX
-        DESCRIPTION "Hardware sampling (e.g., clock frequencies, memory consumption, temperatures, or energy draw) for CPUs, and GPUS.")
+        DESCRIPTION "Hardware sampling (e.g., clock frequencies, memory consumption, temperatures, or energy draw) for CPUs and GPUS.")
 
 # explicitly set library source files
 set(HWS_SOURCES
@@ -90,6 +90,16 @@ else ()
 endif ()
 target_link_libraries(${HWS_LIBRARY_NAME} PUBLIC fmt::fmt)
 
+########################################################################################################################
+##                                              configure version header                                              ##
+########################################################################################################################
+message(STATUS "Configuring version information.")
+configure_file(
+        ${CMAKE_CURRENT_SOURCE_DIR}/include/hardware_sampling/version.hpp.in
+        ${CMAKE_CURRENT_SOURCE_DIR}/include/hardware_sampling/version.hpp
+        @ONLY
+)
+
 ####################################################################################################################
 ##                                                CPU measurements                                                ##
 ####################################################################################################################
diff --git a/bindings/CMakeLists.txt b/bindings/CMakeLists.txt
index ffa7984..f2ef8d8 100644
--- a/bindings/CMakeLists.txt
+++ b/bindings/CMakeLists.txt
@@ -36,6 +36,7 @@ set(HWS_PYTHON_BINDINGS_SOURCES
         ${CMAKE_CURRENT_SOURCE_DIR}/hardware_sampler.cpp
         ${CMAKE_CURRENT_SOURCE_DIR}/sample_category.cpp
         ${CMAKE_CURRENT_SOURCE_DIR}/system_hardware_sampler.cpp
+        ${CMAKE_CURRENT_SOURCE_DIR}/version.cpp
         ${CMAKE_CURRENT_SOURCE_DIR}/main.cpp
 )
 
diff --git a/bindings/main.cpp b/bindings/main.cpp
index 39eb521..f3dca3f 100644
--- a/bindings/main.cpp
+++ b/bindings/main.cpp
@@ -24,6 +24,7 @@ void init_cpu_hardware_sampler(py::module_ &);
 void init_gpu_nvidia_hardware_sampler(py::module_ &);
 void init_gpu_amd_hardware_sampler(py::module_ &);
 void init_gpu_intel_hardware_sampler(py::module_ &);
+void init_version(py::module_ &);
 
 PYBIND11_MODULE(HardwareSampling, m) {
     m.doc() = "Hardware Sampling for CPUs and GPUs";
@@ -57,4 +58,6 @@ PYBIND11_MODULE(HardwareSampling, m) {
     init_gpu_intel_hardware_sampler(m);
 #endif
     m.def("has_gpu_intel_hardware_sampler", []() { return HWS_IS_DEFINED(HWS_FOR_INTEL_GPUS_ENABLED); });
+
+    init_version(m);
 }
diff --git a/bindings/version.cpp b/bindings/version.cpp
new file mode 100644
index 0000000..df892ca
--- /dev/null
+++ b/bindings/version.cpp
@@ -0,0 +1,26 @@
+/**
+* @author Marcel Breyer
+* @copyright 2024-today All Rights Reserved
+* @license This file is released under the MIT license.
+*          See the LICENSE.md file in the project root for full license information.
+*/
+
+#include "hardware_sampling/version.hpp"  // hws::version
+
+#include "pybind11/pybind11.h"   // py::module_
+
+namespace py = pybind11;
+
+// dummy class
+class version { };
+
+void init_version(py::module_ &m) {
+    // bind global version information
+    // complexity necessary to enforce read-only
+    py::class_<version>(m, "version")
+        .def_property_readonly_static("name", [](const py::object & /* self */) { return hws::version::name; }, "the name of the hws library")
+        .def_property_readonly_static("version", [](const py::object & /* self */) { return hws::version::version; }, "the used version of the hws library")
+        .def_property_readonly_static("major", [](const py::object & /* self */) { return hws::version::major; }, "the used major version of the hws library")
+        .def_property_readonly_static("minor", [](const py::object & /* self */) { return hws::version::minor; }, "the used minor version of the hws library")
+        .def_property_readonly_static("patch", [](const py::object & /* self */) { return hws::version::patch; }, "the used patch version of the hws library");
+}
diff --git a/include/hardware_sampling/core.hpp b/include/hardware_sampling/core.hpp
index 7b259f4..44d8922 100644
--- a/include/hardware_sampling/core.hpp
+++ b/include/hardware_sampling/core.hpp
@@ -16,6 +16,7 @@
 #include "hardware_sampling/hardware_sampler.hpp"
 #include "hardware_sampling/sample_category.hpp"
 #include "hardware_sampling/system_hardware_sampler.hpp"
+#include "hardware_sampling/version.hpp"
 
 #if defined(HWS_FOR_CPUS_ENABLED)
     #include "hardware_sampling/cpu/cpu_samples.hpp"
diff --git a/include/hardware_sampling/version.hpp.in b/include/hardware_sampling/version.hpp.in
new file mode 100644
index 0000000..88d0c1e
--- /dev/null
+++ b/include/hardware_sampling/version.hpp.in
@@ -0,0 +1,51 @@
+/**
+ * @file
+ * @author Marcel Breyer
+ * @copyright 2024-today All Rights Reserved
+ * @license This file is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Version information for the hardware sampling.
+ */
+
+#ifndef HARDWARE_SAMPLING_VERSION_HPP_
+#define HARDWARE_SAMPLING_VERSION_HPP_
+#pragma once
+
+#include <string_view>  // std::string_view
+
+namespace hws::version {
+
+/**
+ * @brief The name of the library.
+ * @details The value gets automatically set during the [`CMake`](https://cmake.org/) configuration step.
+ */
+constexpr std::string_view name = "@PROJECT_NAME@";
+
+/**
+ * @brief The current version of the library in the form: "major.minor.patch".
+ * @details The value gets automatically set during the [`CMake`](https://cmake.org/) configuration step.
+ */
+constexpr std::string_view version = "@PROJECT_VERSION@";
+
+/**
+ * @brief The current major version of the library.
+ * @details The value gets automatically set during the [`CMake`](https://cmake.org/) configuration step.
+ */
+constexpr int major = @PROJECT_VERSION_MAJOR@;
+
+/**
+ * @brief The current minor version of the library.
+ * @details The value gets automatically set during the [`CMake`](https://cmake.org/) configuration step.
+ */
+constexpr int minor = @PROJECT_VERSION_MINOR@;
+
+/**
+ * @brief The current patch version of the library.
+ * @details The value gets automatically set during the [`CMake`](https://cmake.org/) configuration step.
+ */
+constexpr int patch = @PROJECT_VERSION_PATCH@;
+
+}  // namespace hws::version
+
+#endif  // HARDWARE_SAMPLING_VERSION_HPP_
diff --git a/src/hardware_sampling/hardware_sampler.cpp b/src/hardware_sampling/hardware_sampler.cpp
index d2289cc..e8813d9 100644
--- a/src/hardware_sampling/hardware_sampler.cpp
+++ b/src/hardware_sampling/hardware_sampler.cpp
@@ -9,6 +9,7 @@
 
 #include "hardware_sampling/event.hpp"    // hws::event
 #include "hardware_sampling/utility.hpp"  // hws::detail::durations_from_reference_time
+#include "hardware_sampling/version.hpp"  // hws::version::version
 
 #include "fmt/chrono.h"  // direct formatting of std::chrono types
 #include "fmt/format.h"  // fmt::format
@@ -127,7 +128,8 @@ void hardware_sampler::dump_yaml(const char *filename) const {
     std::ofstream file{ filename, std::ios_base::app };
 
     // begin a new YAML document (only with "---" multiple YAML documents in a single file are allowed)
-    file << "---\n\n" << this->as_yaml_string();
+    file << "---\n\n"
+         << this->as_yaml_string();
 }
 
 void hardware_sampler::dump_yaml(const std::string &filename) const {
@@ -152,6 +154,8 @@ std::string hardware_sampler::as_yaml_string() const {
     }
 
     return fmt::format("device_identification: \"{}\"\n"
+                       "\n"
+                       "version: \"{}\"\n"
                        "\n"
                        "start_time: \"{:%Y-%m-%d %X}\"\n"
                        "\n"
@@ -171,6 +175,7 @@ std::string hardware_sampler::as_yaml_string() const {
                        "\n"
                        "{}\n",
                        this->device_identification(),
+                       version::version,
                        start_date_time_,
                        fmt::join(detail::durations_from_reference_time(event_time_points, this->get_event(0).time_point), ", "),
                        fmt::join(event_names, ", "),

From dc71dce017d1b19e4ef103bbabbdc29412f38d11 Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Mon, 30 Sep 2024 10:47:03 +0200
Subject: [PATCH 57/69] Add check that the sampling interval must not be zero.

---
 include/hardware_sampling/hardware_sampler.hpp | 1 +
 src/hardware_sampling/hardware_sampler.cpp     | 6 +++++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/include/hardware_sampling/hardware_sampler.hpp b/include/hardware_sampling/hardware_sampler.hpp
index a44fdce..8824ac3 100644
--- a/include/hardware_sampling/hardware_sampler.hpp
+++ b/include/hardware_sampling/hardware_sampler.hpp
@@ -34,6 +34,7 @@ class hardware_sampler {
      * @brief Construct a new hardware sampler with the provided @p sampling_interval.
      * @param[in] sampling_interval the used sampling interval
      * @param[in] category the sample categories that are enabled for hardware sampling
+     * @throws std::invalid_argument if the @p sampling_interval is zero
      */
     hardware_sampler(std::chrono::milliseconds sampling_interval, sample_category category);
 
diff --git a/src/hardware_sampling/hardware_sampler.cpp b/src/hardware_sampling/hardware_sampler.cpp
index e8813d9..d5ec9fc 100644
--- a/src/hardware_sampling/hardware_sampler.cpp
+++ b/src/hardware_sampling/hardware_sampler.cpp
@@ -28,7 +28,11 @@ namespace hws {
 
 hardware_sampler::hardware_sampler(const std::chrono::milliseconds sampling_interval, const sample_category category) :
     sampling_interval_{ sampling_interval },
-    sample_category_{ category } { }
+    sample_category_{ category } {
+    if (sampling_interval == std::chrono::milliseconds{ 0 }) {
+        throw std::invalid_argument{ "The sampling interval must be larger than 0ms!" };
+    }
+}
 
 hardware_sampler::~hardware_sampler() = default;
 

From ed858306a5c0cb4f16e681586fd5e4d1a7416b3d Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Mon, 30 Sep 2024 10:48:20 +0200
Subject: [PATCH 58/69] Update code examples.

---
 examples/cpp/main.cpp   | 2 +-
 examples/python/main.py | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/examples/cpp/main.cpp b/examples/cpp/main.cpp
index 166a967..551c721 100644
--- a/examples/cpp/main.cpp
+++ b/examples/cpp/main.cpp
@@ -12,7 +12,7 @@
 #include <vector>   // std::vector
 
 int main() {
-    hws::cpu_hardware_sampler sampler{};
+    hws::system_hardware_sampler sampler{};
     // could also be, e.g.,
     // hws::gpu_nvidia_hardware_sampler sampler{};
     sampler.start_sampling();
diff --git a/examples/python/main.py b/examples/python/main.py
index 7f384ca..da0809f 100644
--- a/examples/python/main.py
+++ b/examples/python/main.py
@@ -8,12 +8,12 @@
 #          See the LICENSE.md file in the project root for full license information.                                   #
 ########################################################################################################################
 
-import HardwareSampling
+import HardwareSampling as hws
 import numpy as np
 
-sampler = HardwareSampling.CpuHardwareSampler()
+sampler = hws.SystemHardwareSampler()
 # could also be, e.g.,
-# sampler = HardwareSampling.GpuNvidiaHardwareSampler()
+# sampler = hws.GpuNvidiaHardwareSampler()
 sampler.start()
 
 sampler.add_event("init")

From 257ca3d9cc53625437294ec5a56284cb4acba547 Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Mon, 30 Sep 2024 10:52:15 +0200
Subject: [PATCH 59/69] Fix usage of wrong C++ standard in documentation
 string.

---
 CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index eea2868..a96756b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -28,7 +28,7 @@ add_library(hws::hws ALIAS ${HWS_LIBRARY_NAME})
 # set install target
 set(HWS_TARGETS_TO_INSTALL ${HWS_LIBRARY_NAME})
 
-# use C++20
+# use C++17
 target_compile_features(${HWS_LIBRARY_NAME} PUBLIC cxx_std_17)
 
 # add target include directory

From 63bb80f3e7e2bb21f7e24b2c07b56005c763714c Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Mon, 30 Sep 2024 11:57:36 +0200
Subject: [PATCH 60/69] Add the possibility to generate a Doxygen
 documentation.

---
 .github/workflows/documentation.yml           | 43 ++++++++++++++
 .gitignore                                    |  1 +
 CMakeLists.txt                                | 10 +++-
 docs/CMakeLists.txt                           | 56 +++++++++++++++++++
 include/hardware_sampling/cpu/cpu_samples.hpp |  4 ++
 .../cpu/hardware_sampler.hpp                  |  4 ++
 include/hardware_sampling/event.hpp           |  4 ++
 .../gpu_amd/hardware_sampler.hpp              |  4 ++
 .../gpu_amd/rocm_smi_samples.hpp              |  4 ++
 include/hardware_sampling/gpu_amd/utility.hpp |  7 ++-
 .../gpu_intel/hardware_sampler.hpp            |  4 ++
 .../gpu_intel/level_zero_samples.hpp          |  4 ++
 .../gpu_nvidia/hardware_sampler.hpp           |  4 ++
 .../gpu_nvidia/nvml_samples.hpp               |  5 ++
 .../hardware_sampling/gpu_nvidia/utility.hpp  |  5 ++
 .../hardware_sampling/hardware_sampler.hpp    | 11 ++--
 .../system_hardware_sampler.hpp               |  6 +-
 include/hardware_sampling/utility.hpp         | 15 +++++
 18 files changed, 181 insertions(+), 10 deletions(-)
 create mode 100644 .github/workflows/documentation.yml
 create mode 100644 docs/CMakeLists.txt

diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml
new file mode 100644
index 0000000..7f41715
--- /dev/null
+++ b/.github/workflows/documentation.yml
@@ -0,0 +1,43 @@
+name: Generate documentation
+
+# only trigger this action on specific events
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    branches:
+      - main
+
+jobs:
+  build-documentation:
+    runs-on: ubuntu-latest
+    steps:
+      # checkout repository
+      - name: Checkout hws
+        uses: actions/checkout@v4.2.0
+        with:
+          path: hardware_sampling
+      # install dependencies
+      - name: Dependencies
+        run: |
+          sudo apt update
+          sudo apt-get install -y doxygen graphviz
+      # configure project via CMake
+      - name: Configure
+        run: |
+          cd hardware_sampling
+          mkdir build
+          cd build
+          cmake -DHWS_ENABLE_DOCUMENTATION=ON ..
+      # build project
+      - name: Generate
+        run: |
+          cd hardware_sampling/build
+          make doc
+      # deploy generated documentation using github.io
+      - name: Deploy
+        uses: peaceiris/actions-gh-pages@v4
+        with:
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+          publish_dir: ./hardware_sampling/docs/html
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index 4ed094d..36754d0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -36,6 +36,7 @@ Prerequisites
 # CMake ================================
 bin/
 build*/
+docs/html
 install*/
 cmake-build*/
 CMakeLists.txt.user
diff --git a/CMakeLists.txt b/CMakeLists.txt
index a96756b..f4e50c8 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -282,12 +282,20 @@ endif ()
 ##                                             enable Python bindings                                             ##
 ####################################################################################################################
 option(HWS_ENABLE_PYTHON_BINDINGS "Build language bindings for Python." ON)
-
 if (HWS_ENABLE_PYTHON_BINDINGS)
     add_subdirectory(bindings)
 endif ()
 
 
+########################################################################################################################
+##                                                  add documentation                                                 ##
+########################################################################################################################
+option(HWS_ENABLE_DOCUMENTATION "Add documentation using Doxygen." OFF)
+if (HWS_ENABLE_DOCUMENTATION)
+    add_subdirectory(docs)
+endif ()
+
+
 ########################################################################################################################
 ##                                           add support for `make install`                                           ##
 ########################################################################################################################
diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt
new file mode 100644
index 0000000..1623953
--- /dev/null
+++ b/docs/CMakeLists.txt
@@ -0,0 +1,56 @@
+## Authors: Marcel Breyer
+## Copyright (C): 2024-today All Rights Reserved
+## License: This file is released under the MIT license.
+##          See the LICENSE.md file in the project root for full license information.
+########################################################################################################################
+
+########################################################################################################################
+##                                     setup documentation generation with doxygen                                    ##
+########################################################################################################################
+## use installed doxygen
+find_package(Doxygen REQUIRED OPTIONAL_COMPONENTS dot)
+
+## configure doxygen
+set(DOXYGEN_OUTPUT_DIRECTORY "${PROJECT_SOURCE_DIR}/docs")
+set(DOXYGEN_USE_MDFILE_AS_MAINPAGE "${PROJECT_SOURCE_DIR}/README.md")
+set(DOXYGEN_FILE_PATTERNS "*.hpp;")
+set(DOXYGEN_STRIP_FROM_PATH "${PROJECT_SOURCE_DIR}")
+set(DOXYGEN_ABBREVIATE_BRIEF "")
+set(DOXYGEN_QUIET "YES")
+set(DOXYGEN_HTML_TIMESTAMP "YES")
+set(DOXYGEN_NUM_PROC_THREADS 0)
+set(DOXYGEN_WARN_NO_PARAMDOC "YES")
+set(DOXYGEN_SORT_MEMBER_DOCS "NO")
+set(DOXYGEN_INLINE_INHERITED_MEMB "YES")
+set(DOXYGEN_USE_MATHJAX "YES")
+set(DOXYGEN_EXCLUDE_SYMBOLS "*_HPP_")
+
+set(DOXYGEN_DOT_IMAGE_FORMAT "svg")
+set(DOXYGEN_INTERACTIVE_SVG "YES")
+set(DOXYGEN_INCLUDE_GRAPH "NO")
+set(DOXYGEN_EXTRACT_PRIVATE "YES")
+
+## enable processing of specific attributes and macros
+set(DOXYGEN_ENABLE_PREPROCESSING "YES")
+set(DOXYGEN_MACRO_EXPANSION "YES")
+set(DOXYGEN_EXPAND_ONLY_PREDEF "YES")
+set(DOXYGEN_EXPAND_AS_DEFINED "YES")
+
+set(DOXYGEN_VERBATIM_VARS DOXYGEN_ALIASES)
+set(DOXYGEN_ALIASES
+        [[license="\par License^^\parblock^^"  ]]
+)
+
+## add doxygen as target
+doxygen_add_docs(
+        doc
+        "${PROJECT_SOURCE_DIR}/include;${PROJECT_SOURCE_DIR}/README.md;"
+        WORKING_DIRECTORY "${PROJECT_SOURCE_DIR}"
+        COMMENT "Generating API documentation with Doxygen."
+)
+
+## install targets for the documentation
+include(GNUInstallDirs)
+install(DIRECTORY "${PROJECT_SOURCE_DIR}/docs/html"
+        DESTINATION "${CMAKE_INSTALL_DOCDIR}"
+)
diff --git a/include/hardware_sampling/cpu/cpu_samples.hpp b/include/hardware_sampling/cpu/cpu_samples.hpp
index 3bd8a49..1343909 100644
--- a/include/hardware_sampling/cpu/cpu_samples.hpp
+++ b/include/hardware_sampling/cpu/cpu_samples.hpp
@@ -339,6 +339,8 @@ std::ostream &operator<<(std::ostream &out, const cpu_idle_states_samples &sampl
 
 }  // namespace hws
 
+/// @cond Doxygen_suppress
+
 template <>
 struct fmt::formatter<hws::cpu_general_samples> : fmt::ostream_formatter { };
 
@@ -360,4 +362,6 @@ struct fmt::formatter<hws::cpu_gfx_samples> : fmt::ostream_formatter { };
 template <>
 struct fmt::formatter<hws::cpu_idle_states_samples> : fmt::ostream_formatter { };
 
+/// @endcond
+
 #endif  // HARDWARE_SAMPLING_CPU_CPU_SAMPLES_HPP_
diff --git a/include/hardware_sampling/cpu/hardware_sampler.hpp b/include/hardware_sampling/cpu/hardware_sampler.hpp
index bc6971f..4ae805f 100644
--- a/include/hardware_sampling/cpu/hardware_sampler.hpp
+++ b/include/hardware_sampling/cpu/hardware_sampler.hpp
@@ -150,7 +150,11 @@ std::ostream &operator<<(std::ostream &out, const cpu_hardware_sampler &sampler)
 
 }  // namespace hws
 
+/// @cond Doxygen_suppress
+
 template <>
 struct fmt::formatter<hws::cpu_hardware_sampler> : fmt::ostream_formatter { };
 
+/// @endcond
+
 #endif  // HARDWARE_SAMPLING_CPU_HARDWARE_SAMPLER_HPP_
diff --git a/include/hardware_sampling/event.hpp b/include/hardware_sampling/event.hpp
index 7129141..2a60581 100644
--- a/include/hardware_sampling/event.hpp
+++ b/include/hardware_sampling/event.hpp
@@ -50,7 +50,11 @@ std::ostream &operator<<(std::ostream &out, const event &e);
 
 }  // namespace hws
 
+/// @cond Doxygen_suppress
+
 template <>
 struct fmt::formatter<hws::event> : fmt::ostream_formatter { };
 
+/// @endcond
+
 #endif  // HARDWARE_SAMPLING_EVENT_HPP_
diff --git a/include/hardware_sampling/gpu_amd/hardware_sampler.hpp b/include/hardware_sampling/gpu_amd/hardware_sampler.hpp
index 308ca91..00dc90f 100644
--- a/include/hardware_sampling/gpu_amd/hardware_sampler.hpp
+++ b/include/hardware_sampling/gpu_amd/hardware_sampler.hpp
@@ -163,7 +163,11 @@ std::ostream &operator<<(std::ostream &out, const gpu_amd_hardware_sampler &samp
 
 }  // namespace hws
 
+/// @cond Doxygen_suppress
+
 template <>
 struct fmt::formatter<hws::gpu_amd_hardware_sampler> : fmt::ostream_formatter { };
 
+/// @endcond
+
 #endif  // HARDWARE_SAMPLING_GPU_AMD_HARDWARE_SAMPLER_HPP_
diff --git a/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp b/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp
index 727e683..958aa3a 100644
--- a/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp
+++ b/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp
@@ -272,6 +272,8 @@ std::ostream &operator<<(std::ostream &out, const rocm_smi_temperature_samples &
 
 }  // namespace hws
 
+/// @cond Doxygen_suppress
+
 template <>
 struct fmt::formatter<hws::rocm_smi_general_samples> : fmt::ostream_formatter { };
 
@@ -287,4 +289,6 @@ struct fmt::formatter<hws::rocm_smi_memory_samples> : fmt::ostream_formatter { }
 template <>
 struct fmt::formatter<hws::rocm_smi_temperature_samples> : fmt::ostream_formatter { };
 
+/// @endcond
+
 #endif  // HARDWARE_SAMPLING_GPU_AMD_ROCM_SMI_SAMPLES_HPP_
diff --git a/include/hardware_sampling/gpu_amd/utility.hpp b/include/hardware_sampling/gpu_amd/utility.hpp
index a277e06..aa21ba0 100644
--- a/include/hardware_sampling/gpu_amd/utility.hpp
+++ b/include/hardware_sampling/gpu_amd/utility.hpp
@@ -25,6 +25,11 @@ namespace hws::detail {
  * @brief Defines the `HWS_ROCM_SMI_ERROR_CHECK` macro if `HWS_ERROR_CHECKS_ENABLED` is defined, does nothing otherwise.
  * @details Throws an exception if a ROCm SMI call returns with an error. Additionally outputs a more concrete error string if possible.
  */
+/**
+ * @def HWS_HIP_ERROR_CHECK
+ * @brief Defines the `HWS_HIP_ERROR_CHECK` macro if `HWS_ERROR_CHECKS_ENABLED` is defined, does nothing otherwise.
+ * @details Throws an exception if a HIP call returns with an error. Additionally outputs a more concrete error string.
+ */
 #if defined(HWS_ERROR_CHECKS_ENABLED)
     #define HWS_ROCM_SMI_ERROR_CHECK(rocm_smi_func)                                                                                                \
         {                                                                                                                                          \
@@ -58,7 +63,7 @@ namespace hws::detail {
 
 /**
  * @brief Convert the performance level value (`rsmi_dev_perf_level_t`) to a string.
- * @param[in] clocks_event_reasons the bitmask to convert to a string
+ * @param[in] perf_level the bitmask to convert to a string
  * @return all event throttle reasons (`[[nodiscard]]`)
  */
 [[nodiscard]] std::string performance_level_to_string(rsmi_dev_perf_level_t perf_level);
diff --git a/include/hardware_sampling/gpu_intel/hardware_sampler.hpp b/include/hardware_sampling/gpu_intel/hardware_sampler.hpp
index 6841c7b..cf97f41 100644
--- a/include/hardware_sampling/gpu_intel/hardware_sampler.hpp
+++ b/include/hardware_sampling/gpu_intel/hardware_sampler.hpp
@@ -163,7 +163,11 @@ std::ostream &operator<<(std::ostream &out, const gpu_intel_hardware_sampler &sa
 
 }  // namespace hws
 
+/// @cond Doxygen_suppress
+
 template <>
 struct fmt::formatter<hws::gpu_intel_hardware_sampler> : fmt::ostream_formatter { };
 
+/// @endcond
+
 #endif  // HARDWARE_SAMPLING_GPU_INTEL_HARDWARE_SAMPLER_HPP_
diff --git a/include/hardware_sampling/gpu_intel/level_zero_samples.hpp b/include/hardware_sampling/gpu_intel/level_zero_samples.hpp
index f53422e..2ade186 100644
--- a/include/hardware_sampling/gpu_intel/level_zero_samples.hpp
+++ b/include/hardware_sampling/gpu_intel/level_zero_samples.hpp
@@ -267,6 +267,8 @@ std::ostream &operator<<(std::ostream &out, const level_zero_temperature_samples
 
 }  // namespace hws
 
+/// @cond Doxygen_suppress
+
 template <>
 struct fmt::formatter<hws::level_zero_general_samples> : fmt::ostream_formatter { };
 
@@ -282,4 +284,6 @@ struct fmt::formatter<hws::level_zero_memory_samples> : fmt::ostream_formatter {
 template <>
 struct fmt::formatter<hws::level_zero_temperature_samples> : fmt::ostream_formatter { };
 
+/// @endcond
+
 #endif  // HARDWARE_SAMPLING_GPU_INTEL_LEVEL_ZERO_SAMPLES_HPP_
diff --git a/include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp b/include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp
index 4180f95..5dba1ca 100644
--- a/include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp
+++ b/include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp
@@ -164,7 +164,11 @@ std::ostream &operator<<(std::ostream &out, const gpu_nvidia_hardware_sampler &s
 
 }  // namespace hws
 
+/// @cond Doxygen_suppress
+
 template <>
 struct fmt::formatter<hws::gpu_nvidia_hardware_sampler> : fmt::ostream_formatter { };
 
+/// @endcond
+
 #endif  // HARDWARE_SAMPLING_GPU_NVIDIA_HARDWARE_SAMPLER_HPP_
diff --git a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp
index 91f4e6b..631e572 100644
--- a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp
+++ b/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp
@@ -81,6 +81,7 @@ class nvml_clock_samples {
     // befriend hardware sampler class
     friend class gpu_nvidia_hardware_sampler;
 
+    /// The map type used to map the available clock frequencies to a specific memory frequency.
     using map_type = std::map<double, std::vector<double>>;
 
   public:
@@ -260,6 +261,8 @@ std::ostream &operator<<(std::ostream &out, const nvml_temperature_samples &samp
 
 }  // namespace hws
 
+/// @cond Doxygen_suppress
+
 template <>
 struct fmt::formatter<hws::nvml_general_samples> : fmt::ostream_formatter { };
 
@@ -275,4 +278,6 @@ struct fmt::formatter<hws::nvml_memory_samples> : fmt::ostream_formatter { };
 template <>
 struct fmt::formatter<hws::nvml_temperature_samples> : fmt::ostream_formatter { };
 
+/// @endcond
+
 #endif  // HARDWARE_SAMPLING_GPU_NVIDIA_NVML_SAMPLES_HPP_
diff --git a/include/hardware_sampling/gpu_nvidia/utility.hpp b/include/hardware_sampling/gpu_nvidia/utility.hpp
index 0352915..aaf0420 100644
--- a/include/hardware_sampling/gpu_nvidia/utility.hpp
+++ b/include/hardware_sampling/gpu_nvidia/utility.hpp
@@ -26,6 +26,11 @@ namespace hws::detail {
  * @brief Defines the `HWS_NVML_ERROR_CHECK` macro if `HWS_ERROR_CHECKS_ENABLED` is defined, does nothing otherwise.
  * @details Throws an exception if an NVML call returns with an error. Additionally outputs a more concrete error string.
  */
+/**
+ * @def HWS_CUDA_ERROR_CHECK
+ * @brief Defines the `HWS_CUDA_ERROR_CHECK` macro if `HWS_ERROR_CHECKS_ENABLED` is defined, does nothing otherwise.
+ * @details Throws an exception if a CUDA call returns with an error. Additionally outputs a more concrete error string.
+ */
 #if defined(HWS_ERROR_CHECKS_ENABLED)
     #define HWS_NVML_ERROR_CHECK(nvml_func)                                                                                                                        \
         {                                                                                                                                                          \
diff --git a/include/hardware_sampling/hardware_sampler.hpp b/include/hardware_sampling/hardware_sampler.hpp
index 8824ac3..cc59331 100644
--- a/include/hardware_sampling/hardware_sampler.hpp
+++ b/include/hardware_sampling/hardware_sampler.hpp
@@ -123,15 +123,16 @@ class hardware_sampler {
     [[nodiscard]] std::size_t num_events() const noexcept { return events_.size(); }
 
     /**
-     * @brief Return the number of recorded events.
+     * @brief Return a vector of all recorded events.
      * @return the events (`[[nodiscard]]`)
      */
     [[nodiscard]] const std::vector<event> &get_events() const noexcept { return events_; }
 
     /**
-     * @brief Return the number of recorded events.
+     * @brief Return the event at index @p idx.
+     * @param[in] idx the event to return
      * @throws std::out_of_range the the @p idx is out of bounce
-     * @return the number of events (`[[nodiscard]]`)
+     * @return the event at index @p idx (`[[nodiscard]]`)
      */
     [[nodiscard]] event get_event(std::size_t idx) const;
 
@@ -153,11 +154,11 @@ class hardware_sampler {
      */
     void dump_yaml(const char *filename) const;
     /**
-     * @copydoc hws::hardware_sampler::dump_yaml(const char *)
+     * @copydoc hws::hardware_sampler::dump_yaml(const char *) const
      */
     void dump_yaml(const std::string &filename) const;
     /**
-     * @copydoc hws::hardware_sampler::dump_yaml(const char *)
+     * @copydoc hws::hardware_sampler::dump_yaml(const char *) const
      */
     void dump_yaml(const std::filesystem::path &filename) const;
 
diff --git a/include/hardware_sampling/system_hardware_sampler.hpp b/include/hardware_sampling/system_hardware_sampler.hpp
index 96bff5c..15279de 100644
--- a/include/hardware_sampling/system_hardware_sampler.hpp
+++ b/include/hardware_sampling/system_hardware_sampler.hpp
@@ -157,7 +157,7 @@ class system_hardware_sampler {
      */
     [[nodiscard]] const std::unique_ptr<hardware_sampler> &sampler(std::size_t idx) const;
     /**
-     * @copydoc hws::system_hardware_sampler::samplers(std::size_t idx) const
+     * @copydoc hws::system_hardware_sampler::sampler(std::size_t idx) const
      */
     [[nodiscard]] std::unique_ptr<hardware_sampler> &sampler(std::size_t idx);
 
@@ -167,11 +167,11 @@ class system_hardware_sampler {
      */
     void dump_yaml(const char *filename) const;
     /**
-     * @copydoc hws::system_hardware_sampler::dump_yaml(const char *)
+     * @copydoc hws::system_hardware_sampler::dump_yaml(const char *) const
      */
     void dump_yaml(const std::string &filename) const;
     /**
-     * @copydoc hws::system_hardware_sampler::dump_yaml(const char *)
+     * @copydoc hws::system_hardware_sampler::dump_yaml(const char *) const
      */
     void dump_yaml(const std::filesystem::path &filename) const;
 
diff --git a/include/hardware_sampling/utility.hpp b/include/hardware_sampling/utility.hpp
index c70b4c2..4a99a31 100644
--- a/include/hardware_sampling/utility.hpp
+++ b/include/hardware_sampling/utility.hpp
@@ -58,15 +58,30 @@ namespace hws::detail {
 /**                                          type_traits                                            **/
 /*****************************************************************************************************/
 
+/**
+ * @brief Remove the topmost cv-qualifiers from type @p T.
+ */
 template <typename T>
 using remove_cvref_t = std::remove_cv_t<std::remove_reference_t<T>>;
 
+/**
+ * @brief The case if the type @p T isn't a std::vector.
+ * @tparam T the type to check
+ */
 template <typename T>
 struct is_vector : std::false_type { };
 
+/**
+ * @brief The case if the type @p T is a std::vector.
+ * @tparam T the type to check
+ */
 template <typename T>
 struct is_vector<std::vector<T>> : std::true_type { };
 
+/**
+ * @brief Evaluates to `true` if @p T is a std::vector, otherwise `false`.
+ * @tparam T the type to check
+ */
 template <typename T>
 constexpr bool is_vector_v = is_vector<T>::value;
 

From 621f50dba9392090ab474d8a66f2589979bf328e Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Mon, 30 Sep 2024 13:53:16 +0200
Subject: [PATCH 61/69] Rename hardware_sampling folder to hws and change
 target library name.

---
 .clang-format                                 |  2 +-
 .gitignore                                    |  2 +-
 CMakeLists.txt                                | 61 +++++++++----------
 README.md                                     |  8 +--
 bindings/cpu_hardware_sampler.cpp             |  8 +--
 bindings/event.cpp                            |  2 +-
 bindings/gpu_amd_hardware_sampler.cpp         |  8 +--
 bindings/gpu_intel_hardware_sampler.cpp       |  8 +--
 bindings/gpu_nvidia_hardware_sampler.cpp      |  8 +--
 bindings/hardware_sampler.cpp                 | 14 ++---
 bindings/relative_event.hpp                   |  6 +-
 bindings/sample_category.cpp                  |  2 +-
 bindings/system_hardware_sampler.cpp          |  8 +--
 bindings/version.cpp                          |  2 +-
 ...lingConfig.cmake.in => hwsConfig.cmake.in} |  4 +-
 examples/cpp/CMakeLists.txt                   |  4 +-
 examples/cpp/main.cpp                         |  2 +-
 include/hardware_sampling/core.hpp            | 41 -------------
 include/hws/core.hpp                          | 41 +++++++++++++
 .../cpu/cpu_samples.hpp                       |  8 +--
 .../cpu/hardware_sampler.hpp                  | 12 ++--
 .../cpu/utility.hpp                           |  6 +-
 include/{hardware_sampling => hws}/event.hpp  |  6 +-
 .../gpu_amd/hardware_sampler.hpp              | 12 ++--
 .../gpu_amd/rocm_smi_samples.hpp              | 10 +--
 .../gpu_amd/utility.hpp                       |  6 +-
 .../gpu_intel/hardware_sampler.hpp            | 14 ++---
 .../gpu_intel/level_zero_device_handle.hpp    | 10 +--
 .../level_zero_device_handle_impl.hpp         | 10 +--
 .../gpu_intel/level_zero_samples.hpp          |  8 +--
 .../gpu_intel/utility.hpp                     |  6 +-
 .../gpu_nvidia/hardware_sampler.hpp           | 14 ++---
 .../gpu_nvidia/nvml_device_handle.hpp         | 10 +--
 .../gpu_nvidia/nvml_device_handle_impl.hpp    | 10 +--
 .../gpu_nvidia/nvml_samples.hpp               |  8 +--
 .../gpu_nvidia/utility.hpp                    |  6 +-
 .../hardware_sampler.hpp                      | 10 +--
 .../sample_category.hpp                       |  6 +-
 .../system_hardware_sampler.hpp               | 12 ++--
 .../{hardware_sampling => hws}/utility.hpp    |  6 +-
 .../{hardware_sampling => hws}/version.hpp.in |  6 +-
 .../cpu/cpu_samples.cpp                       |  4 +-
 .../cpu/hardware_sampler.cpp                  | 12 ++--
 .../cpu/utility.cpp                           |  4 +-
 src/{hardware_sampling => hws}/event.cpp      |  2 +-
 .../gpu_amd/hardware_sampler.cpp              | 12 ++--
 .../gpu_amd/rocm_smi_samples.cpp              |  4 +-
 .../gpu_amd/utility.cpp                       |  2 +-
 .../gpu_intel/hardware_sampler.cpp            | 16 ++---
 .../gpu_intel/level_zero_samples.cpp          |  4 +-
 .../gpu_intel/utility.cpp                     |  2 +-
 .../gpu_nvidia/hardware_sampler.cpp           | 16 ++---
 .../gpu_nvidia/nvml_samples.cpp               |  4 +-
 .../gpu_nvidia/utility.cpp                    |  2 +-
 .../hardware_sampler.cpp                      |  8 +--
 .../system_hardware_sampler.cpp               | 20 +++---
 src/{hardware_sampling => hws}/utility.cpp    |  2 +-
 57 files changed, 270 insertions(+), 271 deletions(-)
 rename cmake/{hardware_samplingConfig.cmake.in => hwsConfig.cmake.in} (84%)
 delete mode 100644 include/hardware_sampling/core.hpp
 create mode 100644 include/hws/core.hpp
 rename include/{hardware_sampling => hws}/cpu/cpu_samples.hpp (98%)
 rename include/{hardware_sampling => hws}/cpu/hardware_sampler.hpp (92%)
 rename include/{hardware_sampling => hws}/cpu/utility.hpp (93%)
 rename include/{hardware_sampling => hws}/event.hpp (93%)
 rename include/{hardware_sampling => hws}/gpu_amd/hardware_sampler.hpp (93%)
 rename include/{hardware_sampling => hws}/gpu_amd/rocm_smi_samples.hpp (98%)
 rename include/{hardware_sampling => hws}/gpu_amd/utility.hpp (96%)
 rename include/{hardware_sampling => hws}/gpu_intel/hardware_sampler.hpp (91%)
 rename include/{hardware_sampling => hws}/gpu_intel/level_zero_device_handle.hpp (85%)
 rename include/{hardware_sampling => hws}/gpu_intel/level_zero_device_handle_impl.hpp (85%)
 rename include/{hardware_sampling => hws}/gpu_intel/level_zero_samples.hpp (98%)
 rename include/{hardware_sampling => hws}/gpu_intel/utility.hpp (95%)
 rename include/{hardware_sampling => hws}/gpu_nvidia/hardware_sampler.hpp (92%)
 rename include/{hardware_sampling => hws}/gpu_nvidia/nvml_device_handle.hpp (85%)
 rename include/{hardware_sampling => hws}/gpu_nvidia/nvml_device_handle_impl.hpp (74%)
 rename include/{hardware_sampling => hws}/gpu_nvidia/nvml_samples.hpp (98%)
 rename include/{hardware_sampling => hws}/gpu_nvidia/utility.hpp (96%)
 rename include/{hardware_sampling => hws}/hardware_sampler.hpp (96%)
 rename include/{hardware_sampling => hws}/sample_category.hpp (96%)
 rename include/{hardware_sampling => hws}/system_hardware_sampler.hpp (95%)
 rename include/{hardware_sampling => hws}/utility.hpp (99%)
 rename include/{hardware_sampling => hws}/version.hpp.in (92%)
 rename src/{hardware_sampling => hws}/cpu/cpu_samples.cpp (99%)
 rename src/{hardware_sampling => hws}/cpu/hardware_sampler.cpp (98%)
 rename src/{hardware_sampling => hws}/cpu/utility.cpp (95%)
 rename src/{hardware_sampling => hws}/event.cpp (93%)
 rename src/{hardware_sampling => hws}/gpu_amd/hardware_sampler.cpp (98%)
 rename src/{hardware_sampling => hws}/gpu_amd/rocm_smi_samples.cpp (99%)
 rename src/{hardware_sampling => hws}/gpu_amd/utility.cpp (96%)
 rename src/{hardware_sampling => hws}/gpu_intel/hardware_sampler.cpp (98%)
 rename src/{hardware_sampling => hws}/gpu_intel/level_zero_samples.cpp (99%)
 rename src/{hardware_sampling => hws}/gpu_intel/utility.cpp (99%)
 rename src/{hardware_sampling => hws}/gpu_nvidia/hardware_sampler.cpp (97%)
 rename src/{hardware_sampling => hws}/gpu_nvidia/nvml_samples.cpp (99%)
 rename src/{hardware_sampling => hws}/gpu_nvidia/utility.cpp (97%)
 rename src/{hardware_sampling => hws}/hardware_sampler.cpp (96%)
 rename src/{hardware_sampling => hws}/system_hardware_sampler.cpp (90%)
 rename src/{hardware_sampling => hws}/utility.cpp (97%)

diff --git a/.clang-format b/.clang-format
index 97d4dc9..84b7fa5 100644
--- a/.clang-format
+++ b/.clang-format
@@ -77,7 +77,7 @@ ForEachMacros: [ 'foreach', 'Q_FOREACH', 'BOOST_FOREACH' ]
 IfMacros: [ ]
 IncludeBlocks: Regroup
 IncludeCategories:
-  - Regex: '^"hardware_sampling/'
+  - Regex: '^"hws/'
     Priority: 1
   - Regex: '^"(pybind|nvml|cuda|rocm_smi|hip|level_zero|subprocess|fmt)'
     Priority: 2
diff --git a/.gitignore b/.gitignore
index 36754d0..9f74de0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -58,4 +58,4 @@ CTestTestfile.cmake
 .vs/
 
 # auto-generated version header
-include/hardware_sampling/version.hpp
\ No newline at end of file
+include/hws/version.hpp
\ No newline at end of file
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f4e50c8..97ccbe1 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -13,16 +13,15 @@ project("hws - Hardware Sampling for GPUs and CPUs"
 
 # explicitly set library source files
 set(HWS_SOURCES
-        ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/event.cpp
-        ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/hardware_sampler.cpp
-        ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/system_hardware_sampler.cpp
-        ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/utility.cpp
+        ${CMAKE_CURRENT_SOURCE_DIR}/src/hws/event.cpp
+        ${CMAKE_CURRENT_SOURCE_DIR}/src/hws/hardware_sampler.cpp
+        ${CMAKE_CURRENT_SOURCE_DIR}/src/hws/system_hardware_sampler.cpp
+        ${CMAKE_CURRENT_SOURCE_DIR}/src/hws/utility.cpp
 )
 
 # create hardware sampling library
-set(HWS_LIBRARY_NAME hardware_sampling)
+set(HWS_LIBRARY_NAME hws)
 add_library(${HWS_LIBRARY_NAME} SHARED ${HWS_SOURCES})
-add_library(hws ALIAS ${HWS_LIBRARY_NAME})
 add_library(hws::hws ALIAS ${HWS_LIBRARY_NAME})
 
 # set install target
@@ -95,8 +94,8 @@ target_link_libraries(${HWS_LIBRARY_NAME} PUBLIC fmt::fmt)
 ########################################################################################################################
 message(STATUS "Configuring version information.")
 configure_file(
-        ${CMAKE_CURRENT_SOURCE_DIR}/include/hardware_sampling/version.hpp.in
-        ${CMAKE_CURRENT_SOURCE_DIR}/include/hardware_sampling/version.hpp
+        ${CMAKE_CURRENT_SOURCE_DIR}/include/hws/version.hpp.in
+        ${CMAKE_CURRENT_SOURCE_DIR}/include/hws/version.hpp
         @ONLY
 )
 
@@ -189,9 +188,9 @@ if (HWS_LSCPU_FOUND OR HWS_FREE_FOUND OR HWS_TURBOSTAT_EXECUTION_TYPE)
     # add source file to source file list
     target_sources(${HWS_LIBRARY_NAME} PRIVATE
             $<BUILD_INTERFACE:
-            ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/cpu/hardware_sampler.cpp;
-            ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/cpu/cpu_samples.cpp;
-            ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/cpu/utility.cpp;
+            ${CMAKE_CURRENT_SOURCE_DIR}/src/hws/cpu/hardware_sampler.cpp;
+            ${CMAKE_CURRENT_SOURCE_DIR}/src/hws/cpu/cpu_samples.cpp;
+            ${CMAKE_CURRENT_SOURCE_DIR}/src/hws/cpu/utility.cpp;
             >)
 
     # add compile definitions
@@ -214,9 +213,9 @@ if (CUDAToolkit_FOUND)
     # add source file to source file list
     target_sources(${HWS_LIBRARY_NAME} PRIVATE
             $<BUILD_INTERFACE:
-            ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp;
-            ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp;
-            ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/gpu_nvidia/utility.cpp
+            ${CMAKE_CURRENT_SOURCE_DIR}/src/hws/gpu_nvidia/hardware_sampler.cpp;
+            ${CMAKE_CURRENT_SOURCE_DIR}/src/hws/gpu_nvidia/nvml_samples.cpp;
+            ${CMAKE_CURRENT_SOURCE_DIR}/src/hws/gpu_nvidia/utility.cpp
             >)
 
     # add compile definition
@@ -241,9 +240,9 @@ if (rocm_smi_FOUND)
     # add source file to source file list
     target_sources(${HWS_LIBRARY_NAME} PRIVATE
             $<BUILD_INTERFACE:
-            ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/gpu_amd/hardware_sampler.cpp;
-            ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp;
-            ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/gpu_amd/utility.cpp
+            ${CMAKE_CURRENT_SOURCE_DIR}/src/hws/gpu_amd/hardware_sampler.cpp;
+            ${CMAKE_CURRENT_SOURCE_DIR}/src/hws/gpu_amd/rocm_smi_samples.cpp;
+            ${CMAKE_CURRENT_SOURCE_DIR}/src/hws/gpu_amd/utility.cpp
             >)
 
     # add compile definition
@@ -266,9 +265,9 @@ if (level_zero_FOUND)
     # add source file to source file list
     target_sources(${HWS_LIBRARY_NAME} PRIVATE
             $<BUILD_INTERFACE:
-            ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/gpu_intel/hardware_sampler.cpp;
-            ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/gpu_intel/level_zero_samples.cpp;
-            ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/gpu_intel/utility.cpp
+            ${CMAKE_CURRENT_SOURCE_DIR}/src/hws/gpu_intel/hardware_sampler.cpp;
+            ${CMAKE_CURRENT_SOURCE_DIR}/src/hws/gpu_intel/level_zero_samples.cpp;
+            ${CMAKE_CURRENT_SOURCE_DIR}/src/hws/gpu_intel/utility.cpp
             >)
 
     # add compile definition
@@ -302,7 +301,7 @@ endif ()
 include(GNUInstallDirs)
 ## install all necessary library targets
 install(TARGETS ${HWS_TARGETS_TO_INSTALL}
-        EXPORT hardware_sampling_Targets
+        EXPORT hws_Targets
         ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}"  # all files that are neither executables, shared lib or headers
         LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}"  # all shared lib files
         RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}"  # all executables
@@ -316,28 +315,28 @@ install(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/include/"
 ## manage version comparison
 include(CMakePackageConfigHelpers)
 write_basic_package_version_file(
-        "hardware_samplingConfigVersion.cmake"
+        "hwsConfigVersion.cmake"
         VERSION ${PROJECT_VERSION}
         COMPATIBILITY SameMajorVersion
 )
 
 ## generate configuration file
 configure_package_config_file(
-        "${CMAKE_CURRENT_SOURCE_DIR}/cmake/hardware_samplingConfig.cmake.in"
-        "${PROJECT_BINARY_DIR}/hardware_samplingConfig.cmake"
-        INSTALL_DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/hardware_sampling/cmake
+        "${CMAKE_CURRENT_SOURCE_DIR}/cmake/hwsConfig.cmake.in"
+        "${PROJECT_BINARY_DIR}/hwsConfig.cmake"
+        INSTALL_DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/hws/cmake
 )
 
 ## create and copy install-targets file
-install(EXPORT hardware_sampling_Targets
-        FILE hardware_samplingTargets.cmake
+install(EXPORT hws_Targets
+        FILE hwsTargets.cmake
         NAMESPACE hws::
-        DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/hardware_sampling/cmake
+        DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/hws/cmake
 )
 
 ## create file containing the build configuration and version information
 install(FILES
-        "${PROJECT_BINARY_DIR}/hardware_samplingConfig.cmake"
-        "${PROJECT_BINARY_DIR}/hardware_samplingConfigVersion.cmake"
-        DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/hardware_sampling/cmake
+        "${PROJECT_BINARY_DIR}/hwsConfig.cmake"
+        "${PROJECT_BINARY_DIR}/hwsConfigVersion.cmake"
+        DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/hws/cmake
 )
\ No newline at end of file
diff --git a/README.md b/README.md
index 77869b4..477738a 100644
--- a/README.md
+++ b/README.md
@@ -66,7 +66,7 @@ cmake --install . --prefix "/home/myuser/installdir"
 Afterward, the necessary exports should be performed:
 
 ```bash
-export CMAKE_PREFIX_PATH=${CMAKE_INSTALL_PREFIX}/share/hardware_sampling/cmake:${CMAKE_PREFIX_PATH}
+export CMAKE_PREFIX_PATH=${CMAKE_INSTALL_PREFIX}/share/hws/cmake:${CMAKE_PREFIX_PATH}
 export LD_LIBRARY_PATH=${CMAKE_INSTALL_PREFIX}/lib:${LD_LIBRARY_PATH}
 export CPLUS_INCLUDE_PATH=${CMAKE_INSTALL_PREFIX}/include:${CPLUS_INCLUDE_PATH}
 export PYTHONPATH=${CMAKE_INSTALL_PREFIX}/lib:${PYTHONPATH}
@@ -242,15 +242,15 @@ current clock frequencies, temperatures, or memory consumption.
 ## Example Python usage
 
 ```python
-import HardwareSampling
+import HardwareSampling as hws
 import numpy as np
 import matplotlib.pyplot as plt
 import matplotlib.dates as mdates
 import datetime
 
-sampler = HardwareSampling.CpuHardwareSampler()
+sampler = hws.CpuHardwareSampler()
 # could also be, e.g.,
-# sampler = HardwareSampling.GpuNvidiaHardwareSampler()
+# sampler = hws.GpuNvidiaHardwareSampler()
 sampler.start()
 
 sampler.add_event("init")
diff --git a/bindings/cpu_hardware_sampler.cpp b/bindings/cpu_hardware_sampler.cpp
index 9dae939..8e3e104 100644
--- a/bindings/cpu_hardware_sampler.cpp
+++ b/bindings/cpu_hardware_sampler.cpp
@@ -5,10 +5,10 @@
  *          See the LICENSE.md file in the project root for full license information.
  */
 
-#include "hardware_sampling/cpu/cpu_samples.hpp"       // hws::{cpu_general_samples, clock_samples, power_samples, memory_samples, temperature_samples, gfx_samples, idle_state_samples}
-#include "hardware_sampling/cpu/hardware_sampler.hpp"  // hws::cpu_hardware_sampler
-#include "hardware_sampling/hardware_sampler.hpp"      // hws::hardware_sampler
-#include "hardware_sampling/sample_category.hpp"       // hws::sample_category
+#include "hws/cpu/cpu_samples.hpp"       // hws::{cpu_general_samples, clock_samples, power_samples, memory_samples, temperature_samples, gfx_samples, idle_state_samples}
+#include "hws/cpu/hardware_sampler.hpp"  // hws::cpu_hardware_sampler
+#include "hws/hardware_sampler.hpp"      // hws::hardware_sampler
+#include "hws/sample_category.hpp"       // hws::sample_category
 
 #include "fmt/format.h"         // fmt::format
 #include "pybind11/chrono.h"    // automatic bindings for std::chrono::milliseconds
diff --git a/bindings/event.cpp b/bindings/event.cpp
index 8a9696a..f19315a 100644
--- a/bindings/event.cpp
+++ b/bindings/event.cpp
@@ -5,7 +5,7 @@
  *          See the LICENSE.md file in the project root for full license information.
  */
 
-#include "hardware_sampling/event.hpp"  // hws::event
+#include "hws/event.hpp"  // hws::event
 
 #include "fmt/format.h"         // fmt::format
 #include "pybind11/chrono.h"    // bind std::chrono types
diff --git a/bindings/gpu_amd_hardware_sampler.cpp b/bindings/gpu_amd_hardware_sampler.cpp
index 9da321c..db846da 100644
--- a/bindings/gpu_amd_hardware_sampler.cpp
+++ b/bindings/gpu_amd_hardware_sampler.cpp
@@ -5,10 +5,10 @@
  *          See the LICENSE.md file in the project root for full license information.
  */
 
-#include "hardware_sampling/gpu_amd/hardware_sampler.hpp"  // hws::gpu_amd_hardware_sampler
-#include "hardware_sampling/gpu_amd/rocm_smi_samples.hpp"  // hws::{rocm_smi_general_samples, rocm_smi_clock_samples, rocm_smi_power_samples, rocm_smi_memory_samples, rocm_smi_temperature_samples}
-#include "hardware_sampling/hardware_sampler.hpp"          // hws::hardware_sampler
-#include "hardware_sampling/sample_category.hpp"           // hws::sample_category
+#include "hws/gpu_amd/hardware_sampler.hpp"  // hws::gpu_amd_hardware_sampler
+#include "hws/gpu_amd/rocm_smi_samples.hpp"  // hws::{rocm_smi_general_samples, rocm_smi_clock_samples, rocm_smi_power_samples, rocm_smi_memory_samples, rocm_smi_temperature_samples}
+#include "hws/hardware_sampler.hpp"          // hws::hardware_sampler
+#include "hws/sample_category.hpp"           // hws::sample_category
 
 #include "fmt/format.h"         // fmt::format
 #include "pybind11/chrono.h"    // automatic bindings for std::chrono::milliseconds
diff --git a/bindings/gpu_intel_hardware_sampler.cpp b/bindings/gpu_intel_hardware_sampler.cpp
index 901aeed..aaae9ed 100644
--- a/bindings/gpu_intel_hardware_sampler.cpp
+++ b/bindings/gpu_intel_hardware_sampler.cpp
@@ -5,10 +5,10 @@
  *          See the LICENSE.md file in the project root for full license information.
  */
 
-#include "hardware_sampling/gpu_intel/hardware_sampler.hpp"    // hws::gpu_intel_hardware_sampler
-#include "hardware_sampling/gpu_intel/level_zero_samples.hpp"  // hws::{level_zero_general_samples, level_zero_clock_samples, level_zero_power_samples, level_zero_memory_samples, level_zero_temperature_samples}
-#include "hardware_sampling/hardware_sampler.hpp"              // hws::hardware_sampler
-#include "hardware_sampling/sample_category.hpp"               // hws::sample_category
+#include "hws/gpu_intel/hardware_sampler.hpp"    // hws::gpu_intel_hardware_sampler
+#include "hws/gpu_intel/level_zero_samples.hpp"  // hws::{level_zero_general_samples, level_zero_clock_samples, level_zero_power_samples, level_zero_memory_samples, level_zero_temperature_samples}
+#include "hws/hardware_sampler.hpp"              // hws::hardware_sampler
+#include "hws/sample_category.hpp"               // hws::sample_category
 
 #include "fmt/format.h"         // fmt::format
 #include "pybind11/chrono.h"    // automatic bindings for std::chrono::milliseconds
diff --git a/bindings/gpu_nvidia_hardware_sampler.cpp b/bindings/gpu_nvidia_hardware_sampler.cpp
index b049156..a32283a 100644
--- a/bindings/gpu_nvidia_hardware_sampler.cpp
+++ b/bindings/gpu_nvidia_hardware_sampler.cpp
@@ -5,10 +5,10 @@
  *          See the LICENSE.md file in the project root for full license information.
  */
 
-#include "hardware_sampling/gpu_nvidia/hardware_sampler.hpp"  // hws::gpu_nvidia_hardware_sampler
-#include "hardware_sampling/gpu_nvidia/nvml_samples.hpp"      // hws::{nvml_general_samples, nvml_clock_samples, nvml_power_samples, nvml_memory_samples, nvml_temperature_samples}
-#include "hardware_sampling/hardware_sampler.hpp"             // hws::hardware_sampler
-#include "hardware_sampling/sample_category.hpp"              // hws::sample_category
+#include "hws/gpu_nvidia/hardware_sampler.hpp"  // hws::gpu_nvidia_hardware_sampler
+#include "hws/gpu_nvidia/nvml_samples.hpp"      // hws::{nvml_general_samples, nvml_clock_samples, nvml_power_samples, nvml_memory_samples, nvml_temperature_samples}
+#include "hws/hardware_sampler.hpp"             // hws::hardware_sampler
+#include "hws/sample_category.hpp"              // hws::sample_category
 
 #include "fmt/format.h"         // fmt::format
 #include "pybind11/chrono.h"    // automatic bindings for std::chrono::milliseconds
diff --git a/bindings/hardware_sampler.cpp b/bindings/hardware_sampler.cpp
index 2c47046..5a12141 100644
--- a/bindings/hardware_sampler.cpp
+++ b/bindings/hardware_sampler.cpp
@@ -5,22 +5,22 @@
  *          See the LICENSE.md file in the project root for full license information.
  */
 
-#include "hardware_sampling/hardware_sampler.hpp"  // hws::hardware_sampler
+#include "hws/hardware_sampler.hpp"  // hws::hardware_sampler
 
-#include "hardware_sampling/event.hpp"    // hws::event
-#include "hardware_sampling/utility.hpp"  // hws::detail::durations_from_reference_time
+#include "hws/event.hpp"    // hws::event
+#include "hws/utility.hpp"  // hws::detail::durations_from_reference_time
 
 #if defined(HWS_FOR_CPUS_ENABLED)
-    #include "hardware_sampling/cpu/hardware_sampler.hpp"  // hws::cpu_hardware_sampler
+    #include "hws/cpu/hardware_sampler.hpp"  // hws::cpu_hardware_sampler
 #endif
 #if defined(HWS_FOR_NVIDIA_GPUS_ENABLED)
-    #include "hardware_sampling/gpu_nvidia/hardware_sampler.hpp"  // hws::gpu_nvidia_hardware_sampler
+    #include "hws/gpu_nvidia/hardware_sampler.hpp"  // hws::gpu_nvidia_hardware_sampler
 #endif
 #if defined(HWS_FOR_AMD_GPUS_ENABLED)
-    #include "hardware_sampling/gpu_amd/hardware_sampler.hpp"  // hws::gpu_amd_hardware_sampler
+    #include "hws/gpu_amd/hardware_sampler.hpp"  // hws::gpu_amd_hardware_sampler
 #endif
 #if defined(HWS_FOR_INTEL_GPUS_ENABLED)
-    #include "hardware_sampling/gpu_intel/hardware_sampler.hpp"  // hws::gpu_intel_hardware_sampler
+    #include "hws/gpu_intel/hardware_sampler.hpp"  // hws::gpu_intel_hardware_sampler
 #endif
 
 #include "fmt/format.h"         // fmt::format
diff --git a/bindings/relative_event.hpp b/bindings/relative_event.hpp
index 2033f12..fcdd02e 100644
--- a/bindings/relative_event.hpp
+++ b/bindings/relative_event.hpp
@@ -8,8 +8,8 @@
  * @brief Defines a struct encapsulating a single event with a relative time point.
  */
 
-#ifndef HARDWARE_SAMPLING_BINDINGS_RELATIVE_EVENT_HPP_
-#define HARDWARE_SAMPLING_BINDINGS_RELATIVE_EVENT_HPP_
+#ifndef HWS_BINDINGS_RELATIVE_EVENT_HPP_
+#define HWS_BINDINGS_RELATIVE_EVENT_HPP_
 
 #include <string>   // std::string
 #include <utility>  // std::move
@@ -37,4 +37,4 @@ struct relative_event {
 
 }  // namespace hws::detail
 
-#endif  // HARDWARE_SAMPLING_BINDINGS_RELATIVE_EVENT_HPP_
+#endif  // HWS_BINDINGS_RELATIVE_EVENT_HPP_
diff --git a/bindings/sample_category.cpp b/bindings/sample_category.cpp
index 2db6563..455914c 100644
--- a/bindings/sample_category.cpp
+++ b/bindings/sample_category.cpp
@@ -5,7 +5,7 @@
  *          See the LICENSE.md file in the project root for full license information.
  */
 
-#include "hardware_sampling/sample_category.hpp"  // hws::sample_category
+#include "hws/sample_category.hpp"  // hws::sample_category
 
 #include "pybind11/operators.h"  // operator overloading
 #include "pybind11/pybind11.h"   // py::module_, py::overload_cast
diff --git a/bindings/system_hardware_sampler.cpp b/bindings/system_hardware_sampler.cpp
index 2e25e79..d9af622 100644
--- a/bindings/system_hardware_sampler.cpp
+++ b/bindings/system_hardware_sampler.cpp
@@ -5,11 +5,11 @@
  *          See the LICENSE.md file in the project root for full license information.
  */
 
-#include "hardware_sampling/system_hardware_sampler.hpp"  // hws::system_hardware_sampler
+#include "hws/system_hardware_sampler.hpp"  // hws::system_hardware_sampler
 
-#include "hardware_sampling/event.hpp"            // hws::event
-#include "hardware_sampling/sample_category.hpp"  // hws::sample_category
-#include "hardware_sampling/utility.hpp"          // hws::detail::durations_from_reference_time
+#include "hws/event.hpp"            // hws::event
+#include "hws/sample_category.hpp"  // hws::sample_category
+#include "hws/utility.hpp"          // hws::detail::durations_from_reference_time
 
 #include "fmt/format.h"         // fmt::format
 #include "pybind11/chrono.h"    // bind std::chrono types
diff --git a/bindings/version.cpp b/bindings/version.cpp
index df892ca..e5481d1 100644
--- a/bindings/version.cpp
+++ b/bindings/version.cpp
@@ -5,7 +5,7 @@
 *          See the LICENSE.md file in the project root for full license information.
 */
 
-#include "hardware_sampling/version.hpp"  // hws::version
+#include "hws/version.hpp"  // hws::version
 
 #include "pybind11/pybind11.h"   // py::module_
 
diff --git a/cmake/hardware_samplingConfig.cmake.in b/cmake/hwsConfig.cmake.in
similarity index 84%
rename from cmake/hardware_samplingConfig.cmake.in
rename to cmake/hwsConfig.cmake.in
index 53829a0..852e638 100644
--- a/cmake/hardware_samplingConfig.cmake.in
+++ b/cmake/hwsConfig.cmake.in
@@ -15,5 +15,5 @@ list(APPEND CMAKE_PREFIX_PATH "${CMAKE_CURRENT_LIST_DIR}/../../../lib/cmake/fmt"
 find_dependency(fmt REQUIRED)
 
 # sanity checks
-include("${CMAKE_CURRENT_LIST_DIR}/hardware_samplingTargets.cmake")
-check_required_components("hardware_sampling")
\ No newline at end of file
+include("${CMAKE_CURRENT_LIST_DIR}/hwsTargets.cmake")
+check_required_components("hws")
\ No newline at end of file
diff --git a/examples/cpp/CMakeLists.txt b/examples/cpp/CMakeLists.txt
index 56cff22..1ffbc0b 100644
--- a/examples/cpp/CMakeLists.txt
+++ b/examples/cpp/CMakeLists.txt
@@ -8,9 +8,9 @@ cmake_minimum_required(VERSION 3.22)
 
 project(LibraryUsageExample LANGUAGES CXX)
 
-find_package(hardware_sampling REQUIRED)
+find_package(hws REQUIRED)
 
 add_executable(prog main.cpp)
 
 target_compile_features(prog PUBLIC cxx_std_17)
-target_link_libraries(prog PUBLIC hws::hardware_sampling)
\ No newline at end of file
+target_link_libraries(prog PUBLIC hws::hws)
\ No newline at end of file
diff --git a/examples/cpp/main.cpp b/examples/cpp/main.cpp
index 551c721..63e4160 100644
--- a/examples/cpp/main.cpp
+++ b/examples/cpp/main.cpp
@@ -5,7 +5,7 @@
  *          See the LICENSE.md file in the project root for full license information.
  */
 
-#include "hardware_sampling/core.hpp"
+#include "hws/core.hpp"
 
 #include <cstddef>  // std::size_t
 #include <numeric>  // std::iota
diff --git a/include/hardware_sampling/core.hpp b/include/hardware_sampling/core.hpp
deleted file mode 100644
index 44d8922..0000000
--- a/include/hardware_sampling/core.hpp
+++ /dev/null
@@ -1,41 +0,0 @@
-/**
- * @file
- * @author Marcel Breyer
- * @copyright 2024-today All Rights Reserved
- * @license This file is released under the MIT license.
- *          See the LICENSE.md file in the project root for full license information.
- *
- * @brief Core header containing all other necessary other headers.
- */
-
-#ifndef HARDWARE_SAMPLING_CORE_HPP_
-#define HARDWARE_SAMPLING_CORE_HPP_
-#pragma once
-
-#include "hardware_sampling/event.hpp"
-#include "hardware_sampling/hardware_sampler.hpp"
-#include "hardware_sampling/sample_category.hpp"
-#include "hardware_sampling/system_hardware_sampler.hpp"
-#include "hardware_sampling/version.hpp"
-
-#if defined(HWS_FOR_CPUS_ENABLED)
-    #include "hardware_sampling/cpu/cpu_samples.hpp"
-    #include "hardware_sampling/cpu/hardware_sampler.hpp"
-#endif
-
-#if defined(HWS_FOR_NVIDIA_GPUS_ENABLED)
-    #include "hardware_sampling/gpu_nvidia//nvml_samples.hpp"
-    #include "hardware_sampling/gpu_nvidia/hardware_sampler.hpp"
-#endif
-
-#if defined(HWS_FOR_AMD_GPUS_ENABLED)
-    #include "hardware_sampling/gpu_amd/hardware_sampler.hpp"
-    #include "hardware_sampling/gpu_amd/rocm_smi_samples.hpp"
-#endif
-
-#if defined(HWS_FOR_INTEL_GPUS_ENABLED)
-    #include "hardware_sampling/gpu_intel/hardware_sampler.hpp"
-    #include "hardware_sampling/gpu_intel/level_zero_samples.hpp"
-#endif
-
-#endif  // HARDWARE_SAMPLING_CORE_HPP_
diff --git a/include/hws/core.hpp b/include/hws/core.hpp
new file mode 100644
index 0000000..8c7a474
--- /dev/null
+++ b/include/hws/core.hpp
@@ -0,0 +1,41 @@
+/**
+ * @file
+ * @author Marcel Breyer
+ * @copyright 2024-today All Rights Reserved
+ * @license This file is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Core header containing all other necessary other headers.
+ */
+
+#ifndef HWS_CORE_HPP_
+#define HWS_CORE_HPP_
+#pragma once
+
+#include "hws/event.hpp"
+#include "hws/hardware_sampler.hpp"
+#include "hws/sample_category.hpp"
+#include "hws/system_hardware_sampler.hpp"
+#include "hws/version.hpp"
+
+#if defined(HWS_FOR_CPUS_ENABLED)
+    #include "hws/cpu/cpu_samples.hpp"
+    #include "hws/cpu/hardware_sampler.hpp"
+#endif
+
+#if defined(HWS_FOR_NVIDIA_GPUS_ENABLED)
+    #include "hws/gpu_nvidia//nvml_samples.hpp"
+    #include "hws/gpu_nvidia/hardware_sampler.hpp"
+#endif
+
+#if defined(HWS_FOR_AMD_GPUS_ENABLED)
+    #include "hws/gpu_amd/hardware_sampler.hpp"
+    #include "hws/gpu_amd/rocm_smi_samples.hpp"
+#endif
+
+#if defined(HWS_FOR_INTEL_GPUS_ENABLED)
+    #include "hws/gpu_intel/hardware_sampler.hpp"
+    #include "hws/gpu_intel/level_zero_samples.hpp"
+#endif
+
+#endif  // HWS_CORE_HPP_
diff --git a/include/hardware_sampling/cpu/cpu_samples.hpp b/include/hws/cpu/cpu_samples.hpp
similarity index 98%
rename from include/hardware_sampling/cpu/cpu_samples.hpp
rename to include/hws/cpu/cpu_samples.hpp
index 1343909..bcea2d4 100644
--- a/include/hardware_sampling/cpu/cpu_samples.hpp
+++ b/include/hws/cpu/cpu_samples.hpp
@@ -8,11 +8,11 @@
  * @brief Defines the samples used with turbostat, lscpu, and free.
  */
 
-#ifndef HARDWARE_SAMPLING_CPU_CPU_SAMPLES_HPP_
-#define HARDWARE_SAMPLING_CPU_CPU_SAMPLES_HPP_
+#ifndef HWS_CPU_CPU_SAMPLES_HPP_
+#define HWS_CPU_CPU_SAMPLES_HPP_
 #pragma once
 
-#include "hardware_sampling/utility.hpp"  // HWS_SAMPLE_STRUCT_FIXED_MEMBER, HWS_SAMPLE_STRUCT_SAMPLING_MEMBER
+#include "hws/utility.hpp"  // HWS_SAMPLE_STRUCT_FIXED_MEMBER, HWS_SAMPLE_STRUCT_SAMPLING_MEMBER
 
 #include "fmt/ostream.h"  // fmt::formatter, fmt::ostream_formatter
 
@@ -364,4 +364,4 @@ struct fmt::formatter<hws::cpu_idle_states_samples> : fmt::ostream_formatter { }
 
 /// @endcond
 
-#endif  // HARDWARE_SAMPLING_CPU_CPU_SAMPLES_HPP_
+#endif  // HWS_CPU_CPU_SAMPLES_HPP_
diff --git a/include/hardware_sampling/cpu/hardware_sampler.hpp b/include/hws/cpu/hardware_sampler.hpp
similarity index 92%
rename from include/hardware_sampling/cpu/hardware_sampler.hpp
rename to include/hws/cpu/hardware_sampler.hpp
index 4ae805f..d1b4102 100644
--- a/include/hardware_sampling/cpu/hardware_sampler.hpp
+++ b/include/hws/cpu/hardware_sampler.hpp
@@ -8,13 +8,13 @@
  * @brief Defines a hardware sampler for CPUs using the turbostat, lscpu, and free utilities (requires root).
  */
 
-#ifndef HARDWARE_SAMPLING_CPU_HARDWARE_SAMPLER_HPP_
-#define HARDWARE_SAMPLING_CPU_HARDWARE_SAMPLER_HPP_
+#ifndef HWS_CPU_HARDWARE_SAMPLER_HPP_
+#define HWS_CPU_HARDWARE_SAMPLER_HPP_
 #pragma once
 
-#include "hardware_sampling/cpu/cpu_samples.hpp"   // hws::{cpu_general_samples, clock_samples, power_samples, memory_samples, temperature_samples, gfx_samples, idle_state_samples}
-#include "hardware_sampling/hardware_sampler.hpp"  // hws::hardware_sampler
-#include "hardware_sampling/sample_category.hpp"                // hws::sample_category
+#include "hws/cpu/cpu_samples.hpp"   // hws::{cpu_general_samples, clock_samples, power_samples, memory_samples, temperature_samples, gfx_samples, idle_state_samples}
+#include "hws/hardware_sampler.hpp"  // hws::hardware_sampler
+#include "hws/sample_category.hpp"   // hws::sample_category
 
 #include "fmt/ostream.h"  // fmt::formatter, fmt::ostream_formatter
 
@@ -157,4 +157,4 @@ struct fmt::formatter<hws::cpu_hardware_sampler> : fmt::ostream_formatter { };
 
 /// @endcond
 
-#endif  // HARDWARE_SAMPLING_CPU_HARDWARE_SAMPLER_HPP_
+#endif  // HWS_CPU_HARDWARE_SAMPLER_HPP_
diff --git a/include/hardware_sampling/cpu/utility.hpp b/include/hws/cpu/utility.hpp
similarity index 93%
rename from include/hardware_sampling/cpu/utility.hpp
rename to include/hws/cpu/utility.hpp
index 467d4e5..9efd008 100644
--- a/include/hardware_sampling/cpu/utility.hpp
+++ b/include/hws/cpu/utility.hpp
@@ -8,8 +8,8 @@
  * @brief Implements utility functionality for the CPU sampler.
  */
 
-#ifndef HARDWARE_SAMPLING_CPU_UTILITY_HPP_
-#define HARDWARE_SAMPLING_CPU_UTILITY_HPP_
+#ifndef HWS_CPU_UTILITY_HPP_
+#define HWS_CPU_UTILITY_HPP_
 #pragma once
 
 #include "fmt/format.h"  // fmt::format
@@ -46,4 +46,4 @@ namespace hws::detail {
 
 }  // namespace hws::detail
 
-#endif  // HARDWARE_SAMPLING_CPU_UTILITY_HPP_
+#endif  // HWS_CPU_UTILITY_HPP_
diff --git a/include/hardware_sampling/event.hpp b/include/hws/event.hpp
similarity index 93%
rename from include/hardware_sampling/event.hpp
rename to include/hws/event.hpp
index 2a60581..7252a75 100644
--- a/include/hardware_sampling/event.hpp
+++ b/include/hws/event.hpp
@@ -8,8 +8,8 @@
  * @brief Defines an event type.
  */
 
-#ifndef HARDWARE_SAMPLING_EVENT_HPP_
-#define HARDWARE_SAMPLING_EVENT_HPP_
+#ifndef HWS_EVENT_HPP_
+#define HWS_EVENT_HPP_
 #pragma once
 
 #include "fmt/ostream.h"  // fmt::formatter, fmt::ostream_formatter
@@ -57,4 +57,4 @@ struct fmt::formatter<hws::event> : fmt::ostream_formatter { };
 
 /// @endcond
 
-#endif  // HARDWARE_SAMPLING_EVENT_HPP_
+#endif  // HWS_EVENT_HPP_
diff --git a/include/hardware_sampling/gpu_amd/hardware_sampler.hpp b/include/hws/gpu_amd/hardware_sampler.hpp
similarity index 93%
rename from include/hardware_sampling/gpu_amd/hardware_sampler.hpp
rename to include/hws/gpu_amd/hardware_sampler.hpp
index 00dc90f..668cc9a 100644
--- a/include/hardware_sampling/gpu_amd/hardware_sampler.hpp
+++ b/include/hws/gpu_amd/hardware_sampler.hpp
@@ -8,13 +8,13 @@
  * @brief Defines a hardware sampler for AMD GPUs using AMD's ROCm SMI library.
  */
 
-#ifndef HARDWARE_SAMPLING_GPU_AMD_HARDWARE_SAMPLER_HPP_
-#define HARDWARE_SAMPLING_GPU_AMD_HARDWARE_SAMPLER_HPP_
+#ifndef HWS_GPU_AMD_HARDWARE_SAMPLER_HPP_
+#define HWS_GPU_AMD_HARDWARE_SAMPLER_HPP_
 #pragma once
 
-#include "hardware_sampling/gpu_amd/rocm_smi_samples.hpp"  // hws::{rocm_smi_general_samples, rocm_smi_clock_samples, rocm_smi_power_samples, rocm_smi_memory_samples, rocm_smi_temperature_samples}
-#include "hardware_sampling/hardware_sampler.hpp"          // hws::hardware_sampler
-#include "hardware_sampling/sample_category.hpp"           // hws::sample_category
+#include "hws/gpu_amd/rocm_smi_samples.hpp"  // hws::{rocm_smi_general_samples, rocm_smi_clock_samples, rocm_smi_power_samples, rocm_smi_memory_samples, rocm_smi_temperature_samples}
+#include "hws/hardware_sampler.hpp"          // hws::hardware_sampler
+#include "hws/sample_category.hpp"           // hws::sample_category
 
 #include "fmt/ostream.h"  // fmt::formatter, fmt::ostream_formatter
 
@@ -170,4 +170,4 @@ struct fmt::formatter<hws::gpu_amd_hardware_sampler> : fmt::ostream_formatter {
 
 /// @endcond
 
-#endif  // HARDWARE_SAMPLING_GPU_AMD_HARDWARE_SAMPLER_HPP_
+#endif  // HWS_GPU_AMD_HARDWARE_SAMPLER_HPP_
diff --git a/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp b/include/hws/gpu_amd/rocm_smi_samples.hpp
similarity index 98%
rename from include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp
rename to include/hws/gpu_amd/rocm_smi_samples.hpp
index 958aa3a..8ace761 100644
--- a/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp
+++ b/include/hws/gpu_amd/rocm_smi_samples.hpp
@@ -8,11 +8,11 @@
  * @brief Defines the samples used with ROCm SMI.
  */
 
-#ifndef HARDWARE_SAMPLING_GPU_AMD_ROCM_SMI_SAMPLES_HPP_
-#define HARDWARE_SAMPLING_GPU_AMD_ROCM_SMI_SAMPLES_HPP_
+#ifndef HWS_GPU_AMD_ROCM_SMI_SAMPLES_HPP_
+#define HWS_GPU_AMD_ROCM_SMI_SAMPLES_HPP_
 #pragma once
 
-#include "hardware_sampling/utility.hpp"  // HWS_SAMPLE_STRUCT_FIXED_MEMBER, HWS_SAMPLE_STRUCT_SAMPLING_MEMBER
+#include "hws/utility.hpp"  // HWS_SAMPLE_STRUCT_FIXED_MEMBER, HWS_SAMPLE_STRUCT_SAMPLING_MEMBER
 
 #include "fmt/ostream.h"  // fmt::formatter, fmt::ostream_formatter
 
@@ -56,7 +56,7 @@ class rocm_smi_general_samples {
 
     HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint32_t, compute_utilization)  // the GPU compute utilization in percent
     HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint32_t, memory_utilization)   // the GPU memory utilization in percent
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::string, performance_level)              // the performance level: one of rsmi_dev_perf_level_t
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::string, performance_level)      // the performance level: one of rsmi_dev_perf_level_t
 };
 
 /**
@@ -291,4 +291,4 @@ struct fmt::formatter<hws::rocm_smi_temperature_samples> : fmt::ostream_formatte
 
 /// @endcond
 
-#endif  // HARDWARE_SAMPLING_GPU_AMD_ROCM_SMI_SAMPLES_HPP_
+#endif  // HWS_GPU_AMD_ROCM_SMI_SAMPLES_HPP_
diff --git a/include/hardware_sampling/gpu_amd/utility.hpp b/include/hws/gpu_amd/utility.hpp
similarity index 96%
rename from include/hardware_sampling/gpu_amd/utility.hpp
rename to include/hws/gpu_amd/utility.hpp
index aa21ba0..4889976 100644
--- a/include/hardware_sampling/gpu_amd/utility.hpp
+++ b/include/hws/gpu_amd/utility.hpp
@@ -8,8 +8,8 @@
  * @brief Implements utility functionality for the AMD GPU sampler.
  */
 
-#ifndef HARDWARE_SAMPLING_GPU_AMD_UTILITY_HPP_
-#define HARDWARE_SAMPLING_GPU_AMD_UTILITY_HPP_
+#ifndef HWS_GPU_AMD_UTILITY_HPP_
+#define HWS_GPU_AMD_UTILITY_HPP_
 #pragma once
 
 #include "fmt/format.h"         // fmt::format
@@ -70,4 +70,4 @@ namespace hws::detail {
 
 }  // namespace hws::detail
 
-#endif  // HARDWARE_SAMPLING_GPU_AMD_UTILITY_HPP_
+#endif  // HWS_GPU_AMD_UTILITY_HPP_
diff --git a/include/hardware_sampling/gpu_intel/hardware_sampler.hpp b/include/hws/gpu_intel/hardware_sampler.hpp
similarity index 91%
rename from include/hardware_sampling/gpu_intel/hardware_sampler.hpp
rename to include/hws/gpu_intel/hardware_sampler.hpp
index cf97f41..db068fe 100644
--- a/include/hardware_sampling/gpu_intel/hardware_sampler.hpp
+++ b/include/hws/gpu_intel/hardware_sampler.hpp
@@ -8,14 +8,14 @@
  * @brief Defines a hardware sampler for Intel GPUs using Intel's Level Zero.
  */
 
-#ifndef HARDWARE_SAMPLING_GPU_INTEL_HARDWARE_SAMPLER_HPP_
-#define HARDWARE_SAMPLING_GPU_INTEL_HARDWARE_SAMPLER_HPP_
+#ifndef HWS_GPU_INTEL_HARDWARE_SAMPLER_HPP_
+#define HWS_GPU_INTEL_HARDWARE_SAMPLER_HPP_
 #pragma once
 
-#include "hardware_sampling/gpu_intel/level_zero_device_handle.hpp"  // hws::detail::level_zero_device_handle
-#include "hardware_sampling/gpu_intel/level_zero_samples.hpp"        // hws::{level_zero_general_samples, level_zero_clock_samples, level_zero_power_samples, level_zero_memory_samples, level_zero_temperature_samples}
-#include "hardware_sampling/hardware_sampler.hpp"                    // hws::hardware_sampler
-#include "hardware_sampling/sample_category.hpp"                     // hws::sample_category
+#include "hws/gpu_intel/level_zero_device_handle.hpp"  // hws::detail::level_zero_device_handle
+#include "hws/gpu_intel/level_zero_samples.hpp"        // hws::{level_zero_general_samples, level_zero_clock_samples, level_zero_power_samples, level_zero_memory_samples, level_zero_temperature_samples}
+#include "hws/hardware_sampler.hpp"                    // hws::hardware_sampler
+#include "hws/sample_category.hpp"                     // hws::sample_category
 
 #include "fmt/format.h"  // fmt::formatter, fmt::ostream_formatter
 
@@ -170,4 +170,4 @@ struct fmt::formatter<hws::gpu_intel_hardware_sampler> : fmt::ostream_formatter
 
 /// @endcond
 
-#endif  // HARDWARE_SAMPLING_GPU_INTEL_HARDWARE_SAMPLER_HPP_
+#endif  // HWS_GPU_INTEL_HARDWARE_SAMPLER_HPP_
diff --git a/include/hardware_sampling/gpu_intel/level_zero_device_handle.hpp b/include/hws/gpu_intel/level_zero_device_handle.hpp
similarity index 85%
rename from include/hardware_sampling/gpu_intel/level_zero_device_handle.hpp
rename to include/hws/gpu_intel/level_zero_device_handle.hpp
index f84d8a5..c05f630 100644
--- a/include/hardware_sampling/gpu_intel/level_zero_device_handle.hpp
+++ b/include/hws/gpu_intel/level_zero_device_handle.hpp
@@ -8,8 +8,8 @@
  * @brief Defines a pImpl class for a Level Zero device handle.
  */
 
-#ifndef HARDWARE_SAMPLING_GPU_INTEL_LEVEL_ZERO_DEVICE_HANDLE_HPP_
-#define HARDWARE_SAMPLING_GPU_INTEL_LEVEL_ZERO_DEVICE_HANDLE_HPP_
+#ifndef HWS_GPU_INTEL_LEVEL_ZERO_DEVICE_HANDLE_HPP_
+#define HWS_GPU_INTEL_LEVEL_ZERO_DEVICE_HANDLE_HPP_
 #pragma once
 
 #include <cstddef>    // std::size_t
@@ -40,7 +40,7 @@ class level_zero_device_handle {
 
     /**
      * @brief Get the level_zero_device_handle implementation used to access the actual ze_driver_handle_t and ze_device_handle_t.
-     * @throws hardware_sampling_exception if `*this` has been default constructed
+     * @throws std::runtime_error if `*this` has been default constructed
      * @return the device handle (`[[nodiscard]]`)
      */
     [[nodiscard]] level_zero_device_handle_impl &get_impl() {
@@ -52,7 +52,7 @@ class level_zero_device_handle {
 
     /**
      * @brief Get the level_zero_device_handle implementation used to access the actual ze_driver_handle_t and ze_device_handle_t.
-     * @throws hardware_sampling_exception if `*this` has been default constructed
+     * @throws std::runtime_error if `*this` has been default constructed
      * @return the device handle (`[[nodiscard]]`)
      */
     [[nodiscard]] const level_zero_device_handle_impl &get_impl() const {
@@ -69,4 +69,4 @@ class level_zero_device_handle {
 
 }  // namespace hws::detail
 
-#endif  // HARDWARE_SAMPLING_GPU_INTEL_LEVEL_ZERO_DEVICE_HANDLE_HPP_
+#endif  // HWS_GPU_INTEL_LEVEL_ZERO_DEVICE_HANDLE_HPP_
diff --git a/include/hardware_sampling/gpu_intel/level_zero_device_handle_impl.hpp b/include/hws/gpu_intel/level_zero_device_handle_impl.hpp
similarity index 85%
rename from include/hardware_sampling/gpu_intel/level_zero_device_handle_impl.hpp
rename to include/hws/gpu_intel/level_zero_device_handle_impl.hpp
index a0f2ccd..1c3b269 100644
--- a/include/hardware_sampling/gpu_intel/level_zero_device_handle_impl.hpp
+++ b/include/hws/gpu_intel/level_zero_device_handle_impl.hpp
@@ -8,12 +8,12 @@
  * @brief Implements a pImpl class for a Level Zero device handle.
  */
 
-#ifndef HARDWARE_SAMPLING_GPU_INTEL_LEVEL_ZERO_DEVICE_HANDLE_IMPL_HPP_
-#define HARDWARE_SAMPLING_GPU_INTEL_LEVEL_ZERO_DEVICE_HANDLE_IMPL_HPP_
+#ifndef HWS_GPU_INTEL_LEVEL_ZERO_DEVICE_HANDLE_IMPL_HPP_
+#define HWS_GPU_INTEL_LEVEL_ZERO_DEVICE_HANDLE_IMPL_HPP_
 #pragma once
 
-#include "hardware_sampling/gpu_intel/level_zero_device_handle.hpp"  // hws::detail::level_zero_device_handle
-#include "hardware_sampling/gpu_intel/utility.hpp"                   // HWS_LEVEL_ZERO_ERROR_CHECK
+#include "hws/gpu_intel/level_zero_device_handle.hpp"  // hws::detail::level_zero_device_handle
+#include "hws/gpu_intel/utility.hpp"                   // HWS_LEVEL_ZERO_ERROR_CHECK
 
 #include "fmt/format.h"         // fmt::format
 #include "level_zero/ze_api.h"  // Level Zero runtime functions
@@ -76,4 +76,4 @@ inline level_zero_device_handle::level_zero_device_handle(const std::size_t devi
 
 }  // namespace hws::detail
 
-#endif  // HARDWARE_SAMPLING_GPU_INTEL_LEVEL_ZERO_DEVICE_HANDLE_IMPL_HPP_
+#endif  // HWS_GPU_INTEL_LEVEL_ZERO_DEVICE_HANDLE_IMPL_HPP_
diff --git a/include/hardware_sampling/gpu_intel/level_zero_samples.hpp b/include/hws/gpu_intel/level_zero_samples.hpp
similarity index 98%
rename from include/hardware_sampling/gpu_intel/level_zero_samples.hpp
rename to include/hws/gpu_intel/level_zero_samples.hpp
index 2ade186..dec6ec5 100644
--- a/include/hardware_sampling/gpu_intel/level_zero_samples.hpp
+++ b/include/hws/gpu_intel/level_zero_samples.hpp
@@ -8,11 +8,11 @@
  * @brief Defines the samples used with Level Zero.
  */
 
-#ifndef HARDWARE_SAMPLING_GPU_INTEL_LEVEL_ZERO_SAMPLES_HPP_
-#define HARDWARE_SAMPLING_GPU_INTEL_LEVEL_ZERO_SAMPLES_HPP_
+#ifndef HWS_GPU_INTEL_LEVEL_ZERO_SAMPLES_HPP_
+#define HWS_GPU_INTEL_LEVEL_ZERO_SAMPLES_HPP_
 #pragma once
 
-#include "hardware_sampling/utility.hpp"  // HWS_SAMPLE_STRUCT_FIXED_MEMBER, HWS_SAMPLE_STRUCT_SAMPLING_MEMBER
+#include "hws/utility.hpp"  // HWS_SAMPLE_STRUCT_FIXED_MEMBER, HWS_SAMPLE_STRUCT_SAMPLING_MEMBER
 
 #include "fmt/ostream.h"  // fmt::formatter, fmt::ostream_formatter
 
@@ -286,4 +286,4 @@ struct fmt::formatter<hws::level_zero_temperature_samples> : fmt::ostream_format
 
 /// @endcond
 
-#endif  // HARDWARE_SAMPLING_GPU_INTEL_LEVEL_ZERO_SAMPLES_HPP_
+#endif  // HWS_GPU_INTEL_LEVEL_ZERO_SAMPLES_HPP_
diff --git a/include/hardware_sampling/gpu_intel/utility.hpp b/include/hws/gpu_intel/utility.hpp
similarity index 95%
rename from include/hardware_sampling/gpu_intel/utility.hpp
rename to include/hws/gpu_intel/utility.hpp
index 03f9f8d..04626a8 100644
--- a/include/hardware_sampling/gpu_intel/utility.hpp
+++ b/include/hws/gpu_intel/utility.hpp
@@ -8,8 +8,8 @@
  * @brief Implements utility functionality for the Intel GPU sampler.
  */
 
-#ifndef HARDWARE_SAMPLING_GPU_INTEL_UTILITY_HPP_
-#define HARDWARE_SAMPLING_GPU_INTEL_UTILITY_HPP_
+#ifndef HWS_GPU_INTEL_UTILITY_HPP_
+#define HWS_GPU_INTEL_UTILITY_HPP_
 #pragma once
 
 #include "fmt/format.h"          // fmt::format
@@ -77,4 +77,4 @@ namespace hws::detail {
 
 }  // namespace hws::detail
 
-#endif  // HARDWARE_SAMPLING_GPU_INTEL_UTILITY_HPP_
+#endif  // HWS_GPU_INTEL_UTILITY_HPP_
diff --git a/include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp b/include/hws/gpu_nvidia/hardware_sampler.hpp
similarity index 92%
rename from include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp
rename to include/hws/gpu_nvidia/hardware_sampler.hpp
index 5dba1ca..59a5e31 100644
--- a/include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp
+++ b/include/hws/gpu_nvidia/hardware_sampler.hpp
@@ -8,14 +8,14 @@
  * @brief Defines a hardware sampler for NVIDIA GPUs using NVIDIA's Management Library (NVML).
  */
 
-#ifndef HARDWARE_SAMPLING_GPU_NVIDIA_HARDWARE_SAMPLER_HPP_
-#define HARDWARE_SAMPLING_GPU_NVIDIA_HARDWARE_SAMPLER_HPP_
+#ifndef HWS_GPU_NVIDIA_HARDWARE_SAMPLER_HPP_
+#define HWS_GPU_NVIDIA_HARDWARE_SAMPLER_HPP_
 #pragma once
 
-#include "hardware_sampling/gpu_nvidia/nvml_device_handle.hpp"  // hws::nvml_device_handle
-#include "hardware_sampling/gpu_nvidia/nvml_samples.hpp"        // hws::{nvml_general_samples, nvml_clock_samples, nvml_power_samples, nvml_memory_samples, nvml_temperature_samples}
-#include "hardware_sampling/hardware_sampler.hpp"               // hws::hardware_sampler
-#include "hardware_sampling/sample_category.hpp"                // hws::sample_category
+#include "hws/gpu_nvidia/nvml_device_handle.hpp"  // hws::nvml_device_handle
+#include "hws/gpu_nvidia/nvml_samples.hpp"        // hws::{nvml_general_samples, nvml_clock_samples, nvml_power_samples, nvml_memory_samples, nvml_temperature_samples}
+#include "hws/hardware_sampler.hpp"               // hws::hardware_sampler
+#include "hws/sample_category.hpp"                // hws::sample_category
 
 #include "fmt/format.h"  // fmt::formatter, fmt::ostream_formatter
 
@@ -171,4 +171,4 @@ struct fmt::formatter<hws::gpu_nvidia_hardware_sampler> : fmt::ostream_formatter
 
 /// @endcond
 
-#endif  // HARDWARE_SAMPLING_GPU_NVIDIA_HARDWARE_SAMPLER_HPP_
+#endif  // HWS_GPU_NVIDIA_HARDWARE_SAMPLER_HPP_
diff --git a/include/hardware_sampling/gpu_nvidia/nvml_device_handle.hpp b/include/hws/gpu_nvidia/nvml_device_handle.hpp
similarity index 85%
rename from include/hardware_sampling/gpu_nvidia/nvml_device_handle.hpp
rename to include/hws/gpu_nvidia/nvml_device_handle.hpp
index f52fb84..eb3da33 100644
--- a/include/hardware_sampling/gpu_nvidia/nvml_device_handle.hpp
+++ b/include/hws/gpu_nvidia/nvml_device_handle.hpp
@@ -8,8 +8,8 @@
  * @brief Defines a pImpl class for an NVML device handle.
  */
 
-#ifndef HARDWARE_SAMPLING_GPU_NVIDIA_NVML_DEVICE_HANDLE_HPP_
-#define HARDWARE_SAMPLING_GPU_NVIDIA_NVML_DEVICE_HANDLE_HPP_
+#ifndef HWS_GPU_NVIDIA_NVML_DEVICE_HANDLE_HPP_
+#define HWS_GPU_NVIDIA_NVML_DEVICE_HANDLE_HPP_
 #pragma once
 
 #include <cstddef>    // std::size_t
@@ -40,7 +40,7 @@ class nvml_device_handle {
 
     /**
      * @brief Get the nvml_device_handle implementation used to access the actual nvmlDevice_t.
-     * @throws hardware_sampling_exception if `*this` has been default constructed
+     * @throws std::runtime_error if `*this` has been default constructed
      * @return the device handle (`[[nodiscard]]`)
      */
     [[nodiscard]] nvml_device_handle_impl &get_impl() {
@@ -52,7 +52,7 @@ class nvml_device_handle {
 
     /**
      * @brief Get the nvml_device_handle implementation used to access the actual nvmlDevice_t.
-     * @throws hardware_sampling_exception if `*this` has been default constructed
+     * @throws std::runtime_error if `*this` has been default constructed
      * @return the device handle (`[[nodiscard]]`)
      */
     [[nodiscard]] const nvml_device_handle_impl &get_impl() const {
@@ -69,4 +69,4 @@ class nvml_device_handle {
 
 }  // namespace hws::detail
 
-#endif  // HARDWARE_SAMPLING_GPU_NVIDIA_NVML_DEVICE_HANDLE_HPP_
+#endif  // HWS_GPU_NVIDIA_NVML_DEVICE_HANDLE_HPP_
diff --git a/include/hardware_sampling/gpu_nvidia/nvml_device_handle_impl.hpp b/include/hws/gpu_nvidia/nvml_device_handle_impl.hpp
similarity index 74%
rename from include/hardware_sampling/gpu_nvidia/nvml_device_handle_impl.hpp
rename to include/hws/gpu_nvidia/nvml_device_handle_impl.hpp
index df6147c..7656599 100644
--- a/include/hardware_sampling/gpu_nvidia/nvml_device_handle_impl.hpp
+++ b/include/hws/gpu_nvidia/nvml_device_handle_impl.hpp
@@ -8,12 +8,12 @@
  * @brief Implements a pImpl class for an NVML device handle.
  */
 
-#ifndef HARDWARE_SAMPLING_GPU_NVIDIA_NVML_DEVICE_HANDLE_IMPL_HPP_
-#define HARDWARE_SAMPLING_GPU_NVIDIA_NVML_DEVICE_HANDLE_IMPL_HPP_
+#ifndef HWS_GPU_NVIDIA_NVML_DEVICE_HANDLE_IMPL_HPP_
+#define HWS_GPU_NVIDIA_NVML_DEVICE_HANDLE_IMPL_HPP_
 #pragma once
 
-#include "hardware_sampling/gpu_nvidia/nvml_device_handle.hpp"  // hws::detail::nvml_device_handle
-#include "hardware_sampling/gpu_nvidia/utility.hpp"             // HWS_NVML_ERROR_CHECK
+#include "hws/gpu_nvidia/nvml_device_handle.hpp"  // hws::detail::nvml_device_handle
+#include "hws/gpu_nvidia/utility.hpp"             // HWS_NVML_ERROR_CHECK
 
 #include "nvml.h"  // nvmlDevice_t
 
@@ -44,4 +44,4 @@ inline nvml_device_handle::nvml_device_handle(const std::size_t device_id) :
 
 }  // namespace hws::detail
 
-#endif  // HARDWARE_SAMPLING_GPU_NVIDIA_NVML_DEVICE_HANDLE_IMPL_HPP_
+#endif  // HWS_GPU_NVIDIA_NVML_DEVICE_HANDLE_IMPL_HPP_
diff --git a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp b/include/hws/gpu_nvidia/nvml_samples.hpp
similarity index 98%
rename from include/hardware_sampling/gpu_nvidia/nvml_samples.hpp
rename to include/hws/gpu_nvidia/nvml_samples.hpp
index 631e572..0ddd6ae 100644
--- a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp
+++ b/include/hws/gpu_nvidia/nvml_samples.hpp
@@ -8,11 +8,11 @@
  * @brief Defines the samples used with NVML.
  */
 
-#ifndef HARDWARE_SAMPLING_GPU_NVIDIA_NVML_SAMPLES_HPP_
-#define HARDWARE_SAMPLING_GPU_NVIDIA_NVML_SAMPLES_HPP_
+#ifndef HWS_GPU_NVIDIA_NVML_SAMPLES_HPP_
+#define HWS_GPU_NVIDIA_NVML_SAMPLES_HPP_
 #pragma once
 
-#include "hardware_sampling/utility.hpp"  // HWS_SAMPLE_STRUCT_FIXED_MEMBER, HWS_SAMPLE_STRUCT_SAMPLING_MEMBER
+#include "hws/utility.hpp"  // HWS_SAMPLE_STRUCT_FIXED_MEMBER, HWS_SAMPLE_STRUCT_SAMPLING_MEMBER
 
 #include "fmt/ostream.h"  // fmt::formatter, fmt::ostream_formatter
 
@@ -280,4 +280,4 @@ struct fmt::formatter<hws::nvml_temperature_samples> : fmt::ostream_formatter {
 
 /// @endcond
 
-#endif  // HARDWARE_SAMPLING_GPU_NVIDIA_NVML_SAMPLES_HPP_
+#endif  // HWS_GPU_NVIDIA_NVML_SAMPLES_HPP_
diff --git a/include/hardware_sampling/gpu_nvidia/utility.hpp b/include/hws/gpu_nvidia/utility.hpp
similarity index 96%
rename from include/hardware_sampling/gpu_nvidia/utility.hpp
rename to include/hws/gpu_nvidia/utility.hpp
index aaf0420..c405386 100644
--- a/include/hardware_sampling/gpu_nvidia/utility.hpp
+++ b/include/hws/gpu_nvidia/utility.hpp
@@ -8,8 +8,8 @@
  * @brief Implements utility functionality for the NVIDIA GPU sampler.
  */
 
-#ifndef HARDWARE_SAMPLING_GPU_NVIDIA_UTILITY_HPP_
-#define HARDWARE_SAMPLING_GPU_NVIDIA_UTILITY_HPP_
+#ifndef HWS_GPU_NVIDIA_UTILITY_HPP_
+#define HWS_GPU_NVIDIA_UTILITY_HPP_
 #pragma once
 
 #include "cuda_runtime_api.h"  // CUDA runtime functions
@@ -61,4 +61,4 @@ namespace hws::detail {
 
 }  // namespace hws::detail
 
-#endif  // HARDWARE_SAMPLING_GPU_NVIDIA_UTILITY_HPP_
+#endif  // HWS_GPU_NVIDIA_UTILITY_HPP_
diff --git a/include/hardware_sampling/hardware_sampler.hpp b/include/hws/hardware_sampler.hpp
similarity index 96%
rename from include/hardware_sampling/hardware_sampler.hpp
rename to include/hws/hardware_sampler.hpp
index cc59331..326eb7e 100644
--- a/include/hardware_sampling/hardware_sampler.hpp
+++ b/include/hws/hardware_sampler.hpp
@@ -8,12 +8,12 @@
  * @brief Defines the base class for all hardware samplers.
  */
 
-#ifndef HARDWARE_SAMPLING_HARDWARE_SAMPLER_HPP_
-#define HARDWARE_SAMPLING_HARDWARE_SAMPLER_HPP_
+#ifndef HWS_HARDWARE_SAMPLER_HPP_
+#define HWS_HARDWARE_SAMPLER_HPP_
 #pragma once
 
-#include "hardware_sampling/event.hpp"            // hws::event
-#include "hardware_sampling/sample_category.hpp"  // hws::sample_category
+#include "hws/event.hpp"            // hws::event
+#include "hws/sample_category.hpp"  // hws::sample_category
 
 #include <atomic>      // std::atomic
 #include <chrono>      // std::chrono::{system_clock::time_point, steady_clock::time_point, milliseconds}
@@ -228,4 +228,4 @@ class hardware_sampler {
 
 }  // namespace hws
 
-#endif  // HARDWARE_SAMPLING_HARDWARE_SAMPLER_HPP_
+#endif  // HWS_HARDWARE_SAMPLER_HPP_
diff --git a/include/hardware_sampling/sample_category.hpp b/include/hws/sample_category.hpp
similarity index 96%
rename from include/hardware_sampling/sample_category.hpp
rename to include/hws/sample_category.hpp
index e740544..0ec500b 100644
--- a/include/hardware_sampling/sample_category.hpp
+++ b/include/hws/sample_category.hpp
@@ -8,8 +8,8 @@
  * @brief Defines an enum class with all sample categories to be able to only selectively enable some samples.
  */
 
-#ifndef HARDWARE_SAMPLING_SAMPLE_CATEGORY_HPP_
-#define HARDWARE_SAMPLING_SAMPLE_CATEGORY_HPP_
+#ifndef HWS_SAMPLE_CATEGORY_HPP_
+#define HWS_SAMPLE_CATEGORY_HPP_
 #pragma once
 
 namespace hws {
@@ -114,4 +114,4 @@ constexpr sample_category &operator^=(sample_category &lhs, const sample_categor
 
 }  // namespace hws
 
-#endif  // HARDWARE_SAMPLING_SAMPLE_CATEGORY_HPP_
+#endif  // HWS_SAMPLE_CATEGORY_HPP_
diff --git a/include/hardware_sampling/system_hardware_sampler.hpp b/include/hws/system_hardware_sampler.hpp
similarity index 95%
rename from include/hardware_sampling/system_hardware_sampler.hpp
rename to include/hws/system_hardware_sampler.hpp
index 15279de..42924ac 100644
--- a/include/hardware_sampling/system_hardware_sampler.hpp
+++ b/include/hws/system_hardware_sampler.hpp
@@ -8,12 +8,12 @@
  * @brief Defines a hardware sampler for the whole system, i.e., automatically creates CPU and GPU hardware samples if the respective sampler and hardware are available.
  */
 
-#ifndef HARDWARE_SAMPLING_SYSTEM_HARDWARE_SAMPLER_HPP_
-#define HARDWARE_SAMPLING_SYSTEM_HARDWARE_SAMPLER_HPP_
+#ifndef HWS_SYSTEM_HARDWARE_SAMPLER_HPP_
+#define HWS_SYSTEM_HARDWARE_SAMPLER_HPP_
 
-#include "hardware_sampling/event.hpp"             // hws::event
-#include "hardware_sampling/hardware_sampler.hpp"  // hws::hardware_sampler
-#include "hardware_sampling/sample_category.hpp"   // hws::sample_category
+#include "hws/event.hpp"             // hws::event
+#include "hws/hardware_sampler.hpp"  // hws::hardware_sampler
+#include "hws/sample_category.hpp"   // hws::sample_category
 
 #include <chrono>      // std::chrono::{milliseconds, steady_clock::time_point}
 #include <cstddef>     // std::size_t
@@ -194,4 +194,4 @@ class system_hardware_sampler {
 
 }  // namespace hws
 
-#endif  // HARDWARE_SAMPLING_SYSTEM_HARDWARE_SAMPLER_HPP_
+#endif  // HWS_SYSTEM_HARDWARE_SAMPLER_HPP_
diff --git a/include/hardware_sampling/utility.hpp b/include/hws/utility.hpp
similarity index 99%
rename from include/hardware_sampling/utility.hpp
rename to include/hws/utility.hpp
index 4a99a31..db37390 100644
--- a/include/hardware_sampling/utility.hpp
+++ b/include/hws/utility.hpp
@@ -8,8 +8,8 @@
  * @brief Utility functions for the hardware sampling.
  */
 
-#ifndef HARDWARE_SAMPLING_UTILITY_HPP_
-#define HARDWARE_SAMPLING_UTILITY_HPP_
+#ifndef HWS_UTILITY_HPP_
+#define HWS_UTILITY_HPP_
 #pragma once
 
 #include "fmt/format.h"  // fmt::format
@@ -307,4 +307,4 @@ template <typename T>
 
 }  // namespace hws::detail
 
-#endif  // HARDWARE_SAMPLING_UTILITY_HPP_
+#endif  // HWS_UTILITY_HPP_
diff --git a/include/hardware_sampling/version.hpp.in b/include/hws/version.hpp.in
similarity index 92%
rename from include/hardware_sampling/version.hpp.in
rename to include/hws/version.hpp.in
index 88d0c1e..225072f 100644
--- a/include/hardware_sampling/version.hpp.in
+++ b/include/hws/version.hpp.in
@@ -8,8 +8,8 @@
  * @brief Version information for the hardware sampling.
  */
 
-#ifndef HARDWARE_SAMPLING_VERSION_HPP_
-#define HARDWARE_SAMPLING_VERSION_HPP_
+#ifndef HWS_VERSION_HPP_
+#define HWS_VERSION_HPP_
 #pragma once
 
 #include <string_view>  // std::string_view
@@ -48,4 +48,4 @@ constexpr int patch = @PROJECT_VERSION_PATCH@;
 
 }  // namespace hws::version
 
-#endif  // HARDWARE_SAMPLING_VERSION_HPP_
+#endif  // HWS_VERSION_HPP_
diff --git a/src/hardware_sampling/cpu/cpu_samples.cpp b/src/hws/cpu/cpu_samples.cpp
similarity index 99%
rename from src/hardware_sampling/cpu/cpu_samples.cpp
rename to src/hws/cpu/cpu_samples.cpp
index 3ef3ad7..e5690d2 100644
--- a/src/hardware_sampling/cpu/cpu_samples.cpp
+++ b/src/hws/cpu/cpu_samples.cpp
@@ -5,9 +5,9 @@
  *          See the LICENSE.md file in the project root for full license information.
  */
 
-#include "hardware_sampling/cpu/cpu_samples.hpp"
+#include "hws/cpu/cpu_samples.hpp"
 
-#include "hardware_sampling/utility.hpp"  // hws::detail::{value_or_default, quote}
+#include "hws/utility.hpp"  // hws::detail::{value_or_default, quote}
 
 #include "fmt/format.h"  // fmt::format
 #include "fmt/ranges.h"  // fmt::join
diff --git a/src/hardware_sampling/cpu/hardware_sampler.cpp b/src/hws/cpu/hardware_sampler.cpp
similarity index 98%
rename from src/hardware_sampling/cpu/hardware_sampler.cpp
rename to src/hws/cpu/hardware_sampler.cpp
index b996f96..505e0bb 100644
--- a/src/hardware_sampling/cpu/hardware_sampler.cpp
+++ b/src/hws/cpu/hardware_sampler.cpp
@@ -5,13 +5,13 @@
  *          See the LICENSE.md file in the project root for full license information.
  */
 
-#include "hardware_sampling/cpu/hardware_sampler.hpp"
+#include "hws/cpu/hardware_sampler.hpp"
 
-#include "hardware_sampling/cpu/cpu_samples.hpp"   // hws::{cpu_general_samples, clock_samples, power_samples, memory_samples, temperature_samples, gfx_samples, idle_state_samples}
-#include "hardware_sampling/cpu/utility.hpp"       // HWS_SUBPROCESS_ERROR_CHECK, hws::detail::run_subprocess
-#include "hardware_sampling/hardware_sampler.hpp"  // hws::tracking::hardware_sampler
-#include "hardware_sampling/sample_category.hpp"   // hws::sample_category
-#include "hardware_sampling/utility.hpp"           // hws::detail::{split, split_as, trim, convert_to, starts_with}
+#include "hws/cpu/cpu_samples.hpp"   // hws::{cpu_general_samples, clock_samples, power_samples, memory_samples, temperature_samples, gfx_samples, idle_state_samples}
+#include "hws/cpu/utility.hpp"       // HWS_SUBPROCESS_ERROR_CHECK, hws::detail::run_subprocess
+#include "hws/hardware_sampler.hpp"  // hws::tracking::hardware_sampler
+#include "hws/sample_category.hpp"   // hws::sample_category
+#include "hws/utility.hpp"           // hws::detail::{split, split_as, trim, convert_to, starts_with}
 
 #include "fmt/format.h"  // fmt::format
 #include "fmt/ranges.h"  // fmt::join
diff --git a/src/hardware_sampling/cpu/utility.cpp b/src/hws/cpu/utility.cpp
similarity index 95%
rename from src/hardware_sampling/cpu/utility.cpp
rename to src/hws/cpu/utility.cpp
index 3a17995..7bb6b3d 100644
--- a/src/hardware_sampling/cpu/utility.cpp
+++ b/src/hws/cpu/utility.cpp
@@ -5,9 +5,9 @@
  *          See the LICENSE.md file in the project root for full license information.
  */
 
-#include "hardware_sampling/cpu/utility.hpp"
+#include "hws/cpu/utility.hpp"
 
-#include "hardware_sampling/utility.hpp"  // hws::detail::split_as
+#include "hws/utility.hpp"  // hws::detail::split_as
 
 #include "fmt/format.h"  // fmt::format
 #include "subprocess.h"  // subprocess_s, subprocess_create, subprocess_join, subprocess_stdout, subprocess_option_e
diff --git a/src/hardware_sampling/event.cpp b/src/hws/event.cpp
similarity index 93%
rename from src/hardware_sampling/event.cpp
rename to src/hws/event.cpp
index e21c715..373990e 100644
--- a/src/hardware_sampling/event.cpp
+++ b/src/hws/event.cpp
@@ -5,7 +5,7 @@
  *          See the LICENSE.md file in the project root for full license information.
  */
 
-#include "hardware_sampling/event.hpp"
+#include "hws/event.hpp"
 
 #include "fmt/format.h"  // fmt::format
 
diff --git a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp b/src/hws/gpu_amd/hardware_sampler.cpp
similarity index 98%
rename from src/hardware_sampling/gpu_amd/hardware_sampler.cpp
rename to src/hws/gpu_amd/hardware_sampler.cpp
index b205718..6d52e03 100644
--- a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
+++ b/src/hws/gpu_amd/hardware_sampler.cpp
@@ -5,13 +5,13 @@
  *          See the LICENSE.md file in the project root for full license information.
  */
 
-#include "hardware_sampling/gpu_amd/hardware_sampler.hpp"
+#include "hws/gpu_amd/hardware_sampler.hpp"
 
-#include "hardware_sampling/gpu_amd/rocm_smi_samples.hpp"  // hws::{rocm_smi_general_samples, rocm_smi_clock_samples, rocm_smi_power_samples, rocm_smi_memory_samples, rocm_smi_temperature_samples}
-#include "hardware_sampling/gpu_amd/utility.hpp"           // hws::detail::performance_level_to_string, HWS_ROCM_SMI_ERROR_CHECK
-#include "hardware_sampling/hardware_sampler.hpp"          // hws::hardware_sampler
-#include "hardware_sampling/sample_category.hpp"           // hws::sample_category
-#include "hardware_sampling/utility.hpp"                   // hws::detail::time_points_to_epoch
+#include "hws/gpu_amd/rocm_smi_samples.hpp"  // hws::{rocm_smi_general_samples, rocm_smi_clock_samples, rocm_smi_power_samples, rocm_smi_memory_samples, rocm_smi_temperature_samples}
+#include "hws/gpu_amd/utility.hpp"           // hws::detail::performance_level_to_string, HWS_ROCM_SMI_ERROR_CHECK
+#include "hws/hardware_sampler.hpp"          // hws::hardware_sampler
+#include "hws/sample_category.hpp"           // hws::sample_category
+#include "hws/utility.hpp"                   // hws::detail::time_points_to_epoch
 
 #include "fmt/format.h"           // fmt::format
 #include "fmt/ranges.h"           // fmt::join
diff --git a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp b/src/hws/gpu_amd/rocm_smi_samples.cpp
similarity index 99%
rename from src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp
rename to src/hws/gpu_amd/rocm_smi_samples.cpp
index e93c36b..f149c4e 100644
--- a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp
+++ b/src/hws/gpu_amd/rocm_smi_samples.cpp
@@ -5,9 +5,9 @@
  *          See the LICENSE.md file in the project root for full license information.
  */
 
-#include "hardware_sampling/gpu_amd/rocm_smi_samples.hpp"
+#include "hws/gpu_amd/rocm_smi_samples.hpp"
 
-#include "hardware_sampling/utility.hpp"  // hws::detail::{value_or_default, quote}
+#include "hws/utility.hpp"  // hws::detail::{value_or_default, quote}
 
 #include "fmt/format.h"  // fmt::format
 #include "fmt/ranges.h"  // fmt::join
diff --git a/src/hardware_sampling/gpu_amd/utility.cpp b/src/hws/gpu_amd/utility.cpp
similarity index 96%
rename from src/hardware_sampling/gpu_amd/utility.cpp
rename to src/hws/gpu_amd/utility.cpp
index 35d375c..a88969a 100644
--- a/src/hardware_sampling/gpu_amd/utility.cpp
+++ b/src/hws/gpu_amd/utility.cpp
@@ -5,7 +5,7 @@
  *          See the LICENSE.md file in the project root for full license information.
  */
 
-#include "hardware_sampling/gpu_amd/utility.hpp"
+#include "hws/gpu_amd/utility.hpp"
 
 #include "rocm_smi/rocm_smi.h"  // ROCm SMI runtime functions
 
diff --git a/src/hardware_sampling/gpu_intel/hardware_sampler.cpp b/src/hws/gpu_intel/hardware_sampler.cpp
similarity index 98%
rename from src/hardware_sampling/gpu_intel/hardware_sampler.cpp
rename to src/hws/gpu_intel/hardware_sampler.cpp
index ed3aed7..3054e22 100644
--- a/src/hardware_sampling/gpu_intel/hardware_sampler.cpp
+++ b/src/hws/gpu_intel/hardware_sampler.cpp
@@ -5,14 +5,14 @@
  *          See the LICENSE.md file in the project root for full license information.
  */
 
-#include "hardware_sampling/gpu_intel/hardware_sampler.hpp"
-
-#include "hardware_sampling/gpu_intel/level_zero_device_handle_impl.hpp"  // hws::level_zero_device_handle implementation
-#include "hardware_sampling/gpu_intel/level_zero_samples.hpp"             // hws::{level_zero_general_samples, level_zero_clock_samples, level_zero_power_samples, level_zero_memory_samples, level_zero_temperature_samples}
-#include "hardware_sampling/gpu_intel/utility.hpp"                        // HWS_LEVEL_ZERO_ERROR_CHECK
-#include "hardware_sampling/hardware_sampler.hpp"                         // hws::hardware_sampler
-#include "hardware_sampling/sample_category.hpp"                          // hws::sample_category
-#include "hardware_sampling/utility.hpp"                                  // hws::{durations_from_reference_time, join}
+#include "hws/gpu_intel/hardware_sampler.hpp"
+
+#include "hws/gpu_intel/level_zero_device_handle_impl.hpp"  // hws::level_zero_device_handle implementation
+#include "hws/gpu_intel/level_zero_samples.hpp"             // hws::{level_zero_general_samples, level_zero_clock_samples, level_zero_power_samples, level_zero_memory_samples, level_zero_temperature_samples}
+#include "hws/gpu_intel/utility.hpp"                        // HWS_LEVEL_ZERO_ERROR_CHECK
+#include "hws/hardware_sampler.hpp"                         // hws::hardware_sampler
+#include "hws/sample_category.hpp"                          // hws::sample_category
+#include "hws/utility.hpp"                                  // hws::{durations_from_reference_time, join}
 
 #include "fmt/format.h"          // fmt::format
 #include "level_zero/ze_api.h"   // Level Zero runtime functions
diff --git a/src/hardware_sampling/gpu_intel/level_zero_samples.cpp b/src/hws/gpu_intel/level_zero_samples.cpp
similarity index 99%
rename from src/hardware_sampling/gpu_intel/level_zero_samples.cpp
rename to src/hws/gpu_intel/level_zero_samples.cpp
index ab749fb..e296cab 100644
--- a/src/hardware_sampling/gpu_intel/level_zero_samples.cpp
+++ b/src/hws/gpu_intel/level_zero_samples.cpp
@@ -5,9 +5,9 @@
  *          See the LICENSE.md file in the project root for full license information.
  */
 
-#include "hardware_sampling/gpu_intel/level_zero_samples.hpp"
+#include "hws/gpu_intel/level_zero_samples.hpp"
 
-#include "hardware_sampling/utility.hpp"  // hws::detail::{value_or_default, remove_cvref_t}
+#include "hws/utility.hpp"  // hws::detail::{value_or_default, remove_cvref_t}
 
 #include <ostream>      // std::ostream
 #include <string>       // std::string
diff --git a/src/hardware_sampling/gpu_intel/utility.cpp b/src/hws/gpu_intel/utility.cpp
similarity index 99%
rename from src/hardware_sampling/gpu_intel/utility.cpp
rename to src/hws/gpu_intel/utility.cpp
index 635b5c1..5a29eee 100644
--- a/src/hardware_sampling/gpu_intel/utility.cpp
+++ b/src/hws/gpu_intel/utility.cpp
@@ -5,7 +5,7 @@
  *          See the LICENSE.md file in the project root for full license information.
  */
 
-#include "hardware_sampling/gpu_intel/utility.hpp"
+#include "hws/gpu_intel/utility.hpp"
 
 #include "fmt/format.h"          // fmt::format
 #include "fmt/ranges.h"          // fmt::join
diff --git a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp b/src/hws/gpu_nvidia/hardware_sampler.cpp
similarity index 97%
rename from src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
rename to src/hws/gpu_nvidia/hardware_sampler.cpp
index 2ffb6e8..9c2a927 100644
--- a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
+++ b/src/hws/gpu_nvidia/hardware_sampler.cpp
@@ -5,14 +5,14 @@
  *          See the LICENSE.md file in the project root for full license information.
  */
 
-#include "hardware_sampling/gpu_nvidia/hardware_sampler.hpp"
-
-#include "hardware_sampling/gpu_nvidia/nvml_device_handle_impl.hpp"  // hws::detail::nvml_device_handle implementation
-#include "hardware_sampling/gpu_nvidia/nvml_samples.hpp"             // hws::{nvml_general_samples, nvml_clock_samples, nvml_power_samples, nvml_memory_samples, nvml_temperature_samples}
-#include "hardware_sampling/gpu_nvidia/utility.hpp"                  // HWS_NVML_ERROR_CHECK
-#include "hardware_sampling/hardware_sampler.hpp"                    // hws::hardware_sampler
-#include "hardware_sampling/sample_category.hpp"                     // hws::sample_category
-#include "hardware_sampling/utility.hpp"                             // hws::detail::time_points_to_epoch
+#include "hws/gpu_nvidia/hardware_sampler.hpp"
+
+#include "hws/gpu_nvidia/nvml_device_handle_impl.hpp"  // hws::detail::nvml_device_handle implementation
+#include "hws/gpu_nvidia/nvml_samples.hpp"             // hws::{nvml_general_samples, nvml_clock_samples, nvml_power_samples, nvml_memory_samples, nvml_temperature_samples}
+#include "hws/gpu_nvidia/utility.hpp"                  // HWS_NVML_ERROR_CHECK
+#include "hws/hardware_sampler.hpp"                    // hws::hardware_sampler
+#include "hws/sample_category.hpp"                     // hws::sample_category
+#include "hws/utility.hpp"                             // hws::detail::time_points_to_epoch
 
 #include "fmt/format.h"  // fmt::format
 #include "fmt/ranges.h"  // fmt::join
diff --git a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp b/src/hws/gpu_nvidia/nvml_samples.cpp
similarity index 99%
rename from src/hardware_sampling/gpu_nvidia/nvml_samples.cpp
rename to src/hws/gpu_nvidia/nvml_samples.cpp
index b07c7d2..3ce65c3 100644
--- a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp
+++ b/src/hws/gpu_nvidia/nvml_samples.cpp
@@ -5,9 +5,9 @@
  *          See the LICENSE.md file in the project root for full license information.
  */
 
-#include "hardware_sampling/gpu_nvidia/nvml_samples.hpp"
+#include "hws/gpu_nvidia/nvml_samples.hpp"
 
-#include "hardware_sampling/utility.hpp"  // hws::detail::{value_or_default, map_entry_to_string, quote}
+#include "hws/utility.hpp"  // hws::detail::{value_or_default, map_entry_to_string, quote}
 
 #include "fmt/format.h"  // fmt::format
 #include "fmt/ranges.h"  // fmt::join
diff --git a/src/hardware_sampling/gpu_nvidia/utility.cpp b/src/hws/gpu_nvidia/utility.cpp
similarity index 97%
rename from src/hardware_sampling/gpu_nvidia/utility.cpp
rename to src/hws/gpu_nvidia/utility.cpp
index 70883e6..7c1b9f7 100644
--- a/src/hardware_sampling/gpu_nvidia/utility.cpp
+++ b/src/hws/gpu_nvidia/utility.cpp
@@ -5,7 +5,7 @@
  *          See the LICENSE.md file in the project root for full license information.
  */
 
-#include "hardware_sampling/gpu_nvidia/utility.hpp"
+#include "hws/gpu_nvidia/utility.hpp"
 
 #include "fmt/format.h"  // fmt::format
 #include "fmt/ranges.h"  // fmt::join
diff --git a/src/hardware_sampling/hardware_sampler.cpp b/src/hws/hardware_sampler.cpp
similarity index 96%
rename from src/hardware_sampling/hardware_sampler.cpp
rename to src/hws/hardware_sampler.cpp
index d5ec9fc..abd907d 100644
--- a/src/hardware_sampling/hardware_sampler.cpp
+++ b/src/hws/hardware_sampler.cpp
@@ -5,11 +5,11 @@
  *          See the LICENSE.md file in the project root for full license information.
  */
 
-#include "hardware_sampling/hardware_sampler.hpp"
+#include "hws/hardware_sampler.hpp"
 
-#include "hardware_sampling/event.hpp"    // hws::event
-#include "hardware_sampling/utility.hpp"  // hws::detail::durations_from_reference_time
-#include "hardware_sampling/version.hpp"  // hws::version::version
+#include "hws/event.hpp"    // hws::event
+#include "hws/utility.hpp"  // hws::detail::durations_from_reference_time
+#include "hws/version.hpp"  // hws::version::version
 
 #include "fmt/chrono.h"  // direct formatting of std::chrono types
 #include "fmt/format.h"  // fmt::format
diff --git a/src/hardware_sampling/system_hardware_sampler.cpp b/src/hws/system_hardware_sampler.cpp
similarity index 90%
rename from src/hardware_sampling/system_hardware_sampler.cpp
rename to src/hws/system_hardware_sampler.cpp
index 51a2fed..14e75de 100644
--- a/src/hardware_sampling/system_hardware_sampler.cpp
+++ b/src/hws/system_hardware_sampler.cpp
@@ -5,29 +5,29 @@
  *          See the LICENSE.md file in the project root for full license information.
  */
 
-#include "hardware_sampling/system_hardware_sampler.hpp"
+#include "hws/system_hardware_sampler.hpp"
 
-#include "hardware_sampling/event.hpp"            // hws::event
-#include "hardware_sampling/sample_category.hpp"  // hws::sample_category
+#include "hws/event.hpp"            // hws::event
+#include "hws/sample_category.hpp"  // hws::sample_category
 
 #if defined(HWS_FOR_CPUS_ENABLED)
-    #include "hardware_sampling/cpu/hardware_sampler.hpp"  // hws::cpu_hardware_sampler
+    #include "hws/cpu/hardware_sampler.hpp"  // hws::cpu_hardware_sampler
 #endif
 #if defined(HWS_FOR_NVIDIA_GPUS_ENABLED)
-    #include "hardware_sampling/gpu_nvidia/hardware_sampler.hpp"  // hws::gpu_nvidia_hardware_sampler
-    #include "hardware_sampling/gpu_nvidia/utility.hpp"           // HWS_CUDA_ERROR_CHECK
+    #include "hws/gpu_nvidia/hardware_sampler.hpp"  // hws::gpu_nvidia_hardware_sampler
+    #include "hws/gpu_nvidia/utility.hpp"           // HWS_CUDA_ERROR_CHECK
 
     #include "cuda_runtime.h"  // cudaGetDeviceCount
 #endif
 #if defined(HWS_FOR_AMD_GPUS_ENABLED)
-    #include "hardware_sampling/gpu_amd/hardware_sampler.hpp"  // hws::gpu_amd_hardware_sampler
-    #include "hardware_sampling/gpu_amd/utility.hpp"           // HWS_HIP_ERROR_CHECK
+    #include "hws/gpu_amd/hardware_sampler.hpp"  // hws::gpu_amd_hardware_sampler
+    #include "hws/gpu_amd/utility.hpp"           // HWS_HIP_ERROR_CHECK
 
     #include "hip/hip_runtime.h"  // hipGetDeviceCount
 #endif
 #if defined(HWS_FOR_INTEL_GPUS_ENABLED)
-    #include "hardware_sampling/gpu_intel/hardware_sampler.hpp"  // hws::gpu_intel_hardware_sampler
-    #include "hardware_sampling/gpu_intel/utility.hpp"           // HWS_LEVEL_ZERO_ERROR_CHECK
+    #include "hws/gpu_intel/hardware_sampler.hpp"  // hws::gpu_intel_hardware_sampler
+    #include "hws/gpu_intel/utility.hpp"           // HWS_LEVEL_ZERO_ERROR_CHECK
 #endif
 
 #include "fmt/format.h"  // fmt::format
diff --git a/src/hardware_sampling/utility.cpp b/src/hws/utility.cpp
similarity index 97%
rename from src/hardware_sampling/utility.cpp
rename to src/hws/utility.cpp
index 9e2dbc2..6651763 100644
--- a/src/hardware_sampling/utility.cpp
+++ b/src/hws/utility.cpp
@@ -5,7 +5,7 @@
  *          See the LICENSE.md file in the project root for full license information.
  */
 
-#include "hardware_sampling/utility.hpp"
+#include "hws/utility.hpp"
 
 #include <algorithm>    // std::min, std::transform
 #include <cctype>       // std::tolower

From 5c21328789eba5d2309d46b83f95513c978e6ff7 Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Mon, 7 Oct 2024 09:46:53 +0200
Subject: [PATCH 62/69] Add {fmt} to the install targets.

---
 CMakeLists.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 97ccbe1..8f48d0f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -76,7 +76,7 @@ else ()
     set(FMT_FUZZ OFF CACHE INTERNAL "" FORCE)
     set(FMT_CUDA_TEST OFF CACHE INTERNAL "" FORCE)
     set(FMT_MODULE OFF CACHE INTERNAL "" FORCE)
-    set(FMT_SYSTEM_HEADERS ON CACHE INTERNAL "" FORCE)
+    set(FMT_SYSTEM_HEADERS OFF CACHE INTERNAL "" FORCE)
     # fetch string formatting library fmt
     FetchContent_Declare(fmt
             GIT_REPOSITORY https://github.com/fmtlib/fmt.git
@@ -328,7 +328,7 @@ configure_package_config_file(
 )
 
 ## create and copy install-targets file
-install(EXPORT hws_Targets
+install(EXPORT hws_Targets fmt
         FILE hwsTargets.cmake
         NAMESPACE hws::
         DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/hws/cmake

From f8b4427ff19dab1b8671acfad87cf76be65b6c3d Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Mon, 7 Oct 2024 10:05:02 +0200
Subject: [PATCH 63/69] Undo last commit.

---
 CMakeLists.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8f48d0f..aa5ae22 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -306,6 +306,7 @@ install(TARGETS ${HWS_TARGETS_TO_INSTALL}
         LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}"  # all shared lib files
         RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}"  # all executables
 )
+install(TARGETS fmt)
 
 ## mark header to install via 'make install'
 install(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/include/"
@@ -328,7 +329,7 @@ configure_package_config_file(
 )
 
 ## create and copy install-targets file
-install(EXPORT hws_Targets fmt
+install(EXPORT hws_Targets
         FILE hwsTargets.cmake
         NAMESPACE hws::
         DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/hws/cmake

From 55e2936574ba76564285cfd8da11df3e82a1baf9 Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Tue, 8 Oct 2024 11:30:23 +0200
Subject: [PATCH 64/69] Fix compilation error in the level zero error check
 function.

---
 include/hws/gpu_intel/utility.hpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/include/hws/gpu_intel/utility.hpp b/include/hws/gpu_intel/utility.hpp
index 04626a8..76e15a1 100644
--- a/include/hws/gpu_intel/utility.hpp
+++ b/include/hws/gpu_intel/utility.hpp
@@ -36,12 +36,12 @@ namespace hws::detail {
  * @details Throws an exception if a Level Zero call returns with an error. Additionally outputs a more concrete custom error string.
  */
 #if defined(HWS_ERROR_CHECKS_ENABLED)
-    #define HWS_LEVEL_ZERO_ERROR_CHECK(level_zero_func)                                                                                            \
-        {                                                                                                                                          \
-            const ze_result_t errc = level_zero_func;                                                                                              \
-            if (errc != ZE_RESULT_SUCCESS) {                                                                                                       \
-                throw std::runtime_error{ fmt::format("Error in Level Zero function call \"{}\": {}", #level_zero_func, to_result_string(errc)) }; \
-            }                                                                                                                                      \
+    #define HWS_LEVEL_ZERO_ERROR_CHECK(level_zero_func)                                                                                                           \
+        {                                                                                                                                                         \
+            const ze_result_t errc = level_zero_func;                                                                                                             \
+            if (errc != ZE_RESULT_SUCCESS) {                                                                                                                      \
+                throw std::runtime_error{ fmt::format("Error in Level Zero function call \"{}\": {}", #level_zero_func, ::hws::detail::to_result_string(errc)) }; \
+            }                                                                                                                                                     \
         }
 #else
     #define HWS_LEVEL_ZERO_ERROR_CHECK(level_zero_func) level_zero_func;

From d1e878ea2080a4545e633ba1ec1ebd1496323942 Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Tue, 8 Oct 2024 11:30:37 +0200
Subject: [PATCH 65/69] Add missing comparison to ZE_RESULT_SUCCESS.

---
 src/hws/gpu_intel/hardware_sampler.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/hws/gpu_intel/hardware_sampler.cpp b/src/hws/gpu_intel/hardware_sampler.cpp
index 3054e22..9a44369 100644
--- a/src/hws/gpu_intel/hardware_sampler.cpp
+++ b/src/hws/gpu_intel/hardware_sampler.cpp
@@ -146,7 +146,7 @@ void gpu_intel_hardware_sampler::sampling_loop() {
                 for (zes_freq_handle_t handle : frequency_handles) {
                     // get frequency properties
                     zes_freq_properties_t prop{};
-                    if (zesFrequencyGetProperties(handle, &prop)) {
+                    if (zesFrequencyGetProperties(handle, &prop) == ZE_RESULT_SUCCESS) {
                         // determine the frequency domain (e.g. GPU, memory, etc)
                         switch (prop.type) {
                             case ZES_FREQ_DOMAIN_GPU:

From 751adee8bfbe6326bcbf10f8b6bf2c057e7c1ce9 Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Tue, 8 Oct 2024 11:31:24 +0200
Subject: [PATCH 66/69] Fix power related wrong units and values.

---
 src/hws/gpu_intel/hardware_sampler.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/hws/gpu_intel/hardware_sampler.cpp b/src/hws/gpu_intel/hardware_sampler.cpp
index 9a44369..0be124e 100644
--- a/src/hws/gpu_intel/hardware_sampler.cpp
+++ b/src/hws/gpu_intel/hardware_sampler.cpp
@@ -272,7 +272,7 @@ void gpu_intel_hardware_sampler::sampling_loop() {
                                 break;
                         }
 
-                        power_samples_.power_enforced_limit_ = static_cast<decltype(power_samples_.power_enforced_limit_)::value_type>(desc.limit);
+                        power_samples_.power_enforced_limit_ = static_cast<decltype(power_samples_.power_enforced_limit_)::value_type>(desc.limit) / 1000.0;
                     }
 
                     // get total power consumption
@@ -581,7 +581,7 @@ void gpu_intel_hardware_sampler::sampling_loop() {
 
                         // calculate current power draw as (Energy Difference [J]) / (Time Difference [s])
                         const std::size_t last_index = this->sampling_time_points().size() - 1;
-                        const double power_usage = (power_consumption - power_samples_.power_total_energy_consumption_->back()) / (std::chrono::duration<double>(this->sampling_time_points()[last_index] - this->sampling_time_points()[last_index - 1]).count());
+                        const double power_usage = ((power_consumption - initial_total_power_consumption) - power_samples_.power_total_energy_consumption_->back()) / (std::chrono::duration<double>(this->sampling_time_points()[last_index] - this->sampling_time_points()[last_index - 1]).count());
                         power_samples_.power_usage_->push_back(power_usage);
 
                         // add power consumption last to be able to use the std::vector::back() function

From 13daaa8764401fdbcc0e15aedd6f5a55dc2a16c1 Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Tue, 8 Oct 2024 11:39:35 +0200
Subject: [PATCH 67/69] Correctly init level zero driver.

---
 src/hws/system_hardware_sampler.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/hws/system_hardware_sampler.cpp b/src/hws/system_hardware_sampler.cpp
index 14e75de..1c08762 100644
--- a/src/hws/system_hardware_sampler.cpp
+++ b/src/hws/system_hardware_sampler.cpp
@@ -73,6 +73,9 @@ system_hardware_sampler::system_hardware_sampler(const std::chrono::milliseconds
 #endif
 #if defined(HWS_FOR_INTEL_GPUS_ENABLED)
     {
+        // init level zero driver
+        HWS_LEVEL_ZERO_ERROR_CHECK(zeInit(ZE_INIT_FLAG_GPU_ONLY))
+
         // discover the number of drivers
         std::uint32_t driver_count{ 0 };
         HWS_LEVEL_ZERO_ERROR_CHECK(zeDriverGet(&driver_count, nullptr))

From 12da0d9bfbb36d844e0bd2e7255f7e3e06b22101 Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Tue, 8 Oct 2024 11:39:54 +0200
Subject: [PATCH 68/69] Try fixing installation issues.

---
 CMakeLists.txt | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index aa5ae22..97ccbe1 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -76,7 +76,7 @@ else ()
     set(FMT_FUZZ OFF CACHE INTERNAL "" FORCE)
     set(FMT_CUDA_TEST OFF CACHE INTERNAL "" FORCE)
     set(FMT_MODULE OFF CACHE INTERNAL "" FORCE)
-    set(FMT_SYSTEM_HEADERS OFF CACHE INTERNAL "" FORCE)
+    set(FMT_SYSTEM_HEADERS ON CACHE INTERNAL "" FORCE)
     # fetch string formatting library fmt
     FetchContent_Declare(fmt
             GIT_REPOSITORY https://github.com/fmtlib/fmt.git
@@ -306,7 +306,6 @@ install(TARGETS ${HWS_TARGETS_TO_INSTALL}
         LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}"  # all shared lib files
         RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}"  # all executables
 )
-install(TARGETS fmt)
 
 ## mark header to install via 'make install'
 install(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/include/"

From 7c0ce1eecccfdb7c108f35769b439f1b32a9356c Mon Sep 17 00:00:00 2001
From: Marcel Breyer <marcel.breyer@ipvs.uni-stuttgart.de>
Date: Tue, 8 Oct 2024 11:45:15 +0200
Subject: [PATCH 69/69] Update README.

---
 README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README.md b/README.md
index 477738a..f21b0eb 100644
--- a/README.md
+++ b/README.md
@@ -72,6 +72,8 @@ export CPLUS_INCLUDE_PATH=${CMAKE_INSTALL_PREFIX}/include:${CPLUS_INCLUDE_PATH}
 export PYTHONPATH=${CMAKE_INSTALL_PREFIX}/lib:${PYTHONPATH}
 ```
 
+Note: when using Intel GPUs and segmentation faults are encountered in calls to `zes` functions, it may be necessary to set `export ZES_ENABLE_SYSMAN=1`.
+
 ## Available samples
 
 The sampling type `fixed` denotes samples that are gathered once per hardware samples like maximum clock frequencies or