diff --git a/.clang-format b/.clang-format
index 9fc54fe..84b7fa5 100644
--- a/.clang-format
+++ b/.clang-format
@@ -77,9 +77,9 @@ ForEachMacros: [ 'foreach', 'Q_FOREACH', 'BOOST_FOREACH' ]
 IfMacros: [ ]
 IncludeBlocks: Regroup
 IncludeCategories:
-  - Regex: '^"hardware_sampling/'
+  - Regex: '^"hws/'
     Priority: 1
-  - Regex: '^"(pybind|nvml|rocm_smi|level_zero|subprocess)'
+  - Regex: '^"(pybind|nvml|cuda|rocm_smi|hip|level_zero|subprocess|fmt)'
     Priority: 2
   - Regex: '^.*'
     Priority: 3
diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml
new file mode 100644
index 0000000..7f41715
--- /dev/null
+++ b/.github/workflows/documentation.yml
@@ -0,0 +1,43 @@
+name: Generate documentation
+
+# only trigger this action on specific events
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    branches:
+      - main
+
+jobs:
+  build-documentation:
+    runs-on: ubuntu-latest
+    steps:
+      # checkout repository
+      - name: Checkout hws
+        uses: actions/checkout@v4.2.0
+        with:
+          path: hardware_sampling
+      # install dependencies
+      - name: Dependencies
+        run: |
+          sudo apt update
+          sudo apt-get install -y doxygen graphviz
+      # configure project via CMake
+      - name: Configure
+        run: |
+          cd hardware_sampling
+          mkdir build
+          cd build
+          cmake -DHWS_ENABLE_DOCUMENTATION=ON ..
+      # build project
+      - name: Generate
+        run: |
+          cd hardware_sampling/build
+          make doc
+      # deploy generated documentation using github.io
+      - name: Deploy
+        uses: peaceiris/actions-gh-pages@v4
+        with:
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+          publish_dir: ./hardware_sampling/docs/html
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index 9e03206..9f74de0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -36,6 +36,8 @@ Prerequisites
 # CMake ================================
 bin/
 build*/
+docs/html
+install*/
 cmake-build*/
 CMakeLists.txt.user
 CMakeCache.txt
@@ -53,4 +55,7 @@ CTestTestfile.cmake
 # IDEs ================================
 .idea/
 .vscode/
-.vs/
\ No newline at end of file
+.vs/
+
+# auto-generated version header
+include/hws/version.hpp
\ No newline at end of file
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 272d1c0..97ccbe1 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -6,27 +6,29 @@
 
 cmake_minimum_required(VERSION 3.22)
 
-project("HWS - Hardware Sampling for GPUs and CPUs"
+project("hws - Hardware Sampling for GPUs and CPUs"
         VERSION 1.0.0
         LANGUAGES CXX
-        DESCRIPTION "Hardware sampling (e.g., clock frequencies, memory consumption, temperatures, or energy draw) for CPUs, and GPUS.")
+        DESCRIPTION "Hardware sampling (e.g., clock frequencies, memory consumption, temperatures, or energy draw) for CPUs and GPUS.")
 
 # explicitly set library source files
 set(HWS_SOURCES
-        ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/event.cpp
-        ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/hardware_sampler.cpp
-        ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/utility.cpp
+        ${CMAKE_CURRENT_SOURCE_DIR}/src/hws/event.cpp
+        ${CMAKE_CURRENT_SOURCE_DIR}/src/hws/hardware_sampler.cpp
+        ${CMAKE_CURRENT_SOURCE_DIR}/src/hws/system_hardware_sampler.cpp
+        ${CMAKE_CURRENT_SOURCE_DIR}/src/hws/utility.cpp
 )
 
 # create hardware sampling library
-set(HWS_LIBRARY_NAME hardware_sampling)
+set(HWS_LIBRARY_NAME hws)
 add_library(${HWS_LIBRARY_NAME} SHARED ${HWS_SOURCES})
+add_library(hws::hws ALIAS ${HWS_LIBRARY_NAME})
 
 # set install target
 set(HWS_TARGETS_TO_INSTALL ${HWS_LIBRARY_NAME})
 
-# use C++20
-target_compile_features(${HWS_LIBRARY_NAME} PUBLIC cxx_std_20)
+# use C++17
+target_compile_features(${HWS_LIBRARY_NAME} PUBLIC cxx_std_17)
 
 # add target include directory
 target_include_directories(${HWS_LIBRARY_NAME} PUBLIC
@@ -58,6 +60,44 @@ endif ()
 message(STATUS "Setting the hardware sampler interval to ${HWS_SAMPLING_INTERVAL}ms.")
 target_compile_definitions(${HWS_LIBRARY_NAME} PUBLIC HWS_SAMPLING_INTERVAL=${HWS_SAMPLING_INTERVAL}ms)
 
+# install fmt as dependency
+include(FetchContent)
+set(HWS_fmt_VERSION 11.0.2)
+find_package(fmt 11.0.2 QUIET)
+if (fmt_FOUND)
+    message(STATUS "Found package fmt.")
+else ()
+    message(STATUS "Couldn't find package fmt. Building version ${HWS_fmt_VERSION} from source.")
+    set(FMT_PEDANTIC OFF CACHE INTERNAL "" FORCE)
+    set(FMT_WERROR OFF CACHE INTERNAL "" FORCE)
+    set(FMT_DOC OFF CACHE INTERNAL "" FORCE)
+    set(FMT_INSTALL ON CACHE INTERNAL "" FORCE) # let {fmt} handle the install target
+    set(FMT_TEST OFF CACHE INTERNAL "" FORCE)
+    set(FMT_FUZZ OFF CACHE INTERNAL "" FORCE)
+    set(FMT_CUDA_TEST OFF CACHE INTERNAL "" FORCE)
+    set(FMT_MODULE OFF CACHE INTERNAL "" FORCE)
+    set(FMT_SYSTEM_HEADERS ON CACHE INTERNAL "" FORCE)
+    # fetch string formatting library fmt
+    FetchContent_Declare(fmt
+            GIT_REPOSITORY https://github.com/fmtlib/fmt.git
+            GIT_TAG ${HWS_fmt_VERSION}
+            QUIET
+    )
+    FetchContent_MakeAvailable(fmt)
+    set_property(TARGET fmt PROPERTY POSITION_INDEPENDENT_CODE ON)
+    add_dependencies(${HWS_LIBRARY_NAME} fmt)
+endif ()
+target_link_libraries(${HWS_LIBRARY_NAME} PUBLIC fmt::fmt)
+
+########################################################################################################################
+##                                              configure version header                                              ##
+########################################################################################################################
+message(STATUS "Configuring version information.")
+configure_file(
+        ${CMAKE_CURRENT_SOURCE_DIR}/include/hws/version.hpp.in
+        ${CMAKE_CURRENT_SOURCE_DIR}/include/hws/version.hpp
+        @ONLY
+)
 
 ####################################################################################################################
 ##                                                CPU measurements                                                ##
@@ -148,9 +188,9 @@ if (HWS_LSCPU_FOUND OR HWS_FREE_FOUND OR HWS_TURBOSTAT_EXECUTION_TYPE)
     # add source file to source file list
     target_sources(${HWS_LIBRARY_NAME} PRIVATE
             $<BUILD_INTERFACE:
-            ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/cpu/hardware_sampler.cpp;
-            ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/cpu/cpu_samples.cpp;
-            ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/cpu/utility.cpp;
+            ${CMAKE_CURRENT_SOURCE_DIR}/src/hws/cpu/hardware_sampler.cpp;
+            ${CMAKE_CURRENT_SOURCE_DIR}/src/hws/cpu/cpu_samples.cpp;
+            ${CMAKE_CURRENT_SOURCE_DIR}/src/hws/cpu/utility.cpp;
             >)
 
     # add compile definitions
@@ -166,15 +206,16 @@ endif ()
 # find libraries necessary for NVML and link against them
 find_package(CUDAToolkit QUIET)
 if (CUDAToolkit_FOUND)
-    target_link_libraries(${HWS_LIBRARY_NAME} PRIVATE CUDA::nvml)
+    target_link_libraries(${HWS_LIBRARY_NAME} PRIVATE CUDA::nvml CUDA::cudart)
 
     message(STATUS "Enable sampling of NVIDIA GPU information using NVML.")
 
     # add source file to source file list
     target_sources(${HWS_LIBRARY_NAME} PRIVATE
             $<BUILD_INTERFACE:
-            ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp;
-            ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp
+            ${CMAKE_CURRENT_SOURCE_DIR}/src/hws/gpu_nvidia/hardware_sampler.cpp;
+            ${CMAKE_CURRENT_SOURCE_DIR}/src/hws/gpu_nvidia/nvml_samples.cpp;
+            ${CMAKE_CURRENT_SOURCE_DIR}/src/hws/gpu_nvidia/utility.cpp
             >)
 
     # add compile definition
@@ -190,7 +231,8 @@ endif ()
 ## try finding ROCm SMI
 find_package(rocm_smi QUIET)
 if (rocm_smi_FOUND)
-    target_link_libraries(${HWS_LIBRARY_NAME} PRIVATE -lrocm_smi64)
+    find_package(HIP REQUIRED)
+    target_link_libraries(${HWS_LIBRARY_NAME} PRIVATE -lrocm_smi64 hip::host)
     target_include_directories(${HWS_LIBRARY_NAME} PRIVATE ${ROCM_SMI_INCLUDE_DIR})
 
     message(STATUS "Enable sampling of AMD GPU information using ROCm SMI.")
@@ -198,8 +240,9 @@ if (rocm_smi_FOUND)
     # add source file to source file list
     target_sources(${HWS_LIBRARY_NAME} PRIVATE
             $<BUILD_INTERFACE:
-            ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/gpu_amd/hardware_sampler.cpp;
-            ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp
+            ${CMAKE_CURRENT_SOURCE_DIR}/src/hws/gpu_amd/hardware_sampler.cpp;
+            ${CMAKE_CURRENT_SOURCE_DIR}/src/hws/gpu_amd/rocm_smi_samples.cpp;
+            ${CMAKE_CURRENT_SOURCE_DIR}/src/hws/gpu_amd/utility.cpp
             >)
 
     # add compile definition
@@ -222,9 +265,9 @@ if (level_zero_FOUND)
     # add source file to source file list
     target_sources(${HWS_LIBRARY_NAME} PRIVATE
             $<BUILD_INTERFACE:
-            ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/gpu_intel/hardware_sampler.cpp;
-            ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/gpu_intel/level_zero_samples.cpp;
-            ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/gpu_intel/utility.cpp
+            ${CMAKE_CURRENT_SOURCE_DIR}/src/hws/gpu_intel/hardware_sampler.cpp;
+            ${CMAKE_CURRENT_SOURCE_DIR}/src/hws/gpu_intel/level_zero_samples.cpp;
+            ${CMAKE_CURRENT_SOURCE_DIR}/src/hws/gpu_intel/utility.cpp
             >)
 
     # add compile definition
@@ -238,19 +281,27 @@ endif ()
 ##                                             enable Python bindings                                             ##
 ####################################################################################################################
 option(HWS_ENABLE_PYTHON_BINDINGS "Build language bindings for Python." ON)
-
 if (HWS_ENABLE_PYTHON_BINDINGS)
     add_subdirectory(bindings)
 endif ()
 
 
+########################################################################################################################
+##                                                  add documentation                                                 ##
+########################################################################################################################
+option(HWS_ENABLE_DOCUMENTATION "Add documentation using Doxygen." OFF)
+if (HWS_ENABLE_DOCUMENTATION)
+    add_subdirectory(docs)
+endif ()
+
+
 ########################################################################################################################
 ##                                           add support for `make install`                                           ##
 ########################################################################################################################
 include(GNUInstallDirs)
 ## install all necessary library targets
 install(TARGETS ${HWS_TARGETS_TO_INSTALL}
-        EXPORT hardware_sampling_Targets
+        EXPORT hws_Targets
         ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}"  # all files that are neither executables, shared lib or headers
         LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}"  # all shared lib files
         RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}"  # all executables
@@ -264,28 +315,28 @@ install(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/include/"
 ## manage version comparison
 include(CMakePackageConfigHelpers)
 write_basic_package_version_file(
-        "hardware_samplingConfigVersion.cmake"
+        "hwsConfigVersion.cmake"
         VERSION ${PROJECT_VERSION}
         COMPATIBILITY SameMajorVersion
 )
 
 ## generate configuration file
 configure_package_config_file(
-        "${CMAKE_CURRENT_SOURCE_DIR}/cmake/hardware_samplingConfig.cmake.in"
-        "${PROJECT_BINARY_DIR}/hardware_samplingConfig.cmake"
-        INSTALL_DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/hardware_sampling/cmake
+        "${CMAKE_CURRENT_SOURCE_DIR}/cmake/hwsConfig.cmake.in"
+        "${PROJECT_BINARY_DIR}/hwsConfig.cmake"
+        INSTALL_DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/hws/cmake
 )
 
 ## create and copy install-targets file
-install(EXPORT hardware_sampling_Targets
-        FILE hardware_samplingTargets.cmake
+install(EXPORT hws_Targets
+        FILE hwsTargets.cmake
         NAMESPACE hws::
-        DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/hardware_sampling/cmake
+        DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/hws/cmake
 )
 
 ## create file containing the build configuration and version information
 install(FILES
-        "${PROJECT_BINARY_DIR}/hardware_samplingConfig.cmake"
-        "${PROJECT_BINARY_DIR}/hardware_samplingConfigVersion.cmake"
-        DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/hardware_sampling/cmake
+        "${PROJECT_BINARY_DIR}/hwsConfig.cmake"
+        "${PROJECT_BINARY_DIR}/hwsConfigVersion.cmake"
+        DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/hws/cmake
 )
\ No newline at end of file
diff --git a/README.md b/README.md
index 39c2c3d..f21b0eb 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,7 @@
 # hws - Hardware Sampling for CPUs and GPUs
 
-The Hardware Sampling (hws) library can be used to track hardware performance like clock frequency, memory usage, temperatures, or power draw. 
+The Hardware Sampling (hws) library can be used to track hardware performance like clock frequency, memory usage,
+temperatures, or power draw.
 It currently supports CPUs as well as GPUs from NVIDIA, AMD, and Intel.
 
 ## Getting Started
@@ -9,15 +10,24 @@ It currently supports CPUs as well as GPUs from NVIDIA, AMD, and Intel.
 
 General dependencies:
 
-- a C++20 capable compiler supporting `std::format` (tested with GCC 14.1.0)
-- [Pybind11 > v2.13.1](https://github.com/pybind/pybind11) if Python bindings are enabled (automatically build during the CMake configuration if it couldn't be found using the respective `find_package` call)
+- a C++17 capable compiler
+- [{fmt} > 11.0.2](https://github.com/fmtlib/fmt) for string formatting (automatically build during the CMake
+  configuration if it couldn't be found using the respective `find_package` call)
+- [Pybind11 > v2.13.1](https://github.com/pybind/pybind11) if Python bindings are enabled (automatically build during
+  the CMake configuration if it couldn't be found using the respective `find_package` call)
 
 Dependencies based on the hardware to sample:
 
-- if a CPU should be targeted: at least one of [`turbostat`](https://www.linux.org/docs/man8/turbostat.html) (may require root privileges), [`lscpu`](https://man7.org/linux/man-pages/man1/lscpu.1.html), or [`free`](https://man7.org/linux/man-pages/man1/free.1.html) and the [`subprocess.h`](https://github.com/sheredom/subprocess.h) library (automatically build during the CMake configuration if it couldn't be found using the respective `find_package` call)
+- if a CPU should be targeted: at least one of [`turbostat`](https://www.linux.org/docs/man8/turbostat.html) (may
+  require root privileges), [`lscpu`](https://man7.org/linux/man-pages/man1/lscpu.1.html), or [
+  `free`](https://man7.org/linux/man-pages/man1/free.1.html) and the [
+  `subprocess.h`](https://github.com/sheredom/subprocess.h) library (automatically build during the CMake configuration
+  if it couldn't be found using the respective `find_package` call)
 - if an NVIDIA GPU should be targeted: NVIDIA's Management Library [`NVML`](https://docs.nvidia.com/deploy/nvml-api/)
-- if an AMD GPU should be targeted: AMD's ROCm SMI library [`rocm_smi_lib`](https://rocm.docs.amd.com/projects/rocm_smi_lib/en/latest/doxygen/html/modules.html)
-- if an Intel GPU should be targeted: Intel's [`Level Zero library`](https://spec.oneapi.io/level-zero/latest/core/INTRO.html)
+- if an AMD GPU should be targeted: AMD's ROCm SMI library [
+  `rocm_smi_lib`](https://rocm.docs.amd.com/projects/rocm_smi_lib/en/latest/doxygen/html/modules.html)
+- if an Intel GPU should be targeted: Intel's [
+  `Level Zero library`](https://spec.oneapi.io/level-zero/latest/core/INTRO.html)
 
 ### Building hws
 
@@ -40,7 +50,8 @@ cmake --build . -j
 
 The `[optional_options]` can be one or multiple of:
 
-- `HWS_ENABLE_ERROR_CHECKS=ON|OFF` (default: `OFF`): enable sanity checks during hardware sampling, may be problematic with smaller sample intervals
+- `HWS_ENABLE_ERROR_CHECKS=ON|OFF` (default: `OFF`): enable sanity checks during hardware sampling, may be problematic
+  with smaller sample intervals
 - `HWS_SAMPLING_INTERVAL=100ms` (default: `100ms`): set the sampling interval in milliseconds
 - `HWS_ENABLE_PYTHON_BINDINGS=ON|OFF` (default: `ON`): enable Python bindings
 
@@ -55,23 +66,193 @@ cmake --install . --prefix "/home/myuser/installdir"
 Afterward, the necessary exports should be performed:
 
 ```bash
-export CMAKE_PREFIX_PATH=${CMAKE_INSTALL_PREFIX}/share/hardware_sampling/cmake:${CMAKE_PREFIX_PATH}
+export CMAKE_PREFIX_PATH=${CMAKE_INSTALL_PREFIX}/share/hws/cmake:${CMAKE_PREFIX_PATH}
 export LD_LIBRARY_PATH=${CMAKE_INSTALL_PREFIX}/lib:${LD_LIBRARY_PATH}
 export CPLUS_INCLUDE_PATH=${CMAKE_INSTALL_PREFIX}/include:${CPLUS_INCLUDE_PATH}
 export PYTHONPATH=${CMAKE_INSTALL_PREFIX}/lib:${PYTHONPATH}
 ```
 
+Note: when using Intel GPUs and segmentation faults are encountered in calls to `zes` functions, it may be necessary to set `export ZES_ENABLE_SYSMAN=1`.
+
+## Available samples
+
+The sampling type `fixed` denotes samples that are gathered once per hardware samples like maximum clock frequencies or
+temperatures or the total available memory.
+The sampling type `sampled` denotes samples that are gathered during the whole hardware sampling process like the
+current clock frequencies, temperatures, or memory consumption.
+
+### General samples
+
+| sample              | sample type |    CPUs     | NVIDIA GPUs | AMD GPUs  |  Intel GPUs   |
+|:--------------------|:-----------:|:-----------:|:-----------:|:---------:|:-------------:|
+| architecture        |    fixed    |     str     |     str     |    str    |       -       |
+| byte_order          |    fixed    |     str     |  str (fix)  | str (fix) |   str (fix)   |
+| num_cores           |    fixed    |     int     |     int     |     -     |       -       |
+| num_threads         |    fixed    |     int     |      -      |     -     |       -       |
+| threads_per_core    |    fixed    |     int     |      -      |     -     |       -       |
+| cores_per_socket    |    fixed    |     int     |      -      |     -     |       -       |
+| num_sockets         |    fixed    |     int     |      -      |     -     |       -       |
+| numa_nodes          |    fixed    |     int     |      -      |     -     |       -       |
+| vendor_id           |    fixed    |     str     |  str (fix)  |    str    | str (PCIe ID) |
+| name                |    fixed    |     str     |     str     |    str    |      str      |
+| flags               |    fixed    | list of str |      -      |     -     |  list of str  |
+| persistence_mode    |    fixed    |      -      |    bool     |     -     |       -       |
+| standby_mode        |    fixed    |      -      |      -      |     -     |      str      |
+| num_threads_per_eu  |    fixed    |      -      |      -      |     -     |      int      |
+| eu_simd_width       |    fixed    |      -      |      -      |     -     |      int      |
+| compute_utilization |   sampled   |      %      |      %      |     %     |       -       |
+| memory_utilization  |   sampled   |      -      |      %      |     %     |       -       |
+| ipc                 |   sampled   |    float    |      -      |     -     |       -       |
+| irq                 |   sampled   |     int     |      -      |     -     |       -       |
+| smi                 |   sampled   |     int     |      -      |     -     |       -       |
+| poll                |   sampled   |     int     |      -      |     -     |       -       |
+| poll_percent        |   sampled   |      %      |      -      |     -     |       -       |
+| performance_level   |   sampled   |      -      |     int     |    str    |       -       |
+
+### clock-related samples
+
+| sample                             | sample type | CPUs | NVIDIA GPUs |  AMD GPUs   | Intel GPUs  |
+|:-----------------------------------|:-----------:|:----:|:-----------:|:-----------:|:-----------:|
+| auto_boosted_clock_enabled         |    fixed    | bool |    bool     |      -      |      -      |
+| clock_frequency_min                |    fixed    | MHz  |     MHz     |     MHz     |     MHz     |
+| clock_frequency_max                |    fixed    | MHz  |     MHz     |     MHz     |     MHz     |
+| memory_clock_frequency_min         |    fixed    |  -   |     MHz     |     MHz     |     MHz     |
+| memory_clock_frequency_max         |    fixed    |  -   |     MHz     |     MHz     |     MHz     |
+| socket_clock_frequency_min         |    fixed    |  -   |      -      |     MHz     |      -      |
+| socket_clock_frequency_min         |    fixed    |  -   |      -      |     MHz     |      -      |
+| sm_clock_frequency_max             |    fixed    |  -   |     MHz     |      -      |      -      |
+| available_clock_frequencies        |    fixed    |  -   | map of MHz  | list of MHz | list of MHz |
+| available_memory_clock_frequencies |    fixed    |  -   | list of MHz | list of MHz | list of MHz |
+| clock_frequency                    |   sampled   | MHz  |     MHz     |     MHz     |     MHz     |
+| average_non_idle_clock_frequency   |   sampled   | MHz  |      -      |      -      |      -      |
+| time_stamp_counter                 |   sampled   | MHz  |      -      |      -      |      -      |
+| memory_clock_frequency             |   sampled   |  -   |     MHz     |     MHz     |     MHz     |
+| socket_clock_frequency             |   sampled   |  -   |      -      |     MHz     |      -      |
+| sm_clock_frequency                 |   sampled   |  -   |     MHz     |      -      |      -      |
+| overdrive_level                    |   sampled   |  -   |      -      |      %      |      -      |
+| memory_overdrive_level             |   sampled   |  -   |      -      |      %      |      -      |
+| throttle_reason                    |   sampled   |  -   |   bitmask   |      -      |   bitmask   |
+| throttle_reason_string             |   sampled   |  -   |     str     |      -      |     str     |
+| memory_throttle_reason             |   sampled   |  -   |      -      |      -      |   bitmask   |
+| memory_throttle_reason_string      |   sampled   |  -   |      -      |      -      |     str     |
+| auto_boosted_clock                 |   sampled   |  -   |    bool     |      -      |      -      |
+| frequency_limit_tdp                |   sampled   |  -   |      -      |      -      |     MHz     |
+| memory_frequency_limit_tdp         |   sampled   |  -   |      -      |      -      |     MHz     |
+
+### power-related samples
+
+| sample                         | sample type |               CPUs                | NVIDIA GPUs |                                        AMD GPUs                                        |                      Intel GPUs                      |
+|:-------------------------------|:-----------:|:---------------------------------:|:-----------:|:--------------------------------------------------------------------------------------:|:----------------------------------------------------:|
+| power_management_limit         |    fixed    |                 -                 |      W      |                                           W                                            |                          -                           |
+| power_enforced_limit           |    fixed    |                 -                 |      W      |                                           W                                            |                          W                           |
+| power_measurement_type         |    fixed    |             str (fix)             |     str     |                                          str                                           |                         str                          |
+| power_management_mode          |    fixed    |                 -                 |    bool     |                                           -                                            |                         bool                         |
+| available_power_profiles       |    fixed    |                 -                 | list of int |                                      list of str                                       |                          -                           |
+| power_usage                    |   sampled   |                 W                 |      W      |                                           W                                            | W<br>(calculated via power_total_energy_consumption) |
+| core_watt                      |   sampled   |                 W                 |      -      |                                           -                                            |                          -                           |
+| dram_watt                      |   sampled   |                 W                 |      -      |                                           -                                            |                          -                           |
+| package_rapl_throttling        |   sampled   |                 %                 |      -      |                                           -                                            |                          -                           |
+| dram_rapl_throttling           |   sampled   |                 %                 |      -      |                                           -                                            |                          -                           |
+| power_total_energy_consumption |   sampled   | J<br>(calculated via power_usage) |      J      | J<br>(calculated via power_usage if<br>power_total_energy_consumption isn't available) |                          J                           |
+| power_profile                  |   sampled   |                 -                 |     int     |                                          str                                           |                          -                           |
+
+### memory-related samples
+
+| sample                      | sample type | CPUs | NVIDIA GPUs | AMD GPUs |           Intel GPUs           |
+|:----------------------------|:-----------:|:----:|:-----------:|:--------:|:------------------------------:|
+| cache_size_L1d              |    fixed    | str  |      -      |    -     |               -                |
+| cache_size_L1i              |    fixed    | str  |      -      |    -     |               -                |
+| cache_size_L2               |    fixed    | str  |      -      |    -     |               -                |
+| cache_size_L3               |    fixed    | str  |      -      |    -     |               -                |
+| memory_total                |    fixed    |  B   |      B      |    B     |  B<br>(map of memory modules)  |
+| visible_memory_total        |    fixed    |  -   |      -      |    B     |  B<br>(map of memory modules)  |
+| swap_memory_total           |    fixed    |  B   |      -      |    -     |               -                |
+| num_pcie_lanes_min          |    fixed    |  -   |      -      |   int    |               -                |
+| num_pcie_lanes_max          |    fixed    |  -   |     int     |   int    |              int               |
+| pcie_link_generation_max    |    fixed    |  -   |     int     |    -     |              int               |
+| pcie_link_speed_max         |    fixed    |  -   |    MBPS     |    -     |              MBPS              |
+| pcie_link_transfer_rate_min |    fixed    |  -   |      -      |   MT/s   |               -                |
+| pcie_link_transfer_rate_max |    fixed    |  -   |      -      |   MT/s   |               -                |
+| memory_bus_width            |    fixed    |  -   |     Bit     |    -     | Bit<br>(map of memory modules) |
+| memory_num_channels         |    fixed    |  -   |      -      |    -     | int<br>(map of memory modules) |
+| memory_used                 |   sampled   |  B   |      B      |    B     |  B<br>(map of memory modules)  |
+| memory_free                 |   sampled   |  B   |      B      |    B     |  B<br>(map of memory modules)  |
+| swap_memory_used            |   sampled   |  B   |      -      |    -     |               -                |
+| swap_memory_free            |   sampled   |  B   |      -      |    -     |               -                |
+| num_pcie_lanes              |   sampled   |  -   |     int     |   int    |              int               |
+| pcie_link_generation        |   sampled   |  -   |     int     |    -     |              int               |
+| pcie_link_speed             |   sampled   |  -   |    MBPS     |    -     |              MBPS              |
+| pcie_link_transfer_rate     |   sampled   |  -   |      -      |   T/s    |               -                |
+
+### temperature-related samples
+
+| sample                  | sample type | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs |
+|:------------------------|:-----------:|:----:|:-----------:|:--------:|:----------:|
+| num_fans                |    fixed    |  -   |     int     |   int    |    int     |
+| fan_speed_min           |    fixed    |  -   |      %      |    -     |     -      | 
+| fan_speed_max           |    fixed    |  -   |      %      |   RPM    |    RPM     |
+| temperature_min         |    fixed    |  -   |      -      |    °C    |     -      |
+| temperature_max         |    fixed    |  -   |     °C      |    °C    |     °C     |
+| memory_temperature_min  |    fixed    |  -   |      -      |    °C    |     -      |
+| memory_temperature_max  |    fixed    |  -   |     °C      |    °C    |     °C     |
+| hotspot_temperature_min |    fixed    |  -   |      -      |    °C    |     -      |
+| hotspot_temperature_max |    fixed    |  -   |      -      |    °C    |     -      |
+| hbm_0_temperature_min   |    fixed    |  -   |      -      |    °C    |     -      |
+| hbm_0_temperature_max   |    fixed    |  -   |      -      |    °C    |     -      |
+| hbm_1_temperature_min   |    fixed    |  -   |      -      |    °C    |     -      |
+| hbm_1_temperature_max   |    fixed    |  -   |      -      |    °C    |     -      |
+| hbm_2_temperature_min   |    fixed    |  -   |      -      |    °C    |     -      |
+| hbm_2_temperature_max   |    fixed    |  -   |      -      |    °C    |     -      |
+| hbm_3_temperature_min   |    fixed    |  -   |      -      |    °C    |     -      |
+| hbm_3_temperature_max   |    fixed    |  -   |      -      |    °C    |     -      |
+| global_temperature_max  |    fixed    |  -   |      -      |    °C    |     °C     |
+| fan_speed_percentage    |   sampled   |  -   |      %      |    %     |     %      |
+| temperature             |   sampled   |  °C  |     °C      |    °C    |     °C     |
+| memory_temperature      |   sampled   |  -   |      -      |    °C    |     °C     |
+| hotspot_temperature     |   sampled   |  -   |      -      |    °C    |     -      |
+| hbm_0_temperature       |   sampled   |  -   |      -      |    °C    |     -      |
+| hbm_1_temperature       |   sampled   |  -   |      -      |    °C    |     -      |
+| hbm_2_temperature       |   sampled   |  -   |      -      |    °C    |     -      |
+| hbm_3_temperature       |   sampled   |  -   |      -      |    °C    |     -      |
+| global_temperature      |   sampled   |  -   |      -      |    -     |     °C     |
+| psu_temperature         |   sampled   |  -   |      -      |    -     |     °C     |
+| core_temperature        |   sampled   |  °C  |      -      |    -     |     -      |
+| core_throttle_percent   |   sampled   |  %   |      -      |    -     |     -      |
+
+### gfx-related (iGPU) samples
+
+| sample                    | sample type | CPUs |
+|:--------------------------|:-----------:|:----:|
+| gfx_render_state_percent  |   sampled   |  %   |
+| gfx_frequency             |   sampled   | MHz  |
+| average_gfx_frequency     |   sampled   | MHz  |
+| gfx_state_c0_percent      |   sampled   |  %   |
+| cpu_works_for_gpu_percent |   sampled   |  %   |
+| gfx_watt                  |   sampled   |  W   |
+
+### "idle states"-related samples
+
+| sample                               | sample type |     CPUs      |
+|:-------------------------------------|:-----------:|:-------------:|
+| idle_states                          |    fixed    | map of values |
+| all_cpus_state_c0_percent            |   sampled   |       %       |
+| any_cpu_state_c0_percent             |   sampled   |       %       |
+| low_power_idle_state_percent         |   sampled   |       %       |
+| system_low_power_idle_state_percent  |   sampled   |       %       |
+| package_low_power_idle_state_percent |   sampled   |       %       |
+
 ## Example Python usage
 
 ```python
-import HardwareSampling
+import HardwareSampling as hws
 import numpy as np
 import matplotlib.pyplot as plt
+import matplotlib.dates as mdates
 import datetime
 
-sampler = HardwareSampling.CpuHardwareSampler()
+sampler = hws.CpuHardwareSampler()
 # could also be, e.g.,
-# sampler = HardwareSampling.GpuNvidiaHardwareSampler()
+# sampler = hws.GpuNvidiaHardwareSampler()
 sampler.start()
 
 sampler.add_event("init")
@@ -85,19 +266,18 @@ sampler.stop()
 sampler.dump_yaml("track.yaml")
 
 # plot the results
-time_points = sampler.time_points()
-relative_time_points = [(t - time_points[0]) / datetime.timedelta(milliseconds=1) for t in time_points]
+time_points = sampler.relative_time_points()
 
-plt.plot(relative_time_points, sampler.clock_samples().get_average_frequency(), label="average")
-plt.plot(relative_time_points, sampler.clock_samples().get_average_non_idle_frequency(), label="average non-idle")
+plt.plot(time_points, sampler.clock_samples().get_clock_frequency(), label="average")
+plt.plot(time_points, sampler.clock_samples().get_average_non_idle_clock_frequency(), label="average non-idle")
 
 axes = plt.gcf().axes[0]
 x_bounds = axes.get_xlim()
-for event in sampler.get_events()[1:-1]:
-    tp = (event.time_point - time_points[0]) / datetime.timedelta(milliseconds=1)
-
-    axes.axvline(x=tp, color='r')
-    axes.annotate(text=event.name, xy=(((tp - x_bounds[0]) / (x_bounds[1] - x_bounds[0])), 1.025), xycoords='axes fraction', rotation=270)
+for event in sampler.get_relative_events()[1:-1]:
+    axes.axvline(x=event.relative_time_point, color='r')
+    axes.annotate(text=event.name,
+                  xy=(((event.relative_time_point - x_bounds[0]) / (x_bounds[1] - x_bounds[0])), 1.025),
+                  xycoords='axes fraction', rotation=270)
 
 plt.xlabel("runtime [ms]")
 plt.ylabel("clock frequency [MHz]")
@@ -111,4 +291,5 @@ plt.show()
 
 ## License
 
-The hws library is distributed under the [MIT license](https://github.com/SC-SGS/hardware_sampling/blob/main/LICENSE.md).
\ No newline at end of file
+The hws library is distributed under
+the [MIT license](https://github.com/SC-SGS/hardware_sampling/blob/main/LICENSE.md).
\ No newline at end of file
diff --git a/bindings/CMakeLists.txt b/bindings/CMakeLists.txt
index 95f6a2b..f2ef8d8 100644
--- a/bindings/CMakeLists.txt
+++ b/bindings/CMakeLists.txt
@@ -4,7 +4,7 @@
 ##          See the LICENSE.md file in the project root for full license information.
 ########################################################################################################################
 
-message(STATUS "Building Python language bindings for PLSSVM.")
+message(STATUS "Building Python language bindings.")
 
 find_package(Python COMPONENTS Interpreter Development)
 
@@ -32,7 +32,11 @@ endif ()
 # set source files that are always used
 set(HWS_PYTHON_BINDINGS_SOURCES
         ${CMAKE_CURRENT_SOURCE_DIR}/event.cpp
+        ${CMAKE_CURRENT_SOURCE_DIR}/relative_event.cpp
         ${CMAKE_CURRENT_SOURCE_DIR}/hardware_sampler.cpp
+        ${CMAKE_CURRENT_SOURCE_DIR}/sample_category.cpp
+        ${CMAKE_CURRENT_SOURCE_DIR}/system_hardware_sampler.cpp
+        ${CMAKE_CURRENT_SOURCE_DIR}/version.cpp
         ${CMAKE_CURRENT_SOURCE_DIR}/main.cpp
 )
 
diff --git a/bindings/cpu_hardware_sampler.cpp b/bindings/cpu_hardware_sampler.cpp
index 75f0a01..8e3e104 100644
--- a/bindings/cpu_hardware_sampler.cpp
+++ b/bindings/cpu_hardware_sampler.cpp
@@ -5,24 +5,27 @@
  *          See the LICENSE.md file in the project root for full license information.
  */
 
-#include "hardware_sampling/cpu/cpu_samples.hpp"       // hws::{cpu_general_samples, clock_samples, power_samples, memory_samples, temperature_samples, gfx_samples, idle_state_samples}
-#include "hardware_sampling/cpu/hardware_sampler.hpp"  // hws::cpu_hardware_sampler
-#include "hardware_sampling/hardware_sampler.hpp"      // hws::hardware_sampler
+#include "hws/cpu/cpu_samples.hpp"       // hws::{cpu_general_samples, clock_samples, power_samples, memory_samples, temperature_samples, gfx_samples, idle_state_samples}
+#include "hws/cpu/hardware_sampler.hpp"  // hws::cpu_hardware_sampler
+#include "hws/hardware_sampler.hpp"      // hws::hardware_sampler
+#include "hws/sample_category.hpp"       // hws::sample_category
 
+#include "fmt/format.h"         // fmt::format
 #include "pybind11/chrono.h"    // automatic bindings for std::chrono::milliseconds
 #include "pybind11/pybind11.h"  // py::module_
 #include "pybind11/stl.h"       // bind STL types
 
 #include <chrono>  // std::chrono::milliseconds
-#include <format>  // std::format
 
 namespace py = pybind11;
 
 void init_cpu_hardware_sampler(py::module_ &m) {
     // bind the general samples
     py::class_<hws::cpu_general_samples>(m, "CpuGeneralSamples")
+        .def("has_samples", &hws::cpu_general_samples::has_samples, "true if any sample is available, false otherwise")
         .def("get_architecture", &hws::cpu_general_samples::get_architecture, "the CPU architecture (e.g., x86_64)")
         .def("get_byte_order", &hws::cpu_general_samples::get_byte_order, "the byte order (e.g., little/big endian)")
+        .def("get_num_cores", &hws::cpu_general_samples::get_num_cores, "the total number of cores of the CPU(s)")
         .def("get_num_threads", &hws::cpu_general_samples::get_num_threads, "the number of threads of the CPU(s) including potential hyper-threads")
         .def("get_threads_per_core", &hws::cpu_general_samples::get_threads_per_core, "the number of hyper-threads per core")
         .def("get_cores_per_socket", &hws::cpu_general_samples::get_cores_per_socket, "the number of physical cores per socket")
@@ -31,45 +34,50 @@ void init_cpu_hardware_sampler(py::module_ &m) {
         .def("get_vendor_id", &hws::cpu_general_samples::get_vendor_id, "the vendor ID (e.g. GenuineIntel)")
         .def("get_name", &hws::cpu_general_samples::get_name, "the name of the CPU")
         .def("get_flags", &hws::cpu_general_samples::get_flags, "potential CPU flags (e.g., sse4_1, avx, avx, etc)")
-        .def("get_busy_percent", &hws::cpu_general_samples::get_busy_percent, "the percent the CPU was busy doing work")
+        .def("get_compute_utilization", &hws::cpu_general_samples::get_compute_utilization, "the percent the CPU was busy doing work")
         .def("get_ipc", &hws::cpu_general_samples::get_ipc, "the instructions-per-cycle count")
         .def("get_irq", &hws::cpu_general_samples::get_irq, "the number of interrupts")
         .def("get_smi", &hws::cpu_general_samples::get_smi, "the number of system management interrupts")
         .def("get_poll", &hws::cpu_general_samples::get_poll, "the number of times the CPU was in the polling state")
         .def("get_poll_percent", &hws::cpu_general_samples::get_poll_percent, "the percent of the CPU was in the polling state")
         .def("__repr__", [](const hws::cpu_general_samples &self) {
-            return std::format("<HardwareSampling.CpuGeneralSamples with\n{}\n>", self);
+            return fmt::format("<HardwareSampling.CpuGeneralSamples with\n{}\n>", self);
         });
 
     // bind the clock samples
     py::class_<hws::cpu_clock_samples>(m, "CpuClockSamples")
-        .def("get_frequency_boost", &hws::cpu_clock_samples::get_frequency_boost, "true if frequency boosting is enabled")
-        .def("get_min_frequency", &hws::cpu_clock_samples::get_min_frequency, "the minimum possible CPU frequency in MHz")
-        .def("get_max_frequency", &hws::cpu_clock_samples::get_max_frequency, "the maximum possible CPU frequency in MHz")
-        .def("get_average_frequency", &hws::cpu_clock_samples::get_average_frequency, "the average CPU frequency in MHz including idle cores")
-        .def("get_average_non_idle_frequency", &hws::cpu_clock_samples::get_average_non_idle_frequency, "the average CPU frequency in MHz excluding idle cores")
+        .def("has_samples", &hws::cpu_clock_samples::has_samples, "true if any sample is available, false otherwise")
+        .def("get_auto_boosted_clock_enabled", &hws::cpu_clock_samples::get_auto_boosted_clock_enabled, "true if frequency boosting is enabled")
+        .def("get_clock_frequency_min", &hws::cpu_clock_samples::get_clock_frequency_min, "the minimum possible CPU frequency in MHz")
+        .def("get_clock_frequency_max", &hws::cpu_clock_samples::get_clock_frequency_max, "the maximum possible CPU frequency in MHz")
+        .def("get_clock_frequency", &hws::cpu_clock_samples::get_clock_frequency, "the average CPU frequency in MHz including idle cores")
+        .def("get_average_non_idle_clock_frequency", &hws::cpu_clock_samples::get_average_non_idle_clock_frequency, "the average CPU frequency in MHz excluding idle cores")
         .def("get_time_stamp_counter", &hws::cpu_clock_samples::get_time_stamp_counter, "the time stamp counter")
         .def("__repr__", [](const hws::cpu_clock_samples &self) {
-            return std::format("<HardwareSampling.CpuClockSamples with\n{}\n>", self);
+            return fmt::format("<HardwareSampling.CpuClockSamples with\n{}\n>", self);
         });
 
     // bind the power samples
     py::class_<hws::cpu_power_samples>(m, "CpuPowerSamples")
-        .def("get_package_watt", &hws::cpu_power_samples::get_package_watt, "the currently consumed power of the package of the CPU in W")
+        .def("has_samples", &hws::cpu_power_samples::has_samples, "true if any sample is available, false otherwise")
+        .def("get_power_measurement_type", &hws::cpu_power_samples::get_power_measurement_type, "the type of the power readings: always \"instant/current\"")
+        .def("get_power_usage", &hws::cpu_power_samples::get_power_usage, "the currently consumed power of the package of the CPU in W")
+        .def("get_power_total_energy_consumed", &hws::cpu_power_samples::get_power_total_energy_consumption, "the total power consumption in J")
         .def("get_core_watt", &hws::cpu_power_samples::get_core_watt, "the currently consumed power of the core part of the CPU in W")
         .def("get_ram_watt", &hws::cpu_power_samples::get_ram_watt, "the currently consumed power of the RAM part of the CPU in W")
         .def("get_package_rapl_throttle_percent", &hws::cpu_power_samples::get_package_rapl_throttle_percent, "the percent of time the package throttled due to RAPL limiters")
         .def("get_dram_rapl_throttle_percent", &hws::cpu_power_samples::get_dram_rapl_throttle_percent, "the percent of time the DRAM throttled due to RAPL limiters")
         .def("__repr__", [](const hws::cpu_power_samples &self) {
-            return std::format("<HardwareSampling.CpuPowerSamples with\n{}\n>", self);
+            return fmt::format("<HardwareSampling.CpuPowerSamples with\n{}\n>", self);
         });
 
     // bind the memory samples
     py::class_<hws::cpu_memory_samples>(m, "CpuMemorySamples")
-        .def("get_l1d_cache", &hws::cpu_memory_samples::get_l1d_cache, "the size of the L1 data cache")
-        .def("get_l1i_cache", &hws::cpu_memory_samples::get_l1i_cache, "the size of the L1 instruction cache")
-        .def("get_l2_cache", &hws::cpu_memory_samples::get_l2_cache, "the size of the L2 cache")
-        .def("get_l3_cache", &hws::cpu_memory_samples::get_l3_cache, "the size of the L2 cache")
+        .def("has_samples", &hws::cpu_memory_samples::has_samples, "true if any sample is available, false otherwise")
+        .def("get_cache_size_L1d", &hws::cpu_memory_samples::get_cache_size_L1d, "the size of the L1 data cache")
+        .def("get_cache_size_L1i", &hws::cpu_memory_samples::get_cache_size_L1i, "the size of the L1 instruction cache")
+        .def("get_cache_size_L2", &hws::cpu_memory_samples::get_cache_size_L2, "the size of the L2 cache")
+        .def("get_cache_size_L3", &hws::cpu_memory_samples::get_cache_size_L3, "the size of the L2 cache")
         .def("get_memory_total", &hws::cpu_memory_samples::get_memory_total, "the total available memory in Byte")
         .def("get_swap_memory_total", &hws::cpu_memory_samples::get_swap_memory_total, "the total available swap memory in Byte")
         .def("get_memory_used", &hws::cpu_memory_samples::get_memory_used, "the currently used memory in Byte")
@@ -77,20 +85,22 @@ void init_cpu_hardware_sampler(py::module_ &m) {
         .def("get_swap_memory_used", &hws::cpu_memory_samples::get_swap_memory_used, "the currently used swap memory in Byte")
         .def("get_swap_memory_free", &hws::cpu_memory_samples::get_swap_memory_free, "the currently free swap memory in Byte")
         .def("__repr__", [](const hws::cpu_memory_samples &self) {
-            return std::format("<HardwareSampling.CpuMemorySamples with\n{}\n>", self);
+            return fmt::format("<HardwareSampling.CpuMemorySamples with\n{}\n>", self);
         });
 
     // bind the temperature samples
     py::class_<hws::cpu_temperature_samples>(m, "CpuTemperatureSamples")
+        .def("has_samples", &hws::cpu_temperature_samples::has_samples, "true if any sample is available, false otherwise")
+        .def("get_temperature", &hws::cpu_temperature_samples::get_temperature, "the current temperature of the whole package in °C")
         .def("get_core_temperature", &hws::cpu_temperature_samples::get_core_temperature, "the current temperature of the core part of the CPU in °C")
         .def("get_core_throttle_percent", &hws::cpu_temperature_samples::get_core_throttle_percent, "the percent of time the CPU has throttled")
-        .def("get_package_temperature", &hws::cpu_temperature_samples::get_package_temperature, "the current temperature of the whole package in °C")
         .def("__repr__", [](const hws::cpu_temperature_samples &self) {
-            return std::format("<HardwareSampling.CpuTemperatureSamples with\n{}\n>", self);
+            return fmt::format("<HardwareSampling.CpuTemperatureSamples with\n{}\n>", self);
         });
 
     // bind the gfx samples
     py::class_<hws::cpu_gfx_samples>(m, "CpuGfxSamples")
+        .def("has_samples", &hws::cpu_gfx_samples::has_samples, "true if any sample is available, false otherwise")
         .def("get_gfx_render_state_percent", &hws::cpu_gfx_samples::get_gfx_render_state_percent, "the percent of time the iGPU was in the render state")
         .def("get_gfx_frequency", &hws::cpu_gfx_samples::get_gfx_frequency, "the current iGPU power consumption in W")
         .def("get_average_gfx_frequency", &hws::cpu_gfx_samples::get_average_gfx_frequency, "the average iGPU frequency in MHz")
@@ -98,11 +108,12 @@ void init_cpu_hardware_sampler(py::module_ &m) {
         .def("get_cpu_works_for_gpu_percent", &hws::cpu_gfx_samples::get_cpu_works_for_gpu_percent, "the percent of time the CPU was doing work for the iGPU")
         .def("get_gfx_watt", &hws::cpu_gfx_samples::get_gfx_watt, "the currently consumed power of the iGPU of the CPU in W")
         .def("__repr__", [](const hws::cpu_gfx_samples &self) {
-            return std::format("<HardwareSampling.CpuGfxSamples with\n{}\n>", self);
+            return fmt::format("<HardwareSampling.CpuGfxSamples with\n{}\n>", self);
         });
 
     // bind the idle state samples
     py::class_<hws::cpu_idle_states_samples>(m, "CpuIdleStateSamples")
+        .def("has_samples", &hws::cpu_idle_states_samples::has_samples, "true if any sample is available, false otherwise")
         .def("get_idle_states", &hws::cpu_idle_states_samples::get_idle_states, "the map of additional CPU idle states")
         .def("get_all_cpus_state_c0_percent", &hws::cpu_idle_states_samples::get_all_cpus_state_c0_percent, "the percent of time all CPUs were in idle state c0")
         .def("get_any_cpu_state_c0_percent", &hws::cpu_idle_states_samples::get_any_cpu_state_c0_percent, "the percent of time any CPU was in the idle state c0")
@@ -110,13 +121,15 @@ void init_cpu_hardware_sampler(py::module_ &m) {
         .def("get_system_low_power_idle_state_percent", &hws::cpu_idle_states_samples::get_system_low_power_idle_state_percent, "the percent of time the CPU was in the system low power idle state")
         .def("get_package_low_power_idle_state_percent", &hws::cpu_idle_states_samples::get_package_low_power_idle_state_percent, "the percent of time the CPU was in the package low power idle state")
         .def("__repr__", [](const hws::cpu_gfx_samples &self) {
-            return std::format("<HardwareSampling.CpuIdleStateSamples with\n{}\n>", self);
+            return fmt::format("<HardwareSampling.CpuIdleStateSamples with\n{}\n>", self);
         });
 
     // bind the CPU hardware sampler class
     py::class_<hws::cpu_hardware_sampler, hws::hardware_sampler>(m, "CpuHardwareSampler")
         .def(py::init<>(), "construct a new CPU hardware sampler")
+        .def(py::init<hws::sample_category>(), "construct a new CPU hardware sampler sampling only the provided sample_category samples")
         .def(py::init<std::chrono::milliseconds>(), "construct a new CPU hardware sampler specifying the used sampling interval")
+        .def(py::init<std::chrono::milliseconds, hws::sample_category>(), "construct a new CPU hardware sampler specifying the used sampling interval sampling only the provided sample_category samples")
         .def("general_samples", &hws::cpu_hardware_sampler::general_samples, "get all general samples")
         .def("clock_samples", &hws::cpu_hardware_sampler::clock_samples, "get all clock related samples")
         .def("power_samples", &hws::cpu_hardware_sampler::power_samples, "get all power related samples")
@@ -124,7 +137,8 @@ void init_cpu_hardware_sampler(py::module_ &m) {
         .def("temperature_samples", &hws::cpu_hardware_sampler::temperature_samples, "get all temperature related samples")
         .def("gfx_samples", &hws::cpu_hardware_sampler::gfx_samples, "get all gfx (iGPU) related samples")
         .def("idle_state_samples", &hws::cpu_hardware_sampler::idle_state_samples, "get all idle state related samples")
+        .def("samples_only_as_yaml_string", &hws::cpu_hardware_sampler::samples_only_as_yaml_string, "return all hardware samples as YAML string")
         .def("__repr__", [](const hws::cpu_hardware_sampler &self) {
-            return std::format("<HardwareSampling.CpuHardwareSampler with\n{}\n>", self);
+            return fmt::format("<HardwareSampling.CpuHardwareSampler with\n{}\n>", self);
         });
 }
diff --git a/bindings/event.cpp b/bindings/event.cpp
index ba463ad..f19315a 100644
--- a/bindings/event.cpp
+++ b/bindings/event.cpp
@@ -5,14 +5,13 @@
  *          See the LICENSE.md file in the project root for full license information.
  */
 
-#include "hardware_sampling/event.hpp"  // hws::event
+#include "hws/event.hpp"  // hws::event
 
+#include "fmt/format.h"         // fmt::format
 #include "pybind11/chrono.h"    // bind std::chrono types
 #include "pybind11/pybind11.h"  // py::module_
 #include "pybind11/stl.h"       // bind STL types
 
-#include <format>  // std::format
-
 namespace py = pybind11;
 
 void init_event(py::module_ &m) {
@@ -22,6 +21,6 @@ void init_event(py::module_ &m) {
         .def_readonly("time_point", &hws::event::time_point, "read the time point associated to this event")
         .def_readonly("name", &hws::event::name, "read the name associated to this event")
         .def("__repr__", [](const hws::event &self) {
-            return std::format("<HardWareSampling.Event with {{ time_point: {}, name: {} }}>", self.time_point.time_since_epoch(), self.name);
+            return fmt::format("<HardWareSampling.Event with {{ time_point: {}, name: {} }}>", self.time_point.time_since_epoch(), self.name);
         });
 }
diff --git a/bindings/gpu_amd_hardware_sampler.cpp b/bindings/gpu_amd_hardware_sampler.cpp
index f43c741..db846da 100644
--- a/bindings/gpu_amd_hardware_sampler.cpp
+++ b/bindings/gpu_amd_hardware_sampler.cpp
@@ -5,116 +5,135 @@
  *          See the LICENSE.md file in the project root for full license information.
  */
 
-#include "hardware_sampling/gpu_amd/hardware_sampler.hpp"  // hws::gpu_amd_hardware_sampler
-#include "hardware_sampling/gpu_amd/rocm_smi_samples.hpp"  // hws::{rocm_smi_general_samples, rocm_smi_clock_samples, rocm_smi_power_samples, rocm_smi_memory_samples, rocm_smi_temperature_samples}
-#include "hardware_sampling/hardware_sampler.hpp"          // hws::hardware_sampler
+#include "hws/gpu_amd/hardware_sampler.hpp"  // hws::gpu_amd_hardware_sampler
+#include "hws/gpu_amd/rocm_smi_samples.hpp"  // hws::{rocm_smi_general_samples, rocm_smi_clock_samples, rocm_smi_power_samples, rocm_smi_memory_samples, rocm_smi_temperature_samples}
+#include "hws/hardware_sampler.hpp"          // hws::hardware_sampler
+#include "hws/sample_category.hpp"           // hws::sample_category
 
+#include "fmt/format.h"         // fmt::format
 #include "pybind11/chrono.h"    // automatic bindings for std::chrono::milliseconds
 #include "pybind11/pybind11.h"  // py::module_
 #include "pybind11/stl.h"       // bind STL types
 
 #include <chrono>   // std::chrono::milliseconds
 #include <cstddef>  // std::size_t
-#include <format>   // std::format
 
 namespace py = pybind11;
 
 void init_gpu_amd_hardware_sampler(py::module_ &m) {
     // bind the general samples
     py::class_<hws::rocm_smi_general_samples>(m, "RocmSmiGeneralSamples")
+        .def("has_samples", &hws::rocm_smi_general_samples::has_samples, "true if any sample is available, false otherwise")
+        .def("get_architecture", &hws::rocm_smi_general_samples::get_name, "the architecture name of the device")
+        .def("get_byte_order", &hws::rocm_smi_general_samples::get_byte_order, "the byte order (e.g., little/big endian)")
+        .def("get_vendor_id", &hws::rocm_smi_general_samples::get_vendor_id, "the vendor ID")
         .def("get_name", &hws::rocm_smi_general_samples::get_name, "the name of the device")
+        .def("get_compute_utilization", &hws::rocm_smi_general_samples::get_compute_utilization, "the GPU compute utilization in percent")
+        .def("get_memory_utilization", &hws::rocm_smi_general_samples::get_memory_utilization, "the GPU memory utilization in percent")
         .def("get_performance_level", &hws::rocm_smi_general_samples::get_performance_level, "the performance level: one of rsmi_dev_perf_level_t")
-        .def("get_utilization_gpu", &hws::rocm_smi_general_samples::get_utilization_gpu, "the GPU compute utilization in percent")
-        .def("get_utilization_mem", &hws::rocm_smi_general_samples::get_utilization_mem, "the GPU memory utilization in percent")
         .def("__repr__", [](const hws::rocm_smi_general_samples &self) {
-            return std::format("<HardwareSampling.RocmSmiGeneralSamples with\n{}\n>", self);
+            return fmt::format("<HardwareSampling.RocmSmiGeneralSamples with\n{}\n>", self);
         });
 
     // bind the clock samples
     py::class_<hws::rocm_smi_clock_samples>(m, "RocmSmiClockSamples")
-        .def("get_clock_system_min", &hws::rocm_smi_clock_samples::get_clock_system_min, "the minimum possible system clock frequency in Hz")
-        .def("get_clock_system_max", &hws::rocm_smi_clock_samples::get_clock_system_max, "the maximum possible system clock frequency in Hz")
-        .def("get_clock_socket_min", &hws::rocm_smi_clock_samples::get_clock_socket_min, "the minimum possible socket clock frequency in Hz")
-        .def("get_clock_socket_max", &hws::rocm_smi_clock_samples::get_clock_socket_max, "the maximum possible socket clock frequency in Hz")
-        .def("get_clock_memory_min", &hws::rocm_smi_clock_samples::get_clock_memory_min, "the minimum possible memory clock frequency in Hz")
-        .def("get_clock_memory_max", &hws::rocm_smi_clock_samples::get_clock_memory_max, "the maximum possible memory clock frequency in Hz")
-        .def("get_clock_system", &hws::rocm_smi_clock_samples::get_clock_system, "the current system clock frequency in Hz")
-        .def("get_clock_socket", &hws::rocm_smi_clock_samples::get_clock_socket, "the current socket clock frequency in Hz")
-        .def("get_clock_memory", &hws::rocm_smi_clock_samples::get_clock_memory, "the current memory clock frequency in Hz")
+        .def("has_samples", &hws::rocm_smi_clock_samples::has_samples, "true if any sample is available, false otherwise")
+        .def("get_clock_frequency_min", &hws::rocm_smi_clock_samples::get_clock_frequency_min, "the minimum possible system clock frequency in MHz")
+        .def("get_clock_frequency_max", &hws::rocm_smi_clock_samples::get_clock_frequency_max, "the maximum possible system clock frequency in MHz")
+        .def("get_memory_clock_frequency_min", &hws::rocm_smi_clock_samples::get_memory_clock_frequency_min, "the minimum possible memory clock frequency in MHz")
+        .def("get_memory_clock_frequency_max", &hws::rocm_smi_clock_samples::get_memory_clock_frequency_max, "the maximum possible memory clock frequency in MHz")
+        .def("get_socket_clock_frequency_min", &hws::rocm_smi_clock_samples::get_socket_clock_frequency_min, "the minimum possible socket clock frequency in MHz")
+        .def("get_socket_clock_frequency_max", &hws::rocm_smi_clock_samples::get_socket_clock_frequency_max, "the maximum possible socket clock frequency in MHz")
+        .def("get_available_clock_frequencies", &hws::rocm_smi_clock_samples::get_available_clock_frequencies, "the available clock frequencies in MHz (slowest to fastest)")
+        .def("get_available_memory_clock_frequencies", &hws::rocm_smi_clock_samples::get_available_memory_clock_frequencies, "the available memory clock frequencies in MHz (slowest to fastest)")
+        .def("get_clock_frequency", &hws::rocm_smi_clock_samples::get_clock_frequency, "the current system clock frequency in MHz")
+        .def("get_memory_clock_frequency", &hws::rocm_smi_clock_samples::get_memory_clock_frequency, "the current memory clock frequency in MHz")
+        .def("get_socket_clock_frequency", &hws::rocm_smi_clock_samples::get_socket_clock_frequency, "the current socket clock frequency in MHz")
         .def("get_overdrive_level", &hws::rocm_smi_clock_samples::get_overdrive_level, "the GPU overdrive percentage")
         .def("get_memory_overdrive_level", &hws::rocm_smi_clock_samples::get_memory_overdrive_level, "the GPU's memory overdrive percentage")
         .def("__repr__", [](const hws::rocm_smi_clock_samples &self) {
-            return std::format("<HardwareSampling.RocmSmiClockSamples with\n{}\n>", self);
+            return fmt::format("<HardwareSampling.RocmSmiClockSamples with\n{}\n>", self);
         });
 
     // bind the power samples
     py::class_<hws::rocm_smi_power_samples>(m, "RocmSmiPowerSamples")
-        .def("get_power_default_cap", &hws::rocm_smi_power_samples::get_power_default_cap, "the default power cap, may be different from power cap")
-        .def("get_power_cap", &hws::rocm_smi_power_samples::get_power_cap, "if the GPU draws more power (μW) than the power cap, the GPU may throttle")
-        .def("get_power_type", &hws::rocm_smi_power_samples::get_power_type, "the type of the power management: either current power draw or average power draw")
+        .def("has_samples", &hws::rocm_smi_power_samples::has_samples, "true if any sample is available, false otherwise")
+        .def("get_power_management_limit", &hws::rocm_smi_power_samples::get_power_management_limit, "the default power cap (W), may be different from power cap")
+        .def("get_power_enforced_limit", &hws::rocm_smi_power_samples::get_power_enforced_limit, "if the GPU draws more power (W) than the power cap, the GPU may throttle")
+        .def("get_power_measurement_type", &hws::rocm_smi_power_samples::get_power_measurement_type, "the type of the power readings: either current power draw or average power draw")
         .def("get_available_power_profiles", &hws::rocm_smi_power_samples::get_available_power_profiles, "a list of the available power profiles")
-        .def("get_power_usage", &hws::rocm_smi_power_samples::get_power_usage, "the current GPU socket power draw in μW")
-        .def("get_power_total_energy_consumption", &hws::rocm_smi_power_samples::get_power_total_energy_consumption, "the total power consumption since the last driver reload in μJ")
+        .def("get_power_usage", &hws::rocm_smi_power_samples::get_power_usage, "the current GPU socket power draw in W")
+        .def("get_power_total_energy_consumption", &hws::rocm_smi_power_samples::get_power_total_energy_consumption, "the total power consumption since the last driver reload in J")
         .def("get_power_profile", &hws::rocm_smi_power_samples::get_power_profile, "the current active power profile; one of 'available_power_profiles'")
         .def("__repr__", [](const hws::rocm_smi_power_samples &self) {
-            return std::format("<HardwareSampling.RocmSmiPowerSamples with\n{}\n>", self);
+            return fmt::format("<HardwareSampling.RocmSmiPowerSamples with\n{}\n>", self);
         });
 
     // bind the memory samples
     py::class_<hws::rocm_smi_memory_samples>(m, "RocmSmiMemorySamples")
+        .def("has_samples", &hws::rocm_smi_memory_samples::has_samples, "true if any sample is available, false otherwise")
         .def("get_memory_total", &hws::rocm_smi_memory_samples::get_memory_total, "the total available memory in Byte")
         .def("get_visible_memory_total", &hws::rocm_smi_memory_samples::get_visible_memory_total, "the total visible available memory in Byte, may be smaller than the total memory")
-        .def("get_min_num_pcie_lanes", &hws::rocm_smi_memory_samples::get_min_num_pcie_lanes, "the minimum number of used PCIe lanes")
-        .def("get_max_num_pcie_lanes", &hws::rocm_smi_memory_samples::get_max_num_pcie_lanes, "the maximum number of used PCIe lanes")
+        .def("get_num_pcie_lanes_min", &hws::rocm_smi_memory_samples::get_num_pcie_lanes_min, "the minimum number of used PCIe lanes")
+        .def("get_num_pcie_lanes_max", &hws::rocm_smi_memory_samples::get_num_pcie_lanes_max, "the maximum number of used PCIe lanes")
+        .def("get_pcie_link_transfer_rate_min", &hws::rocm_smi_memory_samples::get_pcie_link_transfer_rate_min, "the minimum PCIe link transfer rate in MT/s")
+        .def("get_pcie_link_transfer_rate_max", &hws::rocm_smi_memory_samples::get_pcie_link_transfer_rate_max, "the maximum PCIe link transfer rate in MT/s")
         .def("get_memory_used", &hws::rocm_smi_memory_samples::get_memory_used, "the currently used memory in Byte")
-        .def("get_pcie_transfer_rate", &hws::rocm_smi_memory_samples::get_pcie_transfer_rate, "the current PCIe transfer rate in T/s")
+        .def("get_memory_free", &hws::rocm_smi_memory_samples::get_memory_free, "the currently free memory in Byte")
         .def("get_num_pcie_lanes", &hws::rocm_smi_memory_samples::get_num_pcie_lanes, "the number of currently used PCIe lanes")
+        .def("get_pcie_link_transfer_rate", &hws::rocm_smi_memory_samples::get_pcie_link_transfer_rate, "the current PCIe transfer rate in T/s")
         .def("__repr__", [](const hws::rocm_smi_memory_samples &self) {
-            return std::format("<HardwareSampling.RocmSmiMemorySamples with\n{}\n>", self);
+            return fmt::format("<HardwareSampling.RocmSmiMemorySamples with\n{}\n>", self);
         });
 
     // bind the temperature samples
     py::class_<hws::rocm_smi_temperature_samples>(m, "RocmSmiTemperatureSamples")
+        .def("has_samples", &hws::rocm_smi_temperature_samples::has_samples, "true if any sample is available, false otherwise")
         .def("get_num_fans", &hws::rocm_smi_temperature_samples::get_num_fans, "the number of fans (if any)")
-        .def("get_max_fan_speed", &hws::rocm_smi_temperature_samples::get_max_fan_speed, "the maximum fan speed")
-        .def("get_temperature_edge_min", &hws::rocm_smi_temperature_samples::get_temperature_edge_min, "the minimum temperature on the GPU's edge temperature sensor in m°C")
-        .def("get_temperature_edge_max", &hws::rocm_smi_temperature_samples::get_temperature_edge_max, "the maximum temperature on the GPU's edge temperature sensor in m°C")
-        .def("get_temperature_hotspot_min", &hws::rocm_smi_temperature_samples::get_temperature_hotspot_min, "the minimum temperature on the GPU's hotspot temperature sensor in m°C")
-        .def("get_temperature_hotspot_max", &hws::rocm_smi_temperature_samples::get_temperature_hotspot_max, "the maximum temperature on the GPU's hotspot temperature sensor in m°C")
-        .def("get_temperature_memory_min", &hws::rocm_smi_temperature_samples::get_temperature_memory_min, "the minimum temperature on the GPU's memory temperature sensor in m°C")
-        .def("get_temperature_memory_max", &hws::rocm_smi_temperature_samples::get_temperature_memory_max, "the maximum temperature on the GPU's memory temperature sensor in m°C")
-        .def("get_temperature_hbm_0_min", &hws::rocm_smi_temperature_samples::get_temperature_hbm_0_min, "the minimum temperature on the GPU's HBM0 temperature sensor in m°C")
-        .def("get_temperature_hbm_0_max", &hws::rocm_smi_temperature_samples::get_temperature_hbm_0_max, "the maximum temperature on the GPU's HBM0 temperature sensor in m°C")
-        .def("get_temperature_hbm_1_min", &hws::rocm_smi_temperature_samples::get_temperature_hbm_1_min, "the minimum temperature on the GPU's HBM1 temperature sensor in m°C")
-        .def("get_temperature_hbm_1_max", &hws::rocm_smi_temperature_samples::get_temperature_hbm_1_max, "the maximum temperature on the GPU's HBM1 temperature sensor in m°C")
-        .def("get_temperature_hbm_2_min", &hws::rocm_smi_temperature_samples::get_temperature_hbm_2_min, "the minimum temperature on the GPU's HBM2 temperature sensor in m°C")
-        .def("get_temperature_hbm_2_max", &hws::rocm_smi_temperature_samples::get_temperature_hbm_2_max, "the maximum temperature on the GPU's HBM2 temperature sensor in m°C")
-        .def("get_temperature_hbm_3_min", &hws::rocm_smi_temperature_samples::get_temperature_hbm_3_min, "the minimum temperature on the GPU's HBM3 temperature sensor in m°C")
-        .def("get_temperature_hbm_3_max", &hws::rocm_smi_temperature_samples::get_temperature_hbm_3_max, "the maximum temperature on the GPU's HBM3 temperature sensor in m°C")
-        .def("get_fan_speed", &hws::rocm_smi_temperature_samples::get_fan_speed, "the current fan speed in %")
-        .def("get_temperature_edge", &hws::rocm_smi_temperature_samples::get_temperature_edge, "the current temperature on the GPU's edge temperature sensor in m°C")
-        .def("get_temperature_hotspot", &hws::rocm_smi_temperature_samples::get_temperature_hotspot, "the current temperature on the GPU's hotspot temperature sensor in m°C")
-        .def("get_temperature_memory", &hws::rocm_smi_temperature_samples::get_temperature_memory, "the current temperature on the GPU's memory temperature sensor in m°C")
-        .def("get_temperature_hbm_0", &hws::rocm_smi_temperature_samples::get_temperature_hbm_0, "the current temperature on the GPU's HBM0 temperature sensor in m°C")
-        .def("get_temperature_hbm_1", &hws::rocm_smi_temperature_samples::get_temperature_hbm_1, "the current temperature on the GPU's HBM1 temperature sensor in m°C")
-        .def("get_temperature_hbm_2", &hws::rocm_smi_temperature_samples::get_temperature_hbm_2, "the current temperature on the GPU's HBM2 temperature sensor in m°C")
-        .def("get_temperature_hbm_3", &hws::rocm_smi_temperature_samples::get_temperature_hbm_3, "the current temperature on the GPU's HBM3 temperature sensor in m°C")
+        .def("get_fan_speed_max", &hws::rocm_smi_temperature_samples::get_fan_speed_max, "the maximum fan speed in RPM")
+        .def("get_temperature_min", &hws::rocm_smi_temperature_samples::get_temperature_min, "the minimum temperature on the GPU's edge temperature sensor in °C")
+        .def("get_temperature_max", &hws::rocm_smi_temperature_samples::get_temperature_max, "the maximum temperature on the GPU's edge temperature sensor in °C")
+        .def("get_memory_temperature_min", &hws::rocm_smi_temperature_samples::get_memory_temperature_min, "the minimum temperature on the GPU's memory temperature sensor in °C")
+        .def("get_memory_temperature_max", &hws::rocm_smi_temperature_samples::get_memory_temperature_max, "the maximum temperature on the GPU's memory temperature sensor in °C")
+        .def("get_hotspot_temperature_min", &hws::rocm_smi_temperature_samples::get_hotspot_temperature_min, "the minimum temperature on the GPU's hotspot temperature sensor in °C")
+        .def("get_hotspot_temperature_max", &hws::rocm_smi_temperature_samples::get_hotspot_temperature_max, "the maximum temperature on the GPU's hotspot temperature sensor in °C")
+        .def("get_hbm_0_temperature_min", &hws::rocm_smi_temperature_samples::get_hbm_0_temperature_min, "the minimum temperature on the GPU's HBM0 temperature sensor in °C")
+        .def("get_hbm_0_temperature_max", &hws::rocm_smi_temperature_samples::get_hbm_0_temperature_max, "the maximum temperature on the GPU's HBM0 temperature sensor in °C")
+        .def("get_hbm_1_temperature_min", &hws::rocm_smi_temperature_samples::get_hbm_1_temperature_min, "the minimum temperature on the GPU's HBM1 temperature sensor in °C")
+        .def("get_hbm_1_temperature_max", &hws::rocm_smi_temperature_samples::get_hbm_1_temperature_max, "the maximum temperature on the GPU's HBM1 temperature sensor in °C")
+        .def("get_hbm_2_temperature_min", &hws::rocm_smi_temperature_samples::get_hbm_2_temperature_min, "the minimum temperature on the GPU's HBM2 temperature sensor in °C")
+        .def("get_hbm_2_temperature_max", &hws::rocm_smi_temperature_samples::get_hbm_2_temperature_max, "the maximum temperature on the GPU's HBM2 temperature sensor in °C")
+        .def("get_hbm_3_temperature_min", &hws::rocm_smi_temperature_samples::get_hbm_3_temperature_min, "the minimum temperature on the GPU's HBM3 temperature sensor in °C")
+        .def("get_hbm_3_temperature_max", &hws::rocm_smi_temperature_samples::get_hbm_3_temperature_max, "the maximum temperature on the GPU's HBM3 temperature sensor in °C")
+        .def("get_fan_speed_percentage", &hws::rocm_smi_temperature_samples::get_fan_speed_percentage, "the current fan speed in %")
+        .def("get_temperature", &hws::rocm_smi_temperature_samples::get_temperature, "the current temperature on the GPU's edge temperature sensor in °C")
+        .def("get_hotspot_temperature", &hws::rocm_smi_temperature_samples::get_hotspot_temperature, "the current temperature on the GPU's hotspot temperature sensor in °C")
+        .def("get_memory_temperature", &hws::rocm_smi_temperature_samples::get_memory_temperature, "the current temperature on the GPU's memory temperature sensor in °C")
+        .def("get_hbm_0_temperature", &hws::rocm_smi_temperature_samples::get_hbm_0_temperature, "the current temperature on the GPU's HBM0 temperature sensor in °C")
+        .def("get_hbm_1_temperature", &hws::rocm_smi_temperature_samples::get_hbm_1_temperature, "the current temperature on the GPU's HBM1 temperature sensor in °C")
+        .def("get_hbm_2_temperature", &hws::rocm_smi_temperature_samples::get_hbm_2_temperature, "the current temperature on the GPU's HBM2 temperature sensor in °C")
+        .def("get_hbm_3_temperature", &hws::rocm_smi_temperature_samples::get_hbm_3_temperature, "the current temperature on the GPU's HBM3 temperature sensor in °C")
         .def("__repr__", [](const hws::rocm_smi_temperature_samples &self) {
-            return std::format("<HardwareSampling.RocmSmiTemperatureSamples with\n{}\n>", self);
+            return fmt::format("<HardwareSampling.RocmSmiTemperatureSamples with\n{}\n>", self);
         });
 
     // bind the GPU AMD hardware sampler class
     py::class_<hws::gpu_amd_hardware_sampler, hws::hardware_sampler>(m, "GpuAmdHardwareSampler")
         .def(py::init<>(), "construct a new AMD GPU hardware sampler for the default device with the default sampling interval")
+        .def(py::init<hws::sample_category>(), "construct a new AMD GPU hardware sampler for the default device with the default sampling interval sampling only the provided sample_category samples")
         .def(py::init<std::size_t>(), "construct a new AMD GPU hardware sampler for the specified device with the default sampling interval")
+        .def(py::init<std::size_t, hws::sample_category>(), "construct a new AMD GPU hardware sampler for the specified device with the default sampling interval sampling only the provided sample_category samples")
         .def(py::init<std::chrono::milliseconds>(), "construct a new AMD GPU hardware sampler for the default device with the specified sampling interval")
+        .def(py::init<std::chrono::milliseconds, hws::sample_category>(), "construct a new AMD GPU hardware sampler for the default device with the specified sampling interval sampling only the provided sample_category samples")
         .def(py::init<std::size_t, std::chrono::milliseconds>(), "construct a new AMD GPU hardware sampler for the specified device and sampling interval")
+        .def(py::init<std::size_t, std::chrono::milliseconds, hws::sample_category>(), "construct a new AMD GPU hardware sampler for the specified device and sampling interval sampling only the provided sample_category samples")
         .def("general_samples", &hws::gpu_amd_hardware_sampler::general_samples, "get all general samples")
         .def("clock_samples", &hws::gpu_amd_hardware_sampler::clock_samples, "get all clock related samples")
         .def("power_samples", &hws::gpu_amd_hardware_sampler::power_samples, "get all power related samples")
         .def("memory_samples", &hws::gpu_amd_hardware_sampler::memory_samples, "get all memory related samples")
         .def("temperature_samples", &hws::gpu_amd_hardware_sampler::temperature_samples, "get all temperature related samples")
+        .def("samples_only_as_yaml_string", &hws::gpu_amd_hardware_sampler::samples_only_as_yaml_string, "return all hardware samples as YAML string")
         .def("__repr__", [](const hws::gpu_amd_hardware_sampler &self) {
-            return std::format("<HardwareSampling.GpuAmdHardwareSampler with\n{}\n>", self);
+            return fmt::format("<HardwareSampling.GpuAmdHardwareSampler with\n{}\n>", self);
         });
 }
diff --git a/bindings/gpu_intel_hardware_sampler.cpp b/bindings/gpu_intel_hardware_sampler.cpp
index 39b346a..aaae9ed 100644
--- a/bindings/gpu_intel_hardware_sampler.cpp
+++ b/bindings/gpu_intel_hardware_sampler.cpp
@@ -5,97 +5,123 @@
  *          See the LICENSE.md file in the project root for full license information.
  */
 
-#include "hardware_sampling/gpu_intel/hardware_sampler.hpp"    // hws::gpu_intel_hardware_sampler
-#include "hardware_sampling/gpu_intel/level_zero_samples.hpp"  // hws::{level_zero_general_samples, level_zero_clock_samples, level_zero_power_samples, level_zero_memory_samples, level_zero_temperature_samples}
-#include "hardware_sampling/hardware_sampler.hpp"              // hws::hardware_sampler
+#include "hws/gpu_intel/hardware_sampler.hpp"    // hws::gpu_intel_hardware_sampler
+#include "hws/gpu_intel/level_zero_samples.hpp"  // hws::{level_zero_general_samples, level_zero_clock_samples, level_zero_power_samples, level_zero_memory_samples, level_zero_temperature_samples}
+#include "hws/hardware_sampler.hpp"              // hws::hardware_sampler
+#include "hws/sample_category.hpp"               // hws::sample_category
 
+#include "fmt/format.h"         // fmt::format
 #include "pybind11/chrono.h"    // automatic bindings for std::chrono::milliseconds
 #include "pybind11/pybind11.h"  // py::module_
 #include "pybind11/stl.h"       // bind STL types
 
 #include <chrono>   // std::chrono::milliseconds
 #include <cstddef>  // std::size_t
-#include <format>   // std::format
 
 namespace py = pybind11;
 
 void init_gpu_intel_hardware_sampler(py::module_ &m) {
     // bind the general samples
     py::class_<hws::level_zero_general_samples>(m, "LevelZeroGeneralSamples")
+        .def("has_samples", &hws::level_zero_general_samples::has_samples, "true if any sample is available, false otherwise")
+        .def("get_byte_order", &hws::level_zero_general_samples::get_byte_order, "the byte order (e.g., little/big endian)")
+        .def("get_vendor_id", &hws::level_zero_general_samples::get_vendor_id, "the vendor ID")
         .def("get_name", &hws::level_zero_general_samples::get_name, "the model name of the device")
+        .def("get_flags", &hws::level_zero_general_samples::get_flags, "potential GPU flags (e.g. integrated device)")
         .def("get_standby_mode", &hws::level_zero_general_samples::get_standby_mode, "the enabled standby mode (power saving or never)")
         .def("get_num_threads_per_eu", &hws::level_zero_general_samples::get_num_threads_per_eu, "the number of threads per EU unit")
         .def("get_eu_simd_width", &hws::level_zero_general_samples::get_eu_simd_width, "the physical EU unit SIMD width")
         .def("__repr__", [](const hws::level_zero_general_samples &self) {
-            return std::format("<HardwareSampling.LevelZeroGeneralSamples with\n{}\n>", self);
+            return fmt::format("<HardwareSampling.LevelZeroGeneralSamples with\n{}\n>", self);
         });
 
     // bind the clock samples
     py::class_<hws::level_zero_clock_samples>(m, "LevelZeroClockSamples")
-        .def("get_clock_gpu_min", &hws::level_zero_clock_samples::get_clock_gpu_min, "the minimum possible GPU clock frequency in MHz")
-        .def("get_clock_gpu_max", &hws::level_zero_clock_samples::get_clock_gpu_max, "the maximum possible GPU clock frequency in MHz")
-        .def("get_available_clocks_gpu", &hws::level_zero_clock_samples::get_available_clocks_gpu, "the available GPU clock frequencies in MHz (slowest to fastest)")
-        .def("get_clock_mem_min", &hws::level_zero_clock_samples::get_clock_mem_min, "the minimum possible memory clock frequency in MHz")
-        .def("get_clock_mem_max", &hws::level_zero_clock_samples::get_clock_mem_max, "the maximum possible memory clock frequency in MHz")
-        .def("get_available_clocks_mem", &hws::level_zero_clock_samples::get_available_clocks_mem, "the available memory clock frequencies in MHz (slowest to fastest)")
-        .def("get_tdp_frequency_limit_gpu", &hws::level_zero_clock_samples::get_tdp_frequency_limit_gpu, "the current maximum allowed GPU frequency based on the TDP limit in MHz")
-        .def("get_clock_gpu", &hws::level_zero_clock_samples::get_clock_gpu, "the current GPU frequency in MHz")
-        .def("get_throttle_reason_gpu", &hws::level_zero_clock_samples::get_throttle_reason_gpu, "the current GPU frequency throttle reason")
-        .def("get_tdp_frequency_limit_mem", &hws::level_zero_clock_samples::get_tdp_frequency_limit_mem, "the current maximum allowed memory frequency based on the TDP limit in MHz")
-        .def("get_clock_mem", &hws::level_zero_clock_samples::get_clock_mem, "the current memory frequency in MHz")
-        .def("get_throttle_reason_mem", &hws::level_zero_clock_samples::get_throttle_reason_mem, "the current memory frequency throttle reason")
+        .def("has_samples", &hws::level_zero_clock_samples::has_samples, "true if any sample is available, false otherwise")
+        .def("get_clock_frequency_min", &hws::level_zero_clock_samples::get_clock_frequency_min, "the minimum possible GPU clock frequency in MHz")
+        .def("get_clock_frequency_max", &hws::level_zero_clock_samples::get_clock_frequency_max, "the maximum possible GPU clock frequency in MHz")
+        .def("get_memory_clock_frequency_min", &hws::level_zero_clock_samples::get_memory_clock_frequency_min, "the minimum possible memory clock frequency in MHz")
+        .def("get_memory_clock_frequency_max", &hws::level_zero_clock_samples::get_memory_clock_frequency_max, "the maximum possible memory clock frequency in MHz")
+        .def("get_available_clock_frequencies", &hws::level_zero_clock_samples::get_available_clock_frequencies, "the available GPU clock frequencies in MHz (slowest to fastest)")
+        .def("get_available_memory_clock_frequencies", &hws::level_zero_clock_samples::get_available_memory_clock_frequencies, "the available memory clock frequencies in MHz (slowest to fastest)")
+        .def("get_clock_frequency", &hws::level_zero_clock_samples::get_clock_frequency, "the current GPU frequency in MHz")
+        .def("get_memory_clock_frequency", &hws::level_zero_clock_samples::get_memory_clock_frequency, "the current memory frequency in MHz")
+        .def("get_throttle_reason", &hws::level_zero_clock_samples::get_throttle_reason, "the current GPU frequency throttle reason (as bitmask)")
+        .def("get_throttle_reason_string", &hws::level_zero_clock_samples::get_throttle_reason_string, "the current GPU frequency throttle reason (as string)")
+        .def("get_memory_throttle_reason", &hws::level_zero_clock_samples::get_memory_throttle_reason, "the current memory frequency throttle reason (as bitmask)")
+        .def("get_memory_throttle_reason_string", &hws::level_zero_clock_samples::get_memory_throttle_reason_string, "the current memory frequency throttle reason (as string)")
+        .def("get_frequency_limit_tdp", &hws::level_zero_clock_samples::get_frequency_limit_tdp, "the current maximum allowed GPU frequency based on the TDP limit in MHz")
+        .def("get_memory_frequency_limit_tdp", &hws::level_zero_clock_samples::get_memory_frequency_limit_tdp, "the current maximum allowed memory frequency based on the TDP limit in MHz")
         .def("__repr__", [](const hws::level_zero_clock_samples &self) {
-            return std::format("<HardwareSampling.LevelZeroClockSamples with\n{}\n>", self);
+            return fmt::format("<HardwareSampling.LevelZeroClockSamples with\n{}\n>", self);
         });
 
     // bind the power samples
     py::class_<hws::level_zero_power_samples>(m, "LevelZeroPowerSamples")
-        .def("get_energy_threshold_enabled", &hws::level_zero_power_samples::get_energy_threshold_enabled, "true if the energy threshold is enabled")
-        .def("get_energy_threshold", &hws::level_zero_power_samples::get_energy_threshold, "the energy threshold in J")
-        .def("get_power_total_energy_consumption", &hws::level_zero_power_samples::get_power_total_energy_consumption, "the total power consumption since the last driver reload in mJ")
+        .def("has_samples", &hws::level_zero_power_samples::has_samples, "true if any sample is available, false otherwise")
+        .def("get_power_enforced_limit", &hws::level_zero_power_samples::get_power_enforced_limit, "the actually enforced power limit (W), may be different from power management limit if external limiters are set")
+        .def("get_power_measurement_type", &hws::level_zero_power_samples::get_power_measurement_type, "the type of the power readings")
+        .def("get_power_management_mode", &hws::level_zero_power_samples::get_power_management_mode, "true if power management limits are enabled")
+        .def("get_power_usage", &hws::level_zero_power_samples::get_power_usage, "the current power draw of the GPU in W (calculated from power_total_energy_consumption)")
+        .def("get_power_total_energy_consumption", &hws::level_zero_power_samples::get_power_total_energy_consumption, "the total power consumption since the last driver reload in J")
         .def("__repr__", [](const hws::level_zero_power_samples &self) {
-            return std::format("<HardwareSampling.LevelZeroPowerSamples with\n{}\n>", self);
+            return fmt::format("<HardwareSampling.LevelZeroPowerSamples with\n{}\n>", self);
         });
 
     // bind the memory samples
     py::class_<hws::level_zero_memory_samples>(m, "LevelZeroMemorySamples")
+        .def("has_samples", &hws::level_zero_memory_samples::has_samples, "true if any sample is available, false otherwise")
         .def("get_memory_total", &hws::level_zero_memory_samples::get_memory_total, "the total memory size of the different memory modules in Bytes")
-        .def("get_allocatable_memory_total", &hws::level_zero_memory_samples::get_allocatable_memory_total, "the total allocatable memory size of the different memory modules in Bytes")
-        .def("get_pcie_link_max_speed", &hws::level_zero_memory_samples::get_pcie_link_max_speed, "the maximum PCIe bandwidth in bytes/sec")
-        .def("get_pcie_max_width", &hws::level_zero_memory_samples::get_pcie_max_width, "the PCIe lane width")
-        .def("get_max_pcie_link_generation", &hws::level_zero_memory_samples::get_max_pcie_link_generation, "the PCIe generation")
-        .def("get_bus_width", &hws::level_zero_memory_samples::get_bus_width, "the bus width of the different memory modules")
-        .def("get_num_channels", &hws::level_zero_memory_samples::get_num_channels, "the number of memory channels of the different memory modules")
-        .def("get_location", &hws::level_zero_memory_samples::get_location, "the location of the different memory modules (system or device)")
+        .def("get_visible_memory_total", &hws::level_zero_memory_samples::get_visible_memory_total, "the total allocatable memory size of the different memory modules in Bytes")
+        .def("get_memory_location", &hws::level_zero_memory_samples::get_memory_location, "the location of the different memory modules (system or device)")
+        .def("get_num_pcie_lanes_max", &hws::level_zero_memory_samples::get_num_pcie_lanes_max, "the PCIe lane width")
+        .def("get_pcie_link_generation_max", &hws::level_zero_memory_samples::get_pcie_link_generation_max, "the PCIe generation")
+        .def("get_pcie_link_speed_max", &hws::level_zero_memory_samples::get_pcie_link_speed_max, "the maximum PCIe bandwidth in bytes/sec")
+        .def("get_memory_bus_width", &hws::level_zero_memory_samples::get_memory_bus_width, "the bus width of the different memory modules")
+        .def("get_memory_num_channels", &hws::level_zero_memory_samples::get_memory_num_channels, "the number of memory channels of the different memory modules")
         .def("get_memory_free", &hws::level_zero_memory_samples::get_memory_free, "the currently free memory of the different memory modules in Bytes")
-        .def("get_pcie_link_speed", &hws::level_zero_memory_samples::get_pcie_link_speed, "the current PCIe bandwidth in bytes/sec")
-        .def("get_pcie_link_width", &hws::level_zero_memory_samples::get_pcie_link_width, "the current PCIe lane width")
+        .def("get_memory_used", &hws::level_zero_memory_samples::get_memory_used, "the currently used memory of the different memory modules in Bytes")
+        .def("get_num_pcie_lanes", &hws::level_zero_memory_samples::get_num_pcie_lanes, "the current PCIe lane width")
         .def("get_pcie_link_generation", &hws::level_zero_memory_samples::get_pcie_link_generation, "the current PCIe generation")
+        .def("get_pcie_link_speed", &hws::level_zero_memory_samples::get_pcie_link_speed, "the current PCIe bandwidth in bytes/sec")
         .def("__repr__", [](const hws::level_zero_memory_samples &self) {
-            return std::format("<HardwareSampling.LevelZeroMemorySamples with\n{}\n>", self);
+            return fmt::format("<HardwareSampling.LevelZeroMemorySamples with\n{}\n>", self);
         });
 
     // bind the temperature samples
     py::class_<hws::level_zero_temperature_samples>(m, "LevelZeroTemperatureSamples")
-        .def("get_temperature_max", &hws::level_zero_temperature_samples::get_temperature_max, "the maximum temperature for the sensor in °C")
-        .def("get_temperature_psu", &hws::level_zero_temperature_samples::get_temperature_psu, "the temperature of the PSU in °C")
-        .def("get_temperature", &hws::level_zero_temperature_samples::get_temperature, "the current temperature for the sensor in °C")
+        .def("has_samples", &hws::level_zero_temperature_samples::has_samples, "true if any sample is available, false otherwise")
+        .def("get_num_fans", &hws::level_zero_temperature_samples::get_num_fans, "the number of fans")
+        .def("get_fan_speed_max", &hws::level_zero_temperature_samples::get_fan_speed_max, "the maximum fan speed the user can set in RPM")
+        .def("get_temperature_max", &hws::level_zero_temperature_samples::get_temperature_max, "the maximum GPU temperature in °C")
+        .def("get_memory_temperature_max", &hws::level_zero_temperature_samples::get_memory_temperature_max, "the maximum memory temperature in °C")
+        .def("get_global_temperature_max", &hws::level_zero_temperature_samples::get_global_temperature_max, "the maximum global temperature in °C")
+        .def("get_fan_speed_percentage", &hws::level_zero_temperature_samples::get_fan_speed_percentage, "the current intended fan speed in %")
+        .def("get_temperature", &hws::level_zero_temperature_samples::get_temperature, "the current GPU temperature in °C")
+        .def("get_memory_temperature", &hws::level_zero_temperature_samples::get_memory_temperature, "the current memory temperature in °C")
+        .def("get_global_temperature", &hws::level_zero_temperature_samples::get_global_temperature, "the current global temperature in °C")
+        .def("get_psu_temperature", &hws::level_zero_temperature_samples::get_psu_temperature, "the current PSU temperature in °C")
         .def("__repr__", [](const hws::level_zero_temperature_samples &self) {
-            return std::format("<HardwareSampling.LevelZeroTemperatureSamples with\n{}\n>", self);
+            return fmt::format("<HardwareSampling.LevelZeroTemperatureSamples with\n{}\n>", self);
         });
 
     // bind the GPU Intel hardware sampler class
     py::class_<hws::gpu_intel_hardware_sampler, hws::hardware_sampler>(m, "GpuIntelHardwareSampler")
         .def(py::init<>(), "construct a new Intel GPU hardware sampler for the default device with the default sampling interval")
+        .def(py::init<hws::sample_category>(), "construct a new Intel GPU hardware sampler for the default device with the default sampling interval sampling only the provided sample_category samples")
         .def(py::init<std::size_t>(), "construct a new Intel GPU hardware sampler for the specified device with the default sampling interval")
+        .def(py::init<std::size_t, hws::sample_category>(), "construct a new Intel GPU hardware sampler for the specified device with the default sampling interval sampling only the provided sample_category samples")
         .def(py::init<std::chrono::milliseconds>(), "construct a new Intel GPU hardware sampler for the default device with the specified sampling interval")
+        .def(py::init<std::chrono::milliseconds, hws::sample_category>(), "construct a new Intel GPU hardware sampler for the default device with the specified sampling interval sampling only the provided sample_category samples")
         .def(py::init<std::size_t, std::chrono::milliseconds>(), "construct a new Intel GPU hardware sampler for the specified device and sampling interval")
+        .def(py::init<std::size_t, std::chrono::milliseconds, hws::sample_category>(), "construct a new Intel GPU hardware sampler for the specified device and sampling interval sampling only the provided sample_category samples")
         .def("general_samples", &hws::gpu_intel_hardware_sampler::general_samples, "get all general samples")
         .def("clock_samples", &hws::gpu_intel_hardware_sampler::clock_samples, "get all clock related samples")
         .def("power_samples", &hws::gpu_intel_hardware_sampler::power_samples, "get all power related samples")
         .def("memory_samples", &hws::gpu_intel_hardware_sampler::memory_samples, "get all memory related samples")
         .def("temperature_samples", &hws::gpu_intel_hardware_sampler::temperature_samples, "get all temperature related samples")
+        .def("samples_only_as_yaml_string", &hws::gpu_intel_hardware_sampler::samples_only_as_yaml_string, "return all hardware samples as YAML string")
         .def("__repr__", [](const hws::gpu_intel_hardware_sampler &self) {
-            return std::format("<HardwareSampling.GpuIntelHardwareSampler with\n{}\n>", self);
+            return fmt::format("<HardwareSampling.GpuIntelHardwareSampler with\n{}\n>", self);
         });
 }
diff --git a/bindings/gpu_nvidia_hardware_sampler.cpp b/bindings/gpu_nvidia_hardware_sampler.cpp
index 21130ae..a32283a 100644
--- a/bindings/gpu_nvidia_hardware_sampler.cpp
+++ b/bindings/gpu_nvidia_hardware_sampler.cpp
@@ -5,102 +5,122 @@
  *          See the LICENSE.md file in the project root for full license information.
  */
 
-#include "hardware_sampling/gpu_nvidia/hardware_sampler.hpp"  // hws::gpu_nvidia_hardware_sampler
-#include "hardware_sampling/gpu_nvidia/nvml_samples.hpp"      // hws::{nvml_general_samples, nvml_clock_samples, nvml_power_samples, nvml_memory_samples, nvml_temperature_samples}
-#include "hardware_sampling/hardware_sampler.hpp"             // hws::hardware_sampler
+#include "hws/gpu_nvidia/hardware_sampler.hpp"  // hws::gpu_nvidia_hardware_sampler
+#include "hws/gpu_nvidia/nvml_samples.hpp"      // hws::{nvml_general_samples, nvml_clock_samples, nvml_power_samples, nvml_memory_samples, nvml_temperature_samples}
+#include "hws/hardware_sampler.hpp"             // hws::hardware_sampler
+#include "hws/sample_category.hpp"              // hws::sample_category
 
+#include "fmt/format.h"         // fmt::format
 #include "pybind11/chrono.h"    // automatic bindings for std::chrono::milliseconds
 #include "pybind11/pybind11.h"  // py::module_
 #include "pybind11/stl.h"       // bind STL types
 
 #include <chrono>   // std::chrono::milliseconds
 #include <cstddef>  // std::size_t
-#include <format>   // std::format
 
 namespace py = pybind11;
 
 void init_gpu_nvidia_hardware_sampler(py::module_ &m) {
     // bind the general samples
     py::class_<hws::nvml_general_samples>(m, "NvmlGeneralSamples")
+        .def("has_samples", &hws::nvml_general_samples::has_samples, "true if any sample is available, false otherwise")
+        .def("get_architecture", &hws::nvml_general_samples::get_architecture, "the architecture name of the device")
+        .def("get_byte_order", &hws::nvml_general_samples::get_byte_order, "the byte order (e.g., little/big endian)")
+        .def("get_num_cores", &hws::nvml_general_samples::get_num_cores, "the number of CUDA cores")
+        .def("get_vendor_id", &hws::nvml_general_samples::get_vendor_id, "the vendor ID")
         .def("get_name", &hws::nvml_general_samples::get_name, "the name of the device")
         .def("get_persistence_mode", &hws::nvml_general_samples::get_persistence_mode, "the persistence mode: if true, the driver is always loaded reducing the latency for the first API call")
-        .def("get_num_cores", &hws::nvml_general_samples::get_num_cores, "the number of CUDA cores")
-        .def("get_performance_state", &hws::nvml_general_samples::get_performance_state, "the performance state: 0 - 15 where 0 is the maximum performance and 15 the minimum performance")
-        .def("get_utilization_gpu", &hws::nvml_general_samples::get_utilization_gpu, "the GPU compute utilization in percent")
-        .def("get_utilization_mem", &hws::nvml_general_samples::get_utilization_mem, "the GPU memory utilization in percent")
+        .def("get_compute_utilization", &hws::nvml_general_samples::get_compute_utilization, "the GPU compute utilization in percent")
+        .def("get_memory_utilization", &hws::nvml_general_samples::get_memory_utilization, "the GPU memory utilization in percent")
+        .def("get_performance_level", &hws::nvml_general_samples::get_performance_level, "the performance state: 0 - 15 where 0 is the maximum performance and 15 the minimum performance")
         .def("__repr__", [](const hws::nvml_general_samples &self) {
-            return std::format("<HardwareSampling.NvmlGeneralSamples with\n{}\n>", self);
+            return fmt::format("<HardwareSampling.NvmlGeneralSamples with\n{}\n>", self);
         });
 
     // bind the clock samples
     py::class_<hws::nvml_clock_samples>(m, "NvmlClockSamples")
-        .def("get_adaptive_clock_status", &hws::nvml_clock_samples::get_adaptive_clock_status, "true if clock boosting is currently enabled")
-        .def("get_clock_graph_min", &hws::nvml_clock_samples::get_clock_graph_min, "the minimum possible graphics clock frequency in MHz")
-        .def("get_clock_graph_max", &hws::nvml_clock_samples::get_clock_graph_max, "the maximum possible graphics clock frequency in MHz")
-        .def("get_clock_sm_max", &hws::nvml_clock_samples::get_clock_sm_max, "the maximum possible SM clock frequency in MHz")
-        .def("get_clock_mem_min", &hws::nvml_clock_samples::get_clock_mem_min, "the minimum possible memory clock frequency in MHz")
-        .def("get_clock_mem_max", &hws::nvml_clock_samples::get_clock_mem_max, "the maximum possible memory clock frequency in MHz")
-        .def("get_clock_graph", &hws::nvml_clock_samples::get_clock_graph, "the current graphics clock frequency in MHz")
-        .def("get_clock_sm", &hws::nvml_clock_samples::get_clock_sm, "the current SM clock frequency in Mhz")
-        .def("get_clock_mem", &hws::nvml_clock_samples::get_clock_mem, "the current memory clock frequency in MHz")
-        .def("get_clock_throttle_reason", &hws::nvml_clock_samples::get_clock_throttle_reason, "the reason the GPU clock throttled (bitmask)")
-        .def("get_auto_boosted_clocks", &hws::nvml_clock_samples::get_auto_boosted_clocks, "true if the clocks are currently auto boosted")
+        .def("has_samples", &hws::nvml_clock_samples::has_samples, "true if any sample is available, false otherwise")
+        .def("get_auto_boosted_clock_enabled", &hws::nvml_clock_samples::get_auto_boosted_clock_enabled, "true if clock boosting is currently enabled")
+        .def("get_clock_frequency_min", &hws::nvml_clock_samples::get_clock_frequency_min, "the minimum possible graphics clock frequency in MHz")
+        .def("get_clock_frequency_max", &hws::nvml_clock_samples::get_clock_frequency_max, "the maximum possible graphics clock frequency in MHz")
+        .def("get_memory_clock_frequency_min", &hws::nvml_clock_samples::get_memory_clock_frequency_min, "the minimum possible memory clock frequency in MHz")
+        .def("get_memory_clock_frequency_max", &hws::nvml_clock_samples::get_memory_clock_frequency_max, "the maximum possible memory clock frequency in MHz")
+        .def("get_sm_clock_frequency_max", &hws::nvml_clock_samples::get_sm_clock_frequency_max, "the maximum possible SM clock frequency in MHz")
+        .def("get_clock_frequency", &hws::nvml_clock_samples::get_clock_frequency, "the current graphics clock frequency in MHz")
+        .def("get_available_clock_frequencies", &hws::nvml_clock_samples::get_available_clock_frequencies, "the available clock frequencies in MHz, based on a memory clock frequency (slowest to fastest)")
+        .def("get_available_memory_clock_frequencies", &hws::nvml_clock_samples::get_available_memory_clock_frequencies, "the available memory clock frequencies in MHz (slowest to fastest)")
+        .def("get_memory_clock_frequency", &hws::nvml_clock_samples::get_memory_clock_frequency, "the current memory clock frequency in MHz")
+        .def("get_sm_clock_frequency", &hws::nvml_clock_samples::get_sm_clock_frequency, "the current SM clock frequency in Mhz")
+        .def("get_throttle_reason", &hws::nvml_clock_samples::get_throttle_reason, "the reason the GPU clock throttled (as bitmask)")
+        .def("get_throttle_reason_string", &hws::nvml_clock_samples::get_throttle_reason_string, "the reason the GPU clock throttled (as string)")
+        .def("get_auto_boosted_clock", &hws::nvml_clock_samples::get_auto_boosted_clock, "true if the clocks are currently auto boosted")
         .def("__repr__", [](const hws::nvml_clock_samples &self) {
-            return std::format("<HardwareSampling.NvmlClockSamples with\n{}\n>", self);
+            return fmt::format("<HardwareSampling.NvmlClockSamples with\n{}\n>", self);
         });
 
     // bind the power samples
     py::class_<hws::nvml_power_samples>(m, "NvmlPowerSamples")
-        .def("get_power_management_mode", &hws::nvml_power_samples::get_power_management_mode, "true if power management algorithms are supported and active")
+        .def("has_samples", &hws::nvml_power_samples::has_samples, "true if any sample is available, false otherwise")
         .def("get_power_management_limit", &hws::nvml_power_samples::get_power_management_limit, "if the GPU draws more power (mW) than the power management limit, the GPU may throttle")
         .def("get_power_enforced_limit", &hws::nvml_power_samples::get_power_enforced_limit, "the actually enforced power limit, may be different from power management limit if external limiters are set")
-        .def("get_power_state", &hws::nvml_power_samples::get_power_state, "the current GPU power state: 0 - 15 where 0 is the maximum power and 15 the minimum power")
+        .def("get_power_measurement_type", &hws::nvml_power_samples::get_power_measurement_type, "the type of the power readings: either current power draw or average power draw")
+        .def("get_power_management_mode", &hws::nvml_power_samples::get_power_management_mode, "true if power management algorithms are supported and active")
+        .def("get_available_power_profiles", &hws::nvml_power_samples::get_available_power_profiles, "a list of the available power profiles")
         .def("get_power_usage", &hws::nvml_power_samples::get_power_usage, "the current power draw of the GPU and its related circuity (e.g., memory) in mW")
         .def("get_power_total_energy_consumption", &hws::nvml_power_samples::get_power_total_energy_consumption, "the total power consumption since the last driver reload in mJ")
+        .def("get_power_profile", &hws::nvml_power_samples::get_power_profile, "the current GPU power state: 0 - 15 where 0 is the maximum power and 15 the minimum power")
         .def("__repr__", [](const hws::nvml_power_samples &self) {
-            return std::format("<HardwareSampling.NvmlPowerSamples with\n{}\n>", self);
+            return fmt::format("<HardwareSampling.NvmlPowerSamples with\n{}\n>", self);
         });
 
     // bind the memory samples
     py::class_<hws::nvml_memory_samples>(m, "NvmlMemorySamples")
+        .def("has_samples", &hws::nvml_memory_samples::has_samples, "true if any sample is available, false otherwise")
         .def("get_memory_total", &hws::nvml_memory_samples::get_memory_total, "the total available memory in Byte")
-        .def("get_pcie_link_max_speed", &hws::nvml_memory_samples::get_pcie_link_max_speed, "the maximum PCIe link speed in MBPS")
+        .def("get_num_pcie_lanes_max", &hws::nvml_memory_samples::get_num_pcie_lanes_max, "the maximum number of PCIe lanes")
+        .def("get_pcie_link_generation_max", &hws::nvml_memory_samples::get_pcie_link_generation_max, "the current PCIe link generation (e.g., PCIe 4.0, PCIe 5.0, etc)")
+        .def("get_pcie_link_speed_max", &hws::nvml_memory_samples::get_pcie_link_speed_max, "the maximum PCIe link speed in MBPS")
         .def("get_memory_bus_width", &hws::nvml_memory_samples::get_memory_bus_width, "the memory bus with in Bit")
-        .def("get_max_pcie_link_generation", &hws::nvml_memory_samples::get_max_pcie_link_generation, "the current PCIe link generation (e.g., PCIe 4.0, PCIe 5.0, etc)")
-        .def("get_memory_free", &hws::nvml_memory_samples::get_memory_free, "the currently free memory in Byte")
         .def("get_memory_used", &hws::nvml_memory_samples::get_memory_used, "the currently used memory in Byte")
-        .def("get_pcie_link_speed", &hws::nvml_memory_samples::get_pcie_link_speed, "the current PCIe link speed in MBPS")
-        .def("get_pcie_link_width", &hws::nvml_memory_samples::get_pcie_link_width, "the current PCIe link width (e.g., x16, x8, x4, etc)")
+        .def("get_memory_free", &hws::nvml_memory_samples::get_memory_free, "the currently free memory in Byte")
+        .def("get_num_pcie_lanes", &hws::nvml_memory_samples::get_num_pcie_lanes, "the current PCIe link width (e.g., x16, x8, x4, etc)")
         .def("get_pcie_link_generation", &hws::nvml_memory_samples::get_pcie_link_generation, "the current PCIe link generation (may change during runtime to save energy)")
+        .def("get_pcie_link_speed", &hws::nvml_memory_samples::get_pcie_link_speed, "the current PCIe link speed in MBPS")
         .def("__repr__", [](const hws::nvml_memory_samples &self) {
-            return std::format("<HardwareSampling.NvmlMemorySamples with\n{}\n>", self);
+            return fmt::format("<HardwareSampling.NvmlMemorySamples with\n{}\n>", self);
         });
 
     // bind the temperature samples
     py::class_<hws::nvml_temperature_samples>(m, "NvmlTemperatureSamples")
+        .def("has_samples", &hws::nvml_temperature_samples::has_samples, "true if any sample is available, false otherwise")
         .def("get_num_fans", &hws::nvml_temperature_samples::get_num_fans, "the number of fans (if any)")
-        .def("get_min_fan_speed", &hws::nvml_temperature_samples::get_min_fan_speed, "the minimum fan speed the user can set in %")
-        .def("get_max_fan_speed", &hws::nvml_temperature_samples::get_max_fan_speed, "the maximum fan speed the user can set in %")
-        .def("get_temperature_threshold_gpu_max", &hws::nvml_temperature_samples::get_temperature_threshold_gpu_max, "the maximum graphics temperature threshold in °C")
-        .def("get_temperature_threshold_mem_max", &hws::nvml_temperature_samples::get_temperature_threshold_mem_max, "the maximum memory temperature threshold in °C")
-        .def("get_fan_speed", &hws::nvml_temperature_samples::get_fan_speed, "the current intended fan speed in %")
-        .def("get_temperature_gpu", &hws::nvml_temperature_samples::get_temperature_gpu, "the current GPU temperature in °C")
+        .def("get_fan_speed_min", &hws::nvml_temperature_samples::get_fan_speed_min, "the minimum fan speed the user can set in %")
+        .def("get_fan_speed_max", &hws::nvml_temperature_samples::get_fan_speed_max, "the maximum fan speed the user can set in %")
+        .def("get_temperature_max", &hws::nvml_temperature_samples::get_temperature_max, "the maximum graphics temperature threshold in °C")
+        .def("get_memory_temperature_max", &hws::nvml_temperature_samples::get_memory_temperature_max, "the maximum memory temperature threshold in °C")
+        .def("get_fan_speed_percentage", &hws::nvml_temperature_samples::get_fan_speed_percentage, "the current intended fan speed in %")
+        .def("get_temperature", &hws::nvml_temperature_samples::get_temperature, "the current GPU temperature in °C")
         .def("__repr__", [](const hws::nvml_temperature_samples &self) {
-            return std::format("<HardwareSampling.NvmlTemperatureSamples with\n{}\n>", self);
+            return fmt::format("<HardwareSampling.NvmlTemperatureSamples with\n{}\n>", self);
         });
 
     // bind the GPU NVIDIA hardware sampler class
     py::class_<hws::gpu_nvidia_hardware_sampler, hws::hardware_sampler>(m, "GpuNvidiaHardwareSampler")
         .def(py::init<>(), "construct a new NVIDIA GPU hardware sampler for the default device with the default sampling interval")
+        .def(py::init<hws::sample_category>(), "construct a new NVIDIA GPU hardware sampler for the default device with the default sampling interval sampling only the provided sample_category samples")
         .def(py::init<std::size_t>(), "construct a new NVIDIA GPU hardware sampler for the specified device with the default sampling interval")
+        .def(py::init<std::size_t, hws::sample_category>(), "construct a new NVIDIA GPU hardware sampler for the specified device with the default sampling interval sampling only the provided sample_category samples")
         .def(py::init<std::chrono::milliseconds>(), "construct a new NVIDIA GPU hardware sampler for the default device with the specified sampling interval")
+        .def(py::init<std::chrono::milliseconds, hws::sample_category>(), "construct a new NVIDIA GPU hardware sampler for the default device with the specified sampling interval sampling only the provided sample_category samples")
         .def(py::init<std::size_t, std::chrono::milliseconds>(), "construct a new NVIDIA GPU hardware sampler for the specified device and sampling interval")
+        .def(py::init<std::size_t, std::chrono::milliseconds, hws::sample_category>(), "construct a new NVIDIA GPU hardware sampler for the specified device and sampling interval sampling only the provided sample_category samples")
         .def("general_samples", &hws::gpu_nvidia_hardware_sampler::general_samples, "get all general samples")
         .def("clock_samples", &hws::gpu_nvidia_hardware_sampler::clock_samples, "get all clock related samples")
         .def("power_samples", &hws::gpu_nvidia_hardware_sampler::power_samples, "get all power related samples")
         .def("memory_samples", &hws::gpu_nvidia_hardware_sampler::memory_samples, "get all memory related samples")
         .def("temperature_samples", &hws::gpu_nvidia_hardware_sampler::temperature_samples, "get all temperature related samples")
+        .def("samples_only_as_yaml_string", &hws::gpu_nvidia_hardware_sampler::samples_only_as_yaml_string, "return all hardware samples as YAML string")
         .def("__repr__", [](const hws::gpu_nvidia_hardware_sampler &self) {
-            return std::format("<HardwareSampling.GpuNvidiaHardwareSampler with\n{}\n>", self);
+            return fmt::format("<HardwareSampling.GpuNvidiaHardwareSampler with\n{}\n>", self);
         });
 }
diff --git a/bindings/hardware_sampler.cpp b/bindings/hardware_sampler.cpp
index 5d45f74..5a12141 100644
--- a/bindings/hardware_sampler.cpp
+++ b/bindings/hardware_sampler.cpp
@@ -5,28 +5,31 @@
  *          See the LICENSE.md file in the project root for full license information.
  */
 
-#include "hardware_sampling/hardware_sampler.hpp"  // hws::hardware_sampler
+#include "hws/hardware_sampler.hpp"  // hws::hardware_sampler
 
-#include "hardware_sampling/event.hpp"  // hws::event
+#include "hws/event.hpp"    // hws::event
+#include "hws/utility.hpp"  // hws::detail::durations_from_reference_time
 
 #if defined(HWS_FOR_CPUS_ENABLED)
-    #include "hardware_sampling/cpu/hardware_sampler.hpp"  // hws::cpu_hardware_sampler
+    #include "hws/cpu/hardware_sampler.hpp"  // hws::cpu_hardware_sampler
+#endif
+#if defined(HWS_FOR_NVIDIA_GPUS_ENABLED)
+    #include "hws/gpu_nvidia/hardware_sampler.hpp"  // hws::gpu_nvidia_hardware_sampler
 #endif
 #if defined(HWS_FOR_AMD_GPUS_ENABLED)
-    #include "hardware_sampling/gpu_amd/hardware_sampler.hpp"  // hws::gpu_amd_hardware_sampler
+    #include "hws/gpu_amd/hardware_sampler.hpp"  // hws::gpu_amd_hardware_sampler
 #endif
 #if defined(HWS_FOR_INTEL_GPUS_ENABLED)
-    #include "hardware_sampling/gpu_intel/hardware_sampler.hpp"  // hws::gpu_intel_hardware_sampler
-#endif
-#if defined(HWS_FOR_NVIDIA_GPUS_ENABLED)
-    #include "hardware_sampling/gpu_nvidia/hardware_sampler.hpp"  // hws::gpu_nvidia_hardware_sampler
+    #include "hws/gpu_intel/hardware_sampler.hpp"  // hws::gpu_intel_hardware_sampler
 #endif
 
+#include "fmt/format.h"         // fmt::format
 #include "pybind11/chrono.h"    // bind std::chrono types
 #include "pybind11/pybind11.h"  // py::module_, py::class_
 #include "pybind11/stl.h"       // bind STL types
 
-#include <format>  // std::format
+#include "relative_event.hpp"  // hws::detail::relative_event
+#include <string>              // std::string
 
 namespace py = pybind11;
 
@@ -47,31 +50,40 @@ void init_hardware_sampler(py::module_ &m) {
         .def("add_event", py::overload_cast<decltype(hws::event::name)>(&hws::hardware_sampler::add_event), "add a new event using a name, the current time is used as time point")
         .def("num_events", &hws::hardware_sampler::num_events, "get the number of events")
         .def("get_events", &hws::hardware_sampler::get_events, "get all events")
+        .def("get_relative_events", [](const hws::hardware_sampler &self) {
+            std::vector<hws::detail::relative_event> relative_events{};
+            for (const hws::event &e : self.get_events()) {
+                relative_events.emplace_back(hws::detail::duration_from_reference_time(e.time_point, self.get_event(0).time_point), e.name);
+            }
+            return relative_events; }, "get all relative events")
         .def("get_event", &hws::hardware_sampler::get_event, "get a specific event")
+        .def("get_relative_event", [](const hws::hardware_sampler &self, const std::size_t idx) { return hws::detail::relative_event{ hws::detail::duration_from_reference_time(self.get_event(idx).time_point, self.get_event(0).time_point), self.get_event(idx).name }; }, "get a specific relative event")
         .def("time_points", &hws::hardware_sampler::sampling_time_points, "get the time points of the respective hardware samples")
+        .def("relative_time_points", [](const hws::hardware_sampler &self) { return hws::detail::durations_from_reference_time(self.sampling_time_points(), self.get_event(0).time_point); }, "get the relative durations of the respective hardware samples in seconds (as \"normal\" number)")
         .def("sampling_interval", &hws::hardware_sampler::sampling_interval, "get the sampling interval of this hardware sampler (in ms)")
-        .def("dump_yaml", py::overload_cast<const std::string &>(&hws::hardware_sampler::dump_yaml), "dump all hardware samples to the given YAML file")
+        .def("dump_yaml", py::overload_cast<const std::string &>(&hws::hardware_sampler::dump_yaml, py::const_), "dump all hardware samples to the given YAML file")
+        .def("as_yaml_string", &hws::hardware_sampler::as_yaml_string, "return all hardware samples including additional information like events as YAML string")
+        .def("samples_only_as_yaml_string", &hws::hardware_sampler::samples_only_as_yaml_string, "return all hardware samples as YAML string")
         .def("__repr__", [](const hws::hardware_sampler &self) {
 #if defined(HWS_FOR_CPUS_ENABLED)
             if (dynamic_cast<const hws::cpu_hardware_sampler *>(&self)) {
-                return std::format("<plssvm.detail.tracking.CpuHardwareSampler with\n{}\n>", dynamic_cast<const hws::cpu_hardware_sampler &>(self));
+                return fmt::format("<HardwareSampling.CpuHardwareSampler with\n{}\n>", dynamic_cast<const hws::cpu_hardware_sampler &>(self));
             }
 #endif
 #if defined(HWS_FOR_NVIDIA_GPUS_ENABLED)
             if (dynamic_cast<const hws::gpu_nvidia_hardware_sampler *>(&self)) {
-                return std::format("<plssvm.detail.tracking.GpuNvidiaHardwareSampler with\n{}\n>", dynamic_cast<const hws::gpu_nvidia_hardware_sampler &>(self));
+                return fmt::format("<HardwareSampling.GpuNvidiaHardwareSampler with\n{}\n>", dynamic_cast<const hws::gpu_nvidia_hardware_sampler &>(self));
             }
 #endif
 #if defined(HWS_FOR_AMD_GPUS_ENABLED)
             if (dynamic_cast<const hws::gpu_amd_hardware_sampler *>(&self)) {
-                return std::format("<plssvm.detail.tracking.GpuAmdHardwareSampler with\n{}\n>", dynamic_cast<const hws::gpu_amd_hardware_sampler &>(self));
+                return fmt::format("<HardwareSampling.GpuAmdHardwareSampler with\n{}\n>", dynamic_cast<const hws::gpu_amd_hardware_sampler &>(self));
             }
 #endif
 #if defined(HWS_FOR_INTEL_GPUS_ENABLED)
             if (dynamic_cast<const hws::gpu_intel_hardware_sampler *>(&self)) {
-                return std::format("<plssvm.detail.tracking.GpuIntelHardwareSampler with\n{}\n>", dynamic_cast<const hws::gpu_intel_hardware_sampler &>(self));
+                return fmt::format("<HardwareSampling.GpuIntelHardwareSampler with\n{}\n>", dynamic_cast<const hws::gpu_intel_hardware_sampler &>(self));
             }
 #endif
-            return std::string{ "unknown" };
-        });
+            return std::string{ "unknown" }; });
 }
diff --git a/bindings/main.cpp b/bindings/main.cpp
index 5a4c01c..f3dca3f 100644
--- a/bindings/main.cpp
+++ b/bindings/main.cpp
@@ -7,46 +7,57 @@
 
 #include "pybind11/pybind11.h"  // PYBIND11_MODULE, py::module_
 
+#include <string_view>  // std::string_view
+
+#define HWS_IS_DEFINED_HELPER(x) #x
+#define HWS_IS_DEFINED(x) (std::string_view{ #x } != std::string_view{ HWS_IS_DEFINED_HELPER(x) })
+
 namespace py = pybind11;
 
 // forward declare binding functions
 void init_event(py::module_ &);
+void init_sample_category(py::module_ &);
+void init_relative_event(py::module_ &);
 void init_hardware_sampler(py::module_ &);
+void init_system_hardware_sampler(py::module_ &);
 void init_cpu_hardware_sampler(py::module_ &);
 void init_gpu_nvidia_hardware_sampler(py::module_ &);
 void init_gpu_amd_hardware_sampler(py::module_ &);
 void init_gpu_intel_hardware_sampler(py::module_ &);
+void init_version(py::module_ &);
 
 PYBIND11_MODULE(HardwareSampling, m) {
     m.doc() = "Hardware Sampling for CPUs and GPUs";
 
     init_event(m);
+    init_sample_category(m);
+    init_relative_event(m);
     init_hardware_sampler(m);
+    init_system_hardware_sampler(m);
 
+    // CPU sampling
 #if defined(HWS_FOR_CPUS_ENABLED)
     init_cpu_hardware_sampler(m);
-    m.def("has_cpu_hardware_sampler", []{return true;} );
-#else
-    m.def("has_cpu_hardware_sampler", []{return false;} );
 #endif
+    m.def("has_cpu_hardware_sampler", []() { return HWS_IS_DEFINED(HWS_FOR_CPUS_ENABLED); });
+
+    // NVIDIA GPU sampling
 #if defined(HWS_FOR_NVIDIA_GPUS_ENABLED)
     init_gpu_nvidia_hardware_sampler(m);
-    m.def("has_gpu_nvidia_hardware_sampler", []{return true;} );
-#else
-    m.def("has_gpu_nvidia_hardware_sampler", []{return false;} );
 #endif
+    m.def("has_gpu_nvidia_hardware_sampler", []() { return HWS_IS_DEFINED(HWS_FOR_NVIDIA_GPUS_ENABLED); });
+
+    // AMD GPU sampling
 #if defined(HWS_FOR_AMD_GPUS_ENABLED)
     init_gpu_amd_hardware_sampler(m);
-    m.def("has_gpu_amd_hardware_sampler", []{return true;} );
-#else
-    m.def("has_gpu_amd_hardware_sampler", []{return false;} );
 #endif
+    m.def("has_gpu_amd_hardware_sampler", []() { return HWS_IS_DEFINED(HWS_FOR_AMD_GPUS_ENABLED); });
 
+    // Intel GPU sampling
 #if defined(HWS_FOR_INTEL_GPUS_ENABLED)
     init_gpu_intel_hardware_sampler(m);
-    m.def("has_gpu_intel_hardware_sampler", []{return true;} );
-#else
-    m.def("has_gpu_intel_hardware_sampler", []{return false;} );
 #endif
+    m.def("has_gpu_intel_hardware_sampler", []() { return HWS_IS_DEFINED(HWS_FOR_INTEL_GPUS_ENABLED); });
 
+    init_version(m);
 }
diff --git a/bindings/relative_event.cpp b/bindings/relative_event.cpp
new file mode 100644
index 0000000..c0cb611
--- /dev/null
+++ b/bindings/relative_event.cpp
@@ -0,0 +1,26 @@
+/**
+ * @author Marcel Breyer
+ * @copyright 2024-today All Rights Reserved
+ * @license This file is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ */
+
+#include "relative_event.hpp"  // hws::detail::relative_event
+
+#include "fmt/format.h"         // fmt::format
+#include "pybind11/chrono.h"    // bind std::chrono types
+#include "pybind11/pybind11.h"  // py::module_
+#include "pybind11/stl.h"       // bind STL types
+
+namespace py = pybind11;
+
+void init_relative_event(py::module_ &m) {
+    // a special python only struct encapsulating a relative event, i.e., an event where its "relative_time_point" member is the time passed since the first event
+    py::class_<hws::detail::relative_event>(m, "RelativeEvent")
+        .def(py::init<decltype(hws::detail::relative_event::relative_time_point), decltype(hws::detail::relative_event::name)>(), "construct a new event using a time point and a name")
+        .def_readonly("relative_time_point", &hws::detail::relative_event::relative_time_point, "read the relative time point associated to this event")
+        .def_readonly("name", &hws::detail::relative_event::name, "read the name associated to this event")
+        .def("__repr__", [](const hws::detail::relative_event &self) {
+            return fmt::format("<HardWareSampling.RelativeEvent with {{ time_point: {}, name: {} }}>", self.relative_time_point, self.name);
+        });
+}
diff --git a/bindings/relative_event.hpp b/bindings/relative_event.hpp
new file mode 100644
index 0000000..fcdd02e
--- /dev/null
+++ b/bindings/relative_event.hpp
@@ -0,0 +1,40 @@
+/**
+ * @file
+ * @author Marcel Breyer
+ * @copyright 2024-today All Rights Reserved
+ * @license This file is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Defines a struct encapsulating a single event with a relative time point.
+ */
+
+#ifndef HWS_BINDINGS_RELATIVE_EVENT_HPP_
+#define HWS_BINDINGS_RELATIVE_EVENT_HPP_
+
+#include <string>   // std::string
+#include <utility>  // std::move
+
+namespace hws::detail {
+
+/**
+ * @brief A struct encapsulating a single event with a relative time point.
+ */
+struct relative_event {
+    /**
+     * @brief Construct a new event given a time point and name.
+     * @param[in] time_point_p the time when the event occurred relative to the first event
+     * @param[in] name_p the name of the event
+     */
+    relative_event(const double relative_time_point_p, std::string name_p) :
+        relative_time_point{ relative_time_point_p },
+        name{ std::move(name_p) } { }
+
+    /// The relative time point this event occurred at.
+    double relative_time_point;
+    /// The name of this event.
+    std::string name;
+};
+
+}  // namespace hws::detail
+
+#endif  // HWS_BINDINGS_RELATIVE_EVENT_HPP_
diff --git a/bindings/sample_category.cpp b/bindings/sample_category.cpp
new file mode 100644
index 0000000..455914c
--- /dev/null
+++ b/bindings/sample_category.cpp
@@ -0,0 +1,30 @@
+/**
+ * @author Marcel Breyer
+ * @copyright 2024-today All Rights Reserved
+ * @license This file is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ */
+
+#include "hws/sample_category.hpp"  // hws::sample_category
+
+#include "pybind11/operators.h"  // operator overloading
+#include "pybind11/pybind11.h"   // py::module_, py::overload_cast
+
+namespace py = pybind11;
+
+void init_sample_category(py::module_ &m) {
+    // sample_category enum and bitwise operations on the sample_category enum
+    py::enum_<hws::sample_category>(m, "SampleCategory")
+        .value("GENERAL", hws::sample_category::general, "General hardware samples like architecture, names, or utilization.")
+        .value("CLOCK", hws::sample_category::clock, "Clock-related hardware samples like minimum, maximum, and current frequencies or throttle reasons.")
+        .value("POWER", hws::sample_category::power, "Power-related hardware samples like current power draw or total energy consumption.")
+        .value("MEMORY", hws::sample_category::memory, "Memory-related hardware samples like memory usage or PCIe information.")
+        .value("TEMPERATURE", hws::sample_category::temperature, "Temperature-related hardware samples like maximum and current temperatures.")
+        .value("GFX", hws::sample_category::gfx, "Gfx-related (iGPU) hardware samples. Only used in the cpu_hardware_sampler.")
+        .value("IDLE_STATE", hws::sample_category::idle_state, "Idle-state-related hardware samples. Only used in the cpu_hardware_sampler.")
+        .value("ALL", hws::sample_category::all, "Shortcut to enable all available hardware samples (default).")
+        .def("__invert__", py::overload_cast<hws::sample_category>(&hws::operator~))
+        .def("__and__", py::overload_cast<hws::sample_category, hws::sample_category>(&hws::operator&))
+        .def("__or__", py::overload_cast<hws::sample_category, hws::sample_category>(&hws::operator|))
+        .def("__xor__", py::overload_cast<hws::sample_category, hws::sample_category>(&hws::operator^));
+}
diff --git a/bindings/system_hardware_sampler.cpp b/bindings/system_hardware_sampler.cpp
new file mode 100644
index 0000000..d9af622
--- /dev/null
+++ b/bindings/system_hardware_sampler.cpp
@@ -0,0 +1,71 @@
+/**
+ * @author Marcel Breyer
+ * @copyright 2024-today All Rights Reserved
+ * @license This file is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ */
+
+#include "hws/system_hardware_sampler.hpp"  // hws::system_hardware_sampler
+
+#include "hws/event.hpp"            // hws::event
+#include "hws/sample_category.hpp"  // hws::sample_category
+#include "hws/utility.hpp"          // hws::detail::durations_from_reference_time
+
+#include "fmt/format.h"         // fmt::format
+#include "pybind11/chrono.h"    // bind std::chrono types
+#include "pybind11/pybind11.h"  // py::module_, py::class_
+#include "pybind11/stl.h"       // bind STL types
+
+#include "relative_event.hpp"  // hws::detail::relative_event
+#include <string>              // std::string
+
+namespace py = pybind11;
+
+void init_system_hardware_sampler(py::module_ &m) {
+    // bind the pure virtual hardware sampler base class
+    py::class_<hws::system_hardware_sampler>(m, "SystemHardwareSampler")
+        .def(py::init<>(), "construct a new system hardware sampler with the default sampling interval")
+        .def(py::init<hws::sample_category>(), "construct a new system hardware sampler with the default sampling interval sampling only the provided sample_category samples")
+        .def(py::init<std::chrono::milliseconds>(), "construct a new system hardware sampler for with the specified sampling interval")
+        .def(py::init<std::chrono::milliseconds, hws::sample_category>(), "construct a new system hardware sampler for with the specified sampling interval sampling only the provided sample_category samples")
+        .def("start", &hws::system_hardware_sampler::start_sampling, "start hardware sampling for all available hardware samplers")
+        .def("stop", &hws::system_hardware_sampler::stop_sampling, "stop hardware sampling for all available hardware samplers")
+        .def("pause", &hws::system_hardware_sampler::pause_sampling, "pause hardware sampling for all available hardware samplers")
+        .def("resume", &hws::system_hardware_sampler::resume_sampling, "resume hardware sampling for all available hardware samplers")
+        .def("has_started", &hws::system_hardware_sampler::has_sampling_started, "check whether hardware sampling has already been started for all hardware samplers")
+        .def("is_sampling", &hws::system_hardware_sampler::is_sampling, "check whether the hardware sampling is currently active for all hardware samplers")
+        .def("has_stopped", &hws::system_hardware_sampler::has_sampling_stopped, "check whether hardware sampling has already been stopped for all hardware samplers")
+        .def("add_event", py::overload_cast<hws::event>(&hws::system_hardware_sampler::add_event), "add a new event to all hardware samplers")
+        .def("add_event", py::overload_cast<decltype(hws::event::time_point), decltype(hws::event::name)>(&hws::system_hardware_sampler::add_event), "add a new event using a time point and a name to all hardware samplers")
+        .def("add_event", py::overload_cast<decltype(hws::event::name)>(&hws::system_hardware_sampler::add_event), "add a new event using a name, the current time is used as time point to all hardware samplers")
+        .def("num_events", &hws::system_hardware_sampler::num_events, "get the number of events separately for each hardware sampler")
+        .def("get_events", &hws::system_hardware_sampler::get_events, "get all events separately for each hardware sampler")
+        .def("get_relative_events", [](const hws::system_hardware_sampler &self) {
+             std::vector<std::vector<hws::detail::relative_event>> relative_events{};
+             for (const std::vector<hws::event> &events : self.get_events()) {
+                 relative_events.emplace_back();
+                 for (const hws::event &e : events) {
+                     relative_events.back().emplace_back(hws::detail::duration_from_reference_time(e.time_point, events[0].time_point), e.name);
+                 }
+             }
+             return relative_events; }, "get all relative events separately for each hardware sampler")
+        .def("time_points", &hws::system_hardware_sampler::sampling_time_points, "get the time points of the respective hardware samples separately for each hardware sampler")
+        .def("relative_time_points", [](const hws::system_hardware_sampler &self) {
+            std::vector<std::vector<double>> relative_time_points{};
+            for (std::size_t s = 0; s < self.num_samplers(); ++s) {
+                relative_time_points.emplace_back(hws::detail::durations_from_reference_time(self.sampling_time_points()[s], self.get_events()[s][0].time_point));
+            }
+            return relative_time_points; }, "get the relative durations of the respective hardware samples in seconds (as \"normal\" number)")
+        .def("sampling_interval", &hws::system_hardware_sampler::sampling_interval, "get the sampling interval separately for each hardware sampler (in ms)")
+        .def("num_samplers", &hws::system_hardware_sampler::num_samplers, "get the number of hardware samplers available for the whole system")
+        .def("samplers", [](hws::system_hardware_sampler &self) {
+            std::vector<hws::hardware_sampler*> out{};
+            for (auto &ptr : self.samplers()) {
+                out.push_back(ptr.get());
+            }
+            return out; }, "get the hardware samplers available for the whole system")
+        .def("sampler", [](hws::system_hardware_sampler &self, const std::size_t idx) { return self.sampler(idx).get(); }, "get the i-th hardware sampler available for the whole system")
+        .def("dump_yaml", py::overload_cast<const std::string &>(&hws::system_hardware_sampler::dump_yaml, py::const_), "dump all hardware samples for all hardware samplers to the given YAML file")
+        .def("as_yaml_string", &hws::system_hardware_sampler::as_yaml_string, "return all hardware samples for all hardware samplers as YAML string")
+        .def("__repr__", [](const hws::system_hardware_sampler &self) { return fmt::format("<hws.SystemHardwareSampler with {} samples>", self.num_samplers()); });
+}
diff --git a/bindings/version.cpp b/bindings/version.cpp
new file mode 100644
index 0000000..e5481d1
--- /dev/null
+++ b/bindings/version.cpp
@@ -0,0 +1,26 @@
+/**
+* @author Marcel Breyer
+* @copyright 2024-today All Rights Reserved
+* @license This file is released under the MIT license.
+*          See the LICENSE.md file in the project root for full license information.
+*/
+
+#include "hws/version.hpp"  // hws::version
+
+#include "pybind11/pybind11.h"   // py::module_
+
+namespace py = pybind11;
+
+// dummy class
+class version { };
+
+void init_version(py::module_ &m) {
+    // bind global version information
+    // complexity necessary to enforce read-only
+    py::class_<version>(m, "version")
+        .def_property_readonly_static("name", [](const py::object & /* self */) { return hws::version::name; }, "the name of the hws library")
+        .def_property_readonly_static("version", [](const py::object & /* self */) { return hws::version::version; }, "the used version of the hws library")
+        .def_property_readonly_static("major", [](const py::object & /* self */) { return hws::version::major; }, "the used major version of the hws library")
+        .def_property_readonly_static("minor", [](const py::object & /* self */) { return hws::version::minor; }, "the used minor version of the hws library")
+        .def_property_readonly_static("patch", [](const py::object & /* self */) { return hws::version::patch; }, "the used patch version of the hws library");
+}
diff --git a/cmake/hardware_samplingConfig.cmake.in b/cmake/hwsConfig.cmake.in
similarity index 55%
rename from cmake/hardware_samplingConfig.cmake.in
rename to cmake/hwsConfig.cmake.in
index 56ba42a..852e638 100644
--- a/cmake/hardware_samplingConfig.cmake.in
+++ b/cmake/hwsConfig.cmake.in
@@ -8,6 +8,12 @@
 
 include(CMakeFindDependencyMacro)
 
+# always try finding {fmt}
+# -> CMAKE_PREFIX_PATH necessary if build via FetchContent
+# -> doesn't hurt to be set everytime
+list(APPEND CMAKE_PREFIX_PATH "${CMAKE_CURRENT_LIST_DIR}/../../../lib/cmake/fmt")
+find_dependency(fmt REQUIRED)
+
 # sanity checks
-include("${CMAKE_CURRENT_LIST_DIR}/hardware_samplingTargets.cmake")
-check_required_components("hardware_sampling")
\ No newline at end of file
+include("${CMAKE_CURRENT_LIST_DIR}/hwsTargets.cmake")
+check_required_components("hws")
\ No newline at end of file
diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt
new file mode 100644
index 0000000..1623953
--- /dev/null
+++ b/docs/CMakeLists.txt
@@ -0,0 +1,56 @@
+## Authors: Marcel Breyer
+## Copyright (C): 2024-today All Rights Reserved
+## License: This file is released under the MIT license.
+##          See the LICENSE.md file in the project root for full license information.
+########################################################################################################################
+
+########################################################################################################################
+##                                     setup documentation generation with doxygen                                    ##
+########################################################################################################################
+## use installed doxygen
+find_package(Doxygen REQUIRED OPTIONAL_COMPONENTS dot)
+
+## configure doxygen
+set(DOXYGEN_OUTPUT_DIRECTORY "${PROJECT_SOURCE_DIR}/docs")
+set(DOXYGEN_USE_MDFILE_AS_MAINPAGE "${PROJECT_SOURCE_DIR}/README.md")
+set(DOXYGEN_FILE_PATTERNS "*.hpp;")
+set(DOXYGEN_STRIP_FROM_PATH "${PROJECT_SOURCE_DIR}")
+set(DOXYGEN_ABBREVIATE_BRIEF "")
+set(DOXYGEN_QUIET "YES")
+set(DOXYGEN_HTML_TIMESTAMP "YES")
+set(DOXYGEN_NUM_PROC_THREADS 0)
+set(DOXYGEN_WARN_NO_PARAMDOC "YES")
+set(DOXYGEN_SORT_MEMBER_DOCS "NO")
+set(DOXYGEN_INLINE_INHERITED_MEMB "YES")
+set(DOXYGEN_USE_MATHJAX "YES")
+set(DOXYGEN_EXCLUDE_SYMBOLS "*_HPP_")
+
+set(DOXYGEN_DOT_IMAGE_FORMAT "svg")
+set(DOXYGEN_INTERACTIVE_SVG "YES")
+set(DOXYGEN_INCLUDE_GRAPH "NO")
+set(DOXYGEN_EXTRACT_PRIVATE "YES")
+
+## enable processing of specific attributes and macros
+set(DOXYGEN_ENABLE_PREPROCESSING "YES")
+set(DOXYGEN_MACRO_EXPANSION "YES")
+set(DOXYGEN_EXPAND_ONLY_PREDEF "YES")
+set(DOXYGEN_EXPAND_AS_DEFINED "YES")
+
+set(DOXYGEN_VERBATIM_VARS DOXYGEN_ALIASES)
+set(DOXYGEN_ALIASES
+        [[license="\par License^^\parblock^^"  ]]
+)
+
+## add doxygen as target
+doxygen_add_docs(
+        doc
+        "${PROJECT_SOURCE_DIR}/include;${PROJECT_SOURCE_DIR}/README.md;"
+        WORKING_DIRECTORY "${PROJECT_SOURCE_DIR}"
+        COMMENT "Generating API documentation with Doxygen."
+)
+
+## install targets for the documentation
+include(GNUInstallDirs)
+install(DIRECTORY "${PROJECT_SOURCE_DIR}/docs/html"
+        DESTINATION "${CMAKE_INSTALL_DOCDIR}"
+)
diff --git a/examples/cpp/CMakeLists.txt b/examples/cpp/CMakeLists.txt
index 6086f5b..1ffbc0b 100644
--- a/examples/cpp/CMakeLists.txt
+++ b/examples/cpp/CMakeLists.txt
@@ -8,9 +8,9 @@ cmake_minimum_required(VERSION 3.22)
 
 project(LibraryUsageExample LANGUAGES CXX)
 
-find_package(hardware_sampling REQUIRED)
+find_package(hws REQUIRED)
 
 add_executable(prog main.cpp)
 
-target_compile_features(prog PUBLIC cxx_std_20)
-target_link_libraries(prog PUBLIC hws::hardware_sampling)
\ No newline at end of file
+target_compile_features(prog PUBLIC cxx_std_17)
+target_link_libraries(prog PUBLIC hws::hws)
\ No newline at end of file
diff --git a/examples/cpp/main.cpp b/examples/cpp/main.cpp
index 166a967..63e4160 100644
--- a/examples/cpp/main.cpp
+++ b/examples/cpp/main.cpp
@@ -5,14 +5,14 @@
  *          See the LICENSE.md file in the project root for full license information.
  */
 
-#include "hardware_sampling/core.hpp"
+#include "hws/core.hpp"
 
 #include <cstddef>  // std::size_t
 #include <numeric>  // std::iota
 #include <vector>   // std::vector
 
 int main() {
-    hws::cpu_hardware_sampler sampler{};
+    hws::system_hardware_sampler sampler{};
     // could also be, e.g.,
     // hws::gpu_nvidia_hardware_sampler sampler{};
     sampler.start_sampling();
diff --git a/examples/python/main.py b/examples/python/main.py
index 7f384ca..da0809f 100644
--- a/examples/python/main.py
+++ b/examples/python/main.py
@@ -8,12 +8,12 @@
 #          See the LICENSE.md file in the project root for full license information.                                   #
 ########################################################################################################################
 
-import HardwareSampling
+import HardwareSampling as hws
 import numpy as np
 
-sampler = HardwareSampling.CpuHardwareSampler()
+sampler = hws.SystemHardwareSampler()
 # could also be, e.g.,
-# sampler = HardwareSampling.GpuNvidiaHardwareSampler()
+# sampler = hws.GpuNvidiaHardwareSampler()
 sampler.start()
 
 sampler.add_event("init")
diff --git a/include/hardware_sampling/core.hpp b/include/hardware_sampling/core.hpp
deleted file mode 100644
index 3c986a5..0000000
--- a/include/hardware_sampling/core.hpp
+++ /dev/null
@@ -1,38 +0,0 @@
-/**
- * @file
- * @author Marcel Breyer
- * @copyright 2024-today All Rights Reserved
- * @license This file is released under the MIT license.
- *          See the LICENSE.md file in the project root for full license information.
- *
- * @brief Core header containing all other necessary other headers.
- */
-
-#ifndef HARDWARE_SAMPLING_CORE_HPP_
-#define HARDWARE_SAMPLING_CORE_HPP_
-#pragma once
-
-#include "hardware_sampling/event.hpp"
-#include "hardware_sampling/hardware_sampler.hpp"
-
-#if defined(HWS_FOR_CPUS_ENABLED)
-    #include "hardware_sampling/cpu/cpu_samples.hpp"
-    #include "hardware_sampling/cpu/hardware_sampler.hpp"
-#endif
-
-#if defined(HWS_FOR_NVIDIA_GPUS_ENABLED)
-    #include "hardware_sampling/gpu_nvidia//nvml_samples.hpp"
-    #include "hardware_sampling/gpu_nvidia/hardware_sampler.hpp"
-#endif
-
-#if defined(HWS_FOR_AMD_GPUS_ENABLED)
-    #include "hardware_sampling/gpu_amd/hardware_sampler.hpp"
-    #include "hardware_sampling/gpu_amd/rocm_smi_samples.hpp"
-#endif
-
-#if defined(HWS_FOR_INTEL_GPUS_ENABLED)
-    #include "hardware_sampling/gpu_intel/hardware_sampler.hpp"
-    #include "hardware_sampling/gpu_intel/level_zero_samples.hpp"
-#endif
-
-#endif  // HARDWARE_SAMPLING_CORE_HPP_
diff --git a/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp b/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp
deleted file mode 100644
index 8f5d120..0000000
--- a/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp
+++ /dev/null
@@ -1,251 +0,0 @@
-/**
- * @file
- * @author Marcel Breyer
- * @copyright 2024-today All Rights Reserved
- * @license This file is released under the MIT license.
- *          See the LICENSE.md file in the project root for full license information.
- *
- * @brief Defines the samples used with ROCm SMI.
- */
-
-#ifndef HARDWARE_SAMPLING_GPU_AMD_ROCM_SMI_SAMPLES_HPP_
-#define HARDWARE_SAMPLING_GPU_AMD_ROCM_SMI_SAMPLES_HPP_
-#pragma once
-
-#include "hardware_sampling/utility.hpp"  // HWS_SAMPLE_STRUCT_FIXED_MEMBER, HWS_SAMPLE_STRUCT_SAMPLING_MEMBER, hws::detail::ostream_formatter
-
-#include <cstdint>   // std::uint64_t, std::int64_t, std::uint32_t
-#include <format>    // std::formatter
-#include <iosfwd>    // std::ostream forward declaration
-#include <optional>  // std::optional
-#include <string>    // std::string
-#include <vector>    // std::vector
-
-namespace hws {
-
-//*************************************************************************************************************************************//
-//                                                           general samples                                                           //
-//*************************************************************************************************************************************//
-
-/**
- * @brief Wrapper class for all general ROCm SMI hardware samples.
- */
-class rocm_smi_general_samples {
-    // befriend hardware sampler class
-    friend class gpu_amd_hardware_sampler;
-
-  public:
-    /**
-     * @brief Assemble the YAML string containing all available general hardware samples.
-     * @details Hardware samples that are not supported by the current device are omitted in the YAML output.
-     * @return the YAML string (`[[nodiscard]]`)
-     */
-    [[nodiscard]] std::string generate_yaml_string() const;
-
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, name)  // the name of the device
-
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(int, performance_level)          // the performance level: one of rsmi_dev_perf_level_t
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint32_t, utilization_gpu)  // the GPU compute utilization in percent
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint32_t, utilization_mem)  // the GPU memory utilization in percent
-};
-
-/**
- * @brief Output the general @p samples to the given output-stream @p out.
- * @details In contrast to `rocm_smi_general_samples::generate_yaml_string()`, outputs **all** general hardware samples, even if not supported by the current device (default initialized value).
- * @param[in,out] out the output-stream to write the general hardware samples to
- * @param[in] samples the ROCm SMI general samples
- * @return the output-stream
- */
-std::ostream &operator<<(std::ostream &out, const rocm_smi_general_samples &samples);
-
-//*************************************************************************************************************************************//
-//                                                            clock samples                                                            //
-//*************************************************************************************************************************************//
-
-/**
- * @brief Wrapper class for all clock related ROCm SMI hardware samples.
- */
-class rocm_smi_clock_samples {
-    // befriend hardware sampler class
-    friend class gpu_amd_hardware_sampler;
-
-  public:
-    /**
-     * @brief Assemble the YAML string containing all available general hardware samples.
-     * @details Hardware samples that are not supported by the current device are omitted in the YAML output.
-     * @return the YAML string (`[[nodiscard]]`)
-     */
-    [[nodiscard]] std::string generate_yaml_string() const;
-
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, clock_system_min)  // the minimum possible system clock frequency in Hz
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, clock_system_max)  // the maximum possible system clock frequency in Hz
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, clock_socket_min)  // the minimum possible socket clock frequency in Hz
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, clock_socket_max)  // the maximum possible socket clock frequency in Hz
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, clock_memory_min)  // the minimum possible memory clock frequency in Hz
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, clock_memory_max)  // the maximum possible memory clock frequency in Hz
-
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint64_t, clock_system)            // the current system clock frequency in Hz
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint64_t, clock_socket)            // the current socket clock frequency in Hz
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint64_t, clock_memory)            // the current memory clock frequency in Hz
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint32_t, overdrive_level)         // the GPU overdrive percentage
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint32_t, memory_overdrive_level)  // the GPU memory overdrive percentage
-};
-
-/**
- * @brief Output the clock related @p samples to the given output-stream @p out.
- * @details In contrast to `rocm_smi_clock_samples::generate_yaml_string()`, outputs **all** clock related hardware samples, even if not supported by the current device (default initialized value).
- * @param[in,out] out the output-stream to write the clock related hardware samples to
- * @param[in] samples the ROCm SMI clock related samples
- * @return the output-stream
- */
-std::ostream &operator<<(std::ostream &out, const rocm_smi_clock_samples &samples);
-
-//*************************************************************************************************************************************//
-//                                                            power samples                                                            //
-//*************************************************************************************************************************************//
-
-/**
- * @brief Wrapper class for all power related ROCm SMI hardware samples.
- */
-class rocm_smi_power_samples {
-    // befriend hardware sampler class
-    friend class gpu_amd_hardware_sampler;
-
-  public:
-    /**
-     * @brief Assemble the YAML string containing all available general hardware samples.
-     * @details Hardware samples that are not supported by the current device are omitted in the YAML output.
-     * @return the YAML string (`[[nodiscard]]`)
-     */
-    [[nodiscard]] std::string generate_yaml_string() const;
-
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, power_default_cap)                    // the default power cap, may be different from power cap
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, power_cap)                            // if the GPU draws more power (μW) than the power cap, the GPU may throttle
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, power_type)                             // the type of the power management: either current power draw or average power draw
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector<std::string>, available_power_profiles)  // a list of the available power profiles
-
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint64_t, power_usage)                     // the current GPU socket power draw in μW
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint64_t, power_total_energy_consumption)  // the total power consumption since the last driver reload in μJ
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::string, power_profile)                     // the current active power profile; one of 'available_power_profiles'
-};
-
-/**
- * @brief Output the power related @p samples to the given output-stream @p out.
- * @details In contrast to `rocm_smi_power_samples::generate_yaml_string()`, outputs **all** power related hardware samples, even if not supported by the current device (default initialized value).
- * @param[in,out] out the output-stream to write the power related hardware samples to
- * @param[in] samples the ROCm SMI power related samples
- * @return the output-stream
- */
-std::ostream &operator<<(std::ostream &out, const rocm_smi_power_samples &samples);
-
-//*************************************************************************************************************************************//
-//                                                            memory samples                                                           //
-//*************************************************************************************************************************************//
-
-/**
- * @brief Wrapper class for all memory related ROCm SMI hardware samples.
- */
-class rocm_smi_memory_samples {
-    // befriend hardware sampler class
-    friend class gpu_amd_hardware_sampler;
-
-  public:
-    /**
-     * @brief Assemble the YAML string containing all available general hardware samples.
-     * @details Hardware samples that are not supported by the current device are omitted in the YAML output.
-     * @return the YAML string (`[[nodiscard]]`)
-     */
-    [[nodiscard]] std::string generate_yaml_string() const;
-
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, memory_total)          // the total available memory in Byte
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, visible_memory_total)  // the total visible available memory in Byte, may be smaller than the total memory
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint32_t, min_num_pcie_lanes)    // the minimum number of used PCIe lanes
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint32_t, max_num_pcie_lanes)    // the maximum number of used PCIe lanes
-
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint64_t, memory_used)         // the currently used memory in Byte
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint64_t, pcie_transfer_rate)  // the current PCIe transfer rate in T/s
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint32_t, num_pcie_lanes)      // the number of currently used PCIe lanes
-};
-
-/**
- * @brief Output the memory related @p samples to the given output-stream @p out.
- * @details In contrast to `rocm_smi_memory_samples::generate_yaml_string()`, outputs **all** memory related hardware samples, even if not supported by the current device (default initialized value).
- * @param[in,out] out the output-stream to write the memory related hardware samples to
- * @param[in] samples the ROCm SMI memory related samples
- * @return the output-stream
- */
-std::ostream &operator<<(std::ostream &out, const rocm_smi_memory_samples &samples);
-
-//*************************************************************************************************************************************//
-//                                                         temperature samples                                                         //
-//*************************************************************************************************************************************//
-
-/**
- * @brief Wrapper class for all temperature related ROCm SMI hardware samples.
- */
-class rocm_smi_temperature_samples {
-    // befriend hardware sampler class
-    friend class gpu_amd_hardware_sampler;
-
-  public:
-    /**
-     * @brief Assemble the YAML string containing all available general hardware samples.
-     * @details Hardware samples that are not supported by the current device are omitted in the YAML output.
-     * @return the YAML string (`[[nodiscard]]`)
-     */
-    [[nodiscard]] std::string generate_yaml_string() const;
-
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint32_t, num_fans)                // the number of fans (if any)
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, max_fan_speed)           // the maximum fan speed
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, temperature_edge_min)     // the minimum temperature on the GPU's edge temperature sensor in m°C
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, temperature_edge_max)     // the maximum temperature on the GPU's edge temperature sensor in m°C
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, temperature_hotspot_min)  // the minimum temperature on the GPU's hotspot temperature sensor in m°C
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, temperature_hotspot_max)  // the maximum temperature on the GPU's hotspot temperature sensor in m°C
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, temperature_memory_min)   // the minimum temperature on the GPU's memory temperature sensor in m°C
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, temperature_memory_max)   // the maximum temperature on the GPU's memory temperature sensor in m°C
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, temperature_hbm_0_min)    // the minimum temperature on the GPU's HBM0 temperature sensor in m°C
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, temperature_hbm_0_max)    // the maximum temperature on the GPU's HBM0 temperature sensor in m°C
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, temperature_hbm_1_min)    // the minimum temperature on the GPU's HBM1 temperature sensor in m°C
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, temperature_hbm_1_max)    // the maximum temperature on the GPU's HBM1 temperature sensor in m°C
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, temperature_hbm_2_min)    // the minimum temperature on the GPU's HBM2 temperature sensor in m°C
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, temperature_hbm_2_max)    // the maximum temperature on the GPU's HBM2 temperature sensor in m°C
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, temperature_hbm_3_min)    // the minimum temperature on the GPU's HBM3 temperature sensor in m°C
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, temperature_hbm_3_max)    // the maximum temperature on the GPU's HBM3 temperature sensor in m°C
-
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int64_t, fan_speed)            // the current fan speed in %
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int64_t, temperature_edge)     // the current temperature on the GPU's edge temperature sensor in m°C
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int64_t, temperature_hotspot)  // the current temperature on the GPU's hotspot temperature sensor in m°C
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int64_t, temperature_memory)   // the current temperature on the GPU's memory temperature sensor in m°C
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int64_t, temperature_hbm_0)    // the current temperature on the GPU's HBM0 temperature sensor in m°C
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int64_t, temperature_hbm_1)    // the current temperature on the GPU's HBM1 temperature sensor in m°C
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int64_t, temperature_hbm_2)    // the current temperature on the GPU's HBM2 temperature sensor in m°C
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int64_t, temperature_hbm_3)    // the current temperature on the GPU's HBM3 temperature sensor in m°C
-};
-
-/**
- * @brief Output the temperature related @p samples to the given output-stream @p out.
- * @details In contrast to `rocm_smi_temperature_samples::generate_yaml_string()`, outputs **all** temperature related hardware samples, even if not supported by the current device (default initialized value).
- * @param[in,out] out the output-stream to write the temperature related hardware samples to
- * @param[in] samples the ROCm SMI temperature related samples
- * @return the output-stream
- */
-std::ostream &operator<<(std::ostream &out, const rocm_smi_temperature_samples &samples);
-
-}  // namespace hws
-
-template <>
-struct std::formatter<hws::rocm_smi_general_samples> : hws::detail::ostream_formatter { };
-
-template <>
-struct std::formatter<hws::rocm_smi_clock_samples> : hws::detail::ostream_formatter { };
-
-template <>
-struct std::formatter<hws::rocm_smi_power_samples> : hws::detail::ostream_formatter { };
-
-template <>
-struct std::formatter<hws::rocm_smi_memory_samples> : hws::detail::ostream_formatter { };
-
-template <>
-struct std::formatter<hws::rocm_smi_temperature_samples> : hws::detail::ostream_formatter { };
-
-#endif  // HARDWARE_SAMPLING_GPU_AMD_ROCM_SMI_SAMPLES_HPP_
diff --git a/include/hardware_sampling/gpu_nvidia/utility.hpp b/include/hardware_sampling/gpu_nvidia/utility.hpp
deleted file mode 100644
index f4f8577..0000000
--- a/include/hardware_sampling/gpu_nvidia/utility.hpp
+++ /dev/null
@@ -1,41 +0,0 @@
-/**
- * @file
- * @author Marcel Breyer
- * @copyright 2024-today All Rights Reserved
- * @license This file is released under the MIT license.
- *          See the LICENSE.md file in the project root for full license information.
- *
- * @brief Implements utility functionality for the NVIDIA GPU sampler.
- */
-
-#ifndef HARDWARE_SAMPLING_GPU_NVIDIA_UTILITY_HPP_
-#define HARDWARE_SAMPLING_GPU_NVIDIA_UTILITY_HPP_
-#pragma once
-
-#include "nvml.h"  // NVML runtime functions
-
-#include <format>     // std::format
-#include <stdexcept>  // std::runtime_error
-
-namespace hws::detail {
-
-/**
- * @def HWS_NVML_ERROR_CHECK
- * @brief Defines the `HWS_NVML_ERROR_CHECK` macro if `HWS_ERROR_CHECKS_ENABLED` is defined, does nothing otherwise.
- * @details Throws an exception if an NVML call returns with an error. Additionally outputs a more concrete error string.
- */
-#if defined(HWS_ERROR_CHECKS_ENABLED)
-    #define HWS_NVML_ERROR_CHECK(nvml_func)                                                                                                                        \
-        {                                                                                                                                                          \
-            const nvmlReturn_t errc = nvml_func;                                                                                                                   \
-            if (errc != NVML_SUCCESS) {                                                                                                                            \
-                throw std::runtime_error{ std::format("Error in NVML function call \"{}\": {} ({})", #nvml_func, nvmlErrorString(errc), static_cast<int>(errc)) }; \
-            }                                                                                                                                                      \
-        }
-#else
-    #define HWS_NVML_ERROR_CHECK(nvml_func) nvml_func;
-#endif
-
-}  // namespace hws::detail
-
-#endif  // HARDWARE_SAMPLING_GPU_NVIDIA_UTILITY_HPP_
diff --git a/include/hws/core.hpp b/include/hws/core.hpp
new file mode 100644
index 0000000..8c7a474
--- /dev/null
+++ b/include/hws/core.hpp
@@ -0,0 +1,41 @@
+/**
+ * @file
+ * @author Marcel Breyer
+ * @copyright 2024-today All Rights Reserved
+ * @license This file is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Core header containing all other necessary other headers.
+ */
+
+#ifndef HWS_CORE_HPP_
+#define HWS_CORE_HPP_
+#pragma once
+
+#include "hws/event.hpp"
+#include "hws/hardware_sampler.hpp"
+#include "hws/sample_category.hpp"
+#include "hws/system_hardware_sampler.hpp"
+#include "hws/version.hpp"
+
+#if defined(HWS_FOR_CPUS_ENABLED)
+    #include "hws/cpu/cpu_samples.hpp"
+    #include "hws/cpu/hardware_sampler.hpp"
+#endif
+
+#if defined(HWS_FOR_NVIDIA_GPUS_ENABLED)
+    #include "hws/gpu_nvidia//nvml_samples.hpp"
+    #include "hws/gpu_nvidia/hardware_sampler.hpp"
+#endif
+
+#if defined(HWS_FOR_AMD_GPUS_ENABLED)
+    #include "hws/gpu_amd/hardware_sampler.hpp"
+    #include "hws/gpu_amd/rocm_smi_samples.hpp"
+#endif
+
+#if defined(HWS_FOR_INTEL_GPUS_ENABLED)
+    #include "hws/gpu_intel/hardware_sampler.hpp"
+    #include "hws/gpu_intel/level_zero_samples.hpp"
+#endif
+
+#endif  // HWS_CORE_HPP_
diff --git a/include/hardware_sampling/cpu/cpu_samples.hpp b/include/hws/cpu/cpu_samples.hpp
similarity index 74%
rename from include/hardware_sampling/cpu/cpu_samples.hpp
rename to include/hws/cpu/cpu_samples.hpp
index da08f84..bcea2d4 100644
--- a/include/hardware_sampling/cpu/cpu_samples.hpp
+++ b/include/hws/cpu/cpu_samples.hpp
@@ -8,13 +8,14 @@
  * @brief Defines the samples used with turbostat, lscpu, and free.
  */
 
-#ifndef HARDWARE_SAMPLING_CPU_CPU_SAMPLES_HPP_
-#define HARDWARE_SAMPLING_CPU_CPU_SAMPLES_HPP_
+#ifndef HWS_CPU_CPU_SAMPLES_HPP_
+#define HWS_CPU_CPU_SAMPLES_HPP_
 #pragma once
 
-#include "hardware_sampling/utility.hpp"  // HWS_SAMPLE_STRUCT_FIXED_MEMBER, HWS_SAMPLE_STRUCT_SAMPLING_MEMBER, hws::detail::ostream_formatter
+#include "hws/utility.hpp"  // HWS_SAMPLE_STRUCT_FIXED_MEMBER, HWS_SAMPLE_STRUCT_SAMPLING_MEMBER
+
+#include "fmt/ostream.h"  // fmt::formatter, fmt::ostream_formatter
 
-#include <format>         // std::formatter
 #include <iosfwd>         // std::ostream forward declaration
 #include <optional>       // std::optional
 #include <string>         // std::string
@@ -35,30 +36,37 @@ class cpu_general_samples {
     friend class cpu_hardware_sampler;
 
   public:
+    /**
+     * @brief Checks whether any general hardware sample is present.
+     * @return `true` if any general hardware sample is, otherwise `false`.
+     */
+    [[nodiscard]] bool has_samples() const;
     /**
      * @brief Assemble the YAML string containing all available general hardware samples.
      * @details Hardware samples that are not supported by the current device are omitted in the YAML output.
+     *          Returns an empty string if `has_samples()` returns `false`.
      * @return the YAML string (`[[nodiscard]]`)
      */
     [[nodiscard]] std::string generate_yaml_string() const;
 
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, architecture)        // the CPU architecture (e.g., x86_64)
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, byte_order)          // the byte order (e.g., little/big endian)
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, num_cores)          // the total number of cores of the CPU(s)
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, num_threads)        // the number of threads of the CPU(s) including potential hyper-threads
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, threads_per_core)   // the number of hyper-threads per core
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, cores_per_socket)   // the number of physical cores per socket
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, num_sockets)        // the number of sockets
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, numa_nodes)         // the number of NUMA nodes
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, vendor_id)           // the vendor ID (e.g. GenuineIntel)
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, vendor_id)           // the vendor ID (e.g., GenuineIntel)
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, name)                // the name of the CPU
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector<std::string>, flags)  // potential CPU flags (e.g., sse4_1, avx, avx, etc)
 
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, busy_percent)  // the percent the CPU was busy doing work
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, ipc)           // the instructions-per-cycle count
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, irq)     // the number of interrupts
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, smi)     // the number of system management interrupts
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, poll)    // the number of times the CPU was in the polling state
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, poll_percent)  // the percent of the CPU was in the polling state
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, compute_utilization)  // the percent the CPU was busy doing work
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, ipc)                  // the instructions-per-cycle count
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, irq)            // the number of interrupts
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, smi)            // the number of system management interrupts
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, poll)           // the number of times the CPU was in the polling state
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, poll_percent)         // the percent of the CPU was in the polling state
 };
 
 /**
@@ -82,20 +90,26 @@ class cpu_clock_samples {
     friend class cpu_hardware_sampler;
 
   public:
+    /**
+     * @brief Checks whether any clock related hardware sample is present.
+     * @return `true` if any clock related hardware sample is, otherwise `false`.
+     */
+    [[nodiscard]] bool has_samples() const;
     /**
      * @brief Assemble the YAML string containing all available general hardware samples.
      * @details Hardware samples that are not supported by the current device are omitted in the YAML output.
+     *          Returns an empty string if `has_samples()` returns `false`.
      * @return the YAML string (`[[nodiscard]]`)
      */
     [[nodiscard]] std::string generate_yaml_string() const;
 
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(bool, frequency_boost)  // true if frequency boosting is enabled
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, min_frequency)  // the minimum possible CPU frequency in MHz
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, max_frequency)  // the maximum possible CPU frequency in MHz
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(bool, auto_boosted_clock_enabled)  // true if frequency boosting is enabled
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, clock_frequency_min)       // the minimum possible CPU frequency in MHz
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, clock_frequency_max)       // the maximum possible CPU frequency in MHz
 
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, average_frequency)           // the average CPU frequency in MHz including idle cores
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, average_non_idle_frequency)  // the average CPU frequency in MHz excluding idle cores
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, time_stamp_counter)          // the time stamp counter
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, clock_frequency)                   // the average CPU frequency in MHz including idle cores
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, average_non_idle_clock_frequency)  // the average CPU frequency in MHz excluding idle cores
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, time_stamp_counter)                // the time stamp counter
 };
 
 /**
@@ -119,18 +133,27 @@ class cpu_power_samples {
     friend class cpu_hardware_sampler;
 
   public:
+    /**
+     * @brief Checks whether any power related hardware sample is present.
+     * @return `true` if any power related hardware sample is, otherwise `false`.
+     */
+    [[nodiscard]] bool has_samples() const;
     /**
      * @brief Assemble the YAML string containing all available general hardware samples.
      * @details Hardware samples that are not supported by the current device are omitted in the YAML output.
+     *          Returns an empty string if `has_samples()` returns `false`.
      * @return the YAML string (`[[nodiscard]]`)
      */
     [[nodiscard]] std::string generate_yaml_string() const;
 
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, package_watt)                   // the currently consumed power of the package of the CPU in W
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, core_watt)                      // the currently consumed power of the core part of the CPU in W
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, ram_watt)                       // the currently consumed power of the RAM part of the CPU in W
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, package_rapl_throttle_percent)  // the percent of time the package throttled due to RAPL limiters
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, dram_rapl_throttle_percent)     // the percent of time the DRAM throttled due to RAPL limiters
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, power_measurement_type)  // the type of the power readings: always "instant/current"
+
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, power_usage)                     // the currently consumed power of the package of the CPU in W
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, power_total_energy_consumption)  // the total power consumption in J
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, core_watt)                       // the currently consumed power of the core part of the CPU in W
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, ram_watt)                        // the currently consumed power of the RAM part of the CPU in W
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, package_rapl_throttle_percent)   // the percent of time the package throttled due to RAPL limiters
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, dram_rapl_throttle_percent)      // the percent of time the DRAM throttled due to RAPL limiters
 };
 
 /**
@@ -154,17 +177,23 @@ class cpu_memory_samples {
     friend class cpu_hardware_sampler;
 
   public:
+    /**
+     * @brief Checks whether any memory related hardware sample is present.
+     * @return `true` if any memory related hardware sample is, otherwise `false`.
+     */
+    [[nodiscard]] bool has_samples() const;
     /**
      * @brief Assemble the YAML string containing all available general hardware samples.
      * @details Hardware samples that are not supported by the current device are omitted in the YAML output.
+     *          Returns an empty string if `has_samples()` returns `false`.
      * @return the YAML string (`[[nodiscard]]`)
      */
     [[nodiscard]] std::string generate_yaml_string() const;
 
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, l1d_cache)                 // the size of the L1 data cache
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, l1i_cache)                 // the size of the L1 instruction cache
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, l2_cache)                  // the size of the L2 cache
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, l3_cache)                  // the size of the L2 cache
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, cache_size_L1d)            // the size of the L1 data cache
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, cache_size_L1i)            // the size of the L1 instruction cache
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, cache_size_L2)             // the size of the L2 cache
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, cache_size_L3)             // the size of the L2 cache
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned long long, memory_total)       // the total available memory in Byte
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned long long, swap_memory_total)  // the total available swap memory in Byte
 
@@ -195,16 +224,22 @@ class cpu_temperature_samples {
     friend class cpu_hardware_sampler;
 
   public:
+    /**
+     * @brief Checks whether any temperature related hardware sample is present.
+     * @return `true` if any temperature related hardware sample is, otherwise `false`.
+     */
+    [[nodiscard]] bool has_samples() const;
     /**
      * @brief Assemble the YAML string containing all available general hardware samples.
      * @details Hardware samples that are not supported by the current device are omitted in the YAML output.
+     *          Returns an empty string if `has_samples()` returns `false`.
      * @return the YAML string (`[[nodiscard]]`)
      */
     [[nodiscard]] std::string generate_yaml_string() const;
 
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, temperature)             // the current temperature of the whole package in °C
     HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, core_temperature)  // the current temperature of the core part of the CPU in °C
     HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, core_throttle_percent)   // the percent of time the CPU has throttled
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, package_temperature)     // the current temperature of the whole package in °C
 };
 
 /**
@@ -228,9 +263,15 @@ class cpu_gfx_samples {
     friend class cpu_hardware_sampler;
 
   public:
+    /**
+     * @brief Checks whether any gfx related hardware sample is present.
+     * @return `true` if any gfx related hardware sample is, otherwise `false`.
+     */
+    [[nodiscard]] bool has_samples() const;
     /**
      * @brief Assemble the YAML string containing all available general hardware samples.
      * @details Hardware samples that are not supported by the current device are omitted in the YAML output.
+     *          Returns an empty string if `has_samples()` returns `false`.
      * @return the YAML string (`[[nodiscard]]`)
      */
     [[nodiscard]] std::string generate_yaml_string() const;
@@ -266,9 +307,15 @@ class cpu_idle_states_samples {
     using map_type = std::unordered_map<std::string, std::vector<double>>;
 
   public:
+    /**
+     * @brief Checks whether any idle state related hardware sample is present.
+     * @return `true` if any idle state related hardware sample is, otherwise `false`.
+     */
+    [[nodiscard]] bool has_samples() const;
     /**
      * @brief Assemble the YAML string containing all available general hardware samples.
      * @details Hardware samples that are not supported by the current device are omitted in the YAML output.
+     *          Returns an empty string if `has_samples()` returns `false`.
      * @return the YAML string (`[[nodiscard]]`)
      */
     [[nodiscard]] std::string generate_yaml_string() const;
@@ -292,25 +339,29 @@ std::ostream &operator<<(std::ostream &out, const cpu_idle_states_samples &sampl
 
 }  // namespace hws
 
+/// @cond Doxygen_suppress
+
 template <>
-struct std::formatter<hws::cpu_general_samples> : hws::detail::ostream_formatter { };
+struct fmt::formatter<hws::cpu_general_samples> : fmt::ostream_formatter { };
 
 template <>
-struct std::formatter<hws::cpu_clock_samples> : hws::detail::ostream_formatter { };
+struct fmt::formatter<hws::cpu_clock_samples> : fmt::ostream_formatter { };
 
 template <>
-struct std::formatter<hws::cpu_power_samples> : hws::detail::ostream_formatter { };
+struct fmt::formatter<hws::cpu_power_samples> : fmt::ostream_formatter { };
 
 template <>
-struct std::formatter<hws::cpu_memory_samples> : hws::detail::ostream_formatter { };
+struct fmt::formatter<hws::cpu_memory_samples> : fmt::ostream_formatter { };
 
 template <>
-struct std::formatter<hws::cpu_temperature_samples> : hws::detail::ostream_formatter { };
+struct fmt::formatter<hws::cpu_temperature_samples> : fmt::ostream_formatter { };
 
 template <>
-struct std::formatter<hws::cpu_gfx_samples> : hws::detail::ostream_formatter { };
+struct fmt::formatter<hws::cpu_gfx_samples> : fmt::ostream_formatter { };
 
 template <>
-struct std::formatter<hws::cpu_idle_states_samples> : hws::detail::ostream_formatter { };
+struct fmt::formatter<hws::cpu_idle_states_samples> : fmt::ostream_formatter { };
+
+/// @endcond
 
-#endif  // HARDWARE_SAMPLING_CPU_CPU_SAMPLES_HPP_
+#endif  // HWS_CPU_CPU_SAMPLES_HPP_
diff --git a/include/hardware_sampling/cpu/hardware_sampler.hpp b/include/hws/cpu/hardware_sampler.hpp
similarity index 81%
rename from include/hardware_sampling/cpu/hardware_sampler.hpp
rename to include/hws/cpu/hardware_sampler.hpp
index b86771e..d1b4102 100644
--- a/include/hardware_sampling/cpu/hardware_sampler.hpp
+++ b/include/hws/cpu/hardware_sampler.hpp
@@ -8,16 +8,17 @@
  * @brief Defines a hardware sampler for CPUs using the turbostat, lscpu, and free utilities (requires root).
  */
 
-#ifndef HARDWARE_SAMPLING_CPU_HARDWARE_SAMPLER_HPP_
-#define HARDWARE_SAMPLING_CPU_HARDWARE_SAMPLER_HPP_
+#ifndef HWS_CPU_HARDWARE_SAMPLER_HPP_
+#define HWS_CPU_HARDWARE_SAMPLER_HPP_
 #pragma once
 
-#include "hardware_sampling/cpu/cpu_samples.hpp"   // hws::{cpu_general_samples, clock_samples, power_samples, memory_samples, temperature_samples, gfx_samples, idle_state_samples}
-#include "hardware_sampling/hardware_sampler.hpp"  // hws::hardware_sampler
-#include "hardware_sampling/utility.hpp"           // hws::detail::ostream_formatter
+#include "hws/cpu/cpu_samples.hpp"   // hws::{cpu_general_samples, clock_samples, power_samples, memory_samples, temperature_samples, gfx_samples, idle_state_samples}
+#include "hws/hardware_sampler.hpp"  // hws::hardware_sampler
+#include "hws/sample_category.hpp"   // hws::sample_category
+
+#include "fmt/ostream.h"  // fmt::formatter, fmt::ostream_formatter
 
 #include <chrono>  // std::chrono::milliseconds, std::chrono_literals namespace
-#include <format>  // std::formatter
 #include <iosfwd>  // std::ostream forward declaration
 
 namespace hws {
@@ -32,13 +33,15 @@ class cpu_hardware_sampler : public hardware_sampler {
   public:
     /**
      * @brief Construct a new CPU hardware sampler with the default sampling interval.
+     * @param[in] category the sample categories that are enabled for hardware sampling (default: all)
      */
-    cpu_hardware_sampler();
+    explicit cpu_hardware_sampler(sample_category category = sample_category::all);
     /**
      * @brief Construct a new CPU hardware sampler with the @p sampling_interval.
      * @param[in] sampling_interval the used sampling interval
+     * @param[in] category the sample categories that are enabled for hardware sampling (default: all)
      */
-    explicit cpu_hardware_sampler(std::chrono::milliseconds sampling_interval);
+    explicit cpu_hardware_sampler(std::chrono::milliseconds sampling_interval, sample_category category = sample_category::all);
 
     /**
      * @brief Delete the copy-constructor (already implicitly deleted due to the base class's std::atomic member).
@@ -104,21 +107,21 @@ class cpu_hardware_sampler : public hardware_sampler {
      */
     [[nodiscard]] const cpu_idle_states_samples &idle_state_samples() const noexcept { return idle_state_samples_; }
 
-  private:
     /**
-     * @copydoc hws::hardware_sampler::sampling_loop
+     * @copydoc hws::hardware_sampler::device_identification
      */
-    void sampling_loop() final;
+    [[nodiscard]] std::string device_identification() const final;
 
     /**
-     * @copydoc hws::hardware_sampler::device_identification
+     * @copydoc hws::hardware_sampler::samples_only_as_yaml_string() const
      */
-    std::string device_identification() const final;
+    [[nodiscard]] std::string samples_only_as_yaml_string() const final;
 
+  private:
     /**
-     * @copydoc hws::hardware_sampler::generate_yaml_string
+     * @copydoc hws::hardware_sampler::sampling_loop
      */
-    std::string generate_yaml_string() const final;
+    void sampling_loop() final;
 
     /// The general CPU samples.
     cpu_general_samples general_samples_{};
@@ -147,7 +150,11 @@ std::ostream &operator<<(std::ostream &out, const cpu_hardware_sampler &sampler)
 
 }  // namespace hws
 
+/// @cond Doxygen_suppress
+
 template <>
-struct std::formatter<hws::cpu_hardware_sampler> : hws::detail::ostream_formatter { };
+struct fmt::formatter<hws::cpu_hardware_sampler> : fmt::ostream_formatter { };
+
+/// @endcond
 
-#endif  // HARDWARE_SAMPLING_CPU_HARDWARE_SAMPLER_HPP_
+#endif  // HWS_CPU_HARDWARE_SAMPLER_HPP_
diff --git a/include/hardware_sampling/cpu/utility.hpp b/include/hws/cpu/utility.hpp
similarity index 87%
rename from include/hardware_sampling/cpu/utility.hpp
rename to include/hws/cpu/utility.hpp
index d203e0b..9efd008 100644
--- a/include/hardware_sampling/cpu/utility.hpp
+++ b/include/hws/cpu/utility.hpp
@@ -8,11 +8,12 @@
  * @brief Implements utility functionality for the CPU sampler.
  */
 
-#ifndef HARDWARE_SAMPLING_CPU_UTILITY_HPP_
-#define HARDWARE_SAMPLING_CPU_UTILITY_HPP_
+#ifndef HWS_CPU_UTILITY_HPP_
+#define HWS_CPU_UTILITY_HPP_
 #pragma once
 
-#include <format>       // std::format
+#include "fmt/format.h"  // fmt::format
+
 #include <stdexcept>    // std::runtime_error
 #include <string>       // std::string
 #include <string_view>  // std::string_view
@@ -29,7 +30,7 @@ namespace hws::detail {
         {                                                                                                              \
             const int errc = subprocess_func;                                                                          \
             if (errc != 0) {                                                                                           \
-                throw std::runtime_error{ std::format("Error calling subprocess function \"{}\"", #subprocess_func) }; \
+                throw std::runtime_error{ fmt::format("Error calling subprocess function \"{}\"", #subprocess_func) }; \
             }                                                                                                          \
         }
 #else
@@ -43,6 +44,6 @@ namespace hws::detail {
  */
 [[nodiscard]] std::string run_subprocess(std::string_view cmd_line);
 
-}  // namespace hws
+}  // namespace hws::detail
 
-#endif  // HARDWARE_SAMPLING_CPU_UTILITY_HPP_
+#endif  // HWS_CPU_UTILITY_HPP_
diff --git a/include/hardware_sampling/event.hpp b/include/hws/event.hpp
similarity index 51%
rename from include/hardware_sampling/event.hpp
rename to include/hws/event.hpp
index 4375813..7252a75 100644
--- a/include/hardware_sampling/event.hpp
+++ b/include/hws/event.hpp
@@ -8,16 +8,16 @@
  * @brief Defines an event type.
  */
 
-#ifndef HARDWARE_SAMPLING_EVENT_HPP_
-#define HARDWARE_SAMPLING_EVENT_HPP_
+#ifndef HWS_EVENT_HPP_
+#define HWS_EVENT_HPP_
 #pragma once
 
-#include "hardware_sampling/utility.hpp"  // hws::detail::ostream_formatter
+#include "fmt/ostream.h"  // fmt::formatter, fmt::ostream_formatter
 
-#include <chrono>  // std::chrono::steady_clock::time_point
-#include <format>  // std::formatter
-#include <iosfwd>  // std::ostream forward declaration
-#include <string>  // std::string
+#include <chrono>   // std::chrono::steady_clock::time_point
+#include <iosfwd>   // std::ostream forward declaration
+#include <string>   // std::string
+#include <utility>  // std::move
 
 namespace hws {
 
@@ -25,9 +25,17 @@ namespace hws {
  * @brief A struct encapsulating a single event.
  */
 struct event {
+    /**
+     * @brief Construct a new event given a time point and name.
+     * @param[in] time_point_p the time when the event occurred
+     * @param[in] name_p the name of the event
+     */
+    event(const std::chrono::steady_clock::time_point time_point_p, std::string name_p) :
+        time_point{ time_point_p },
+        name{ std::move(name_p) } { }
+
     /// The time point this event occurred at.
     std::chrono::steady_clock::time_point time_point;
-
     /// The name of this event.
     std::string name;
 };
@@ -42,7 +50,11 @@ std::ostream &operator<<(std::ostream &out, const event &e);
 
 }  // namespace hws
 
+/// @cond Doxygen_suppress
+
 template <>
-struct std::formatter<hws::event> : hws::detail::ostream_formatter { };
+struct fmt::formatter<hws::event> : fmt::ostream_formatter { };
+
+/// @endcond
 
-#endif  // HARDWARE_SAMPLING_EVENT_HPP_
+#endif  // HWS_EVENT_HPP_
diff --git a/include/hardware_sampling/gpu_amd/hardware_sampler.hpp b/include/hws/gpu_amd/hardware_sampler.hpp
similarity index 78%
rename from include/hardware_sampling/gpu_amd/hardware_sampler.hpp
rename to include/hws/gpu_amd/hardware_sampler.hpp
index 55ab3a9..668cc9a 100644
--- a/include/hardware_sampling/gpu_amd/hardware_sampler.hpp
+++ b/include/hws/gpu_amd/hardware_sampler.hpp
@@ -8,19 +8,20 @@
  * @brief Defines a hardware sampler for AMD GPUs using AMD's ROCm SMI library.
  */
 
-#ifndef HARDWARE_SAMPLING_GPU_AMD_HARDWARE_SAMPLER_HPP_
-#define HARDWARE_SAMPLING_GPU_AMD_HARDWARE_SAMPLER_HPP_
+#ifndef HWS_GPU_AMD_HARDWARE_SAMPLER_HPP_
+#define HWS_GPU_AMD_HARDWARE_SAMPLER_HPP_
 #pragma once
 
-#include "hardware_sampling/gpu_amd/rocm_smi_samples.hpp"  // hws::{rocm_smi_general_samples, rocm_smi_clock_samples, rocm_smi_power_samples, rocm_smi_memory_samples, rocm_smi_temperature_samples}
-#include "hardware_sampling/hardware_sampler.hpp"          // hws::hardware_sampler
-#include "hardware_sampling/utility.hpp"                   // hws::detail::ostream_formatter
+#include "hws/gpu_amd/rocm_smi_samples.hpp"  // hws::{rocm_smi_general_samples, rocm_smi_clock_samples, rocm_smi_power_samples, rocm_smi_memory_samples, rocm_smi_temperature_samples}
+#include "hws/hardware_sampler.hpp"          // hws::hardware_sampler
+#include "hws/sample_category.hpp"           // hws::sample_category
+
+#include "fmt/ostream.h"  // fmt::formatter, fmt::ostream_formatter
 
 #include <atomic>   // std::atomic
 #include <chrono>   // std::chrono::milliseconds, std::chrono_literals namespace
 #include <cstddef>  // std::size_t
 #include <cstdint>  // std::uint32_t
-#include <format>   // std::formatter
 #include <iosfwd>   // std::ostream forward declaration
 
 namespace hws {
@@ -36,27 +37,31 @@ class gpu_amd_hardware_sampler : public hardware_sampler {
     /**
      * @brief Construct a new AMD GPU hardware sampler for the default device with the default sampling interval.
      * @details If this is the first AMD GPU sampler, initializes the ROCm SMI environment.
+     * @param[in] category the sample categories that are enabled for hardware sampling (default: all)
      */
-    gpu_amd_hardware_sampler();
+    explicit gpu_amd_hardware_sampler(sample_category category = sample_category::all);
     /**
      * @brief Construct a new AMD GPU hardware sampler for device @p device_id with the default sampling interval.
      * @details If this is the first AMD GPU sampler, initializes the ROCm SMI environment.
      * @param[in] device_id the ID of the device to sample
+     * @param[in] category the sample categories that are enabled for hardware sampling (default: all)
      */
-    explicit gpu_amd_hardware_sampler(std::size_t device_id);
+    explicit gpu_amd_hardware_sampler(std::size_t device_id, sample_category category = sample_category::all);
     /**
      * @brief Construct a new AMD GPU hardware sampler for the default device with the @p sampling_interval.
      * @details If this is the first AMD GPU sampler, initializes the ROCm SMI environment.
      * @param[in] sampling_interval the used sampling interval
+     * @param[in] category the sample categories that are enabled for hardware sampling (default: all)
      */
-    explicit gpu_amd_hardware_sampler(std::chrono::milliseconds sampling_interval);
+    explicit gpu_amd_hardware_sampler(std::chrono::milliseconds sampling_interval, sample_category category = sample_category::all);
     /**
      * @brief Construct a new AMD GPU hardware sampler for device @p device_id with the @p sampling_interval.
      * @details If this is the first AMD GPU sampler, initializes the ROCm SMI environment.
      * @param[in] device_id the ID of the device to sample
      * @param[in] sampling_interval the used sampling interval
+     * @param[in] category the sample categories that are enabled for hardware sampling (default: all)
      */
-    gpu_amd_hardware_sampler(std::size_t device_id, std::chrono::milliseconds sampling_interval);
+    gpu_amd_hardware_sampler(std::size_t device_id, std::chrono::milliseconds sampling_interval, sample_category category = sample_category::all);
 
     /**
      * @brief Delete the copy-constructor (already implicitly deleted due to the base class's std::atomic member).
@@ -111,21 +116,21 @@ class gpu_amd_hardware_sampler : public hardware_sampler {
      */
     [[nodiscard]] const rocm_smi_temperature_samples &temperature_samples() const noexcept { return temperature_samples_; }
 
-  private:
     /**
-     * @copydoc hws::hardware_sampler::sampling_loop
+     * @copydoc hws::hardware_sampler::device_identification
      */
-    void sampling_loop() final;
+    [[nodiscard]] std::string device_identification() const final;
 
     /**
-     * @copydoc hws::hardware_sampler::device_identification
+     * @copydoc hws::hardware_sampler::samples_only_as_yaml_string() const
      */
-    std::string device_identification() const final;
+    [[nodiscard]] std::string samples_only_as_yaml_string() const final;
 
+  private:
     /**
-     * @copydoc hws::hardware_sampler::generate_yaml_string
+     * @copydoc hws::hardware_sampler::sampling_loop
      */
-    std::string generate_yaml_string() const final;
+    void sampling_loop() final;
 
     /// The ID of the device to sample.
     std::uint32_t device_id_{};
@@ -158,7 +163,11 @@ std::ostream &operator<<(std::ostream &out, const gpu_amd_hardware_sampler &samp
 
 }  // namespace hws
 
+/// @cond Doxygen_suppress
+
 template <>
-struct std::formatter<hws::gpu_amd_hardware_sampler> : hws::detail::ostream_formatter { };
+struct fmt::formatter<hws::gpu_amd_hardware_sampler> : fmt::ostream_formatter { };
+
+/// @endcond
 
-#endif  // HARDWARE_SAMPLING_GPU_AMD_HARDWARE_SAMPLER_HPP_
+#endif  // HWS_GPU_AMD_HARDWARE_SAMPLER_HPP_
diff --git a/include/hws/gpu_amd/rocm_smi_samples.hpp b/include/hws/gpu_amd/rocm_smi_samples.hpp
new file mode 100644
index 0000000..8ace761
--- /dev/null
+++ b/include/hws/gpu_amd/rocm_smi_samples.hpp
@@ -0,0 +1,294 @@
+/**
+ * @file
+ * @author Marcel Breyer
+ * @copyright 2024-today All Rights Reserved
+ * @license This file is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Defines the samples used with ROCm SMI.
+ */
+
+#ifndef HWS_GPU_AMD_ROCM_SMI_SAMPLES_HPP_
+#define HWS_GPU_AMD_ROCM_SMI_SAMPLES_HPP_
+#pragma once
+
+#include "hws/utility.hpp"  // HWS_SAMPLE_STRUCT_FIXED_MEMBER, HWS_SAMPLE_STRUCT_SAMPLING_MEMBER
+
+#include "fmt/ostream.h"  // fmt::formatter, fmt::ostream_formatter
+
+#include <cstdint>   // std::uint64_t, std::int64_t, std::uint32_t
+#include <iosfwd>    // std::ostream forward declaration
+#include <optional>  // std::optional
+#include <string>    // std::string
+#include <vector>    // std::vector
+
+namespace hws {
+
+//*************************************************************************************************************************************//
+//                                                           general samples                                                           //
+//*************************************************************************************************************************************//
+
+/**
+ * @brief Wrapper class for all general ROCm SMI hardware samples.
+ */
+class rocm_smi_general_samples {
+    // befriend hardware sampler class
+    friend class gpu_amd_hardware_sampler;
+
+  public:
+    /**
+     * @brief Checks whether any general hardware sample is present.
+     * @return `true` if any general hardware sample is, otherwise `false`.
+     */
+    [[nodiscard]] bool has_samples() const;
+    /**
+     * @brief Assemble the YAML string containing all available general hardware samples.
+     * @details Hardware samples that are not supported by the current device are omitted in the YAML output.
+     *          Returns an empty string if `has_samples()` returns `false`.
+     * @return the YAML string (`[[nodiscard]]`)
+     */
+    [[nodiscard]] std::string generate_yaml_string() const;
+
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, architecture)  // the architecture name of the device
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, byte_order)    // the byte order (e.g., little/big endian)
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, vendor_id)     // the vendor ID
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, name)          // the name of the device
+
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint32_t, compute_utilization)  // the GPU compute utilization in percent
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint32_t, memory_utilization)   // the GPU memory utilization in percent
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::string, performance_level)      // the performance level: one of rsmi_dev_perf_level_t
+};
+
+/**
+ * @brief Output the general @p samples to the given output-stream @p out.
+ * @details In contrast to `rocm_smi_general_samples::generate_yaml_string()`, outputs **all** general hardware samples, even if not supported by the current device (default initialized value).
+ * @param[in,out] out the output-stream to write the general hardware samples to
+ * @param[in] samples the ROCm SMI general samples
+ * @return the output-stream
+ */
+std::ostream &operator<<(std::ostream &out, const rocm_smi_general_samples &samples);
+
+//*************************************************************************************************************************************//
+//                                                            clock samples                                                            //
+//*************************************************************************************************************************************//
+
+/**
+ * @brief Wrapper class for all clock related ROCm SMI hardware samples.
+ */
+class rocm_smi_clock_samples {
+    // befriend hardware sampler class
+    friend class gpu_amd_hardware_sampler;
+
+  public:
+    /**
+     * @brief Checks whether any clock related hardware sample is present.
+     * @return `true` if any clock related hardware sample is, otherwise `false`.
+     */
+    [[nodiscard]] bool has_samples() const;
+    /**
+     * @brief Assemble the YAML string containing all available general hardware samples.
+     * @details Hardware samples that are not supported by the current device are omitted in the YAML output.
+     *          Returns an empty string if `has_samples()` returns `false`.
+     * @return the YAML string (`[[nodiscard]]`)
+     */
+    [[nodiscard]] std::string generate_yaml_string() const;
+
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, clock_frequency_min)                              // the minimum possible system clock frequency in MHz
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, clock_frequency_max)                              // the maximum possible system clock frequency in MHz
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, memory_clock_frequency_min)                       // the minimum possible memory clock frequency in MHz
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, memory_clock_frequency_max)                       // the maximum possible memory clock frequency in MHz
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, socket_clock_frequency_min)                       // the minimum possible socket clock frequency in MHz
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, socket_clock_frequency_max)                       // the maximum possible socket clock frequency in MHz
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector<double>, available_clock_frequencies)         // the available clock frequencies in MHz (slowest to fastest)
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector<double>, available_memory_clock_frequencies)  // the available memory clock frequencies in MHz (slowest to fastest)
+
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, clock_frequency)                // the current system clock frequency in MHz
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, memory_clock_frequency)         // the current memory clock frequency in MHz
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, socket_clock_frequency)         // the current socket clock frequency in MHz
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint32_t, overdrive_level)         // the GPU overdrive percentage
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint32_t, memory_overdrive_level)  // the GPU memory overdrive percentage
+};
+
+/**
+ * @brief Output the clock related @p samples to the given output-stream @p out.
+ * @details In contrast to `rocm_smi_clock_samples::generate_yaml_string()`, outputs **all** clock related hardware samples, even if not supported by the current device (default initialized value).
+ * @param[in,out] out the output-stream to write the clock related hardware samples to
+ * @param[in] samples the ROCm SMI clock related samples
+ * @return the output-stream
+ */
+std::ostream &operator<<(std::ostream &out, const rocm_smi_clock_samples &samples);
+
+//*************************************************************************************************************************************//
+//                                                            power samples                                                            //
+//*************************************************************************************************************************************//
+
+/**
+ * @brief Wrapper class for all power related ROCm SMI hardware samples.
+ */
+class rocm_smi_power_samples {
+    // befriend hardware sampler class
+    friend class gpu_amd_hardware_sampler;
+
+  public:
+    /**
+     * @brief Checks whether any power related hardware sample is present.
+     * @return `true` if any power related hardware sample is, otherwise `false`.
+     */
+    [[nodiscard]] bool has_samples() const;
+    /**
+     * @brief Assemble the YAML string containing all available general hardware samples.
+     * @details Hardware samples that are not supported by the current device are omitted in the YAML output.
+     *          Returns an empty string if `has_samples()` returns `false`.
+     * @return the YAML string (`[[nodiscard]]`)
+     */
+    [[nodiscard]] std::string generate_yaml_string() const;
+
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, power_management_limit)                      // the default power cap (W), may be different from power cap
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, power_enforced_limit)                        // if the GPU draws more power (W) than the power cap, the GPU may throttle
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, power_measurement_type)                 // the type of the power readings: either current power draw or average power draw
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector<std::string>, available_power_profiles)  // a list of the available power profiles
+
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, power_usage)                     // the current GPU socket power draw in W
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, power_total_energy_consumption)  // the total power consumption since the last driver reload in J
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::string, power_profile)              // the current active power profile; one of 'available_power_profiles'
+};
+
+/**
+ * @brief Output the power related @p samples to the given output-stream @p out.
+ * @details In contrast to `rocm_smi_power_samples::generate_yaml_string()`, outputs **all** power related hardware samples, even if not supported by the current device (default initialized value).
+ * @param[in,out] out the output-stream to write the power related hardware samples to
+ * @param[in] samples the ROCm SMI power related samples
+ * @return the output-stream
+ */
+std::ostream &operator<<(std::ostream &out, const rocm_smi_power_samples &samples);
+
+//*************************************************************************************************************************************//
+//                                                            memory samples                                                           //
+//*************************************************************************************************************************************//
+
+/**
+ * @brief Wrapper class for all memory related ROCm SMI hardware samples.
+ */
+class rocm_smi_memory_samples {
+    // befriend hardware sampler class
+    friend class gpu_amd_hardware_sampler;
+
+  public:
+    /**
+     * @brief Checks whether any memory related hardware sample is present.
+     * @return `true` if any memory related hardware sample is, otherwise `false`.
+     */
+    [[nodiscard]] bool has_samples() const;
+    /**
+     * @brief Assemble the YAML string containing all available general hardware samples.
+     * @details Hardware samples that are not supported by the current device are omitted in the YAML output.
+     *          Returns an empty string if `has_samples()` returns `false`.
+     * @return the YAML string (`[[nodiscard]]`)
+     */
+    [[nodiscard]] std::string generate_yaml_string() const;
+
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, memory_total)                 // the total available memory in Byte
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, visible_memory_total)         // the total visible available memory in Byte, may be smaller than the total memory
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint32_t, num_pcie_lanes_min)           // the minimum number of used PCIe lanes
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint32_t, num_pcie_lanes_max)           // the maximum number of used PCIe lanes
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, pcie_link_transfer_rate_min)  // the minimum PCIe link transfer rate in MT/s
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, pcie_link_transfer_rate_max)  // the maximum PCIe link transfer rate in MT/s
+
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint64_t, memory_used)              // the currently used memory in Byte
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint64_t, memory_free)              // the currently free memory in Byte
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint32_t, num_pcie_lanes)           // the number of currently used PCIe lanes
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint64_t, pcie_link_transfer_rate)  // the current PCIe transfer rate in MT/s
+};
+
+/**
+ * @brief Output the memory related @p samples to the given output-stream @p out.
+ * @details In contrast to `rocm_smi_memory_samples::generate_yaml_string()`, outputs **all** memory related hardware samples, even if not supported by the current device (default initialized value).
+ * @param[in,out] out the output-stream to write the memory related hardware samples to
+ * @param[in] samples the ROCm SMI memory related samples
+ * @return the output-stream
+ */
+std::ostream &operator<<(std::ostream &out, const rocm_smi_memory_samples &samples);
+
+//*************************************************************************************************************************************//
+//                                                         temperature samples                                                         //
+//*************************************************************************************************************************************//
+
+/**
+ * @brief Wrapper class for all temperature related ROCm SMI hardware samples.
+ */
+class rocm_smi_temperature_samples {
+    // befriend hardware sampler class
+    friend class gpu_amd_hardware_sampler;
+
+  public:
+    /**
+     * @brief Checks whether any temperature related hardware sample is present.
+     * @return `true` if any temperature related hardware sample is, otherwise `false`.
+     */
+    [[nodiscard]] bool has_samples() const;
+    /**
+     * @brief Assemble the YAML string containing all available general hardware samples.
+     * @details Hardware samples that are not supported by the current device are omitted in the YAML output.
+     *          Returns an empty string if `has_samples()` returns `false`.
+     * @return the YAML string (`[[nodiscard]]`)
+     */
+    [[nodiscard]] std::string generate_yaml_string() const;
+
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint32_t, num_fans)          // the number of fans (if any)
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, fan_speed_max)     // the maximum fan speed in RPM
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, temperature_min)          // the minimum temperature on the GPU's edge temperature sensor in °C
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, temperature_max)          // the maximum temperature on the GPU's edge temperature sensor in °C
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, memory_temperature_min)   // the minimum temperature on the GPU's memory temperature sensor in °C
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, memory_temperature_max)   // the maximum temperature on the GPU's memory temperature sensor in °C
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, hotspot_temperature_min)  // the minimum temperature on the GPU's hotspot temperature sensor in °C
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, hotspot_temperature_max)  // the maximum temperature on the GPU's hotspot temperature sensor in °C
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, hbm_0_temperature_min)    // the minimum temperature on the GPU's HBM0 temperature sensor in °C
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, hbm_0_temperature_max)    // the maximum temperature on the GPU's HBM0 temperature sensor in °C
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, hbm_1_temperature_min)    // the minimum temperature on the GPU's HBM1 temperature sensor in °C
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, hbm_1_temperature_max)    // the maximum temperature on the GPU's HBM1 temperature sensor in °C
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, hbm_2_temperature_min)    // the minimum temperature on the GPU's HBM2 temperature sensor in °C
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, hbm_2_temperature_max)    // the maximum temperature on the GPU's HBM2 temperature sensor in °C
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, hbm_3_temperature_min)    // the minimum temperature on the GPU's HBM3 temperature sensor in °C
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, hbm_3_temperature_max)    // the maximum temperature on the GPU's HBM3 temperature sensor in °C
+
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, fan_speed_percentage)  // the current fan speed in %
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, temperature)           // the current temperature on the GPU's edge temperature sensor in °C
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, hotspot_temperature)   // the current temperature on the GPU's hotspot temperature sensor in °C
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, memory_temperature)    // the current temperature on the GPU's memory temperature sensor in °C
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, hbm_0_temperature)     // the current temperature on the GPU's HBM0 temperature sensor in °C
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, hbm_1_temperature)     // the current temperature on the GPU's HBM1 temperature sensor in °C
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, hbm_2_temperature)     // the current temperature on the GPU's HBM2 temperature sensor in °C
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, hbm_3_temperature)     // the current temperature on the GPU's HBM3 temperature sensor in °C
+};
+
+/**
+ * @brief Output the temperature related @p samples to the given output-stream @p out.
+ * @details In contrast to `rocm_smi_temperature_samples::generate_yaml_string()`, outputs **all** temperature related hardware samples, even if not supported by the current device (default initialized value).
+ * @param[in,out] out the output-stream to write the temperature related hardware samples to
+ * @param[in] samples the ROCm SMI temperature related samples
+ * @return the output-stream
+ */
+std::ostream &operator<<(std::ostream &out, const rocm_smi_temperature_samples &samples);
+
+}  // namespace hws
+
+/// @cond Doxygen_suppress
+
+template <>
+struct fmt::formatter<hws::rocm_smi_general_samples> : fmt::ostream_formatter { };
+
+template <>
+struct fmt::formatter<hws::rocm_smi_clock_samples> : fmt::ostream_formatter { };
+
+template <>
+struct fmt::formatter<hws::rocm_smi_power_samples> : fmt::ostream_formatter { };
+
+template <>
+struct fmt::formatter<hws::rocm_smi_memory_samples> : fmt::ostream_formatter { };
+
+template <>
+struct fmt::formatter<hws::rocm_smi_temperature_samples> : fmt::ostream_formatter { };
+
+/// @endcond
+
+#endif  // HWS_GPU_AMD_ROCM_SMI_SAMPLES_HPP_
diff --git a/include/hardware_sampling/gpu_amd/utility.hpp b/include/hws/gpu_amd/utility.hpp
similarity index 56%
rename from include/hardware_sampling/gpu_amd/utility.hpp
rename to include/hws/gpu_amd/utility.hpp
index 5d039c7..4889976 100644
--- a/include/hardware_sampling/gpu_amd/utility.hpp
+++ b/include/hws/gpu_amd/utility.hpp
@@ -8,22 +8,28 @@
  * @brief Implements utility functionality for the AMD GPU sampler.
  */
 
-#ifndef HARDWARE_SAMPLING_GPU_AMD_UTILITY_HPP_
-#define HARDWARE_SAMPLING_GPU_AMD_UTILITY_HPP_
+#ifndef HWS_GPU_AMD_UTILITY_HPP_
+#define HWS_GPU_AMD_UTILITY_HPP_
 #pragma once
 
+#include "fmt/format.h"         // fmt::format
 #include "rocm_smi/rocm_smi.h"  // ROCm SMI runtime functions
 
-#include <format>     // std::format
 #include <stdexcept>  // std::runtime_error
+#include <string>     // std::string
 
-namespace hws {
+namespace hws::detail {
 
 /**
  * @def HWS_ROCM_SMI_ERROR_CHECK
  * @brief Defines the `HWS_ROCM_SMI_ERROR_CHECK` macro if `HWS_ERROR_CHECKS_ENABLED` is defined, does nothing otherwise.
  * @details Throws an exception if a ROCm SMI call returns with an error. Additionally outputs a more concrete error string if possible.
  */
+/**
+ * @def HWS_HIP_ERROR_CHECK
+ * @brief Defines the `HWS_HIP_ERROR_CHECK` macro if `HWS_ERROR_CHECKS_ENABLED` is defined, does nothing otherwise.
+ * @details Throws an exception if a HIP call returns with an error. Additionally outputs a more concrete error string.
+ */
 #if defined(HWS_ERROR_CHECKS_ENABLED)
     #define HWS_ROCM_SMI_ERROR_CHECK(rocm_smi_func)                                                                                                \
         {                                                                                                                                          \
@@ -32,16 +38,36 @@ namespace hws {
                 const char *error_string;                                                                                                          \
                 const rsmi_status_t ret = rsmi_status_string(errc, &error_string);                                                                 \
                 if (ret == RSMI_STATUS_SUCCESS) {                                                                                                  \
-                    throw std::runtime_error{ std::format("Error in ROCm SMI function call \"{}\": {}", #rocm_smi_func, error_string) };           \
+                    throw std::runtime_error{ fmt::format("Error in ROCm SMI function call \"{}\": {}", #rocm_smi_func, error_string) };           \
                 } else {                                                                                                                           \
-                    throw std::runtime_error{ std::format("Error in ROCm SMI function call \"{}\": {}", #rocm_smi_func, static_cast<int>(errc)) }; \
+                    throw std::runtime_error{ fmt::format("Error in ROCm SMI function call \"{}\": {}", #rocm_smi_func, static_cast<int>(errc)) }; \
                 }                                                                                                                                  \
             }                                                                                                                                      \
         }
+
+    #define HWS_HIP_ERROR_CHECK(hip_func)                                                                                             \
+        {                                                                                                                             \
+            const hiperror_t errc = hip_func;                                                                                         \
+            if (errc != hipSuccess) {                                                                                                 \
+                throw std::runtime_error{ fmt::format("Error in HIP function call \"{}\": {}", #hip_func, hipGetErrorString(errc)) }; \
+            }                                                                                                                         \
+        }
+
 #else
     #define HWS_ROCM_SMI_ERROR_CHECK(rocm_smi_func) rocm_smi_func;
+    #define HWS_HIP_ERROR_CHECK(hip_func)                \
+        {                                                \
+            [[maybe_unused]] hipError_t errc = hip_func; \
+        }
 #endif
 
-}  // namespace hws
+/**
+ * @brief Convert the performance level value (`rsmi_dev_perf_level_t`) to a string.
+ * @param[in] perf_level the bitmask to convert to a string
+ * @return all event throttle reasons (`[[nodiscard]]`)
+ */
+[[nodiscard]] std::string performance_level_to_string(rsmi_dev_perf_level_t perf_level);
+
+}  // namespace hws::detail
 
-#endif  // HARDWARE_SAMPLING_GPU_AMD_UTILITY_HPP_
+#endif  // HWS_GPU_AMD_UTILITY_HPP_
diff --git a/include/hardware_sampling/gpu_intel/hardware_sampler.hpp b/include/hws/gpu_intel/hardware_sampler.hpp
similarity index 76%
rename from include/hardware_sampling/gpu_intel/hardware_sampler.hpp
rename to include/hws/gpu_intel/hardware_sampler.hpp
index 442be04..db068fe 100644
--- a/include/hardware_sampling/gpu_intel/hardware_sampler.hpp
+++ b/include/hws/gpu_intel/hardware_sampler.hpp
@@ -8,18 +8,21 @@
  * @brief Defines a hardware sampler for Intel GPUs using Intel's Level Zero.
  */
 
-#ifndef HARDWARE_SAMPLING_GPU_INTEL_HARDWARE_SAMPLER_HPP_
-#define HARDWARE_SAMPLING_GPU_INTEL_HARDWARE_SAMPLER_HPP_
+#ifndef HWS_GPU_INTEL_HARDWARE_SAMPLER_HPP_
+#define HWS_GPU_INTEL_HARDWARE_SAMPLER_HPP_
 #pragma once
 
-#include "hardware_sampling/gpu_intel/level_zero_device_handle.hpp"  // hws::detail::level_zero_device_handle
-#include "hardware_sampling/gpu_intel/level_zero_samples.hpp"        // hws::{level_zero_general_samples, level_zero_clock_samples, level_zero_power_samples, level_zero_memory_samples, level_zero_temperature_samples}
-#include "hardware_sampling/hardware_sampler.hpp"                    // hws::hardware_sampler
+#include "hws/gpu_intel/level_zero_device_handle.hpp"  // hws::detail::level_zero_device_handle
+#include "hws/gpu_intel/level_zero_samples.hpp"        // hws::{level_zero_general_samples, level_zero_clock_samples, level_zero_power_samples, level_zero_memory_samples, level_zero_temperature_samples}
+#include "hws/hardware_sampler.hpp"                    // hws::hardware_sampler
+#include "hws/sample_category.hpp"                     // hws::sample_category
+
+#include "fmt/format.h"  // fmt::formatter, fmt::ostream_formatter
 
 #include <atomic>   // std::atomic
-#include <chrono>   // std::chrono::{steady_clock, milliseconds}, std::chrono_literals namespace
+#include <chrono>   // std::chrono::milliseconds, std::chrono_literals namespace
 #include <cstddef>  // std::size_t
-#include <format>   // std::formatter
+#include <iosfwd>   // std::ostream forward declaration
 #include <string>   // std::string
 
 namespace hws {
@@ -35,27 +38,31 @@ class gpu_intel_hardware_sampler : public hardware_sampler {
     /**
      * @brief Construct a new Intel GPU hardware sampler for the default device with the default sampling interval.
      * @details If this is the first Intel GPU sampler, initializes the Level Zero environment.
+     * @param[in] category the sample categories that are enabled for hardware sampling (default: all)
      */
-    gpu_intel_hardware_sampler();
+    explicit gpu_intel_hardware_sampler(sample_category category = sample_category::all);
     /**
      * @brief Construct a new Intel GPU hardware sampler for device @p device_id with the default sampling interval.
      * @details If this is the first Intel GPU sampler, initializes the Level Zero environment.
      * @param[in] device_id the ID of the device to sample
+     * @param[in] category the sample categories that are enabled for hardware sampling (default: all)
      */
-    explicit gpu_intel_hardware_sampler(std::size_t device_id);
+    explicit gpu_intel_hardware_sampler(std::size_t device_id, sample_category category = sample_category::all);
     /**
      * @brief Construct a new Intel GPU hardware sampler for the default device with the @p sampling_interval.
      * @details If this is the first Intel GPU sampler, initializes the Level Zero environment.
      * @param[in] sampling_interval the used sampling interval
+     * @param[in] category the sample categories that are enabled for hardware sampling (default: all)
      */
-    explicit gpu_intel_hardware_sampler(std::chrono::milliseconds sampling_interval);
+    explicit gpu_intel_hardware_sampler(std::chrono::milliseconds sampling_interval, sample_category category = sample_category::all);
     /**
      * @brief Construct a new Intel GPU hardware sampler for device @p device_id with the @p sampling_interval.
      * @details If this is the first Intel GPU sampler, initializes the Level Zero environment.
      * @param[in] device_id the ID of the device to sample
      * @param[in] sampling_interval the used sampling interval
+     * @param[in] category the sample categories that are enabled for hardware sampling (default: all)
      */
-    gpu_intel_hardware_sampler(std::size_t device_id, std::chrono::milliseconds sampling_interval);
+    gpu_intel_hardware_sampler(std::size_t device_id, std::chrono::milliseconds sampling_interval, sample_category category = sample_category::all);
 
     /**
      * @brief Delete the copy-constructor (already implicitly deleted due to the base class's std::atomic member).
@@ -109,21 +116,21 @@ class gpu_intel_hardware_sampler : public hardware_sampler {
      */
     [[nodiscard]] const level_zero_temperature_samples &temperature_samples() const noexcept { return temperature_samples_; }
 
-  private:
     /**
-     * @copydoc hws::hardware_sampler::sampling_loop
+     * @copydoc hws::hardware_sampler::device_identification
      */
-    void sampling_loop() final;
+    std::string device_identification() const final;
 
     /**
-     * @copydoc hws::hardware_sampler::device_identification
+     * @copydoc hws::hardware_sampler::samples_only_as_yaml_string() const
      */
-    std::string device_identification() const final;
+    [[nodiscard]] std::string samples_only_as_yaml_string() const final;
 
+  private:
     /**
-     * @copydoc hws::hardware_sampler::generate_yaml_string
+     * @copydoc hws::hardware_sampler::sampling_loop
      */
-    std::string generate_yaml_string() const final;
+    void sampling_loop() final;
 
     /// The device handle for the device to sample.
     detail::level_zero_device_handle device_;
@@ -156,7 +163,11 @@ std::ostream &operator<<(std::ostream &out, const gpu_intel_hardware_sampler &sa
 
 }  // namespace hws
 
+/// @cond Doxygen_suppress
+
 template <>
-struct std::formatter<hws::gpu_intel_hardware_sampler> : hws::detail::ostream_formatter { };
+struct fmt::formatter<hws::gpu_intel_hardware_sampler> : fmt::ostream_formatter { };
+
+/// @endcond
 
-#endif  // HARDWARE_SAMPLING_GPU_INTEL_HARDWARE_SAMPLER_HPP_
+#endif  // HWS_GPU_INTEL_HARDWARE_SAMPLER_HPP_
diff --git a/include/hardware_sampling/gpu_intel/level_zero_device_handle.hpp b/include/hws/gpu_intel/level_zero_device_handle.hpp
similarity index 85%
rename from include/hardware_sampling/gpu_intel/level_zero_device_handle.hpp
rename to include/hws/gpu_intel/level_zero_device_handle.hpp
index f84d8a5..c05f630 100644
--- a/include/hardware_sampling/gpu_intel/level_zero_device_handle.hpp
+++ b/include/hws/gpu_intel/level_zero_device_handle.hpp
@@ -8,8 +8,8 @@
  * @brief Defines a pImpl class for a Level Zero device handle.
  */
 
-#ifndef HARDWARE_SAMPLING_GPU_INTEL_LEVEL_ZERO_DEVICE_HANDLE_HPP_
-#define HARDWARE_SAMPLING_GPU_INTEL_LEVEL_ZERO_DEVICE_HANDLE_HPP_
+#ifndef HWS_GPU_INTEL_LEVEL_ZERO_DEVICE_HANDLE_HPP_
+#define HWS_GPU_INTEL_LEVEL_ZERO_DEVICE_HANDLE_HPP_
 #pragma once
 
 #include <cstddef>    // std::size_t
@@ -40,7 +40,7 @@ class level_zero_device_handle {
 
     /**
      * @brief Get the level_zero_device_handle implementation used to access the actual ze_driver_handle_t and ze_device_handle_t.
-     * @throws hardware_sampling_exception if `*this` has been default constructed
+     * @throws std::runtime_error if `*this` has been default constructed
      * @return the device handle (`[[nodiscard]]`)
      */
     [[nodiscard]] level_zero_device_handle_impl &get_impl() {
@@ -52,7 +52,7 @@ class level_zero_device_handle {
 
     /**
      * @brief Get the level_zero_device_handle implementation used to access the actual ze_driver_handle_t and ze_device_handle_t.
-     * @throws hardware_sampling_exception if `*this` has been default constructed
+     * @throws std::runtime_error if `*this` has been default constructed
      * @return the device handle (`[[nodiscard]]`)
      */
     [[nodiscard]] const level_zero_device_handle_impl &get_impl() const {
@@ -69,4 +69,4 @@ class level_zero_device_handle {
 
 }  // namespace hws::detail
 
-#endif  // HARDWARE_SAMPLING_GPU_INTEL_LEVEL_ZERO_DEVICE_HANDLE_HPP_
+#endif  // HWS_GPU_INTEL_LEVEL_ZERO_DEVICE_HANDLE_HPP_
diff --git a/include/hardware_sampling/gpu_intel/level_zero_device_handle_impl.hpp b/include/hws/gpu_intel/level_zero_device_handle_impl.hpp
similarity index 78%
rename from include/hardware_sampling/gpu_intel/level_zero_device_handle_impl.hpp
rename to include/hws/gpu_intel/level_zero_device_handle_impl.hpp
index 640cdcc..1c3b269 100644
--- a/include/hardware_sampling/gpu_intel/level_zero_device_handle_impl.hpp
+++ b/include/hws/gpu_intel/level_zero_device_handle_impl.hpp
@@ -8,18 +8,18 @@
  * @brief Implements a pImpl class for a Level Zero device handle.
  */
 
-#ifndef HARDWARE_SAMPLING_GPU_INTEL_LEVEL_ZERO_DEVICE_HANDLE_IMPL_HPP_
-#define HARDWARE_SAMPLING_GPU_INTEL_LEVEL_ZERO_DEVICE_HANDLE_IMPL_HPP_
+#ifndef HWS_GPU_INTEL_LEVEL_ZERO_DEVICE_HANDLE_IMPL_HPP_
+#define HWS_GPU_INTEL_LEVEL_ZERO_DEVICE_HANDLE_IMPL_HPP_
 #pragma once
 
-#include "hardware_sampling/gpu_intel/level_zero_device_handle.hpp"  // hws::detail::level_zero_device_handle
-#include "hardware_sampling/gpu_intel/utility.hpp"                   // HWS_LEVEL_ZERO_ERROR_CHECK
+#include "hws/gpu_intel/level_zero_device_handle.hpp"  // hws::detail::level_zero_device_handle
+#include "hws/gpu_intel/utility.hpp"                   // HWS_LEVEL_ZERO_ERROR_CHECK
 
+#include "fmt/format.h"         // fmt::format
 #include "level_zero/ze_api.h"  // Level Zero runtime functions
 
 #include <cstddef>    // std::size_t
 #include <cstdint>    // std::uint32_t
-#include <format>     // std::format
 #include <memory>     // std::make_shared
 #include <stdexcept>  // std::runtime_error
 #include <vector>     // std::vector
@@ -38,28 +38,28 @@ struct level_zero_device_handle::level_zero_device_handle_impl {
     explicit level_zero_device_handle_impl(const std::size_t device_id) {
         // discover the number of drivers
         std::uint32_t driver_count{ 0 };
-        HWS_LEVEL_ZERO_ERROR_CHECK(zeDriverGet(&driver_count, nullptr));
+        HWS_LEVEL_ZERO_ERROR_CHECK(zeDriverGet(&driver_count, nullptr))
 
         // check if only the single GPU driver has been found
         if (driver_count > 1) {
-            throw std::runtime_error{ std::format("Found too many GPU drivers ({})!", driver_count) };
+            throw std::runtime_error{ fmt::format("Found too many GPU drivers ({})!", driver_count) };
         }
 
         // get the GPU driver
-        HWS_LEVEL_ZERO_ERROR_CHECK(zeDriverGet(&driver_count, &driver));
+        HWS_LEVEL_ZERO_ERROR_CHECK(zeDriverGet(&driver_count, &driver))
 
         // get all GPUs for the current driver
         std::uint32_t device_count{ 0 };
-        HWS_LEVEL_ZERO_ERROR_CHECK(zeDeviceGet(driver, &device_count, nullptr));
+        HWS_LEVEL_ZERO_ERROR_CHECK(zeDeviceGet(driver, &device_count, nullptr))
 
         // check if enough GPUs have been found
         if (driver_count <= device_id) {
-            throw std::runtime_error{ std::format("Found only {} GPUs, but GPU with the ID was requested!", device_count, device_id) };
+            throw std::runtime_error{ fmt::format("Found only {} GPUs, but GPU with the ID was requested!", device_count, device_id) };
         }
 
         // get the GPUs
         std::vector<ze_device_handle_t> all_devices(device_count);
-        HWS_LEVEL_ZERO_ERROR_CHECK(zeDeviceGet(driver, &device_count, all_devices.data()));
+        HWS_LEVEL_ZERO_ERROR_CHECK(zeDeviceGet(driver, &device_count, all_devices.data()))
 
         // save the requested device
         device = all_devices[device_id];
@@ -76,4 +76,4 @@ inline level_zero_device_handle::level_zero_device_handle(const std::size_t devi
 
 }  // namespace hws::detail
 
-#endif  // HARDWARE_SAMPLING_GPU_INTEL_LEVEL_ZERO_DEVICE_HANDLE_IMPL_HPP_
+#endif  // HWS_GPU_INTEL_LEVEL_ZERO_DEVICE_HANDLE_IMPL_HPP_
diff --git a/include/hardware_sampling/gpu_intel/level_zero_samples.hpp b/include/hws/gpu_intel/level_zero_samples.hpp
similarity index 55%
rename from include/hardware_sampling/gpu_intel/level_zero_samples.hpp
rename to include/hws/gpu_intel/level_zero_samples.hpp
index 7d0f713..dec6ec5 100644
--- a/include/hardware_sampling/gpu_intel/level_zero_samples.hpp
+++ b/include/hws/gpu_intel/level_zero_samples.hpp
@@ -8,14 +8,15 @@
  * @brief Defines the samples used with Level Zero.
  */
 
-#ifndef HARDWARE_SAMPLING_GPU_INTEL_LEVEL_ZERO_SAMPLES_HPP_
-#define HARDWARE_SAMPLING_GPU_INTEL_LEVEL_ZERO_SAMPLES_HPP_
+#ifndef HWS_GPU_INTEL_LEVEL_ZERO_SAMPLES_HPP_
+#define HWS_GPU_INTEL_LEVEL_ZERO_SAMPLES_HPP_
 #pragma once
 
-#include "hardware_sampling/utility.hpp"  // HWS_SAMPLE_STRUCT_FIXED_MEMBER, HWS_SAMPLE_STRUCT_SAMPLING_MEMBER, hws::detail::ostream_formatter
+#include "hws/utility.hpp"  // HWS_SAMPLE_STRUCT_FIXED_MEMBER, HWS_SAMPLE_STRUCT_SAMPLING_MEMBER
 
-#include <cstdint>        // std::uint64_t, std::int32_t
-#include <format>         // std::format
+#include "fmt/ostream.h"  // fmt::formatter, fmt::ostream_formatter
+
+#include <cstdint>        // std::uint64_t, std::int64_t, std::int32_t
 #include <iosfwd>         // std::ostream forward declaration
 #include <optional>       // std::optional
 #include <string>         // std::string
@@ -36,14 +37,23 @@ class level_zero_general_samples {
     friend class gpu_intel_hardware_sampler;
 
   public:
+    /**
+     * @brief Checks whether any general hardware sample is present.
+     * @return `true` if any general hardware sample is, otherwise `false`.
+     */
+    [[nodiscard]] bool has_samples() const;
     /**
      * @brief Assemble the YAML string containing all available general hardware samples.
      * @details Hardware samples that are not supported by the current device are omitted in the YAML output.
+     *          Returns an empty string if `has_samples()` returns `false`.
      * @return the YAML string (`[[nodiscard]]`)
      */
     [[nodiscard]] std::string generate_yaml_string() const;
 
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, byte_order)            // the byte order (e.g., little/big endian)
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, vendor_id)             // the vendor ID
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, name)                  // the model name of the device
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector<std::string>, flags)    // potential GPU flags (e.g. integrated device)
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, standby_mode)          // the enabled standby mode (power saving or never)
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint32_t, num_threads_per_eu)  // the number of threads per EU unit
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint32_t, eu_simd_width)       // the physical EU unit SIMD width
@@ -70,26 +80,34 @@ class level_zero_clock_samples {
     friend class gpu_intel_hardware_sampler;
 
   public:
+    /**
+     * @brief Checks whether any clock related hardware sample is present.
+     * @return `true` if any clock related hardware sample is, otherwise `false`.
+     */
+    [[nodiscard]] bool has_samples() const;
     /**
      * @brief Assemble the YAML string containing all available general hardware samples.
      * @details Hardware samples that are not supported by the current device are omitted in the YAML output.
+     *          Returns an empty string if `has_samples()` returns `false`.
      * @return the YAML string (`[[nodiscard]]`)
      */
     [[nodiscard]] std::string generate_yaml_string() const;
 
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, clock_gpu_min)                      // the minimum possible GPU clock frequency in MHz
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, clock_gpu_max)                      // the maximum possible GPU clock frequency in MHz
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector<double>, available_clocks_gpu)  // the available GPU clock frequencies in MHz (slowest to fastest)
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, clock_mem_min)                      // the minimum possible memory clock frequency in MHz
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, clock_mem_max)                      // the maximum possible memory clock frequency in MHz
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector<double>, available_clocks_mem)  // the available memory clock frequencies in MHz (slowest to fastest)
-
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, tdp_frequency_limit_gpu)  // the current maximum allowed GPU frequency based on the TDP limit in MHz
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, clock_gpu)                // the current GPU frequency in MHz
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(int, throttle_reason_gpu)         // the current GPU frequency throttle reason
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, tdp_frequency_limit_mem)  // the current maximum allowed memory frequency based on the TDP limit in MHz
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, clock_mem)                // the current memory frequency in MHz
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(int, throttle_reason_mem)         // the current memory frequency throttle reason
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, clock_frequency_min)                              // the minimum possible GPU clock frequency in MHz
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, clock_frequency_max)                              // the maximum possible GPU clock frequency in MHz
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, memory_clock_frequency_min)                       // the minimum possible memory clock frequency in MHz
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, memory_clock_frequency_max)                       // the maximum possible memory clock frequency in MHz
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector<double>, available_clock_frequencies)         // the available GPU clock frequencies in MHz (slowest to fastest)
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector<double>, available_memory_clock_frequencies)  // the available memory clock frequencies in MHz (slowest to fastest)
+
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, clock_frequency)                     // the current GPU frequency in MHz
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, memory_clock_frequency)              // the current memory frequency in MHz
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int64_t, throttle_reason)               // the current GPU frequency throttle reason as bitmask
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::string, throttle_reason_string)         // the current GPU frequency throttle reason as string
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int64_t, memory_throttle_reason)        // the current memory frequency throttle reason as bitmask
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::string, memory_throttle_reason_string)  // the current memory frequency throttle reason as string
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, frequency_limit_tdp)                 // the current maximum allowed GPU frequency based on the TDP limit in MHz
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, memory_frequency_limit_tdp)          // the current maximum allowed memory frequency based on the TDP limit in MHz
 };
 
 /**
@@ -113,17 +131,25 @@ class level_zero_power_samples {
     friend class gpu_intel_hardware_sampler;
 
   public:
+    /**
+     * @brief Checks whether any power related hardware sample is present.
+     * @return `true` if any power related hardware sample is, otherwise `false`.
+     */
+    [[nodiscard]] bool has_samples() const;
     /**
      * @brief Assemble the YAML string containing all available general hardware samples.
      * @details Hardware samples that are not supported by the current device are omitted in the YAML output.
+     *          Returns an empty string if `has_samples()` returns `false`.
      * @return the YAML string (`[[nodiscard]]`)
      */
     [[nodiscard]] std::string generate_yaml_string() const;
 
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(bool, energy_threshold_enabled)  // true if the energy threshold is enabled
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, energy_threshold)        // the energy threshold in J
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, power_enforced_limit)         // the actually enforced power limit (W), may be different from power management limit if external limiters are set
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, power_measurement_type)  // the type of the power readings
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(bool, power_management_mode)          // true if power management limits are enabled
 
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint64_t, power_total_energy_consumption)  // the total power consumption since the last driver reload in mJ
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, power_usage)                     // the current power draw of the GPU in W (calculated from power_total_energy_consumption)
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, power_total_energy_consumption)  // the total power consumption since the last driver reload in J
 };
 
 /**
@@ -154,26 +180,33 @@ class level_zero_memory_samples {
     using map_type = std::unordered_map<std::string, T>;
 
   public:
+    /**
+     * @brief Checks whether any memory related hardware sample is present.
+     * @return `true` if any memory related hardware sample is, otherwise `false`.
+     */
+    [[nodiscard]] bool has_samples() const;
     /**
      * @brief Assemble the YAML string containing all available general hardware samples.
      * @details Hardware samples that are not supported by the current device are omitted in the YAML output.
+     *          Returns an empty string if `has_samples()` returns `false`.
      * @return the YAML string (`[[nodiscard]]`)
      */
     [[nodiscard]] std::string generate_yaml_string() const;
 
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type<std::uint64_t>, memory_total)              // the total memory size of the different memory modules in Bytes
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type<std::uint64_t>, allocatable_memory_total)  // the total allocatable memory size of the different memory modules in Bytes
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, pcie_link_max_speed)                  // the maximum PCIe bandwidth in bytes/sec
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int32_t, pcie_max_width)                       // the PCIe lane width
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int32_t, max_pcie_link_generation)             // the PCIe generation
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type<std::int32_t>, bus_width)                  // the bus width of the different memory modules
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type<std::int32_t>, num_channels)               // the number of memory channels of the different memory modules
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type<std::string>, location)                    // the location of the different memory modules (system or device)
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type<std::uint64_t>, memory_total)          // the total memory size of the different memory modules in Bytes
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type<std::uint64_t>, visible_memory_total)  // the total allocatable memory size of the different memory modules in Bytes
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type<std::string>, memory_location)         // the location of the different memory modules (system or device)
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int32_t, num_pcie_lanes_max)               // the maximum PCIe lane width
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int32_t, pcie_link_generation_max)         // the maximum PCIe generation
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, pcie_link_speed_max)              // the maximum PCIe bandwidth in MBPS
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type<std::int32_t>, memory_bus_width)       // the bus width of the different memory modules
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type<std::int32_t>, memory_num_channels)    // the number of memory channels of the different memory modules
 
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type<std::vector<std::uint64_t>>, memory_free)  // the currently free memory of the different memory modules in Bytes
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int64_t, pcie_link_speed)                   // the current PCIe bandwidth in bytes/sec
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int32_t, pcie_link_width)                   // the current PCIe lane width
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type<std::vector<std::uint64_t>>, memory_used)  // the currently used memory of the different memory modules in Bytes
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int32_t, num_pcie_lanes)                    // the current PCIe lane width
     HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int32_t, pcie_link_generation)              // the current PCIe generation
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int64_t, pcie_link_speed)                   // the current PCIe bandwidth in bytes/sec
 };
 
 /**
@@ -196,25 +229,31 @@ class level_zero_temperature_samples {
     // befriend hardware sampler class
     friend class gpu_intel_hardware_sampler;
 
+  public:
     /**
-     * @brief The map type used if the number of potential Level Zero domains is unknown at compile time.
-     * @tparam T the mapped type
+     * @brief Checks whether any temperature related hardware sample is present.
+     * @return `true` if any temperature related hardware sample is, otherwise `false`.
      */
-    template <typename T>
-    using map_type = std::unordered_map<std::string, T>;
-
-  public:
+    [[nodiscard]] bool has_samples() const;
     /**
      * @brief Assemble the YAML string containing all available general hardware samples.
      * @details Hardware samples that are not supported by the current device are omitted in the YAML output.
+     *          Returns an empty string if `has_samples()` returns `false`.
      * @return the YAML string (`[[nodiscard]]`)
      */
     [[nodiscard]] std::string generate_yaml_string() const;
 
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type<double>, temperature_max)  // the maximum temperature for the sensor in °C
-
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int32_t, temperature_psu)            // the temperature of the PSU in °C
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type<std::vector<double>>, temperature)  // the current temperature for the sensor in °C
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint32_t, num_fans)         // the number of fans
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int32_t, fan_speed_max)     // the maximum fan speed the user can set in RPM
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, temperature_max)         // the maximum GPU temperature in °C
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, memory_temperature_max)  // the maximum memory temperature in °C
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, global_temperature_max)  // the maximum global temperature in °C
+
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, fan_speed_percentage)  // the current intended fan speed in %
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, temperature)           // the temperature of the GPU in °C
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, memory_temperature)    // the temperature of the memory in °C
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, global_temperature)    // the global temperature in °C
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, psu_temperature)       // the temperature of the PSU in °C
 };
 
 /**
@@ -228,19 +267,23 @@ std::ostream &operator<<(std::ostream &out, const level_zero_temperature_samples
 
 }  // namespace hws
 
+/// @cond Doxygen_suppress
+
 template <>
-struct std::formatter<hws::level_zero_general_samples> : hws::detail::ostream_formatter { };
+struct fmt::formatter<hws::level_zero_general_samples> : fmt::ostream_formatter { };
 
 template <>
-struct std::formatter<hws::level_zero_clock_samples> : hws::detail::ostream_formatter { };
+struct fmt::formatter<hws::level_zero_clock_samples> : fmt::ostream_formatter { };
 
 template <>
-struct std::formatter<hws::level_zero_power_samples> : hws::detail::ostream_formatter { };
+struct fmt::formatter<hws::level_zero_power_samples> : fmt::ostream_formatter { };
 
 template <>
-struct std::formatter<hws::level_zero_memory_samples> : hws::detail::ostream_formatter { };
+struct fmt::formatter<hws::level_zero_memory_samples> : fmt::ostream_formatter { };
 
 template <>
-struct std::formatter<hws::level_zero_temperature_samples> : hws::detail::ostream_formatter { };
+struct fmt::formatter<hws::level_zero_temperature_samples> : fmt::ostream_formatter { };
+
+/// @endcond
 
-#endif  // HARDWARE_SAMPLING_GPU_INTEL_LEVEL_ZERO_SAMPLES_HPP_
+#endif  // HWS_GPU_INTEL_LEVEL_ZERO_SAMPLES_HPP_
diff --git a/include/hardware_sampling/gpu_intel/utility.hpp b/include/hws/gpu_intel/utility.hpp
similarity index 67%
rename from include/hardware_sampling/gpu_intel/utility.hpp
rename to include/hws/gpu_intel/utility.hpp
index 810901f..76e15a1 100644
--- a/include/hardware_sampling/gpu_intel/utility.hpp
+++ b/include/hws/gpu_intel/utility.hpp
@@ -8,17 +8,18 @@
  * @brief Implements utility functionality for the Intel GPU sampler.
  */
 
-#ifndef HARDWARE_SAMPLING_GPU_INTEL_UTILITY_HPP_
-#define HARDWARE_SAMPLING_GPU_INTEL_UTILITY_HPP_
+#ifndef HWS_GPU_INTEL_UTILITY_HPP_
+#define HWS_GPU_INTEL_UTILITY_HPP_
 #pragma once
 
+#include "fmt/format.h"          // fmt::format
 #include "level_zero/ze_api.h"   // Level Zero runtime functions
 #include "level_zero/zes_api.h"  // Level Zero runtime functions
 
-#include <format>       // std::format
 #include <stdexcept>    // std::runtime_error
 #include <string>       // std::string
 #include <string_view>  // std::string_view
+#include <vector>       // std::vector
 
 namespace hws::detail {
 
@@ -35,17 +36,31 @@ namespace hws::detail {
  * @details Throws an exception if a Level Zero call returns with an error. Additionally outputs a more concrete custom error string.
  */
 #if defined(HWS_ERROR_CHECKS_ENABLED)
-    #define HWS_LEVEL_ZERO_ERROR_CHECK(level_zero_func)                                                                                            \
-        {                                                                                                                                          \
-            const ze_result_t errc = level_zero_func;                                                                                              \
-            if (errc != ZE_RESULT_SUCCESS) {                                                                                                       \
-                throw std::runtime_error{ std::format("Error in Level Zero function call \"{}\": {}", #level_zero_func, to_result_string(errc)) }; \
-            }                                                                                                                                      \
+    #define HWS_LEVEL_ZERO_ERROR_CHECK(level_zero_func)                                                                                                           \
+        {                                                                                                                                                         \
+            const ze_result_t errc = level_zero_func;                                                                                                             \
+            if (errc != ZE_RESULT_SUCCESS) {                                                                                                                      \
+                throw std::runtime_error{ fmt::format("Error in Level Zero function call \"{}\": {}", #level_zero_func, ::hws::detail::to_result_string(errc)) }; \
+            }                                                                                                                                                     \
         }
 #else
     #define HWS_LEVEL_ZERO_ERROR_CHECK(level_zero_func) level_zero_func;
 #endif
 
+/**
+ * @brief Convert the @p flags to a vector of strings.
+ * @param[in] flags the flags to convert to strings
+ * @return a vector containing all flags as strings (`[[nodiscard]]`)
+ */
+[[nodiscard]] std::vector<std::string> property_flags_to_vector(ze_device_property_flags_t flags);
+
+/**
+ * @brief Convert the throttle reason bitmask to a string representation. If the provided bitmask represents multiple reasons, they are split using "|".
+ * @param[in] reasons the bitmask to convert to a string
+ * @return all throttle reasons (`[[nodiscard]]`)
+ */
+[[nodiscard]] std::string throttle_reason_to_string(zes_freq_throttle_reason_flags_t reasons);
+
 /**
  * @brief Convert a Level Zero memory type to a string representation.
  * @param[in] mem_type the Level Zero memory type
@@ -60,13 +75,6 @@ namespace hws::detail {
  */
 [[nodiscard]] std::string memory_location_to_name(zes_mem_loc_t mem_loc);
 
-/**
- * @brief Convert a Level Zero temperature sensor type to a string representation.
- * @param[in] sensor_type the Level Zero temperature sensor type
- * @return the string representation (`[[nodiscard]]`)
- */
-[[nodiscard]] std::string temperature_sensor_type_to_name(zes_temp_sensors_t sensor_type);
-
 }  // namespace hws::detail
 
-#endif  // HARDWARE_SAMPLING_GPU_INTEL_UTILITY_HPP_
+#endif  // HWS_GPU_INTEL_UTILITY_HPP_
diff --git a/include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp b/include/hws/gpu_nvidia/hardware_sampler.hpp
similarity index 78%
rename from include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp
rename to include/hws/gpu_nvidia/hardware_sampler.hpp
index de22f3f..59a5e31 100644
--- a/include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp
+++ b/include/hws/gpu_nvidia/hardware_sampler.hpp
@@ -8,19 +8,20 @@
  * @brief Defines a hardware sampler for NVIDIA GPUs using NVIDIA's Management Library (NVML).
  */
 
-#ifndef HARDWARE_SAMPLING_GPU_NVIDIA_HARDWARE_SAMPLER_HPP_
-#define HARDWARE_SAMPLING_GPU_NVIDIA_HARDWARE_SAMPLER_HPP_
+#ifndef HWS_GPU_NVIDIA_HARDWARE_SAMPLER_HPP_
+#define HWS_GPU_NVIDIA_HARDWARE_SAMPLER_HPP_
 #pragma once
 
-#include "hardware_sampling/gpu_nvidia/nvml_device_handle.hpp"  // hws::nvml_device_handle
-#include "hardware_sampling/gpu_nvidia/nvml_samples.hpp"        // hws::{nvml_general_samples, nvml_clock_samples, nvml_power_samples, nvml_memory_samples, nvml_temperature_samples}
-#include "hardware_sampling/hardware_sampler.hpp"               // hws::hardware_sampler
-#include "hardware_sampling/utility.hpp"                        // hws::detail::ostream_formatter
+#include "hws/gpu_nvidia/nvml_device_handle.hpp"  // hws::nvml_device_handle
+#include "hws/gpu_nvidia/nvml_samples.hpp"        // hws::{nvml_general_samples, nvml_clock_samples, nvml_power_samples, nvml_memory_samples, nvml_temperature_samples}
+#include "hws/hardware_sampler.hpp"               // hws::hardware_sampler
+#include "hws/sample_category.hpp"                // hws::sample_category
+
+#include "fmt/format.h"  // fmt::formatter, fmt::ostream_formatter
 
 #include <atomic>   // std::atomic
 #include <chrono>   // std::chrono::milliseconds, std::chrono_literals namespace
 #include <cstddef>  // std::size_t
-#include <format>   // std::formatter
 #include <iosfwd>   // std::ostream forward declaration
 #include <string>   // std::string
 
@@ -37,27 +38,31 @@ class gpu_nvidia_hardware_sampler : public hardware_sampler {
     /**
      * @brief Construct a new NVIDIA GPU hardware sampler for the default device with the default sampling interval.
      * @details If this is the first NVIDIA GPU sampler, initializes the NVML environment.
+     * @param[in] category the sample categories that are enabled for hardware sampling (default: all)
      */
-    gpu_nvidia_hardware_sampler();
+    explicit gpu_nvidia_hardware_sampler(sample_category category = sample_category::all);
     /**
      * @brief Construct a new NVIDIA GPU hardware sampler for device @p device_id with the default sampling interval.
      * @details If this is the first NVIDIA GPU sampler, initializes the NVML environment.
      * @param[in] device_id the ID of the device to sample
+     * @param[in] category the sample categories that are enabled for hardware sampling (default: all)
      */
-    explicit gpu_nvidia_hardware_sampler(std::size_t device_id);
+    explicit gpu_nvidia_hardware_sampler(std::size_t device_id, sample_category category = sample_category::all);
     /**
      * @brief Construct a new NVIDIA GPU hardware sampler for the default device with the @p sampling_interval.
      * @details If this is the first NVIDIA GPU sampler, initializes the NVML environment.
      * @param[in] sampling_interval the used sampling interval
+     * @param[in] category the sample categories that are enabled for hardware sampling (default: all)
      */
-    explicit gpu_nvidia_hardware_sampler(std::chrono::milliseconds sampling_interval);
+    explicit gpu_nvidia_hardware_sampler(std::chrono::milliseconds sampling_interval, sample_category category = sample_category::all);
     /**
      * @brief Construct a new NVIDIA GPU hardware sampler for device @p device_id with the @p sampling_interval.
      * @details If this is the first NVIDIA GPU sampler, initializes the NVML environment.
      * @param[in] device_id the ID of the device to sample
      * @param[in] sampling_interval the used sampling interval
+     * @param[in] category the sample categories that are enabled for hardware sampling (default: all)
      */
-    gpu_nvidia_hardware_sampler(std::size_t device_id, std::chrono::milliseconds sampling_interval);
+    gpu_nvidia_hardware_sampler(std::size_t device_id, std::chrono::milliseconds sampling_interval, sample_category category = sample_category::all);
 
     /**
      * @brief Delete the copy-constructor (already implicitly deleted due to the base class's std::atomic member).
@@ -112,21 +117,21 @@ class gpu_nvidia_hardware_sampler : public hardware_sampler {
      */
     [[nodiscard]] const nvml_temperature_samples &temperature_samples() const noexcept { return temperature_samples_; }
 
-  private:
     /**
-     * @copydoc hws::hardware_sampler::sampling_loop
+     * @copydoc hws::hardware_sampler::device_identification
      */
-    void sampling_loop() final;
+    [[nodiscard]] std::string device_identification() const final;
 
     /**
-     * @copydoc hws::hardware_sampler::device_identification
+     * @copydoc hws::hardware_sampler::samples_only_as_yaml_string() const
      */
-    std::string device_identification() const final;
+    [[nodiscard]] std::string samples_only_as_yaml_string() const final;
 
+  private:
     /**
-     * @copydoc hws::hardware_sampler::generate_yaml_string
+     * @copydoc hws::hardware_sampler::sampling_loop
      */
-    std::string generate_yaml_string() const final;
+    void sampling_loop() final;
 
     /// The device handle for the device to sample.
     detail::nvml_device_handle device_{};
@@ -159,7 +164,11 @@ std::ostream &operator<<(std::ostream &out, const gpu_nvidia_hardware_sampler &s
 
 }  // namespace hws
 
+/// @cond Doxygen_suppress
+
 template <>
-struct std::formatter<hws::gpu_nvidia_hardware_sampler> : hws::detail::ostream_formatter { };
+struct fmt::formatter<hws::gpu_nvidia_hardware_sampler> : fmt::ostream_formatter { };
+
+/// @endcond
 
-#endif  // HARDWARE_SAMPLING_GPU_NVIDIA_HARDWARE_SAMPLER_HPP_
+#endif  // HWS_GPU_NVIDIA_HARDWARE_SAMPLER_HPP_
diff --git a/include/hardware_sampling/gpu_nvidia/nvml_device_handle.hpp b/include/hws/gpu_nvidia/nvml_device_handle.hpp
similarity index 85%
rename from include/hardware_sampling/gpu_nvidia/nvml_device_handle.hpp
rename to include/hws/gpu_nvidia/nvml_device_handle.hpp
index f52fb84..eb3da33 100644
--- a/include/hardware_sampling/gpu_nvidia/nvml_device_handle.hpp
+++ b/include/hws/gpu_nvidia/nvml_device_handle.hpp
@@ -8,8 +8,8 @@
  * @brief Defines a pImpl class for an NVML device handle.
  */
 
-#ifndef HARDWARE_SAMPLING_GPU_NVIDIA_NVML_DEVICE_HANDLE_HPP_
-#define HARDWARE_SAMPLING_GPU_NVIDIA_NVML_DEVICE_HANDLE_HPP_
+#ifndef HWS_GPU_NVIDIA_NVML_DEVICE_HANDLE_HPP_
+#define HWS_GPU_NVIDIA_NVML_DEVICE_HANDLE_HPP_
 #pragma once
 
 #include <cstddef>    // std::size_t
@@ -40,7 +40,7 @@ class nvml_device_handle {
 
     /**
      * @brief Get the nvml_device_handle implementation used to access the actual nvmlDevice_t.
-     * @throws hardware_sampling_exception if `*this` has been default constructed
+     * @throws std::runtime_error if `*this` has been default constructed
      * @return the device handle (`[[nodiscard]]`)
      */
     [[nodiscard]] nvml_device_handle_impl &get_impl() {
@@ -52,7 +52,7 @@ class nvml_device_handle {
 
     /**
      * @brief Get the nvml_device_handle implementation used to access the actual nvmlDevice_t.
-     * @throws hardware_sampling_exception if `*this` has been default constructed
+     * @throws std::runtime_error if `*this` has been default constructed
      * @return the device handle (`[[nodiscard]]`)
      */
     [[nodiscard]] const nvml_device_handle_impl &get_impl() const {
@@ -69,4 +69,4 @@ class nvml_device_handle {
 
 }  // namespace hws::detail
 
-#endif  // HARDWARE_SAMPLING_GPU_NVIDIA_NVML_DEVICE_HANDLE_HPP_
+#endif  // HWS_GPU_NVIDIA_NVML_DEVICE_HANDLE_HPP_
diff --git a/include/hardware_sampling/gpu_nvidia/nvml_device_handle_impl.hpp b/include/hws/gpu_nvidia/nvml_device_handle_impl.hpp
similarity index 72%
rename from include/hardware_sampling/gpu_nvidia/nvml_device_handle_impl.hpp
rename to include/hws/gpu_nvidia/nvml_device_handle_impl.hpp
index 9247f29..7656599 100644
--- a/include/hardware_sampling/gpu_nvidia/nvml_device_handle_impl.hpp
+++ b/include/hws/gpu_nvidia/nvml_device_handle_impl.hpp
@@ -8,12 +8,12 @@
  * @brief Implements a pImpl class for an NVML device handle.
  */
 
-#ifndef HARDWARE_SAMPLING_GPU_NVIDIA_NVML_DEVICE_HANDLE_IMPL_HPP_
-#define HARDWARE_SAMPLING_GPU_NVIDIA_NVML_DEVICE_HANDLE_IMPL_HPP_
+#ifndef HWS_GPU_NVIDIA_NVML_DEVICE_HANDLE_IMPL_HPP_
+#define HWS_GPU_NVIDIA_NVML_DEVICE_HANDLE_IMPL_HPP_
 #pragma once
 
-#include "hardware_sampling/gpu_nvidia/nvml_device_handle.hpp"  // hws::detail::nvml_device_handle
-#include "hardware_sampling/gpu_nvidia/utility.hpp"             // HWS_NVML_ERROR_CHECK
+#include "hws/gpu_nvidia/nvml_device_handle.hpp"  // hws::detail::nvml_device_handle
+#include "hws/gpu_nvidia/utility.hpp"             // HWS_NVML_ERROR_CHECK
 
 #include "nvml.h"  // nvmlDevice_t
 
@@ -32,7 +32,7 @@ struct nvml_device_handle::nvml_device_handle_impl {
      * @param[in] device_id the device to get the handle for
      */
     explicit nvml_device_handle_impl(const std::size_t device_id) {
-        HWS_NVML_ERROR_CHECK(nvmlDeviceGetHandleByIndex(static_cast<int>(device_id), &device));
+        HWS_NVML_ERROR_CHECK(nvmlDeviceGetHandleByIndex(static_cast<int>(device_id), &device))
     }
 
     /// The wrapped NVML device handle.
@@ -44,4 +44,4 @@ inline nvml_device_handle::nvml_device_handle(const std::size_t device_id) :
 
 }  // namespace hws::detail
 
-#endif  // HARDWARE_SAMPLING_GPU_NVIDIA_NVML_DEVICE_HANDLE_IMPL_HPP_
+#endif  // HWS_GPU_NVIDIA_NVML_DEVICE_HANDLE_IMPL_HPP_
diff --git a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp b/include/hws/gpu_nvidia/nvml_samples.hpp
similarity index 53%
rename from include/hardware_sampling/gpu_nvidia/nvml_samples.hpp
rename to include/hws/gpu_nvidia/nvml_samples.hpp
index 29b1d5a..0ddd6ae 100644
--- a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp
+++ b/include/hws/gpu_nvidia/nvml_samples.hpp
@@ -8,14 +8,16 @@
  * @brief Defines the samples used with NVML.
  */
 
-#ifndef HARDWARE_SAMPLING_GPU_NVIDIA_NVML_SAMPLES_HPP_
-#define HARDWARE_SAMPLING_GPU_NVIDIA_NVML_SAMPLES_HPP_
+#ifndef HWS_GPU_NVIDIA_NVML_SAMPLES_HPP_
+#define HWS_GPU_NVIDIA_NVML_SAMPLES_HPP_
 #pragma once
 
-#include "hardware_sampling/utility.hpp"  // HWS_SAMPLE_STRUCT_FIXED_MEMBER, HWS_SAMPLE_STRUCT_SAMPLING_MEMBER, hws::detail::ostream_formatter
+#include "hws/utility.hpp"  // HWS_SAMPLE_STRUCT_FIXED_MEMBER, HWS_SAMPLE_STRUCT_SAMPLING_MEMBER
+
+#include "fmt/ostream.h"  // fmt::formatter, fmt::ostream_formatter
 
-#include <format>    // std::formatter
 #include <iosfwd>    // std::ostream forward declaration
+#include <map>       // std::map
 #include <optional>  // std::optional
 #include <string>    // std::string
 #include <vector>    // std::vector
@@ -34,20 +36,29 @@ class nvml_general_samples {
     friend class gpu_nvidia_hardware_sampler;
 
   public:
+    /**
+     * @brief Checks whether any general hardware sample is present.
+     * @return `true` if any general hardware sample is, otherwise `false`.
+     */
+    [[nodiscard]] bool has_samples() const;
     /**
      * @brief Assemble the YAML string containing all available general hardware samples.
      * @details Hardware samples that are not supported by the current device are omitted in the YAML output.
+     *          Returns an empty string if `has_samples()` returns `false`.
      * @return the YAML string (`[[nodiscard]]`)
      */
     [[nodiscard]] std::string generate_yaml_string() const;
 
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, name)        // the name of the device
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(bool, persistence_mode)   // the persistence mode: if true, the driver is always loaded reducing the latency for the first API call
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, num_cores)  // the number of CUDA cores
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, architecture)  // the architecture name of the device
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, byte_order)    // the byte order (e.g., little/big endian)
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, num_cores)    // the number of CUDA cores
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, vendor_id)     // the vendor ID
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, name)          // the name of the device
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(bool, persistence_mode)     // the persistence mode: if true, the driver is always loaded reducing the latency for the first API call
 
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(int, performance_state)         // the performance state: 0 - 15 where 0 is the maximum performance and 15 the minimum performance
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, utilization_gpu)  // the GPU compute utilization in percent
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, utilization_mem)  // the GPU memory utilization in percent
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, compute_utilization)  // the GPU compute utilization in percent
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, memory_utilization)   // the GPU memory utilization in percent
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(int, performance_level)             // the performance state: 0 - 15 where 0 is the maximum performance and 15 the minimum performance
 };
 
 /**
@@ -70,26 +81,38 @@ class nvml_clock_samples {
     // befriend hardware sampler class
     friend class gpu_nvidia_hardware_sampler;
 
+    /// The map type used to map the available clock frequencies to a specific memory frequency.
+    using map_type = std::map<double, std::vector<double>>;
+
   public:
+    /**
+     * @brief Checks whether any clock related hardware sample is present.
+     * @return `true` if any clock related hardware sample is, otherwise `false`.
+     */
+    [[nodiscard]] bool has_samples() const;
     /**
      * @brief Assemble the YAML string containing all available general hardware samples.
      * @details Hardware samples that are not supported by the current device are omitted in the YAML output.
+     *          Returns an empty string if `has_samples()` returns `false`.
      * @return the YAML string (`[[nodiscard]]`)
      */
     [[nodiscard]] std::string generate_yaml_string() const;
 
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, adaptive_clock_status)  // true if clock boosting is currently enabled
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, clock_graph_min)        // the minimum possible graphics clock frequency in MHz
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, clock_graph_max)        // the maximum possible graphics clock frequency in MHz
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, clock_sm_max)           // the maximum possible SM clock frequency in MHz
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, clock_mem_min)          // the minimum possible memory clock frequency in MHz
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, clock_mem_max)          // the maximum possible memory clock frequency in MHz
-
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, clock_graph)                  // the current graphics clock frequency in MHz
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, clock_sm)                     // the current SM clock frequency in Mhz
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, clock_mem)                    // the current memory clock frequency in MHz
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned long long, clock_throttle_reason)  // the reason the GPU clock throttled (bitmask)
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(bool, auto_boosted_clocks)                  // true if the clocks are currently auto boosted
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(bool, auto_boosted_clock_enabled)                         // true if clock boosting is currently enabled
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, clock_frequency_min)                              // the minimum possible graphics clock frequency in MHz
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, clock_frequency_max)                              // the maximum possible graphics clock frequency in MHz
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, memory_clock_frequency_min)                       // the minimum possible memory clock frequency in MHz
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, memory_clock_frequency_max)                       // the maximum possible memory clock frequency in MHz
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, sm_clock_frequency_max)                           // the maximum possible SM clock frequency in MHz
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type, available_clock_frequencies)                    // the available clock frequencies in MHz, based on a memory clock frequency (slowest to fastest)
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector<double>, available_memory_clock_frequencies)  // the available memory clock frequencies in MHz (slowest to fastest)
+
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, clock_frequency)              // the current graphics clock frequency in MHz
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, memory_clock_frequency)       // the current memory clock frequency in MHz
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, sm_clock_frequency)           // the current SM clock frequency in Mhz
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned long long, throttle_reason)  // the reason the GPU clock throttled (as bitmask)
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::string, throttle_reason_string)  // the reason the GPU clock throttled (as string)
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(bool, auto_boosted_clock)             // true if the clocks are currently auto boosted
 };
 
 /**
@@ -113,20 +136,28 @@ class nvml_power_samples {
     friend class gpu_nvidia_hardware_sampler;
 
   public:
+    /**
+     * @brief Checks whether any power related hardware sample is present.
+     * @return `true` if any power related hardware sample is, otherwise `false`.
+     */
+    [[nodiscard]] bool has_samples() const;
     /**
      * @brief Assemble the YAML string containing all available general hardware samples.
      * @details Hardware samples that are not supported by the current device are omitted in the YAML output.
+     *          Returns an empty string if `has_samples()` returns `false`.
      * @return the YAML string (`[[nodiscard]]`)
      */
     [[nodiscard]] std::string generate_yaml_string() const;
 
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(bool, power_management_mode)           // true if power management algorithms are supported and active
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, power_management_limit)  // if the GPU draws more power (mW) than the power management limit, the GPU may throttle
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, power_enforced_limit)    // the actually enforced power limit, may be different from power management limit if external limiters are set
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, power_management_limit)              // if the GPU draws more power (W) than the power management limit, the GPU may throttle
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, power_enforced_limit)                // the actually enforced power limit (W), may be different from power management limit if external limiters are set
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, power_measurement_type)         // the type of the power readings: either current power draw or average power draw
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(bool, power_management_mode)                 // true if power management algorithms are supported and active
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector<int>, available_power_profiles)  // a list of the available power profiles
 
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(int, power_state)                                    // the current GPU power state: 0 - 15 where 0 is the maximum power and 15 the minimum power
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, power_usage)                           // the current power draw of the GPU and its related circuity (e.g., memory) in mW
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned long long, power_total_energy_consumption)  // the total power consumption since the last driver reload in mJ
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, power_usage)                     // the current power draw of the GPU and its related circuity (e.g., memory) in W
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, power_total_energy_consumption)  // the total power consumption since the last driver reload in J
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(int, power_profile)                      // the current GPU power state: 0 - 15 where 0 is the maximum power and 15 the minimum power; 32 indicates unknown
 };
 
 /**
@@ -150,23 +181,30 @@ class nvml_memory_samples {
     friend class gpu_nvidia_hardware_sampler;
 
   public:
+    /**
+     * @brief Checks whether any memory related hardware sample is present.
+     * @return `true` if any memory related hardware sample is, otherwise `false`.
+     */
+    [[nodiscard]] bool has_samples() const;
     /**
      * @brief Assemble the YAML string containing all available general hardware samples.
      * @details Hardware samples that are not supported by the current device are omitted in the YAML output.
+     *          Returns an empty string if `has_samples()` returns `false`.
      * @return the YAML string (`[[nodiscard]]`)
      */
     [[nodiscard]] std::string generate_yaml_string() const;
 
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned long, memory_total)             // the total available memory in Byte
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, pcie_link_max_speed)       // the maximum PCIe link speed in MBPS
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, num_pcie_lanes_max)        // the maximum number of PCIe lanes
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, pcie_link_generation_max)  // the maximum PCIe link generation (e.g., PCIe 4.0, PCIe 5.0, etc.)
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, pcie_link_speed_max)       // the maximum PCIe link speed in MBPS
     HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, memory_bus_width)          // the memory bus with in Bit
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, max_pcie_link_generation)  // the current PCIe link generation (e.g., PCIe 4.0, PCIe 5.0, etc)
 
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned long long, memory_free)     // the currently free memory in Byte
     HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned long long, memory_used)     // the currently used memory in Byte
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, pcie_link_speed)       // the current PCIe link speed in MBPS
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, pcie_link_width)       // the current PCIe link width (e.g., x16, x8, x4, etc)
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned long long, memory_free)     // the currently free memory in Byte
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, num_pcie_lanes)        // the current PCIe link width (e.g., x16, x8, x4, etc)
     HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, pcie_link_generation)  // the current PCIe link generation (may change during runtime to save energy)
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, pcie_link_speed)       // the current PCIe link speed in MBPS
 };
 
 /**
@@ -190,6 +228,11 @@ class nvml_temperature_samples {
     friend class gpu_nvidia_hardware_sampler;
 
   public:
+    /**
+     * @brief Checks whether any temperature related hardware sample is present.
+     * @return `true` if any temperature related hardware sample is, otherwise `false`.
+     */
+    [[nodiscard]] bool has_samples() const;
     /**
      * @brief Assemble the YAML string containing all available general hardware samples.
      * @details Hardware samples that are not supported by the current device are omitted in the YAML output.
@@ -197,14 +240,14 @@ class nvml_temperature_samples {
      */
     [[nodiscard]] std::string generate_yaml_string() const;
 
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, num_fans)                       // the number of fans (if any)
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, min_fan_speed)                  // the minimum fan speed the user can set in %
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, max_fan_speed)                  // the maximum fan speed the user can set in %
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, temperature_threshold_gpu_max)  // the maximum graphics temperature threshold in °C
-    HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, temperature_threshold_mem_max)  // the maximum memory temperature threshold in °C
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, num_fans)          // the number of fans (if any)
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, fan_speed_min)     // the minimum fan speed the user can set in %
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, fan_speed_max)     // the maximum fan speed the user can set in %
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, temperature_max)         // the maximum graphics temperature threshold in °C
+    HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, memory_temperature_max)  // the maximum memory temperature threshold in °C
 
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, fan_speed)        // the current intended fan speed in %
-    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, temperature_gpu)  // the current GPU temperature in °C
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, fan_speed_percentage)  // the current intended fan speed in %
+    HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, temperature)           // the current GPU temperature in °C
 };
 
 /**
@@ -218,19 +261,23 @@ std::ostream &operator<<(std::ostream &out, const nvml_temperature_samples &samp
 
 }  // namespace hws
 
+/// @cond Doxygen_suppress
+
 template <>
-struct std::formatter<hws::nvml_general_samples> : hws::detail::ostream_formatter { };
+struct fmt::formatter<hws::nvml_general_samples> : fmt::ostream_formatter { };
 
 template <>
-struct std::formatter<hws::nvml_clock_samples> : hws::detail::ostream_formatter { };
+struct fmt::formatter<hws::nvml_clock_samples> : fmt::ostream_formatter { };
 
 template <>
-struct std::formatter<hws::nvml_power_samples> : hws::detail::ostream_formatter { };
+struct fmt::formatter<hws::nvml_power_samples> : fmt::ostream_formatter { };
 
 template <>
-struct std::formatter<hws::nvml_memory_samples> : hws::detail::ostream_formatter { };
+struct fmt::formatter<hws::nvml_memory_samples> : fmt::ostream_formatter { };
 
 template <>
-struct std::formatter<hws::nvml_temperature_samples> : hws::detail::ostream_formatter { };
+struct fmt::formatter<hws::nvml_temperature_samples> : fmt::ostream_formatter { };
+
+/// @endcond
 
-#endif  // HARDWARE_SAMPLING_GPU_NVIDIA_NVML_SAMPLES_HPP_
+#endif  // HWS_GPU_NVIDIA_NVML_SAMPLES_HPP_
diff --git a/include/hws/gpu_nvidia/utility.hpp b/include/hws/gpu_nvidia/utility.hpp
new file mode 100644
index 0000000..c405386
--- /dev/null
+++ b/include/hws/gpu_nvidia/utility.hpp
@@ -0,0 +1,64 @@
+/**
+ * @file
+ * @author Marcel Breyer
+ * @copyright 2024-today All Rights Reserved
+ * @license This file is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Implements utility functionality for the NVIDIA GPU sampler.
+ */
+
+#ifndef HWS_GPU_NVIDIA_UTILITY_HPP_
+#define HWS_GPU_NVIDIA_UTILITY_HPP_
+#pragma once
+
+#include "cuda_runtime_api.h"  // CUDA runtime functions
+#include "fmt/format.h"        // fmt::format
+#include "nvml.h"              // NVML runtime functions
+
+#include <stdexcept>  // std::runtime_error
+#include <string>     // std::string
+
+namespace hws::detail {
+
+/**
+ * @def HWS_NVML_ERROR_CHECK
+ * @brief Defines the `HWS_NVML_ERROR_CHECK` macro if `HWS_ERROR_CHECKS_ENABLED` is defined, does nothing otherwise.
+ * @details Throws an exception if an NVML call returns with an error. Additionally outputs a more concrete error string.
+ */
+/**
+ * @def HWS_CUDA_ERROR_CHECK
+ * @brief Defines the `HWS_CUDA_ERROR_CHECK` macro if `HWS_ERROR_CHECKS_ENABLED` is defined, does nothing otherwise.
+ * @details Throws an exception if a CUDA call returns with an error. Additionally outputs a more concrete error string.
+ */
+#if defined(HWS_ERROR_CHECKS_ENABLED)
+    #define HWS_NVML_ERROR_CHECK(nvml_func)                                                                                                                        \
+        {                                                                                                                                                          \
+            const nvmlReturn_t errc = nvml_func;                                                                                                                   \
+            if (errc != NVML_SUCCESS) {                                                                                                                            \
+                throw std::runtime_error{ fmt::format("Error in NVML function call \"{}\": {} ({})", #nvml_func, nvmlErrorString(errc), static_cast<int>(errc)) }; \
+            }                                                                                                                                                      \
+        }
+
+    #define HWS_CUDA_ERROR_CHECK(cuda_func)                                                                                                                           \
+        {                                                                                                                                                             \
+            const cudaError_t errc = cuda_func;                                                                                                                       \
+            if (errc != cudaSuccess) {                                                                                                                                \
+                throw std::runtime_error{ fmt::format("Error in CUDA function call \"{}\": {} ({})", #cuda_func, cudaGetErrorName(errc), cudaGetErrorString(errc)) }; \
+            }                                                                                                                                                         \
+        }
+#else
+    #define HWS_NVML_ERROR_CHECK(nvml_func) nvml_func;
+    #define HWS_CUDA_ERROR_CHECK(cuda_func) cuda_func;
+#endif
+
+/**
+ * @brief Convert the clock throttle reason event bitmask to a string representation. If the provided bitmask represents multiple reasons, they are split using "|".
+ * @param[in] clocks_event_reasons the bitmask to convert to a string
+ * @return all event throttle reasons (`[[nodiscard]]`)
+ */
+[[nodiscard]] std::string throttle_event_reason_to_string(unsigned long long clocks_event_reasons);
+
+}  // namespace hws::detail
+
+#endif  // HWS_GPU_NVIDIA_UTILITY_HPP_
diff --git a/include/hardware_sampling/hardware_sampler.hpp b/include/hws/hardware_sampler.hpp
similarity index 76%
rename from include/hardware_sampling/hardware_sampler.hpp
rename to include/hws/hardware_sampler.hpp
index ce7c6fb..326eb7e 100644
--- a/include/hardware_sampling/hardware_sampler.hpp
+++ b/include/hws/hardware_sampler.hpp
@@ -8,11 +8,12 @@
  * @brief Defines the base class for all hardware samplers.
  */
 
-#ifndef HARDWARE_SAMPLING_HARDWARE_SAMPLER_HPP_
-#define HARDWARE_SAMPLING_HARDWARE_SAMPLER_HPP_
+#ifndef HWS_HARDWARE_SAMPLER_HPP_
+#define HWS_HARDWARE_SAMPLER_HPP_
 #pragma once
 
-#include "hardware_sampling/event.hpp"  // hws::event
+#include "hws/event.hpp"            // hws::event
+#include "hws/sample_category.hpp"  // hws::sample_category
 
 #include <atomic>      // std::atomic
 #include <chrono>      // std::chrono::{system_clock::time_point, steady_clock::time_point, milliseconds}
@@ -32,8 +33,10 @@ class hardware_sampler {
     /**
      * @brief Construct a new hardware sampler with the provided @p sampling_interval.
      * @param[in] sampling_interval the used sampling interval
+     * @param[in] category the sample categories that are enabled for hardware sampling
+     * @throws std::invalid_argument if the @p sampling_interval is zero
      */
-    explicit hardware_sampler(std::chrono::milliseconds sampling_interval);
+    hardware_sampler(std::chrono::milliseconds sampling_interval, sample_category category);
 
     /**
      * @brief Delete the copy-constructor (already implicitly deleted due to the std::atomic member).
@@ -86,7 +89,7 @@ class hardware_sampler {
      */
     [[nodiscard]] bool has_sampling_started() const noexcept;
     /**
-     * @brief Check whether this hardware sampler has currently sampling.
+     * @brief Check whether this hardware sampler is currently sampling.
      * @return `true` if the hardware sampler is currently sampling, `false` otherwise (`[[nodiscard]]`)
      */
     [[nodiscard]] bool is_sampling() const noexcept;
@@ -120,15 +123,16 @@ class hardware_sampler {
     [[nodiscard]] std::size_t num_events() const noexcept { return events_.size(); }
 
     /**
-     * @brief Return the number of recorded events.
-     * @return the number of events (`[[nodiscard]]`)
+     * @brief Return a vector of all recorded events.
+     * @return the events (`[[nodiscard]]`)
      */
     [[nodiscard]] const std::vector<event> &get_events() const noexcept { return events_; }
 
     /**
-     * @brief Return the number of recorded events.
+     * @brief Return the event at index @p idx.
+     * @param[in] idx the event to return
      * @throws std::out_of_range the the @p idx is out of bounce
-     * @return the number of events (`[[nodiscard]]`)
+     * @return the event at index @p idx (`[[nodiscard]]`)
      */
     [[nodiscard]] event get_event(std::size_t idx) const;
 
@@ -148,21 +152,15 @@ class hardware_sampler {
      * @brief Dump the hardware samples to the YAML file with @p filename.
      * @param[in] filename the YAML file to append the hardware samples to
      */
-    void dump_yaml(const char *filename);
+    void dump_yaml(const char *filename) const;
     /**
-     * @copydoc hws::hardware_sampler::dump_yaml(const char *)
+     * @copydoc hws::hardware_sampler::dump_yaml(const char *) const
      */
-    void dump_yaml(const std::string &filename);
+    void dump_yaml(const std::string &filename) const;
     /**
-     * @copydoc hws::hardware_sampler::dump_yaml(const char *)
+     * @copydoc hws::hardware_sampler::dump_yaml(const char *) const
      */
-    void dump_yaml(const std::filesystem::path &filename);
-
-  protected:
-    /**
-     * @brief Getter the hardware samples. Called in another std::thread.
-     */
-    virtual void sampling_loop() = 0;
+    void dump_yaml(const std::filesystem::path &filename) const;
 
     /**
      * @brief Return the unique device identification. Can be used as unique key in the YAML string.
@@ -171,18 +169,35 @@ class hardware_sampler {
     [[nodiscard]] virtual std::string device_identification() const = 0;
 
     /**
-     * @brief Assemble the YAML string containing all hardware samples.
-     * @param[in] start_time_point the reference time point the hardware samples occurred relative to
+     * @brief Return the hardware samples as well as events and time points as YAML string.
+     * @return the YAML content as string (`[[nodiscard]]`)
+     */
+    [[nodiscard]] std::string as_yaml_string() const;
+    /**
+     * @brief Return only the hardware samples as YAML string.
      * @throws std::runtime_error if sampling is still running
-     * @return the YAML string (`[[nodiscard]]`)
+     * @return the YAML content as string (`[[nodiscard]]`)
      */
-    [[nodiscard]] virtual std::string generate_yaml_string() const = 0;
+    [[nodiscard]] virtual std::string samples_only_as_yaml_string() const = 0;
+
+  protected:
+    /**
+     * @brief Getter the hardware samples. Called in another std::thread.
+     */
+    virtual void sampling_loop() = 0;
 
     /**
      * @brief Add a new time point to this hardware sampler. Called during the sampling loop.
      * @param time_point the new time point to add
      */
-    void add_time_point(const std::chrono::steady_clock::time_point time_point) { time_points_.push_back(time_point); }
+    void add_time_point(std::chrono::steady_clock::time_point time_point);
+
+    /**
+     * @brief Check whether the @p category is currently enabled for hardware sampling or not.
+     * @param[in] category the sample_category to check
+     * @return Returns `true` if @p category is enabled for sampling, otherwise `false` (`[[nodiscard]]`)
+     */
+    [[nodiscard]] bool sample_category_enabled(sample_category category) const noexcept;
 
   private:
     /// A boolean flag indicating whether the sampling has already started.
@@ -206,8 +221,11 @@ class hardware_sampler {
 
     /// The sampling interval of this hardware sampler.
     const std::chrono::milliseconds sampling_interval_{};
+
+    /// The bitmask of sample categories to use.
+    const sample_category sample_category_{};
 };
 
 }  // namespace hws
 
-#endif  // HARDWARE_SAMPLING_HARDWARE_SAMPLER_HPP_
+#endif  // HWS_HARDWARE_SAMPLER_HPP_
diff --git a/include/hws/sample_category.hpp b/include/hws/sample_category.hpp
new file mode 100644
index 0000000..0ec500b
--- /dev/null
+++ b/include/hws/sample_category.hpp
@@ -0,0 +1,117 @@
+/**
+ * @file
+ * @author Marcel Breyer
+ * @copyright 2024-today All Rights Reserved
+ * @license This file is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Defines an enum class with all sample categories to be able to only selectively enable some samples.
+ */
+
+#ifndef HWS_SAMPLE_CATEGORY_HPP_
+#define HWS_SAMPLE_CATEGORY_HPP_
+#pragma once
+
+namespace hws {
+
+/**
+ * @brief Enum class as bitfield containing the possible sample categories.
+ * @details The sample_category "gfx" and "idle_state" are only used in the cpu_hardware_sampler.
+ *          Additionally, the "all" sample_category is available to easily enable all hardware samples (default).
+ */
+enum class sample_category : int {
+    // clang-format off
+    /// General hardware samples like architecture, names, or utilization.
+    general     = 0b00000001,
+    /// Clock-related hardware samples like minimum, maximum, and current frequencies or throttle reasons.
+    clock       = 0b00000010,
+    /// Power-related hardware samples like current power draw or total energy consumption.
+    power       = 0b00000100,
+    /// Memory-related hardware samples like memory usage or PCIe information.
+    memory      = 0b00001000,
+    /// Temperature-related hardware samples like maximum and current temperatures.
+    temperature = 0b00010000,
+    /// Gfx-related (iGPU) hardware samples. Only used in the cpu_hardware_sampler.
+    gfx         = 0b00100000,
+    /// Idle-state-related hardware samples. Only used in the cpu_hardware_sampler.
+    idle_state  = 0b01000000,
+    /// Shortcut to enable all available hardware samples (default).
+    all         = 0b01111111
+    // clang-format on
+};
+
+/**
+ * @brief Compute the bitwise not of @p sc.
+ * @param[in] sc the sample_category to apply the bitwise not to
+ * @return the bitwise not result (`[[nodiscard]]`)
+ */
+[[nodiscard]] constexpr sample_category operator~(const sample_category sc) noexcept {
+    return static_cast<sample_category>(~static_cast<int>(sc));
+}
+
+/**
+ * @brief Compute the bitwise and between @p lhs and @p rhs and return a new sample_category.
+ * @param[in] lhs the first sample_category
+ * @param[in] rhs the second sample_category
+ * @return the bitwise and result (`[[nodiscard]]`)
+ */
+[[nodiscard]] constexpr sample_category operator&(const sample_category lhs, const sample_category rhs) noexcept {
+    return static_cast<sample_category>(static_cast<int>(lhs) & static_cast<int>(rhs));
+}
+
+/**
+ * @brief Compute the bitwise or between @p lhs and @p rhs and return a new sample_category.
+ * @param[in] lhs the first sample_category
+ * @param[in] rhs the second sample_category
+ * @return the bitwise or result (`[[nodiscard]]`)
+ */
+[[nodiscard]] constexpr sample_category operator|(const sample_category lhs, const sample_category rhs) noexcept {
+    return static_cast<sample_category>(static_cast<int>(lhs) | static_cast<int>(rhs));
+}
+
+/**
+ * @brief Compute the bitwise xor between @p lhs and @p rhs and return a new sample_category.
+ * @param[in] lhs the first sample_category
+ * @param[in] rhs the second sample_category
+ * @return the bitwise xor result (`[[nodiscard]]`)
+ */
+[[nodiscard]] constexpr sample_category operator^(const sample_category lhs, const sample_category rhs) noexcept {
+    return static_cast<sample_category>(static_cast<int>(lhs) ^ static_cast<int>(rhs));
+}
+
+/**
+ * @brief Compute the bitwise compound and between @p lhs and @p rhs and return the result in @p lhs.
+ * @param[in,out] lhs the first sample_category
+ * @param[in] rhs the second sample_category
+ * @return a reference to @p lhs containing the bitwise and result
+ */
+constexpr sample_category &operator&=(sample_category &lhs, const sample_category rhs) noexcept {
+    lhs = lhs & rhs;
+    return lhs;
+}
+
+/**
+ * @brief Compute the bitwise compound or between @p lhs and @p rhs and return the result in @p lhs.
+ * @param[in,out] lhs the first sample_category
+ * @param[in] rhs the second sample_category
+ * @return a reference to @p lhs containing the bitwise or result
+ */
+constexpr sample_category &operator|=(sample_category &lhs, const sample_category rhs) noexcept {
+    lhs = lhs | rhs;
+    return lhs;
+}
+
+/**
+ * @brief Compute the bitwise compound xor between @p lhs and @p rhs and return the result in @p lhs.
+ * @param[in,out] lhs the first sample_category
+ * @param[in] rhs the second sample_category
+ * @return a reference to @p lhs containing the bitwise xor result
+ */
+constexpr sample_category &operator^=(sample_category &lhs, const sample_category rhs) noexcept {
+    lhs = lhs ^ rhs;
+    return lhs;
+}
+
+}  // namespace hws
+
+#endif  // HWS_SAMPLE_CATEGORY_HPP_
diff --git a/include/hws/system_hardware_sampler.hpp b/include/hws/system_hardware_sampler.hpp
new file mode 100644
index 0000000..42924ac
--- /dev/null
+++ b/include/hws/system_hardware_sampler.hpp
@@ -0,0 +1,197 @@
+/**
+ * @file
+ * @author Marcel Breyer
+ * @copyright 2024-today All Rights Reserved
+ * @license This file is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Defines a hardware sampler for the whole system, i.e., automatically creates CPU and GPU hardware samples if the respective sampler and hardware are available.
+ */
+
+#ifndef HWS_SYSTEM_HARDWARE_SAMPLER_HPP_
+#define HWS_SYSTEM_HARDWARE_SAMPLER_HPP_
+
+#include "hws/event.hpp"             // hws::event
+#include "hws/hardware_sampler.hpp"  // hws::hardware_sampler
+#include "hws/sample_category.hpp"   // hws::sample_category
+
+#include <chrono>      // std::chrono::{milliseconds, steady_clock::time_point}
+#include <cstddef>     // std::size_t
+#include <filesystem>  // std::filesystem::path
+#include <memory>      // std::unique_ptr
+#include <string>      // std::string
+#include <vector>      // std::vector
+
+namespace hws {
+
+/**
+ * @brief A hardware sampler for the whole system.
+ * @details Enables hardware samplers for which hardware is available and the CMake configuration found the respective dependencies.
+ */
+class system_hardware_sampler {
+  public:
+    /**
+     * @brief Construct hardware samplers with the default sampling interval.
+     * @param[in] category the sample categories that are enabled for hardware sampling (default: all)
+     */
+    explicit system_hardware_sampler(sample_category category = sample_category::all);
+    /**
+     * @brief Construct hardware samplers with the provided @p sampling_interval.
+     * @param[in] sampling_interval the used sampling interval
+     * @param[in] category the sample categories that are enabled for hardware sampling (default: all)
+     */
+    explicit system_hardware_sampler(std::chrono::milliseconds sampling_interval, sample_category category = sample_category::all);
+
+    /**
+     * @brief Delete the copy-constructor.
+     */
+    system_hardware_sampler(const system_hardware_sampler &) = delete;
+    /**
+     * @brief Delete the move-constructor.
+     */
+    system_hardware_sampler(system_hardware_sampler &&) noexcept = delete;
+    /**
+     * @brief Delete the copy-assignment operator.
+     */
+    system_hardware_sampler &operator=(const system_hardware_sampler &) = delete;
+    /**
+     * @brief Delete the move-assignment operator.
+     */
+    system_hardware_sampler &operator=(system_hardware_sampler &&) noexcept = delete;
+
+    /**
+     * @brief Explicitly use the default destructor.
+     */
+    ~system_hardware_sampler() = default;
+
+    /**
+     * @brief Start hardware sampling for all wrapped hardware samplers.
+     */
+    void start_sampling();
+    /**
+     * @brief Stop hardware sampling for all wrapped hardware samplers.
+     */
+    void stop_sampling();
+    /**
+     * @brief Pause hardware sampling for all wrapped hardware samplers.
+     */
+    void pause_sampling();
+    /**
+     * @brief Resume hardware sampling for all wrapped hardware samplers.
+     */
+    void resume_sampling();
+
+    /**
+     * @brief Check whether the hardware samplers have already started sampling.
+     * @return `true` if **all** hardware samplers have already started sampling, `false` otherwise (`[[nodiscard]]`)
+     */
+    [[nodiscard]] bool has_sampling_started() const noexcept;
+    /**
+     * @brief Check whether the hardware samplers are currently sampling.
+     * @return `true` if **all** hardware samplers are currently sampling, `false` otherwise (`[[nodiscard]]`)
+     */
+    [[nodiscard]] bool is_sampling() const noexcept;
+    /**
+     * @brief Check whether the hardware samplers have already stopped sampling.
+     * @return `true` if **all** hardware samplers have already stopped sampling, `false` otherwise (`[[nodiscard]]`)
+     */
+    [[nodiscard]] bool has_sampling_stopped() const noexcept;
+
+    /**
+     * @brief Add a new event to all hardware samplers.
+     * @param e the event
+     */
+    void add_event(event e);
+    /**
+     * @brief Add a new event to all hardware samplers.
+     * @param[in] time_point the time point when the event occurred
+     * @param[in] name the name of the event
+     */
+    void add_event(decltype(event::time_point) time_point, decltype(event::name) name);
+    /**
+     * @brief Add a new event to all hardware samplers. The time_point will be the current time.
+     * @param[in] name the name of the event
+     */
+    void add_event(decltype(event::name) name);
+
+    /**
+     * @brief Return the number of recorded events separately for each hardware sampler.
+     * @return the number of events per hardware sampler (`[[nodiscard]]`)
+     */
+    [[nodiscard]] std::vector<std::size_t> num_events() const;
+    /**
+     * @brief Return the number of recorded events separately for each hardware sampler.
+     * @return the events per hardware sampler (`[[nodiscard]]`)
+     */
+    [[nodiscard]] std::vector<std::vector<event>> get_events() const;
+    /**
+     * @brief Return the time points the samples separately for each hardware sampler.
+     * @return the time points per hardware sampler (`[[nodiscard]]`)
+     */
+    [[nodiscard]] std::vector<std::vector<std::chrono::steady_clock::time_point>> sampling_time_points() const;
+    /**
+     * @brief Return the sampling interval separately for each hardware sampler.
+     * @return the samping interval in milliseconds per hardware sampler (`[[nodiscard]]`)
+     */
+    [[nodiscard]] std::vector<std::chrono::milliseconds> sampling_interval() const;
+
+    /**
+     * @brief The number of hardware samplers available for the whole system.
+     * @return the number of hardware samplers (`[[nodiscard]]`)
+     */
+    [[nodiscard]] std::size_t num_samplers() const noexcept;
+    /**
+     * @brief The hardware samplers available for the whole system.
+     * @return all available hardware samplers (`[[nodiscard]]`)
+     */
+    [[nodiscard]] const std::vector<std::unique_ptr<hardware_sampler>> &samplers() const noexcept;
+    /**
+     * @copydoc hws::system_hardware_sampler::samplers() const
+     */
+    [[nodiscard]] std::vector<std::unique_ptr<hardware_sampler>> &samplers() noexcept;
+    /**
+     * @brief Return the hardware sampler at index @p idx.
+     * @param[in] idx the index of the hardware sampler
+     * @throws std::out_of_range if @p idx is out-of-range
+     * @return the hardware sampler at index @p idx (`[[nodiscard]]`)
+     */
+    [[nodiscard]] const std::unique_ptr<hardware_sampler> &sampler(std::size_t idx) const;
+    /**
+     * @copydoc hws::system_hardware_sampler::sampler(std::size_t idx) const
+     */
+    [[nodiscard]] std::unique_ptr<hardware_sampler> &sampler(std::size_t idx);
+
+    /**
+     * @brief Dump the hardware samples of all hardware samplers to the YAML file with @p filename.
+     * @param[in] filename the YAML file to append the hardware samples to
+     */
+    void dump_yaml(const char *filename) const;
+    /**
+     * @copydoc hws::system_hardware_sampler::dump_yaml(const char *) const
+     */
+    void dump_yaml(const std::string &filename) const;
+    /**
+     * @copydoc hws::system_hardware_sampler::dump_yaml(const char *) const
+     */
+    void dump_yaml(const std::filesystem::path &filename) const;
+
+    /**
+     * @brief Return the hardware samples as YAML string.
+     * @return the YAML content as string (`[[nodiscard]]`)
+     */
+    [[nodiscard]] std::string as_yaml_string() const;
+    /**
+     * @brief Return only the hardware samples as YAML string.
+     * @throws std::runtime_error if sampling is still running
+     * @return the YAML content as string (`[[nodiscard]]`)
+     */
+    [[nodiscard]] std::string samples_only_as_yaml_string() const;
+
+  private:
+    /// The different hardware sampler for the current system.
+    std::vector<std::unique_ptr<hardware_sampler>> samplers_;
+};
+
+}  // namespace hws
+
+#endif  // HWS_SYSTEM_HARDWARE_SAMPLER_HPP_
diff --git a/include/hardware_sampling/utility.hpp b/include/hws/utility.hpp
similarity index 56%
rename from include/hardware_sampling/utility.hpp
rename to include/hws/utility.hpp
index e66d6c8..db37390 100644
--- a/include/hardware_sampling/utility.hpp
+++ b/include/hws/utility.hpp
@@ -8,22 +8,23 @@
  * @brief Utility functions for the hardware sampling.
  */
 
-#ifndef HARDWARE_SAMPLING_UTILITY_HPP_
-#define HARDWARE_SAMPLING_UTILITY_HPP_
+#ifndef HWS_UTILITY_HPP_
+#define HWS_UTILITY_HPP_
 #pragma once
 
+#include "fmt/format.h"  // fmt::format
+#include "fmt/ranges.h"  // fmt::join
+
 #include <charconv>      // std::from_chars
-#include <chrono>        // std::chrono::{milliseconds, duration_cast}
+#include <chrono>        // std::chrono::duration
+#include <cmath>         // std::trunc
 #include <cstddef>       // std::size_t
-#include <format>        // std::format, std::formatter, std::basic_format_context, std::format_to
-#include <iterator>      // std::back_inserter, std::next, std::prev
 #include <optional>      // std::optional
-#include <sstream>       // std::basic_stringstream
 #include <stdexcept>     // std::runtime_error
 #include <string>        // std::string, std::stof, std::stod, std::stold
-#include <string_view>   // std::string_view, std::basic_string_view
+#include <string_view>   // std::string_view
 #include <system_error>  // std::errc
-#include <type_traits>   // std::is_same_v, std::remove_cvref_t
+#include <type_traits>   // std::is_same_v, std::is_floating_point_v, std::remove_cv_t, std::remove_reference_t, std::true_type, std::false_type
 #include <vector>        // std::vector
 
 namespace hws::detail {
@@ -53,55 +54,48 @@ namespace hws::detail {
   private:                                                                                            \
     std::optional<std::vector<sample_type>> sample_name##_{};
 
+/*****************************************************************************************************/
+/**                                          type_traits                                            **/
+/*****************************************************************************************************/
+
 /**
- * @brief Convert all time points to their duration passed since the @p reference time point.
- * @tparam Duration the duration type to return
- * @tparam TimePoint the type if the time points
- * @param[in] time_points the time points
- * @param[in] reference the reference time point
- * @return the duration passed since the @p reference time point (`[[nodiscard]]`)
+ * @brief Remove the topmost cv-qualifiers from type @p T.
  */
-template <typename Duration = std::chrono::milliseconds, typename TimePoint>
-[[nodiscard]] inline std::vector<Duration> durations_from_reference_time(const std::vector<TimePoint> &time_points, const TimePoint &reference) {
-    std::vector<Duration> durations(time_points.size());
+template <typename T>
+using remove_cvref_t = std::remove_cv_t<std::remove_reference_t<T>>;
 
-    for (std::size_t i = 0; i < durations.size(); ++i) {
-        durations[i] = std::chrono::duration_cast<Duration>(time_points[i] - reference);
-    }
+/**
+ * @brief The case if the type @p T isn't a std::vector.
+ * @tparam T the type to check
+ */
+template <typename T>
+struct is_vector : std::false_type { };
 
-    return durations;
-}
+/**
+ * @brief The case if the type @p T is a std::vector.
+ * @tparam T the type to check
+ */
+template <typename T>
+struct is_vector<std::vector<T>> : std::true_type { };
 
 /**
- * @brief Convert all time points to their duration since the epoch start.
- * @tparam TimePoint the type of the time points
- * @param[in] time_points the time points
- * @return the duration passed since the respective @p TimePoint epoch start (`[[nodiscard]]`)
+ * @brief Evaluates to `true` if @p T is a std::vector, otherwise `false`.
+ * @tparam T the type to check
  */
-template <typename TimePoint>
-[[nodiscard]] inline std::vector<typename TimePoint::duration> time_points_to_epoch(const std::vector<TimePoint> &time_points) {
-    std::vector<typename TimePoint::duration> times(time_points.size());
+template <typename T>
+constexpr bool is_vector_v = is_vector<T>::value;
 
-    for (std::size_t i = 0; i < times.size(); ++i) {
-        times[i] = time_points[i].time_since_epoch();
-    }
-    return times;
-}
+/*****************************************************************************************************/
+/**                                      string manipulation                                        **/
+/*****************************************************************************************************/
 
 /**
- * @brief Return the value encapsulated by the std::optional @p opt if it contains a value, otherwise a default constructed @p T is returned.
- * @tparam T the type of the value stored in the std::optional
- * @param[in] opt the std::optional to check
- * @return the value of the std::optional or a default constructed @p T (`[[nodiscard]]`)
+ * @brief Checks whether the string @p sv starts with the substring @p start
+ * @param[in] sv the full string
+ * @param[in] start the substring
+ * @return `true` if @p sv starts with @p start, otherwise `false`
  */
-template <typename T>
-[[nodiscard]] inline T value_or_default(const std::optional<T> &opt) {
-    if (opt.has_value()) {
-        return opt.value();
-    } else {
-        return T{};
-    }
-}
+[[nodiscard]] bool starts_with(std::string_view sv, std::string_view start) noexcept;
 
 /**
  * @brief Trim the @p str, i.e., remove all leading and trailing whitespace characters.
@@ -117,6 +111,14 @@ template <typename T>
  */
 [[nodiscard]] std::string to_lower_case(std::string_view str);
 
+/**
+ * @brief Split the @p str at the delimiters @p delim.
+ * @param[in] str the string to split
+ * @param[in] delim the used delimiter
+ * @return a vector containing all split tokens (`[[nodiscard]]`)
+ */
+[[nodiscard]] std::vector<std::string_view> split(std::string_view str, char delim = ' ');
+
 /**
  * @brief Convert the @p str to a value of type @p T.
  * @tparam T the type to convert the string to
@@ -125,10 +127,10 @@ template <typename T>
  */
 template <typename T>
 [[nodiscard]] inline T convert_to(const std::string_view str) {
-    if constexpr (std::is_same_v<std::remove_cvref_t<T>, std::string>) {
+    if constexpr (std::is_same_v<detail::remove_cvref_t<T>, std::string>) {
         // convert string_view to string
         return std::string{ trim(str) };
-    } else if constexpr (std::is_same_v<std::remove_cvref_t<T>, bool>) {
+    } else if constexpr (std::is_same_v<detail::remove_cvref_t<T>, bool>) {
         const std::string lower_case_str = to_lower_case(trim(str));
         // the string true
         if (lower_case_str == "true") {
@@ -140,17 +142,17 @@ template <typename T>
         }
         // convert a number to its "long long" value and convert it to a bool: 0 -> false, otherwise true
         return static_cast<bool>(convert_to<long long>(str));
-    } else if constexpr (std::is_same_v<std::remove_cvref_t<T>, char>) {
+    } else if constexpr (std::is_same_v<detail::remove_cvref_t<T>, char>) {
         const std::string_view trimmed = trim(str);
         // since we expect a character, after trimming the string must only contain exactly one character
         if (trimmed.size() != 1) {
-            throw std::runtime_error{ std::format("Can't convert '{}' to a value of type char!", str) };
+            throw std::runtime_error{ fmt::format("Can't convert '{}' to a value of type char!", str) };
         }
         return trimmed.front();
-    } else if constexpr (std::is_floating_point_v<std::remove_cvref_t<T>>) {
-        if constexpr (std::is_same_v<std::remove_cvref_t<T>, float>) {
+    } else if constexpr (std::is_floating_point_v<detail::remove_cvref_t<T>>) {
+        if constexpr (std::is_same_v<detail::remove_cvref_t<T>, float>) {
             return std::stof(std::string{ str });
-        } else if constexpr (std::is_same_v<std::remove_cvref_t<T>, double>) {
+        } else if constexpr (std::is_same_v<detail::remove_cvref_t<T>, double>) {
             return std::stod(std::string{ str });
         } else {
             return std::stold(std::string{ str });
@@ -163,7 +165,7 @@ template <typename T>
         T val;
         auto res = std::from_chars(trimmed_str.data(), trimmed_str.data() + trimmed_str.size(), val);
         if (res.ec != std::errc{}) {
-            throw std::runtime_error{ std::format("Can't convert '{}' to a value of type T!", str) };
+            throw std::runtime_error{ fmt::format("Can't convert '{}' to a value of type T!", str) };
         }
         return val;
     }
@@ -196,53 +198,113 @@ template <typename T>
 }
 
 /**
- * @brief Split the @p str at the delimiters @p delim.
- * @param[in] str the string to split
- * @param[in] delim the used delimiter
- * @return a vector containing all split tokens (`[[nodiscard]]`)
+ * @brief Convert all entries in the map to a single dict-like string.
+ * @details The resulting string is of form "{KEY, VALUE}" or "{KEY, [VALUES]}".
+ * @tparam MapType the type of the map
+ * @param[in] map the map to convert to a string
+ * @return the result string (`[[nodiscard]]`(
  */
-[[nodiscard]] std::vector<std::string_view> split(std::string_view str, char delim = ' ');
+template <typename MapType>
+[[nodiscard]] inline std::string map_entry_to_string(const std::optional<MapType> &map) {
+    if (map.has_value()) {
+        std::vector<std::string> entries{};
+        for (const auto &[key, value] : map.value()) {
+            if constexpr (is_vector_v<detail::remove_cvref_t<decltype(value)>>) {
+                entries.push_back(fmt::format("{{{}, [{}]}}", key, fmt::join(value, ", ")));
+            } else {
+                entries.push_back(fmt::format("{{{}, {}}}", key, value));
+            }
+        }
+        return fmt::format("{}", fmt::join(entries, ", "));
+    }
+    return "";
+}
 
 /**
- * @brief A std::formatter child class allowing to format custom types using an `operator<<` overload.
- * @tparam CharT the character type
+ * @brief Quote all @p values and return a vector of strings.
+ * @details Example: calling this function with `{ 1, 2, 3, 4 }` would return a vector of strings containing `{ "1", "2", "3", "4" }`.
+ * @tparam T the type of the values to quote
+ * @param[in] values the values to quote
+ * @return the quoted values (`[[nodiscard]]`)
  */
-template <typename CharT>
-struct basic_ostream_formatter : std::formatter<std::basic_string_view<CharT>, CharT> {
-    template <typename T, typename OutputIt>
-    OutputIt format(const T &value, std::basic_format_context<OutputIt, CharT> &ctx) const {
-        std::basic_stringstream<CharT> ss;
-        ss << value;
-        return std::formatter<std::basic_string_view<CharT>, CharT>::format(ss.view(), ctx);
+template <typename T>
+[[nodiscard]] inline std::vector<std::string> quote(const std::vector<T> &values) {
+    std::vector<std::string> quoted{};
+    quoted.reserve(values.size());
+
+    // quote all values
+    for (const T &val : values) {
+        quoted.push_back(fmt::format("\"{}\"", val));
     }
-};
 
-/// Type alias for a basic_ostream_formatter using a normal char.
-using ostream_formatter = basic_ostream_formatter<char>;
+    return quoted;
+}
+
+/*****************************************************************************************************/
+/**                                      other free functions                                       **/
+/*****************************************************************************************************/
 
 /**
- * @brief Join all values in @p c to a single string using @p delim as delimiter.
- * @tparam Container the type of the container
- * @param[in] c the container for what the values should be joined
- * @param[in] delim the delimiter used in joining the values
- * @return the joined string (`[[nodiscard]]`)
+ * @brief Convert the time point to its duration in seconds (using double) truncated to three decimal places passed since the @p reference time point.
+ * @tparam TimePoint the type if the time point
+ * @param[in] time_point the time point
+ * @param[in] reference the reference time point
+ * @return the duration passed in seconds since the @p reference time point (`[[nodiscard]]`)
  */
-template <typename Container>
-[[nodiscard]] inline std::string join(const Container &c, const std::string_view delim) {
-    if (c.empty()) {
-        return "";
-    } else if (c.size() == 1) {
-        return std::format("{}", *c.cbegin());
+template <typename TimePoint>
+[[nodiscard]] inline double duration_from_reference_time(const TimePoint &time_point, const TimePoint &reference) {
+    return std::trunc(std::chrono::duration<double>(time_point - reference).count() * 1000.0) / 1000.0;
+}
+
+/**
+ * @brief Convert all time points to their duration in seconds (using double) truncated to three decimal places passed since the @p reference time point.
+ * @tparam TimePoint the type if the time points
+ * @param[in] time_points the time points
+ * @param[in] reference the reference time point
+ * @return the duration passed in seconds since the @p reference time point (`[[nodiscard]]`)
+ */
+template <typename TimePoint>
+[[nodiscard]] inline std::vector<double> durations_from_reference_time(const std::vector<TimePoint> &time_points, const TimePoint &reference) {
+    std::vector<double> durations(time_points.size());
+
+    for (std::size_t i = 0; i < durations.size(); ++i) {
+        durations[i] = duration_from_reference_time(time_points[i], reference);
+    }
+
+    return durations;
+}
+
+/**
+ * @brief Convert all time points to their duration since the epoch start.
+ * @tparam TimePoint the type of the time points
+ * @param[in] time_points the time points
+ * @return the duration passed since the respective @p TimePoint epoch start (`[[nodiscard]]`)
+ */
+template <typename TimePoint>
+[[nodiscard]] inline std::vector<typename TimePoint::duration> time_points_to_epoch(const std::vector<TimePoint> &time_points) {
+    std::vector<typename TimePoint::duration> times(time_points.size());
+
+    for (std::size_t i = 0; i < times.size(); ++i) {
+        times[i] = time_points[i].time_since_epoch();
+    }
+    return times;
+}
+
+/**
+ * @brief Return the value encapsulated by the std::optional @p opt if it contains a value, otherwise a default constructed @p T is returned.
+ * @tparam T the type of the value stored in the std::optional
+ * @param[in] opt the std::optional to check
+ * @return the value of the std::optional or a default constructed @p T (`[[nodiscard]]`)
+ */
+template <typename T>
+[[nodiscard]] inline T value_or_default(const std::optional<T> &opt) {
+    if (opt.has_value()) {
+        return opt.value();
     } else {
-        std::string out{};
-        for (auto it = c.cbegin(); it != std::prev(c.cend()); it = std::next(it)) {
-            std::format_to(std::back_inserter(out), "{}{}", *it, delim);
-        }
-        std::format_to(std::back_inserter(out), "{}", *std::prev(c.end()));
-        return out;
+        return T{};
     }
 }
 
 }  // namespace hws::detail
 
-#endif  // HARDWARE_SAMPLING_UTILITY_HPP_
+#endif  // HWS_UTILITY_HPP_
diff --git a/include/hws/version.hpp.in b/include/hws/version.hpp.in
new file mode 100644
index 0000000..225072f
--- /dev/null
+++ b/include/hws/version.hpp.in
@@ -0,0 +1,51 @@
+/**
+ * @file
+ * @author Marcel Breyer
+ * @copyright 2024-today All Rights Reserved
+ * @license This file is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ *
+ * @brief Version information for the hardware sampling.
+ */
+
+#ifndef HWS_VERSION_HPP_
+#define HWS_VERSION_HPP_
+#pragma once
+
+#include <string_view>  // std::string_view
+
+namespace hws::version {
+
+/**
+ * @brief The name of the library.
+ * @details The value gets automatically set during the [`CMake`](https://cmake.org/) configuration step.
+ */
+constexpr std::string_view name = "@PROJECT_NAME@";
+
+/**
+ * @brief The current version of the library in the form: "major.minor.patch".
+ * @details The value gets automatically set during the [`CMake`](https://cmake.org/) configuration step.
+ */
+constexpr std::string_view version = "@PROJECT_VERSION@";
+
+/**
+ * @brief The current major version of the library.
+ * @details The value gets automatically set during the [`CMake`](https://cmake.org/) configuration step.
+ */
+constexpr int major = @PROJECT_VERSION_MAJOR@;
+
+/**
+ * @brief The current minor version of the library.
+ * @details The value gets automatically set during the [`CMake`](https://cmake.org/) configuration step.
+ */
+constexpr int minor = @PROJECT_VERSION_MINOR@;
+
+/**
+ * @brief The current patch version of the library.
+ * @details The value gets automatically set during the [`CMake`](https://cmake.org/) configuration step.
+ */
+constexpr int patch = @PROJECT_VERSION_PATCH@;
+
+}  // namespace hws::version
+
+#endif  // HWS_VERSION_HPP_
diff --git a/src/hardware_sampling/cpu/hardware_sampler.cpp b/src/hardware_sampling/cpu/hardware_sampler.cpp
deleted file mode 100644
index 1eb5a14..0000000
--- a/src/hardware_sampling/cpu/hardware_sampler.cpp
+++ /dev/null
@@ -1,458 +0,0 @@
-/**
- * @author Marcel Breyer
- * @copyright 2024-today All Rights Reserved
- * @license This file is released under the MIT license.
- *          See the LICENSE.md file in the project root for full license information.
- */
-
-#include "hardware_sampling/cpu/hardware_sampler.hpp"
-
-#include "hardware_sampling/cpu/cpu_samples.hpp"   // hws::{cpu_general_samples, clock_samples, power_samples, memory_samples, temperature_samples, gfx_samples, idle_state_samples}
-#include "hardware_sampling/cpu/utility.hpp"       // HWS_SUBPROCESS_ERROR_CHECK, hws::detail::run_subprocess
-#include "hardware_sampling/hardware_sampler.hpp"  // hws::tracking::hardware_sampler
-#include "hardware_sampling/utility.hpp"           // hws::detail::{split, split_as, trim, convert_to, ostream_formatter, join}
-
-#include <cassert>        // assert
-#include <chrono>         // std::chrono::{steady_clock, milliseconds}
-#include <cstddef>        // std::size_t
-#include <exception>      // std::exception, std::terminate
-#include <format>         // std::format
-#include <ios>            // std::ios_base
-#include <iostream>       // std::cerr, std::endl
-#include <optional>       // std::make_optional
-#include <ostream>        // std::ostream
-#include <regex>          // std::regex, std::regex::extended, std::regex_match, std::regex_replace
-#include <stdexcept>      // std::runtime_error
-#include <string>         // std::string
-#include <string_view>    // std::string_view
-#include <thread>         // std::this_thread
-#include <unordered_map>  // std::unordered_map
-#include <vector>         // std::vector
-
-namespace hws {
-
-cpu_hardware_sampler::cpu_hardware_sampler() :
-    cpu_hardware_sampler{ HWS_SAMPLING_INTERVAL } { }
-
-cpu_hardware_sampler::cpu_hardware_sampler(const std::chrono::milliseconds sampling_interval) :
-    hardware_sampler{ sampling_interval } { }
-
-cpu_hardware_sampler::~cpu_hardware_sampler() {
-    try {
-        // if this hardware sampler is still sampling, stop it
-        if (this->has_sampling_started() && !this->has_sampling_stopped()) {
-            this->stop_sampling();
-        }
-    } catch (const std::exception &e) {
-        std::cerr << e.what() << std::endl;
-        std::terminate();
-    }
-}
-
-void cpu_hardware_sampler::sampling_loop() {
-    //
-    // add samples where we only have to retrieve the value once
-    //
-
-    this->add_time_point(std::chrono::steady_clock::now());
-
-#if defined(HWS_VIA_LSCPU_ENABLED)
-    {
-        const std::string lscpu_output = detail::run_subprocess("lscpu");
-        const std::vector<std::string_view> lscpu_lines = detail::split(detail::trim(lscpu_output), '\n');
-
-        for (std::string_view line : lscpu_lines) {
-            line = detail::trim(line);
-            // extract the value
-            std::string_view value{ line };
-            value.remove_prefix(value.find_first_of(":") + 1);
-            value = detail::trim(value);
-
-            // check the lines if the start with an entry that we want to sample
-            if (line.starts_with("Architecture")) {
-                general_samples_.architecture_ = detail::convert_to<decltype(general_samples_.architecture_)::value_type>(value);
-            } else if (line.starts_with("Byte Order")) {
-                general_samples_.byte_order_ = detail::convert_to<decltype(general_samples_.byte_order_)::value_type>(value);
-            } else if (line.starts_with("CPU(s)")) {
-                general_samples_.num_threads_ = detail::convert_to<decltype(general_samples_.num_threads_)::value_type>(value);
-            } else if (line.starts_with("Thread(s) per core")) {
-                general_samples_.threads_per_core_ = detail::convert_to<decltype(general_samples_.threads_per_core_)::value_type>(value);
-            } else if (line.starts_with("Core(s) per socket")) {
-                general_samples_.cores_per_socket_ = detail::convert_to<decltype(general_samples_.cores_per_socket_)::value_type>(value);
-            } else if (line.starts_with("Socket(s)")) {
-                general_samples_.num_sockets_ = detail::convert_to<decltype(general_samples_.num_sockets_)::value_type>(value);
-            } else if (line.starts_with("NUMA node(s)")) {
-                general_samples_.numa_nodes_ = detail::convert_to<decltype(general_samples_.numa_nodes_)::value_type>(value);
-            } else if (line.starts_with("Vendor ID")) {
-                general_samples_.vendor_id_ = detail::convert_to<decltype(general_samples_.vendor_id_)::value_type>(value);
-            } else if (line.starts_with("Model name")) {
-                general_samples_.name_ = detail::convert_to<decltype(general_samples_.name_)::value_type>(value);
-            } else if (line.starts_with("Flags")) {
-                general_samples_.flags_ = detail::split_as<decltype(general_samples_.flags_)::value_type::value_type>(value, ' ');
-            } else if (line.starts_with("Frequency boost")) {
-                clock_samples_.frequency_boost_ = value == "enabled";
-            } else if (line.starts_with("CPU max MHz")) {
-                clock_samples_.max_frequency_ = detail::convert_to<decltype(clock_samples_.max_frequency_)::value_type>(value);
-            } else if (line.starts_with("CPU min MHz")) {
-                clock_samples_.min_frequency_ = detail::convert_to<decltype(clock_samples_.min_frequency_)::value_type>(value);
-            } else if (line.starts_with("L1d cache")) {
-                memory_samples_.l1d_cache_ = detail::convert_to<decltype(memory_samples_.l1d_cache_)::value_type>(value);
-            } else if (line.starts_with("L1i cache")) {
-                memory_samples_.l1i_cache_ = detail::convert_to<decltype(memory_samples_.l1i_cache_)::value_type>(value);
-            } else if (line.starts_with("L2 cache")) {
-                memory_samples_.l2_cache_ = detail::convert_to<decltype(memory_samples_.l2_cache_)::value_type>(value);
-            } else if (line.starts_with("L3 cache")) {
-                memory_samples_.l3_cache_ = detail::convert_to<decltype(memory_samples_.l3_cache_)::value_type>(value);
-            }
-        }
-    }
-#endif
-
-#if defined(HWS_VIA_FREE_ENABLED)
-    const std::regex whitespace_replace_reg{ "[ ]+", std::regex::extended };
-    {
-        std::string free_output = detail::run_subprocess("free -b");
-        free_output = std::regex_replace(free_output, whitespace_replace_reg, " ");
-        const std::vector<std::string_view> free_lines = detail::split(detail::trim(free_output), '\n');
-        assert((free_lines.size() >= 3) && "Must read more than three lines, but fewer were read!");
-
-        // read memory information
-        const std::vector<std::string_view> memory_data = detail::split(free_lines[1], ' ');
-        memory_samples_.memory_total_ = detail::convert_to<decltype(memory_samples_.memory_total_)::value_type>(memory_data[1]);
-        memory_samples_.memory_used_ = decltype(memory_samples_.memory_used_)::value_type{ detail::convert_to<decltype(memory_samples_.memory_used_)::value_type::value_type>(memory_data[2]) };
-        memory_samples_.memory_free_ = decltype(memory_samples_.memory_free_)::value_type{ detail::convert_to<decltype(memory_samples_.memory_free_)::value_type::value_type>(memory_data[3]) };
-
-        // read swap information
-        const std::vector<std::string_view> swap_data = detail::split(free_lines[2], ' ');
-        memory_samples_.swap_memory_total_ = detail::convert_to<decltype(memory_samples_.swap_memory_total_)::value_type>(swap_data[1]);
-        memory_samples_.swap_memory_used_ = decltype(memory_samples_.swap_memory_used_)::value_type{ detail::convert_to<decltype(memory_samples_.swap_memory_used_)::value_type::value_type>(swap_data[2]) };
-        memory_samples_.swap_memory_free_ = decltype(memory_samples_.swap_memory_free_)::value_type{ detail::convert_to<decltype(memory_samples_.swap_memory_free_)::value_type::value_type>(swap_data[3]) };
-    }
-#endif
-
-#if defined(HWS_VIA_TURBOSTAT_ENABLED)
-
-    // -n, --num_iterations     number of the measurement iterations
-    // -i, --interval           sampling interval in seconds (decimal number)
-    // -S, --Summary            limits output to 1-line per interval
-    // -q, --quiet              skip decoding system configuration header
-
-    // get header information
-    #if defined(HWS_VIA_TURBOSTAT_ROOT)
-    // run with sudo
-    const std::string_view turbostat_command_line = "sudo turbostat -n 1 -i 0.001 -S -q";
-    #else
-    // run without sudo
-    const std::string_view turbostat_command_line = "turbostat -n 1 -i 0.001 -S -q";
-    #endif
-
-    {
-        // run turbostat
-        const std::string turbostat_output = detail::run_subprocess(turbostat_command_line);
-
-        // retrieve the turbostat data
-        const std::vector<std::string_view> data = detail::split(detail::trim(turbostat_output), '\n');
-        assert((data.size() >= 2) && "Must read at least two lines!");
-        const std::vector<std::string_view> header = detail::split(data[0], '\t');
-        const std::vector<std::string_view> values = detail::split(data[1], '\t');
-
-        for (std::size_t i = 0; i < header.size(); ++i) {
-            if (header[i] == "Avg_MHz") {
-                using vector_type = decltype(clock_samples_.average_frequency_)::value_type;
-                clock_samples_.average_frequency_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-            } else if (header[i] == "Busy%") {
-                using vector_type = decltype(general_samples_.busy_percent_)::value_type;
-                general_samples_.busy_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-            } else if (header[i] == "Bzy_MHz") {
-                using vector_type = decltype(clock_samples_.average_non_idle_frequency_)::value_type;
-                clock_samples_.average_non_idle_frequency_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-            } else if (header[i] == "TSC_MHz") {
-                using vector_type = decltype(clock_samples_.time_stamp_counter_)::value_type;
-                clock_samples_.time_stamp_counter_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-            } else if (header[i] == "IPC") {
-                using vector_type = decltype(general_samples_.ipc_)::value_type;
-                general_samples_.ipc_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-            } else if (header[i] == "IRQ") {
-                using vector_type = decltype(general_samples_.irq_)::value_type;
-                general_samples_.irq_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-            } else if (header[i] == "SMI") {
-                using vector_type = decltype(general_samples_.smi_)::value_type;
-                general_samples_.smi_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-            } else if (header[i] == "POLL") {
-                using vector_type = decltype(general_samples_.poll_)::value_type;
-                general_samples_.poll_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-            } else if (header[i] == "POLL%") {
-                using vector_type = decltype(general_samples_.poll_percent_)::value_type;
-                general_samples_.poll_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-            } else if (header[i] == "CoreTmp") {
-                using vector_type = decltype(temperature_samples_.core_temperature_)::value_type;
-                temperature_samples_.core_temperature_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-            } else if (header[i] == "CoreThr") {
-                using vector_type = decltype(temperature_samples_.core_throttle_percent_)::value_type;
-                temperature_samples_.core_throttle_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-            } else if (header[i] == "PkgTmp") {
-                using vector_type = decltype(temperature_samples_.package_temperature_)::value_type;
-                temperature_samples_.package_temperature_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-            } else if (header[i] == "GFX%rc6") {
-                using vector_type = decltype(gfx_samples_.gfx_render_state_percent_)::value_type;
-                gfx_samples_.gfx_render_state_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-            } else if (header[i] == "GFXMHz") {
-                using vector_type = decltype(gfx_samples_.gfx_frequency_)::value_type;
-                gfx_samples_.gfx_frequency_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-            } else if (header[i] == "GFXAMHz") {
-                using vector_type = decltype(gfx_samples_.average_gfx_frequency_)::value_type;
-                gfx_samples_.average_gfx_frequency_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-            } else if (header[i] == "Totl%C0") {
-                using vector_type = decltype(idle_state_samples_.all_cpus_state_c0_percent_)::value_type;
-                idle_state_samples_.all_cpus_state_c0_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-            } else if (header[i] == "Any%C0") {
-                using vector_type = decltype(idle_state_samples_.any_cpu_state_c0_percent_)::value_type;
-                idle_state_samples_.any_cpu_state_c0_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-            } else if (header[i] == "GFX%C0") {
-                using vector_type = decltype(gfx_samples_.gfx_state_c0_percent_)::value_type;
-                gfx_samples_.gfx_state_c0_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-            } else if (header[i] == "CPUGFX%") {
-                using vector_type = decltype(gfx_samples_.cpu_works_for_gpu_percent_)::value_type;
-                gfx_samples_.cpu_works_for_gpu_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-            } else if (header[i] == "CPU%LPI") {
-                using vector_type = decltype(idle_state_samples_.low_power_idle_state_percent_)::value_type;
-                idle_state_samples_.low_power_idle_state_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-            } else if (header[i] == "SYS%LPI") {
-                using vector_type = decltype(idle_state_samples_.system_low_power_idle_state_percent_)::value_type;
-                idle_state_samples_.system_low_power_idle_state_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-            } else if (header[i] == "Pkg%LPI") {
-                using vector_type = decltype(idle_state_samples_.package_low_power_idle_state_percent_)::value_type;
-                idle_state_samples_.package_low_power_idle_state_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-            } else if (header[i] == "PkgWatt") {
-                using vector_type = decltype(power_samples_.package_watt_)::value_type;
-                power_samples_.package_watt_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-            } else if (header[i] == "CorWatt") {
-                using vector_type = decltype(power_samples_.core_watt_)::value_type;
-                power_samples_.core_watt_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-            } else if (header[i] == "GFXWatt") {
-                using vector_type = decltype(gfx_samples_.gfx_watt_)::value_type;
-                gfx_samples_.gfx_watt_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-            } else if (header[i] == "RAMWatt") {
-                using vector_type = decltype(power_samples_.ram_watt_)::value_type;
-                power_samples_.ram_watt_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-            } else if (header[i] == "PKG_%") {
-                using vector_type = decltype(power_samples_.package_rapl_throttle_percent_)::value_type;
-                power_samples_.package_rapl_throttle_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-            } else if (header[i] == "RAM_%") {
-                using vector_type = decltype(power_samples_.dram_rapl_throttle_percent_)::value_type;
-                power_samples_.dram_rapl_throttle_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-            } else {
-                // test against regex
-                const std::string header_str{ header[i] };
-                const std::regex reg{ std::string{ "CPU%[0-9a-zA-Z]+|Pkg%[0-9a-zA-Z]+|Pk%[0-9a-zA-Z]+|C[0-9a-zA-Z]+%|C[0-9a-zA-Z]+" }, std::regex::extended };
-                if (std::regex_match(header_str, reg)) {
-                    // first time this branch is reached -> create optional value
-                    if (!idle_state_samples_.idle_states_.has_value()) {
-                        idle_state_samples_.idle_states_ = std::make_optional<typename cpu_idle_states_samples::map_type>();
-                    }
-
-                    using vector_type = cpu_idle_states_samples::map_type::mapped_type;
-                    idle_state_samples_.idle_states_.value()[header_str] = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
-                }
-            }
-        }
-    }
-#endif
-
-    //
-    // loop until stop_sampling() is called
-    //
-
-    while (!this->has_sampling_stopped()) {
-        // only sample values if the sampler currently isn't paused
-        if (this->is_sampling()) {
-            // add current time point
-            this->add_time_point(std::chrono::steady_clock::now());
-
-#if defined(HWS_VIA_FREE_ENABLED)
-            {
-                // run free
-                std::string free_output = detail::run_subprocess("free -b");
-                free_output = std::regex_replace(free_output, whitespace_replace_reg, " ");
-                const std::vector<std::string_view> free_lines = detail::split(detail::trim(free_output), '\n');
-                assert((free_lines.size() >= 3) && "Must read more than three lines, but fewer were read!");
-
-                // read memory information
-                const std::vector<std::string_view> memory_data = detail::split(free_lines[1], ' ');
-                memory_samples_.memory_used_->push_back(detail::convert_to<decltype(memory_samples_.memory_used_)::value_type::value_type>(memory_data[2]));
-                memory_samples_.memory_free_->push_back(detail::convert_to<decltype(memory_samples_.memory_free_)::value_type::value_type>(memory_data[3]));
-
-                // read swap information
-                const std::vector<std::string_view> swap_data = detail::split(free_lines[2], ' ');
-                memory_samples_.swap_memory_used_->push_back(detail::convert_to<decltype(memory_samples_.swap_memory_used_)::value_type::value_type>(swap_data[2]));
-                memory_samples_.swap_memory_free_->push_back(detail::convert_to<decltype(memory_samples_.swap_memory_free_)::value_type::value_type>(swap_data[3]));
-            }
-#endif
-
-#if defined(HWS_VIA_TURBOSTAT_ENABLED)
-            {
-                // run turbostat
-                const std::string turbostat_output = detail::run_subprocess(turbostat_command_line);
-
-                // retrieve the turbostat data
-                const std::vector<std::string_view> data = detail::split(detail::trim(turbostat_output), '\n');
-                assert((data.size() >= 2) && "Must read at least two lines!");
-                const std::vector<std::string_view> header = detail::split(data[0], '\t');
-                const std::vector<std::string_view> values = detail::split(data[1], '\t');
-
-                // add values to the respective sample entries
-                for (std::size_t i = 0; i < header.size(); ++i) {
-                    if (header[i] == "Avg_MHz") {
-                        using vector_type = decltype(clock_samples_.average_frequency_)::value_type;
-                        clock_samples_.average_frequency_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                    } else if (header[i] == "Busy%") {
-                        using vector_type = decltype(general_samples_.busy_percent_)::value_type;
-                        general_samples_.busy_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                    } else if (header[i] == "Bzy_MHz") {
-                        using vector_type = decltype(clock_samples_.average_non_idle_frequency_)::value_type;
-                        clock_samples_.average_non_idle_frequency_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                    } else if (header[i] == "TSC_MHz") {
-                        using vector_type = decltype(clock_samples_.time_stamp_counter_)::value_type;
-                        clock_samples_.time_stamp_counter_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                    } else if (header[i] == "IPC") {
-                        using vector_type = decltype(general_samples_.ipc_)::value_type;
-                        general_samples_.ipc_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                    } else if (header[i] == "IRQ") {
-                        using vector_type = decltype(general_samples_.irq_)::value_type;
-                        general_samples_.irq_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                    } else if (header[i] == "SMI") {
-                        using vector_type = decltype(general_samples_.smi_)::value_type;
-                        general_samples_.smi_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                    } else if (header[i] == "POLL") {
-                        using vector_type = decltype(general_samples_.poll_)::value_type;
-                        general_samples_.poll_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                    } else if (header[i] == "POLL%") {
-                        using vector_type = decltype(general_samples_.poll_percent_)::value_type;
-                        general_samples_.poll_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                    } else if (header[i] == "CoreTmp") {
-                        using vector_type = decltype(temperature_samples_.core_temperature_)::value_type;
-                        temperature_samples_.core_temperature_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                    } else if (header[i] == "CoreThr") {
-                        using vector_type = decltype(temperature_samples_.core_throttle_percent_)::value_type;
-                        temperature_samples_.core_throttle_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                    } else if (header[i] == "PkgTmp") {
-                        using vector_type = decltype(temperature_samples_.package_temperature_)::value_type;
-                        temperature_samples_.package_temperature_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                    } else if (header[i] == "GFX%rc6") {
-                        using vector_type = decltype(gfx_samples_.gfx_render_state_percent_)::value_type;
-                        gfx_samples_.gfx_render_state_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                    } else if (header[i] == "GFXMHz") {
-                        using vector_type = decltype(gfx_samples_.gfx_frequency_)::value_type;
-                        gfx_samples_.gfx_frequency_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                    } else if (header[i] == "GFXAMHz") {
-                        using vector_type = decltype(gfx_samples_.average_gfx_frequency_)::value_type;
-                        gfx_samples_.average_gfx_frequency_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                    } else if (header[i] == "Totl%C0") {
-                        using vector_type = decltype(idle_state_samples_.all_cpus_state_c0_percent_)::value_type;
-                        idle_state_samples_.all_cpus_state_c0_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                    } else if (header[i] == "Any%C0") {
-                        using vector_type = decltype(idle_state_samples_.any_cpu_state_c0_percent_)::value_type;
-                        idle_state_samples_.any_cpu_state_c0_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                    } else if (header[i] == "GFX%C0") {
-                        using vector_type = decltype(gfx_samples_.gfx_state_c0_percent_)::value_type;
-                        gfx_samples_.gfx_state_c0_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                    } else if (header[i] == "CPUGFX%") {
-                        using vector_type = decltype(gfx_samples_.cpu_works_for_gpu_percent_)::value_type;
-                        gfx_samples_.cpu_works_for_gpu_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                    } else if (header[i] == "CPU%LPI") {
-                        using vector_type = decltype(idle_state_samples_.low_power_idle_state_percent_)::value_type;
-                        idle_state_samples_.low_power_idle_state_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                    } else if (header[i] == "SYS%LPI") {
-                        using vector_type = decltype(idle_state_samples_.system_low_power_idle_state_percent_)::value_type;
-                        idle_state_samples_.system_low_power_idle_state_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                    } else if (header[i] == "Pkg%LPI") {
-                        using vector_type = decltype(idle_state_samples_.package_low_power_idle_state_percent_)::value_type;
-                        idle_state_samples_.package_low_power_idle_state_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                    } else if (header[i] == "PkgWatt") {
-                        using vector_type = decltype(power_samples_.package_watt_)::value_type;
-                        power_samples_.package_watt_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                    } else if (header[i] == "CorWatt") {
-                        using vector_type = decltype(power_samples_.core_watt_)::value_type;
-                        power_samples_.core_watt_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                    } else if (header[i] == "GFXWatt") {
-                        using vector_type = decltype(gfx_samples_.gfx_watt_)::value_type;
-                        gfx_samples_.gfx_watt_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                    } else if (header[i] == "RAMWatt") {
-                        using vector_type = decltype(power_samples_.ram_watt_)::value_type;
-                        power_samples_.ram_watt_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                    } else if (header[i] == "PKG_%") {
-                        using vector_type = decltype(power_samples_.package_rapl_throttle_percent_)::value_type;
-                        power_samples_.package_rapl_throttle_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                    } else if (header[i] == "RAM_%") {
-                        using vector_type = decltype(power_samples_.dram_rapl_throttle_percent_)::value_type;
-                        power_samples_.dram_rapl_throttle_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                    } else {
-                        const std::string header_str{ header[i] };
-                        if (idle_state_samples_.idle_states_.value().contains(header_str)) {
-                            using vector_type = cpu_idle_states_samples::map_type::mapped_type;
-                            idle_state_samples_.idle_states_.value()[header_str].push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
-                        }
-                    }
-                }
-            }
-#endif
-        }
-
-        // wait for the sampling interval to pass to retrieve the next sample
-        std::this_thread::sleep_for(this->sampling_interval());
-    }
-}
-
-std::string cpu_hardware_sampler::device_identification() const {
-    return "cpu_device";
-}
-
-std::string cpu_hardware_sampler::generate_yaml_string() const {
-    // check whether it's safe to generate the YAML entry
-    if (this->is_sampling()) {
-        throw std::runtime_error{ "Can't create the final YAML entry if the hardware sampler is still running!" };
-    }
-
-    return std::format("{}\n"
-                       "{}\n"
-                       "{}\n"
-                       "{}\n"
-                       "{}\n"
-                       "{}\n"
-                       "{}",
-                       general_samples_.generate_yaml_string(),
-                       clock_samples_.generate_yaml_string(),
-                       power_samples_.generate_yaml_string(),
-                       memory_samples_.generate_yaml_string(),
-                       temperature_samples_.generate_yaml_string(),
-                       gfx_samples_.generate_yaml_string(),
-                       idle_state_samples_.generate_yaml_string());
-}
-
-std::ostream &operator<<(std::ostream &out, const cpu_hardware_sampler &sampler) {
-    if (sampler.is_sampling()) {
-        out.setstate(std::ios_base::failbit);
-        return out;
-    } else {
-        return out << std::format("sampling interval: {}\n"
-                                  "time points: [{}]\n\n"
-                                  "general samples:\n{}\n\n"
-                                  "clock samples:\n{}\n\n"
-                                  "power samples:\n{}\n\n"
-                                  "memory samples:\n{}\n\n"
-                                  "temperature samples:\n{}\n\n"
-                                  "gfx samples:\n{}\n\n"
-                                  "idle state samples:\n{}",
-                                  sampler.sampling_interval(),
-                                  detail::join(detail::time_points_to_epoch(sampler.sampling_time_points()), ", "),
-                                  sampler.general_samples(),
-                                  sampler.clock_samples(),
-                                  sampler.power_samples(),
-                                  sampler.memory_samples(),
-                                  sampler.temperature_samples(),
-                                  sampler.gfx_samples(),
-                                  sampler.idle_state_samples());
-    }
-}
-
-}  // namespace hws
diff --git a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
deleted file mode 100644
index 3f926af..0000000
--- a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp
+++ /dev/null
@@ -1,671 +0,0 @@
-/**
- * @author Marcel Breyer
- * @copyright 2024-today All Rights Reserved
- * @license This file is released under the MIT license.
- *          See the LICENSE.md file in the project root for full license information.
- */
-
-#include "hardware_sampling/gpu_amd/hardware_sampler.hpp"
-
-#include "hardware_sampling/gpu_amd/rocm_smi_samples.hpp"  // hws::{rocm_smi_general_samples, rocm_smi_clock_samples, rocm_smi_power_samples, rocm_smi_memory_samples, rocm_smi_temperature_samples}
-#include "hardware_sampling/gpu_amd/utility.hpp"           // HWS_ROCM_SMI_ERROR_CHECK
-#include "hardware_sampling/hardware_sampler.hpp"          // hws::hardware_sampler
-#include "hardware_sampling/utility.hpp"                   // hws::detail::{time_points_to_epoch, join}
-
-#include "rocm_smi/rocm_smi.h"  // ROCm SMI runtime functions
-
-#include <chrono>     // std::chrono::{steady_clock, duration_cast, milliseconds}
-#include <cstddef>    // std::size_t
-#include <cstdint>    // std::uint32_t, std::uint64_t
-#include <exception>  // std::exception, std::terminate
-#include <format>     // std::format
-#include <ios>        // std::ios_base
-#include <iostream>   // std::cerr, std::endl
-#include <optional>   // std::optional
-#include <ostream>    // std::ostream
-#include <stdexcept>  // std::runtime_error
-#include <string>     // std::string
-#include <thread>     // std::this_thread
-#include <utility>    // std::move
-#include <vector>     // std::vector
-
-namespace hws {
-
-gpu_amd_hardware_sampler::gpu_amd_hardware_sampler() :
-    gpu_amd_hardware_sampler{ 0, HWS_SAMPLING_INTERVAL } { }
-
-gpu_amd_hardware_sampler::gpu_amd_hardware_sampler(const std::size_t device_id) :
-    gpu_amd_hardware_sampler{ device_id, HWS_SAMPLING_INTERVAL } { }
-
-gpu_amd_hardware_sampler::gpu_amd_hardware_sampler(const std::chrono::milliseconds sampling_interval) :
-    gpu_amd_hardware_sampler{ 0, sampling_interval } { }
-
-gpu_amd_hardware_sampler::gpu_amd_hardware_sampler(const std::size_t device_id, const std::chrono::milliseconds sampling_interval) :
-    hardware_sampler{ sampling_interval },
-    device_id_{ static_cast<std::uint32_t>(device_id) } {
-    // make sure that rsmi_init is only called once for all instances
-    if (instances_++ == 0) {
-        HWS_ROCM_SMI_ERROR_CHECK(rsmi_init(std::uint64_t{ 0 }));
-        // notify that initialization has been finished
-        init_finished_ = true;
-    } else {
-        // wait until init has been finished!
-        while (!init_finished_) { }
-    }
-}
-
-gpu_amd_hardware_sampler::~gpu_amd_hardware_sampler() {
-    try {
-        // if this hardware sampler is still sampling, stop it
-        if (this->has_sampling_started() && !this->has_sampling_stopped()) {
-            this->stop_sampling();
-        }
-
-        // the last instance must shut down the ROCm SMI runtime
-        // make sure that rsmi_shut_down is only called once
-        if (--instances_ == 0) {
-            HWS_ROCM_SMI_ERROR_CHECK(rsmi_shut_down());
-            // reset init_finished flag
-            init_finished_ = false;
-        }
-    } catch (const std::exception &e) {
-        std::cerr << e.what() << std::endl;
-        std::terminate();
-    }
-}
-
-void gpu_amd_hardware_sampler::sampling_loop() {
-    //
-    // add samples where we only have to retrieve the value once
-    //
-
-    this->add_time_point(std::chrono::steady_clock::now());
-
-    // retrieve initial general information
-    {
-        // fixed information -> only retrieved once
-        std::string name(static_cast<std::string::size_type>(1024), '\0');
-        if (rsmi_dev_name_get(device_id_, name.data(), name.size()) == RSMI_STATUS_SUCCESS) {
-            general_samples_.name_ = name.substr(0, name.find_first_of('\0'));
-        }
-
-        // queried samples -> retrieved every iteration if available
-        rsmi_dev_perf_level_t pstate{};
-        if (rsmi_dev_perf_level_get(device_id_, &pstate) == RSMI_STATUS_SUCCESS) {
-            general_samples_.performance_level_ = decltype(general_samples_.performance_level_)::value_type{ static_cast<decltype(general_samples_.performance_level_)::value_type::value_type>(pstate) };
-        }
-
-        decltype(general_samples_.utilization_gpu_)::value_type::value_type utilization_gpu{};
-        if (rsmi_dev_busy_percent_get(device_id_, &utilization_gpu) == RSMI_STATUS_SUCCESS) {
-            general_samples_.utilization_gpu_ = decltype(general_samples_.utilization_gpu_)::value_type{ utilization_gpu };
-        }
-
-        decltype(general_samples_.utilization_mem_)::value_type::value_type utilization_mem{};
-        if (rsmi_dev_memory_busy_percent_get(device_id_, &utilization_mem) == RSMI_STATUS_SUCCESS) {
-            general_samples_.utilization_mem_ = decltype(general_samples_.utilization_mem_)::value_type{ utilization_mem };
-        }
-    }
-
-    // retrieve initial clock related information
-    {
-        rsmi_frequencies_t frequency_info{};
-        if (rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_SYS, &frequency_info) == RSMI_STATUS_SUCCESS) {
-            clock_samples_.clock_system_min_ = frequency_info.frequency[0];
-            clock_samples_.clock_system_max_ = frequency_info.frequency[frequency_info.num_supported - 1];
-            // queried samples -> retrieved every iteration if available
-            clock_samples_.clock_system_ = decltype(clock_samples_.clock_system_)::value_type{};
-            if (frequency_info.current < RSMI_MAX_NUM_FREQUENCIES) {
-                clock_samples_.clock_system_->push_back(frequency_info.frequency[frequency_info.current]);
-            } else {
-                clock_samples_.clock_system_->push_back(0);
-            }
-        }
-
-        if (rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_SOC, &frequency_info) == RSMI_STATUS_SUCCESS) {
-            clock_samples_.clock_socket_min_ = frequency_info.frequency[0];
-            clock_samples_.clock_socket_max_ = frequency_info.frequency[frequency_info.num_supported - 1];
-            // queried samples -> retrieved every iteration if available
-            clock_samples_.clock_socket_ = decltype(clock_samples_.clock_socket_)::value_type{};
-            if (frequency_info.current < RSMI_MAX_NUM_FREQUENCIES) {
-                clock_samples_.clock_socket_->push_back(frequency_info.frequency[frequency_info.current]);
-            } else {
-                clock_samples_.clock_socket_->push_back(0);
-            }
-        }
-
-        if (rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_MEM, &frequency_info) == RSMI_STATUS_SUCCESS) {
-            clock_samples_.clock_memory_min_ = frequency_info.frequency[0];
-            clock_samples_.clock_memory_max_ = frequency_info.frequency[frequency_info.num_supported - 1];
-            // queried samples -> retrieved every iteration if available
-            clock_samples_.clock_memory_ = decltype(clock_samples_.clock_memory_)::value_type{};
-            if (frequency_info.current < RSMI_MAX_NUM_FREQUENCIES) {
-                clock_samples_.clock_memory_->push_back(frequency_info.frequency[frequency_info.current]);
-            } else {
-                clock_samples_.clock_memory_->push_back(0);
-            }
-        }
-
-        // queried samples -> retrieved every iteration if available
-        decltype(clock_samples_.overdrive_level_)::value_type::value_type overdrive_level{};
-        if (rsmi_dev_overdrive_level_get(device_id_, &overdrive_level) == RSMI_STATUS_SUCCESS) {
-            clock_samples_.overdrive_level_ = decltype(clock_samples_.overdrive_level_)::value_type{ overdrive_level };
-        }
-
-        decltype(clock_samples_.memory_overdrive_level_)::value_type::value_type memory_overdrive_level{};
-        if (rsmi_dev_mem_overdrive_level_get(device_id_, &memory_overdrive_level) == RSMI_STATUS_SUCCESS) {
-            clock_samples_.memory_overdrive_level_ = decltype(clock_samples_.memory_overdrive_level_)::value_type{ memory_overdrive_level };
-        }
-    }
-
-    // retrieve initial power related information
-    {
-        decltype(power_samples_.power_default_cap_)::value_type power_default_cap{};
-        if (rsmi_dev_power_cap_default_get(device_id_, &power_default_cap) == RSMI_STATUS_SUCCESS) {
-            power_samples_.power_default_cap_ = power_default_cap;
-        }
-
-        decltype(power_samples_.power_cap_)::value_type power_cap{};
-        if (rsmi_dev_power_cap_get(device_id_, std::uint32_t{ 0 }, &power_cap) == RSMI_STATUS_SUCCESS) {
-            power_samples_.power_cap_ = power_cap;
-        }
-
-        {
-            decltype(power_samples_.power_usage_)::value_type::value_type power_usage{};
-            RSMI_POWER_TYPE power_type{};
-            if (rsmi_dev_power_get(device_id_, &power_usage, &power_type) == RSMI_STATUS_SUCCESS) {
-                switch (power_type) {
-                    case RSMI_POWER_TYPE::RSMI_AVERAGE_POWER:
-                        power_samples_.power_type_ = "average";
-                        break;
-                    case RSMI_POWER_TYPE::RSMI_CURRENT_POWER:
-                        power_samples_.power_type_ = "current/instant";
-                        break;
-                    case RSMI_POWER_TYPE::RSMI_INVALID_POWER:
-                        power_samples_.power_type_ = "invalid/undetected";
-                        break;
-                }
-                // queried samples -> retrieved every iteration if available
-                power_samples_.power_usage_ = decltype(power_samples_.power_usage_)::value_type{ power_usage };
-            }
-        }
-
-        rsmi_power_profile_status_t power_profile{};
-        if (rsmi_dev_power_profile_presets_get(device_id_, std::uint32_t{ 0 }, &power_profile) == RSMI_STATUS_SUCCESS) {
-            decltype(power_samples_.available_power_profiles_)::value_type available_power_profiles{};
-            // go through all possible power profiles
-            if ((power_profile.available_profiles & RSMI_PWR_PROF_PRST_CUSTOM_MASK) != std::uint64_t{ 0 }) {
-                available_power_profiles.emplace_back("CUSTOM");
-            }
-            if ((power_profile.available_profiles & RSMI_PWR_PROF_PRST_VIDEO_MASK) != std::uint64_t{ 0 }) {
-                available_power_profiles.emplace_back("VIDEO");
-            }
-            if ((power_profile.available_profiles & RSMI_PWR_PROF_PRST_POWER_SAVING_MASK) != std::uint64_t{ 0 }) {
-                available_power_profiles.emplace_back("POWER_SAVING");
-            }
-            if ((power_profile.available_profiles & RSMI_PWR_PROF_PRST_COMPUTE_MASK) != std::uint64_t{ 0 }) {
-                available_power_profiles.emplace_back("COMPUTE");
-            }
-            if ((power_profile.available_profiles & RSMI_PWR_PROF_PRST_VR_MASK) != std::uint64_t{ 0 }) {
-                available_power_profiles.emplace_back("VR");
-            }
-            if ((power_profile.available_profiles & RSMI_PWR_PROF_PRST_3D_FULL_SCR_MASK) != std::uint64_t{ 0 }) {
-                available_power_profiles.emplace_back("3D_FULL_SCREEN");
-            }
-            if ((power_profile.available_profiles & RSMI_PWR_PROF_PRST_BOOTUP_DEFAULT) != std::uint64_t{ 0 }) {
-                available_power_profiles.emplace_back("BOOTUP_DEFAULT");
-            }
-            power_samples_.available_power_profiles_ = std::move(available_power_profiles);
-
-            // queried samples -> retrieved every iteration if available
-            switch (power_profile.current) {
-                case RSMI_PWR_PROF_PRST_CUSTOM_MASK:
-                    power_samples_.power_profile_ = decltype(power_samples_.power_profile_)::value_type{ "CUSTOM" };
-                    break;
-                case RSMI_PWR_PROF_PRST_VIDEO_MASK:
-                    power_samples_.power_profile_ = decltype(power_samples_.power_profile_)::value_type{ "VIDEO" };
-                    break;
-                case RSMI_PWR_PROF_PRST_POWER_SAVING_MASK:
-                    power_samples_.power_profile_ = decltype(power_samples_.power_profile_)::value_type{ "POWER_SAVING" };
-                    break;
-                case RSMI_PWR_PROF_PRST_COMPUTE_MASK:
-                    power_samples_.power_profile_ = decltype(power_samples_.power_profile_)::value_type{ "COMPUTE" };
-                    break;
-                case RSMI_PWR_PROF_PRST_VR_MASK:
-                    power_samples_.power_profile_ = decltype(power_samples_.power_profile_)::value_type{ "VR" };
-                    break;
-                case RSMI_PWR_PROF_PRST_3D_FULL_SCR_MASK:
-                    power_samples_.power_profile_ = decltype(power_samples_.power_profile_)::value_type{ "3D_FULL_SCREEN" };
-                    break;
-                case RSMI_PWR_PROF_PRST_BOOTUP_DEFAULT:
-                    power_samples_.power_profile_ = decltype(power_samples_.power_profile_)::value_type{ "BOOTUP_DEFAULT" };
-                    break;
-                case RSMI_PWR_PROF_PRST_INVALID:
-                    power_samples_.power_profile_ = decltype(power_samples_.power_profile_)::value_type{ "INVALID" };
-                    break;
-            }
-        }
-
-        // queried samples -> retrieved every iteration if available
-        [[maybe_unused]] std::uint64_t timestamp{};
-        float resolution{};
-        decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type power_total_energy_consumption{};
-        if (rsmi_dev_energy_count_get(device_id_, &power_total_energy_consumption, &resolution, &timestamp) == RSMI_STATUS_SUCCESS) {  // TODO: returns the same value for all invocations
-            const double scaled_value = static_cast<double>(power_total_energy_consumption) * static_cast<double>(resolution);
-            power_samples_.power_total_energy_consumption_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ static_cast<decltype(power_total_energy_consumption)>(scaled_value) };
-        }
-    }
-
-    // retrieve initial memory related information
-    {
-        decltype(memory_samples_.memory_total_)::value_type memory_total{};
-        if (rsmi_dev_memory_total_get(device_id_, RSMI_MEM_TYPE_VRAM, &memory_total) == RSMI_STATUS_SUCCESS) {
-            memory_samples_.memory_total_ = memory_total;
-        }
-
-        decltype(memory_samples_.visible_memory_total_)::value_type visible_memory_total{};
-        if (rsmi_dev_memory_total_get(device_id_, RSMI_MEM_TYPE_VIS_VRAM, &visible_memory_total) == RSMI_STATUS_SUCCESS) {
-            memory_samples_.visible_memory_total_ = visible_memory_total;
-        }
-
-        rsmi_pcie_bandwidth_t bandwidth_info{};
-        if (rsmi_dev_pci_bandwidth_get(device_id_, &bandwidth_info) == RSMI_STATUS_SUCCESS) {
-            memory_samples_.min_num_pcie_lanes_ = bandwidth_info.lanes[0];
-            memory_samples_.max_num_pcie_lanes_ = bandwidth_info.lanes[bandwidth_info.transfer_rate.num_supported - 1];
-            // queried samples -> retrieved every iteration if available
-            memory_samples_.pcie_transfer_rate_ = decltype(memory_samples_.pcie_transfer_rate_)::value_type{};
-            memory_samples_.num_pcie_lanes_ = decltype(memory_samples_.num_pcie_lanes_)::value_type{};
-            if (bandwidth_info.transfer_rate.current < RSMI_MAX_NUM_FREQUENCIES) {
-                memory_samples_.pcie_transfer_rate_->push_back(bandwidth_info.transfer_rate.frequency[bandwidth_info.transfer_rate.current]);
-                memory_samples_.num_pcie_lanes_->push_back(bandwidth_info.lanes[bandwidth_info.transfer_rate.current]);
-            } else {
-                // the current index is (somehow) wrong
-                memory_samples_.pcie_transfer_rate_->push_back(0);
-                memory_samples_.num_pcie_lanes_->push_back(0);
-            }
-        }
-
-        // queried samples -> retrieved every iteration if available
-        decltype(memory_samples_.memory_used_)::value_type::value_type memory_used{};
-        if (rsmi_dev_memory_usage_get(device_id_, RSMI_MEM_TYPE_VRAM, &memory_used) == RSMI_STATUS_SUCCESS) {
-            memory_samples_.memory_used_ = decltype(memory_samples_.memory_used_)::value_type{ memory_used };
-        }
-    }
-
-    // retrieve fixed temperature related information
-    {
-        std::uint32_t fan_id{ 0 };
-        decltype(temperature_samples_.fan_speed_)::value_type::value_type fan_speed{};
-        while (rsmi_dev_fan_speed_get(device_id_, fan_id, &fan_speed) == RSMI_STATUS_SUCCESS) {
-            if (fan_id == 0) {
-                // queried samples -> retrieved every iteration if available
-                temperature_samples_.fan_speed_ = decltype(temperature_samples_.fan_speed_)::value_type{ fan_speed };
-            }
-            ++fan_id;
-        }
-        temperature_samples_.num_fans_ = fan_id;
-
-        decltype(temperature_samples_.max_fan_speed_)::value_type max_fan_speed{};
-        if (rsmi_dev_fan_speed_max_get(device_id_, std::uint32_t{ 0 }, &max_fan_speed) == RSMI_STATUS_SUCCESS) {
-            temperature_samples_.max_fan_speed_ = max_fan_speed;
-        }
-
-        decltype(temperature_samples_.temperature_edge_min_)::value_type temperature_edge_min{};
-        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_EDGE, RSMI_TEMP_MIN, &temperature_edge_min) == RSMI_STATUS_SUCCESS) {
-            temperature_samples_.temperature_edge_min_ = temperature_edge_min;
-        }
-
-        decltype(temperature_samples_.temperature_edge_max_)::value_type temperature_edge_max{};
-        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_EDGE, RSMI_TEMP_MAX, &temperature_edge_max) == RSMI_STATUS_SUCCESS) {
-            temperature_samples_.temperature_edge_max_ = temperature_edge_min;
-        }
-
-        decltype(temperature_samples_.temperature_hotspot_min_)::value_type temperature_hotspot_min{};
-        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_JUNCTION, RSMI_TEMP_MIN, &temperature_hotspot_min) == RSMI_STATUS_SUCCESS) {
-            temperature_samples_.temperature_hotspot_min_ = temperature_hotspot_min;
-        }
-
-        decltype(temperature_samples_.temperature_hotspot_max_)::value_type temperature_hotspot_max{};
-        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_JUNCTION, RSMI_TEMP_MAX, &temperature_hotspot_max) == RSMI_STATUS_SUCCESS) {
-            temperature_samples_.temperature_hotspot_max_ = temperature_hotspot_max;
-        }
-
-        decltype(temperature_samples_.temperature_memory_min_)::value_type temperature_memory_min{};
-        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_MEMORY, RSMI_TEMP_MIN, &temperature_memory_min) == RSMI_STATUS_SUCCESS) {
-            temperature_samples_.temperature_memory_min_ = temperature_memory_min;
-        }
-
-        decltype(temperature_samples_.temperature_memory_max_)::value_type temperature_memory_max{};
-        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_MEMORY, RSMI_TEMP_MAX, &temperature_memory_max) == RSMI_STATUS_SUCCESS) {
-            temperature_samples_.temperature_memory_max_ = temperature_memory_max;
-        }
-
-        decltype(temperature_samples_.temperature_hbm_0_min_)::value_type temperature_hbm_0_min{};
-        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_0, RSMI_TEMP_MIN, &temperature_hbm_0_min) == RSMI_STATUS_SUCCESS) {
-            temperature_samples_.temperature_hbm_0_min_ = temperature_hbm_0_min;
-        }
-
-        decltype(temperature_samples_.temperature_hbm_0_max_)::value_type temperature_hbm_0_max{};
-        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_0, RSMI_TEMP_MAX, &temperature_hbm_0_max) == RSMI_STATUS_SUCCESS) {
-            temperature_samples_.temperature_hbm_0_max_ = temperature_hbm_0_max;
-        }
-
-        decltype(temperature_samples_.temperature_hbm_1_min_)::value_type temperature_hbm_1_min{};
-        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_1, RSMI_TEMP_MIN, &temperature_hbm_1_min) == RSMI_STATUS_SUCCESS) {
-            temperature_samples_.temperature_hbm_1_min_ = temperature_hbm_1_min;
-        }
-
-        decltype(temperature_samples_.temperature_hbm_1_max_)::value_type temperature_hbm_1_max{};
-        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_1, RSMI_TEMP_MAX, &temperature_hbm_1_max) == RSMI_STATUS_SUCCESS) {
-            temperature_samples_.temperature_hbm_1_max_ = temperature_hbm_1_max;
-        }
-
-        decltype(temperature_samples_.temperature_hbm_2_min_)::value_type temperature_hbm_2_min{};
-        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_2, RSMI_TEMP_MIN, &temperature_hbm_2_min) == RSMI_STATUS_SUCCESS) {
-            temperature_samples_.temperature_hbm_2_min_ = temperature_hbm_2_min;
-        }
-
-        decltype(temperature_samples_.temperature_hbm_2_max_)::value_type temperature_hbm_2_max{};
-        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_2, RSMI_TEMP_MAX, &temperature_hbm_2_max) == RSMI_STATUS_SUCCESS) {
-            temperature_samples_.temperature_hbm_2_max_ = temperature_hbm_2_max;
-        }
-
-        decltype(temperature_samples_.temperature_hbm_3_min_)::value_type temperature_hbm_3_min{};
-        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_3, RSMI_TEMP_MIN, &temperature_hbm_3_min) == RSMI_STATUS_SUCCESS) {
-            temperature_samples_.temperature_hbm_3_min_ = temperature_hbm_3_min;
-        }
-
-        decltype(temperature_samples_.temperature_hbm_3_max_)::value_type temperature_hbm_3_max{};
-        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_3, RSMI_TEMP_MAX, &temperature_hbm_3_max) == RSMI_STATUS_SUCCESS) {
-            temperature_samples_.temperature_hbm_3_max_ = temperature_hbm_3_max;
-        }
-
-        // queried samples -> retrieved every iteration if available
-        decltype(temperature_samples_.temperature_edge_)::value_type::value_type temperature_edge{};
-        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_EDGE, RSMI_TEMP_CURRENT, &temperature_edge) == RSMI_STATUS_SUCCESS) {
-            temperature_samples_.temperature_edge_ = decltype(temperature_samples_.temperature_edge_)::value_type{ temperature_edge };
-        }
-
-        decltype(temperature_samples_.temperature_hotspot_)::value_type::value_type temperature_hotspot{};
-        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_JUNCTION, RSMI_TEMP_CURRENT, &temperature_hotspot) == RSMI_STATUS_SUCCESS) {
-            temperature_samples_.temperature_hotspot_ = decltype(temperature_samples_.temperature_hotspot_)::value_type{ temperature_hotspot };
-        }
-
-        decltype(temperature_samples_.temperature_memory_)::value_type::value_type temperature_memory{};
-        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_MEMORY, RSMI_TEMP_CURRENT, &temperature_memory) == RSMI_STATUS_SUCCESS) {
-            temperature_samples_.temperature_memory_ = decltype(temperature_samples_.temperature_memory_)::value_type{ temperature_memory };
-        }
-
-        decltype(temperature_samples_.temperature_hbm_0_)::value_type::value_type temperature_hbm_0{};
-        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_0, RSMI_TEMP_CURRENT, &temperature_hbm_0) == RSMI_STATUS_SUCCESS) {
-            temperature_samples_.temperature_hbm_0_ = decltype(temperature_samples_.temperature_hbm_0_)::value_type{ temperature_hbm_0 };
-        }
-
-        decltype(temperature_samples_.temperature_hbm_1_)::value_type::value_type temperature_hbm_1{};
-        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_1, RSMI_TEMP_CURRENT, &temperature_hbm_1) == RSMI_STATUS_SUCCESS) {
-            temperature_samples_.temperature_hbm_1_ = decltype(temperature_samples_.temperature_hbm_1_)::value_type{ temperature_hbm_1 };
-        }
-
-        decltype(temperature_samples_.temperature_hbm_2_)::value_type::value_type temperature_hbm_2{};
-        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_2, RSMI_TEMP_CURRENT, &temperature_hbm_2) == RSMI_STATUS_SUCCESS) {
-            temperature_samples_.temperature_hbm_2_ = decltype(temperature_samples_.temperature_hbm_2_)::value_type{ temperature_hbm_2 };
-        }
-
-        decltype(temperature_samples_.temperature_hbm_3_)::value_type::value_type temperature_hbm_3{};
-        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_3, RSMI_TEMP_CURRENT, &temperature_hbm_3) == RSMI_STATUS_SUCCESS) {
-            temperature_samples_.temperature_hbm_3_ = decltype(temperature_samples_.temperature_hbm_3_)::value_type{ temperature_hbm_3 };
-        }
-    }
-
-    //
-    // loop until stop_sampling() is called
-    //
-
-    while (!this->has_sampling_stopped()) {
-        // only sample values if the sampler currently isn't paused
-        if (this->is_sampling()) {
-            // add current time point
-            this->add_time_point(std::chrono::steady_clock::now());
-
-            // retrieve general samples
-            {
-                if (general_samples_.performance_level_.has_value()) {
-                    rsmi_dev_perf_level_t pstate{};
-                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_perf_level_get(device_id_, &pstate));
-                    general_samples_.performance_level_->push_back(static_cast<decltype(general_samples_.performance_level_)::value_type::value_type>(pstate));
-                }
-
-                if (general_samples_.utilization_gpu_.has_value()) {
-                    decltype(general_samples_.utilization_gpu_)::value_type::value_type value{};
-                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_busy_percent_get(device_id_, &value));
-                    general_samples_.utilization_gpu_->push_back(value);
-                }
-
-                if (general_samples_.utilization_mem_.has_value()) {
-                    decltype(general_samples_.utilization_mem_)::value_type::value_type value{};
-                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_memory_busy_percent_get(device_id_, &value));
-                    general_samples_.utilization_mem_->push_back(value);
-                }
-            }
-
-            // retrieve clock related samples
-            {
-                if (clock_samples_.clock_system_.has_value()) {
-                    rsmi_frequencies_t frequency_info{};
-                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_SYS, &frequency_info));
-                    if (frequency_info.current < RSMI_MAX_NUM_FREQUENCIES) {
-                        clock_samples_.clock_system_->push_back(frequency_info.frequency[frequency_info.current]);
-                    } else {
-                        // the current index is (somehow) wrong
-                        clock_samples_.clock_system_->push_back(0);
-                    }
-                }
-
-                if (clock_samples_.clock_socket_.has_value()) {
-                    rsmi_frequencies_t frequency_info{};
-                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_SOC, &frequency_info));
-                    if (frequency_info.current < RSMI_MAX_NUM_FREQUENCIES) {
-                        clock_samples_.clock_socket_->push_back(frequency_info.frequency[frequency_info.current]);
-                    } else {
-                        // the current index is (somehow) wrong
-                        clock_samples_.clock_socket_->push_back(0);
-                    }
-                }
-
-                if (clock_samples_.clock_memory_.has_value()) {
-                    rsmi_frequencies_t frequency_info{};
-                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_MEM, &frequency_info));
-                    if (frequency_info.current < RSMI_MAX_NUM_FREQUENCIES) {
-                        clock_samples_.clock_memory_->push_back(frequency_info.frequency[frequency_info.current]);
-                    } else {
-                        // the current index is (somehow) wrong
-                        clock_samples_.clock_memory_->push_back(0);
-                    }
-                }
-
-                if (clock_samples_.overdrive_level_.has_value()) {
-                    decltype(clock_samples_.overdrive_level_)::value_type::value_type value{};
-                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_overdrive_level_get(device_id_, &value));
-                    clock_samples_.overdrive_level_->push_back(value);
-                }
-
-                if (clock_samples_.memory_overdrive_level_.has_value()) {
-                    decltype(clock_samples_.memory_overdrive_level_)::value_type::value_type value{};
-                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_mem_overdrive_level_get(device_id_, &value));
-                    clock_samples_.memory_overdrive_level_->push_back(value);
-                }
-            }
-
-            // retrieve power related samples
-            {
-                if (power_samples_.power_usage_.has_value()) {
-                    [[maybe_unused]] RSMI_POWER_TYPE power_type{};
-                    decltype(power_samples_.power_usage_)::value_type::value_type value{};
-                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_power_get(device_id_, &value, &power_type));
-                    power_samples_.power_usage_->push_back(value);
-                }
-
-                if (power_samples_.power_total_energy_consumption_.has_value()) {
-                    [[maybe_unused]] std::uint64_t timestamp{};
-                    float resolution{};
-                    decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type value{};
-                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_energy_count_get(device_id_, &value, &resolution, &timestamp));  // TODO: returns the same value for all invocations
-                    const double scaled_value = static_cast<double>(value) * static_cast<double>(resolution);
-                    power_samples_.power_total_energy_consumption_->push_back(static_cast<decltype(value)>(scaled_value));
-                }
-
-                if (power_samples_.power_profile_.has_value()) {
-                    rsmi_power_profile_status_t power_profile{};
-                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_power_profile_presets_get(device_id_, std::uint32_t{ 0 }, &power_profile));
-                    switch (power_profile.current) {
-                        case RSMI_PWR_PROF_PRST_CUSTOM_MASK:
-                            power_samples_.power_profile_->emplace_back("CUSTOM");
-                            break;
-                        case RSMI_PWR_PROF_PRST_VIDEO_MASK:
-                            power_samples_.power_profile_->emplace_back("VIDEO");
-                            break;
-                        case RSMI_PWR_PROF_PRST_POWER_SAVING_MASK:
-                            power_samples_.power_profile_->emplace_back("POWER_SAVING");
-                            break;
-                        case RSMI_PWR_PROF_PRST_COMPUTE_MASK:
-                            power_samples_.power_profile_->emplace_back("COMPUTE");
-                            break;
-                        case RSMI_PWR_PROF_PRST_VR_MASK:
-                            power_samples_.power_profile_->emplace_back("VR");
-                            break;
-                        case RSMI_PWR_PROF_PRST_3D_FULL_SCR_MASK:
-                            power_samples_.power_profile_->emplace_back("3D_FULL_SCREEN");
-                            break;
-                        case RSMI_PWR_PROF_PRST_BOOTUP_DEFAULT:
-                            power_samples_.power_profile_->emplace_back("BOOTUP_DEFAULT");
-                            break;
-                        case RSMI_PWR_PROF_PRST_INVALID:
-                            power_samples_.power_profile_->emplace_back("INVALID");
-                            break;
-                    }
-                }
-            }
-
-            // retrieve memory related samples
-            {
-                if (memory_samples_.memory_used_.has_value()) {
-                    decltype(memory_samples_.memory_used_)::value_type::value_type value{};
-                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_memory_usage_get(device_id_, RSMI_MEM_TYPE_VRAM, &value));
-                    memory_samples_.memory_used_->push_back(value);
-                }
-
-                if (memory_samples_.pcie_transfer_rate_.has_value() && memory_samples_.num_pcie_lanes_.has_value()) {
-                    rsmi_pcie_bandwidth_t bandwidth_info{};
-                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_pci_bandwidth_get(device_id_, &bandwidth_info));
-                    if (bandwidth_info.transfer_rate.current < RSMI_MAX_NUM_FREQUENCIES) {
-                        memory_samples_.pcie_transfer_rate_->push_back(bandwidth_info.transfer_rate.frequency[bandwidth_info.transfer_rate.current]);
-                        memory_samples_.num_pcie_lanes_->push_back(bandwidth_info.lanes[bandwidth_info.transfer_rate.current]);
-                    } else {
-                        // the current index is (somehow) wrong
-                        memory_samples_.pcie_transfer_rate_->push_back(0);
-                        memory_samples_.num_pcie_lanes_->push_back(0);
-                    }
-                }
-            }
-
-            // retrieve temperature related samples
-            {
-                if (temperature_samples_.fan_speed_.has_value()) {
-                    decltype(temperature_samples_.fan_speed_)::value_type::value_type value{};
-                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_fan_speed_get(device_id_, std::uint32_t{ 0 }, &value));
-                    temperature_samples_.fan_speed_->push_back(value);
-                }
-
-                if (temperature_samples_.temperature_edge_.has_value()) {
-                    decltype(temperature_samples_.temperature_edge_)::value_type::value_type value{};
-                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_EDGE, RSMI_TEMP_CURRENT, &value));
-                    temperature_samples_.temperature_edge_->push_back(value);
-                }
-
-                if (temperature_samples_.temperature_hotspot_.has_value()) {
-                    decltype(temperature_samples_.temperature_hotspot_)::value_type::value_type value{};
-                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_JUNCTION, RSMI_TEMP_CURRENT, &value));
-                    temperature_samples_.temperature_hotspot_->push_back(value);
-                }
-
-                if (temperature_samples_.temperature_memory_.has_value()) {
-                    decltype(temperature_samples_.temperature_memory_)::value_type::value_type value{};
-                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_MEMORY, RSMI_TEMP_CURRENT, &value));
-                    temperature_samples_.temperature_memory_->push_back(value);
-                }
-
-                if (temperature_samples_.temperature_hbm_0_.has_value()) {
-                    decltype(temperature_samples_.temperature_hbm_0_)::value_type::value_type value{};
-                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_0, RSMI_TEMP_CURRENT, &value));
-                    temperature_samples_.temperature_hbm_0_->push_back(value);
-                }
-
-                if (temperature_samples_.temperature_hbm_1_.has_value()) {
-                    decltype(temperature_samples_.temperature_hbm_1_)::value_type::value_type value{};
-                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_1, RSMI_TEMP_CURRENT, &value));
-                    temperature_samples_.temperature_hbm_1_->push_back(value);
-                }
-
-                if (temperature_samples_.temperature_hbm_2_.has_value()) {
-                    decltype(temperature_samples_.temperature_hbm_2_)::value_type::value_type value{};
-                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_2, RSMI_TEMP_CURRENT, &value));
-                    temperature_samples_.temperature_hbm_2_->push_back(value);
-                }
-
-                if (temperature_samples_.temperature_hbm_3_.has_value()) {
-                    decltype(temperature_samples_.temperature_hbm_3_)::value_type::value_type value{};
-                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_3, RSMI_TEMP_CURRENT, &value));
-                    temperature_samples_.temperature_hbm_3_->push_back(value);
-                }
-            }
-        }
-
-        // wait for the sampling interval to pass to retrieve the next sample
-        std::this_thread::sleep_for(this->sampling_interval());
-    }
-}
-
-std::string gpu_amd_hardware_sampler::device_identification() const {
-    return std::format("gpu_amd_device_{}", device_id_);
-}
-
-std::string gpu_amd_hardware_sampler::generate_yaml_string() const {
-    // check whether it's safe to generate the YAML entry
-    if (this->is_sampling()) {
-        throw std::runtime_error{ "Can't create the final YAML entry if the hardware sampler is still running!" };
-    }
-
-    return std::format("{}\n"
-                       "{}\n"
-                       "{}\n"
-                       "{}\n"
-                       "{}",
-                       general_samples_.generate_yaml_string(),
-                       clock_samples_.generate_yaml_string(),
-                       power_samples_.generate_yaml_string(),
-                       memory_samples_.generate_yaml_string(),
-                       temperature_samples_.generate_yaml_string());
-}
-
-std::ostream &operator<<(std::ostream &out, const gpu_amd_hardware_sampler &sampler) {
-    if (sampler.is_sampling()) {
-        out.setstate(std::ios_base::failbit);
-        return out;
-    } else {
-        return out << std::format("sampling interval: {}\n"
-                                  "time points: [{}]\n\n"
-                                  "general samples:\n{}\n\n"
-                                  "clock samples:\n{}\n\n"
-                                  "power samples:\n{}\n\n"
-                                  "memory samples:\n{}\n\n"
-                                  "temperature samples:\n{}",
-                                  sampler.sampling_interval(),
-                                  detail::join(detail::time_points_to_epoch(sampler.sampling_time_points()), ", "),
-                                  sampler.general_samples(),
-                                  sampler.clock_samples(),
-                                  sampler.power_samples(),
-                                  sampler.memory_samples(),
-                                  sampler.temperature_samples());
-    }
-}
-
-}  // namespace hws
diff --git a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp b/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp
deleted file mode 100644
index 52a1ae8..0000000
--- a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp
+++ /dev/null
@@ -1,609 +0,0 @@
-/**
- * @author Marcel Breyer
- * @copyright 2024-today All Rights Reserved
- * @license This file is released under the MIT license.
- *          See the LICENSE.md file in the project root for full license information.
- */
-
-#include "hardware_sampling/gpu_amd/rocm_smi_samples.hpp"
-
-#include "hardware_sampling/utility.hpp"  // hws::detail::{value_or_default, join}
-
-#include "rocm_smi/rocm_smi.h"  // RSMI_MAX_FAN_SPEED
-
-#include <format>   // std::format
-#include <ostream>  // std::ostream
-#include <string>   // std::string
-
-namespace hws {
-
-//*************************************************************************************************************************************//
-//                                                           general samples                                                           //
-//*************************************************************************************************************************************//
-
-std::string rocm_smi_general_samples::generate_yaml_string() const {
-    std::string str{ "general:\n" };
-
-    // device name
-    if (this->name_.has_value()) {
-        str += std::format("  name:\n"
-                           "    unit: \"string\"\n"
-                           "    values: \"{}\"\n",
-                           this->name_.value());
-    }
-
-    // performance state
-    if (this->performance_level_.has_value()) {
-        str += std::format("  performance_state:\n"
-                           "    unit: \"int - see rsmi_dev_perf_level_t\"\n"
-                           "    values: [{}]\n",
-                           detail::join(this->performance_level_.value(), ", "));
-    }
-    // device compute utilization
-    if (this->utilization_gpu_.has_value()) {
-        str += std::format("  utilization_gpu:\n"
-                           "    unit: \"percentage\"\n"
-                           "    values: [{}]\n",
-                           detail::join(this->utilization_gpu_.value(), ", "));
-    }
-    // device memory utilization
-    if (this->utilization_mem_.has_value()) {
-        str += std::format("  utilization_mem:\n"
-                           "    unit: \"percentage\"\n"
-                           "    values: [{}]\n",
-                           detail::join(this->utilization_mem_.value(), ", "));
-    }
-
-    // remove last newline
-    str.pop_back();
-
-    return str;
-}
-
-std::ostream &operator<<(std::ostream &out, const rocm_smi_general_samples &samples) {
-    return out << std::format("name [string]: {}\n"
-                              "performance_level [int]: [{}]\n"
-                              "utilization_gpu [%]: [{}]\n"
-                              "utilization_mem [%]: [{}]",
-                              detail::value_or_default(samples.get_name()),
-                              detail::join(detail::value_or_default(samples.get_performance_level()), ", "),
-                              detail::join(detail::value_or_default(samples.get_utilization_gpu()), ", "),
-                              detail::join(detail::value_or_default(samples.get_utilization_mem()), ", "));
-}
-
-//*************************************************************************************************************************************//
-//                                                            clock samples                                                            //
-//*************************************************************************************************************************************//
-
-std::string rocm_smi_clock_samples::generate_yaml_string() const {
-    std::string str{ "clock:\n" };
-
-    // socket clock min frequencies
-    if (this->clock_socket_min_.has_value()) {
-        str += std::format("  clock_socket_min:\n"
-                           "    unit: \"Hz\"\n"
-                           "    values: {}\n",
-                           this->clock_socket_min_.value());
-    }
-    // socket clock max frequencies
-    if (this->clock_socket_max_.has_value()) {
-        str += std::format("  clock_socket_max:\n"
-                           "    unit: \"Hz\"\n"
-                           "    values: {}\n",
-                           this->clock_socket_max_.value());
-    }
-
-    // memory clock min frequencies
-    if (this->clock_memory_min_.has_value()) {
-        str += std::format("  clock_memory_min:\n"
-                           "    unit: \"Hz\"\n"
-                           "    values: {}\n",
-                           this->clock_memory_min_.value());
-    }
-    // memory clock max frequencies
-    if (this->clock_memory_max_.has_value()) {
-        str += std::format("  clock_memory_max:\n"
-                           "    unit: \"Hz\"\n"
-                           "    values: {}\n",
-                           this->clock_memory_max_.value());
-    }
-
-    // system clock min frequencies
-    if (this->clock_system_min_.has_value()) {
-        str += std::format("  clock_gpu_min:\n"
-                           "    unit: \"Hz\"\n"
-                           "    values: {}\n",
-                           this->clock_system_min_.value());
-    }
-    // system clock max frequencies
-    if (this->clock_system_max_.has_value()) {
-        str += std::format("  clock_gpu_max:\n"
-                           "    unit: \"Hz\"\n"
-                           "    values: {}\n",
-                           this->clock_system_max_.value());
-    }
-
-    // socket clock frequency
-    if (this->clock_socket_.has_value()) {
-        str += std::format("  clock_socket:\n"
-                           "    unit: \"Hz\"\n"
-                           "    values: [{}]\n",
-                           detail::join(this->clock_socket_.value(), ", "));
-    }
-    // memory clock frequency
-    if (this->clock_memory_.has_value()) {
-        str += std::format("  clock_memory:\n"
-                           "    unit: \"Hz\"\n"
-                           "    values: [{}]\n",
-                           detail::join(this->clock_memory_.value(), ", "));
-    }
-    // system clock frequency
-    if (this->clock_system_.has_value()) {
-        str += std::format("  clock_gpu:\n"
-                           "    unit: \"Hz\"\n"
-                           "    values: [{}]\n",
-                           detail::join(this->clock_system_.value(), ", "));
-    }
-    // overdrive level
-    if (this->overdrive_level_.has_value()) {
-        str += std::format("  overdrive_level:\n"
-                           "    unit: \"percentage\"\n"
-                           "    values: [{}]\n",
-                           detail::join(this->overdrive_level_.value(), ", "));
-    }
-    // memory overdrive level
-    if (this->memory_overdrive_level_.has_value()) {
-        str += std::format("  memory_overdrive_level:\n"
-                           "    unit: \"percentage\"\n"
-                           "    values: [{}]\n",
-                           detail::join(this->memory_overdrive_level_.value(), ", "));
-    }
-
-    // remove last newline
-    str.pop_back();
-
-    return str;
-}
-
-std::ostream &operator<<(std::ostream &out, const rocm_smi_clock_samples &samples) {
-    return out << std::format("clock_system_min [Hz]: {}\n"
-                              "clock_system_max [Hz]: {}\n"
-                              "clock_socket_min [Hz]: {}\n"
-                              "clock_socket_max [Hz]: {}\n"
-                              "clock_memory_min [Hz]: {}\n"
-                              "clock_memory_max [Hz]: {}\n"
-                              "clock_system [Hz]: [{}]\n"
-                              "clock_socket [Hz]: [{}]\n"
-                              "clock_memory [Hz]: [{}]\n"
-                              "overdrive_level [%]: [{}]\n"
-                              "memory_overdrive_level [%]: [{}]",
-                              detail::value_or_default(samples.get_clock_system_min()),
-                              detail::value_or_default(samples.get_clock_system_max()),
-                              detail::value_or_default(samples.get_clock_socket_min()),
-                              detail::value_or_default(samples.get_clock_socket_max()),
-                              detail::value_or_default(samples.get_clock_memory_min()),
-                              detail::value_or_default(samples.get_clock_memory_max()),
-                              detail::join(detail::value_or_default(samples.get_clock_system()), ", "),
-                              detail::join(detail::value_or_default(samples.get_clock_socket()), ", "),
-                              detail::join(detail::value_or_default(samples.get_clock_memory()), ", "),
-                              detail::join(detail::value_or_default(samples.get_overdrive_level()), ", "),
-                              detail::join(detail::value_or_default(samples.get_memory_overdrive_level()), ", "));
-}
-
-//*************************************************************************************************************************************//
-//                                                            power samples                                                            //
-//*************************************************************************************************************************************//
-
-std::string rocm_smi_power_samples::generate_yaml_string() const {
-    std::string str{ "power:\n" };
-
-    // default power cap
-    if (this->power_default_cap_.has_value()) {
-        str += std::format("  power_management_limit:\n"
-                           "    unit: \"muW\"\n"
-                           "    values: {}\n",
-                           this->power_default_cap_.value());
-    }
-    // power cap
-    if (this->power_cap_.has_value()) {
-        str += std::format("  power_enforced_limit:\n"
-                           "    unit: \"muW\"\n"
-                           "    values: {}\n",
-                           this->power_cap_.value());
-    }
-    // power measurement type
-    if (this->power_type_.has_value()) {
-        str += std::format("  power_measurement_type:\n"
-                           "    unit: \"string\"\n"
-                           "    values: {}\n",
-                           this->power_type_.value());
-    }
-    // available power levels
-    if (this->available_power_profiles_.has_value()) {
-        str += std::format("  available_power_profiles:\n"
-                           "    unit: \"string\"\n"
-                           "    values: [{}]\n",
-                           detail::join(this->available_power_profiles_.value(), ", "));
-    }
-
-    // current power usage
-    if (this->power_usage_.has_value()) {
-        str += std::format("  power_usage:\n"
-                           "    unit: \"muW\"\n"
-                           "    values: [{}]\n",
-                           detail::join(this->power_usage_.value(), ", "));
-    }
-    // total energy consumed
-    if (this->power_total_energy_consumption_.has_value()) {
-        decltype(rocm_smi_power_samples::power_total_energy_consumption_)::value_type consumed_energy(this->power_total_energy_consumption_->size());
-        for (std::size_t i = 0; i < consumed_energy.size(); ++i) {
-            consumed_energy[i] = this->power_total_energy_consumption_.value()[i] - this->power_total_energy_consumption_->front();
-        }
-        str += std::format("  power_total_energy_consumed:\n"
-                           "    unit: \"muJ\"\n"
-                           "    values: [{}]\n",
-                           detail::join(consumed_energy, ", "));
-    }
-    // current power level
-    if (this->power_profile_.has_value()) {
-        str += std::format("  power_profile:\n"
-                           "    unit: \"string\"\n"
-                           "    values: [{}]\n",
-                           detail::join(this->power_profile_.value(), ", "));
-    }
-
-    // remove last newline
-    str.pop_back();
-
-    return str;
-}
-
-std::ostream &operator<<(std::ostream &out, const rocm_smi_power_samples &samples) {
-    return out << std::format("power_default_cap [muW]: {}\n"
-                              "power_cap [muW]: {}\n"
-                              "power_type [string]: {}\n"
-                              "available_power_profiles [string]: [{}]\n"
-                              "power_usage [muW]: [{}]\n"
-                              "power_total_energy_consumption [muJ]: [{}]\n"
-                              "power_profile [string]: [{}]",
-                              detail::value_or_default(samples.get_power_default_cap()),
-                              detail::value_or_default(samples.get_power_cap()),
-                              detail::value_or_default(samples.get_power_type()),
-                              detail::join(detail::value_or_default(samples.get_available_power_profiles()), ", "),
-                              detail::join(detail::value_or_default(samples.get_power_usage()), ", "),
-                              detail::join(detail::value_or_default(samples.get_power_total_energy_consumption()), ", "),
-                              detail::join(detail::value_or_default(samples.get_power_profile()), ", "));
-}
-
-//*************************************************************************************************************************************//
-//                                                            memory samples                                                           //
-//*************************************************************************************************************************************//
-
-std::string rocm_smi_memory_samples::generate_yaml_string() const {
-    std::string str{ "memory:\n" };
-
-    // total memory
-    if (this->memory_total_.has_value()) {
-        str += std::format("  memory_total:\n"
-                           "    unit: \"B\"\n"
-                           "    values: {}\n",
-                           this->memory_total_.value());
-    }
-    // total visible memory
-    if (this->visible_memory_total_.has_value()) {
-        str += std::format("  visible_memory_total:\n"
-                           "    unit: \"B\"\n"
-                           "    values: {}\n",
-                           this->visible_memory_total_.value());
-    }
-    // min number of PCIe lanes
-    if (this->min_num_pcie_lanes_.has_value()) {
-        str += std::format("  min_num_pcie_lanes:\n"
-                           "    unit: \"int\"\n"
-                           "    values: {}\n",
-                           this->min_num_pcie_lanes_.value());
-    }
-    // max number of PCIe lanes
-    if (this->max_num_pcie_lanes_.has_value()) {
-        str += std::format("  max_num_pcie_lanes:\n"
-                           "    unit: \"int\"\n"
-                           "    values: {}\n",
-                           this->max_num_pcie_lanes_.value());
-    }
-
-    // used memory
-    if (this->memory_used_.has_value()) {
-        str += std::format("  memory_used:\n"
-                           "    unit: \"B\"\n"
-                           "    values: [{}]\n",
-                           detail::join(this->memory_used_.value(), ", "));
-    }
-    // free memory
-    if (this->memory_used_.has_value() && this->memory_total_.has_value()) {
-        decltype(rocm_smi_memory_samples::memory_used_)::value_type memory_free(this->memory_used_->size(), this->memory_total_.value());
-        for (std::size_t i = 0; i < memory_free.size(); ++i) {
-            memory_free[i] -= this->memory_used_.value()[i];
-        }
-        str += std::format("  memory_free:\n"
-                           "    unit: \"B\"\n"
-                           "    values: [{}]\n",
-                           detail::join(memory_free, ", "));
-    }
-
-    // PCIe bandwidth
-    if (this->pcie_transfer_rate_.has_value()) {
-        str += std::format("  pcie_bandwidth:\n"
-                           "    unit: \"T/s\"\n"
-                           "    values: [{}]\n",
-                           detail::join(this->pcie_transfer_rate_.value(), ", "));
-    }
-    // number of PCIe lanes
-    if (this->num_pcie_lanes_.has_value()) {
-        str += std::format("  pcie_num_lanes:\n"
-                           "    unit: \"int\"\n"
-                           "    values: [{}]\n",
-                           detail::join(this->num_pcie_lanes_.value(), ", "));
-    }
-
-    // remove last newline
-    str.pop_back();
-
-    return str;
-}
-
-std::ostream &operator<<(std::ostream &out, const rocm_smi_memory_samples &samples) {
-    return out << std::format("memory_total [B]: {}\n"
-                              "visible_memory_total [B]: {}\n"
-                              "min_num_pcie_lanes [int]: {}\n"
-                              "max_num_pcie_lanes [int]: {}\n"
-                              "memory_used [B]: [{}]\n"
-                              "pcie_transfer_rate [T/s]: [{}]\n"
-                              "num_pcie_lanes [int]: [{}]",
-                              detail::value_or_default(samples.get_memory_total()),
-                              detail::value_or_default(samples.get_visible_memory_total()),
-                              detail::value_or_default(samples.get_min_num_pcie_lanes()),
-                              detail::value_or_default(samples.get_max_num_pcie_lanes()),
-                              detail::join(detail::value_or_default(samples.get_memory_used()), ", "),
-                              detail::join(detail::value_or_default(samples.get_pcie_transfer_rate()), ", "),
-                              detail::join(detail::value_or_default(samples.get_num_pcie_lanes()), ", "));
-}
-
-//*************************************************************************************************************************************//
-//                                                         temperature samples                                                         //
-//*************************************************************************************************************************************//
-
-std::string rocm_smi_temperature_samples::generate_yaml_string() const {
-    std::string str{ "temperature:\n" };
-
-    // number of fans (emulated)
-    if (this->num_fans_.has_value()) {
-        str += std::format("  num_fans:\n"
-                           "    unit: \"int\"\n"
-                           "    values: {}\n",
-                           this->num_fans_.value());
-    }
-    // maximum fan speed
-    if (this->max_fan_speed_.has_value()) {
-        str += std::format("  max_fan_speed:\n"
-                           "    unit: \"int\"\n"
-                           "    values: {}\n",
-                           this->max_fan_speed_.value());
-    }
-    // minimum GPU edge temperature
-    if (this->temperature_edge_min_.has_value()) {
-        str += std::format("  temperature_gpu_min:\n"
-                           "    unit: \"m°C\"\n"
-                           "    values: {}\n",
-                           this->temperature_edge_min_.value());
-    }
-    // maximum GPU edge temperature
-    if (this->temperature_edge_max_.has_value()) {
-        str += std::format("  temperature_gpu_max:\n"
-                           "    unit: \"m°C\"\n"
-                           "    values: {}\n",
-                           this->temperature_edge_max_.value());
-    }
-    // minimum GPU hotspot temperature
-    if (this->temperature_hotspot_min_.has_value()) {
-        str += std::format("  temperature_hotspot_min:\n"
-                           "    unit: \"m°C\"\n"
-                           "    values: {}\n",
-                           this->temperature_hotspot_min_.value());
-    }
-    // maximum GPU hotspot temperature
-    if (this->temperature_hotspot_max_.has_value()) {
-        str += std::format("  temperature_hotspot_max:\n"
-                           "    unit: \"m°C\"\n"
-                           "    values: {}\n",
-                           this->temperature_hotspot_max_.value());
-    }
-    // minimum GPU memory temperature
-    if (this->temperature_memory_min_.has_value()) {
-        str += std::format("  temperature_memory_min:\n"
-                           "    unit: \"m°C\"\n"
-                           "    values: {}\n",
-                           this->temperature_memory_min_.value());
-    }
-    // maximum GPU memory temperature
-    if (this->temperature_memory_max_.has_value()) {
-        str += std::format("  temperature_memory_max:\n"
-                           "    unit: \"m°C\"\n"
-                           "    values: {}\n",
-                           this->temperature_memory_max_.value());
-    }
-    // minimum GPU HBM 0 temperature
-    if (this->temperature_hbm_0_min_.has_value()) {
-        str += std::format("  temperature_hbm_0_min:\n"
-                           "    unit: \"m°C\"\n"
-                           "    values: {}\n",
-                           this->temperature_hbm_0_min_.value());
-    }
-    // maximum GPU HBM 0 temperature
-    if (this->temperature_hbm_0_max_.has_value()) {
-        str += std::format("  temperature_hbm_0_max:\n"
-                           "    unit: \"m°C\"\n"
-                           "    values: {}\n",
-                           this->temperature_hbm_0_max_.value());
-    }
-    // minimum GPU HBM 1 temperature
-    if (this->temperature_hbm_1_min_.has_value()) {
-        str += std::format("  temperature_hbm_1_min:\n"
-                           "    unit: \"m°C\"\n"
-                           "    values: {}\n",
-                           this->temperature_hbm_1_min_.value());
-    }
-    // maximum GPU HBM 1 temperature
-    if (this->temperature_hbm_1_max_.has_value()) {
-        str += std::format("  temperature_hbm_1_max:\n"
-                           "    unit: \"m°C\"\n"
-                           "    values: {}\n",
-                           this->temperature_hbm_1_max_.value());
-    }
-    // minimum GPU HBM 2 temperature
-    if (this->temperature_hbm_2_min_.has_value()) {
-        str += std::format("  temperature_hbm_2_min:\n"
-                           "    unit: \"m°C\"\n"
-                           "    values: {}\n",
-                           this->temperature_hbm_2_min_.value());
-    }
-    // maximum GPU HBM 2 temperature
-    if (this->temperature_hbm_2_max_.has_value()) {
-        str += std::format("  temperature_hbm_2_max:\n"
-                           "    unit: \"m°C\"\n"
-                           "    values: {}\n",
-                           this->temperature_hbm_2_max_.value());
-    }
-    // minimum GPU HBM 3 temperature
-    if (this->temperature_hbm_3_min_.has_value()) {
-        str += std::format("  temperature_hbm_3_min:\n"
-                           "    unit: \"m°C\"\n"
-                           "    values: {}\n",
-                           this->temperature_hbm_3_min_.value());
-    }
-    // maximum GPU HBM 3 temperature
-    if (this->temperature_hbm_3_max_.has_value()) {
-        str += std::format("  temperature_hbm_3_max:\n"
-                           "    unit: \"m°C\"\n"
-                           "    values: {}\n",
-                           this->temperature_hbm_3_max_.value());
-    }
-
-    // fan speed
-    if (this->fan_speed_.has_value()) {
-        std::vector<double> fan_speed_percent(this->fan_speed_->size());
-        for (std::size_t i = 0; i < fan_speed_percent.size(); ++i) {
-            fan_speed_percent[i] = static_cast<double>(this->fan_speed_.value()[i]) / static_cast<double>(RSMI_MAX_FAN_SPEED);
-        }
-        str += std::format("  fan_speed:\n"
-                           "    unit: \"percentage\"\n"
-                           "    values: [{}]\n",
-                           detail::join(fan_speed_percent, ", "));
-    }
-    // GPU edge temperature
-    if (this->temperature_edge_.has_value()) {
-        str += std::format("  temperature_gpu:\n"
-                           "    unit: \"m°C\"\n"
-                           "    values: [{}]\n",
-                           detail::join(this->temperature_edge_.value(), ", "));
-    }
-    // GPU hotspot temperature
-    if (this->temperature_hotspot_.has_value()) {
-        str += std::format("  temperature_hotspot:\n"
-                           "    unit: \"m°C\"\n"
-                           "    values: [{}]\n",
-                           detail::join(this->temperature_hotspot_.value(), ", "));
-    }
-    // GPU memory temperature
-    if (this->temperature_memory_.has_value()) {
-        str += std::format("  temperature_memory:\n"
-                           "    unit: \"m°C\"\n"
-                           "    values: [{}]\n",
-                           detail::join(this->temperature_memory_.value(), ", "));
-    }
-    // GPU HBM 0 temperature
-    if (this->temperature_hbm_0_.has_value()) {
-        str += std::format("  temperature_hbm_0:\n"
-                           "    unit: \"m°C\"\n"
-                           "    values: [{}]\n",
-                           detail::join(this->temperature_hbm_0_.value(), ", "));
-    }
-    // GPU HBM 1 temperature
-    if (this->temperature_hbm_1_.has_value()) {
-        str += std::format("  temperature_hbm_1:\n"
-                           "    unit: \"m°C\"\n"
-                           "    values: [{}]\n",
-                           detail::join(this->temperature_hbm_1_.value(), ", "));
-    }
-    // GPU HBM 2 temperature
-    if (this->temperature_hbm_2_.has_value()) {
-        str += std::format("  temperature_hbm_2:\n"
-                           "    unit: \"m°C\"\n"
-                           "    values: [{}]\n",
-                           detail::join(this->temperature_hbm_2_.value(), ", "));
-    }
-    // GPU HBM 3 temperature
-    if (this->temperature_hbm_3_.has_value()) {
-        str += std::format("  temperature_hbm_3:\n"
-                           "    unit: \"m°C\"\n"
-                           "    values: [{}]\n",
-                           detail::join(this->temperature_hbm_3_.value(), ", "));
-    }
-
-    // remove last newline
-    str.pop_back();
-
-    return str;
-}
-
-std::ostream &operator<<(std::ostream &out, const rocm_smi_temperature_samples &samples) {
-    return out << std::format("num_fans [int]: {}\n"
-                              "max_fan_speed [int]: {}\n"
-                              "temperature_edge_min [m°C]: {}\n"
-                              "temperature_edge_max [m°C]: {}\n"
-                              "temperature_hotspot_min [m°C]: {}\n"
-                              "temperature_hotspot_max [m°C]: {}\n"
-                              "temperature_memory_min [m°C]: {}\n"
-                              "temperature_memory_max [m°C]: {}\n"
-                              "temperature_hbm_0_min [m°C]: {}\n"
-                              "temperature_hbm_0_max [m°C]: {}\n"
-                              "temperature_hbm_1_min [m°C]: {}\n"
-                              "temperature_hbm_1_max [m°C]: {}\n"
-                              "temperature_hbm_2_min [m°C]: {}\n"
-                              "temperature_hbm_2_max [m°C]: {}\n"
-                              "temperature_hbm_3_min [m°C]: {}\n"
-                              "temperature_hbm_3_max [m°C]: {}\n"
-                              "fan_speed [%]: [{}]\n"
-                              "temperature_edge [m°C]: [{}]\n"
-                              "temperature_hotspot [m°C]: [{}]\n"
-                              "temperature_memory [m°C]: [{}]\n"
-                              "temperature_hbm_0 [m°C]: [{}]\n"
-                              "temperature_hbm_1 [m°C]: [{}]\n"
-                              "temperature_hbm_2 [m°C]: [{}]\n"
-                              "temperature_hbm_3 [m°C]: [{}]",
-                              detail::value_or_default(samples.get_num_fans()),
-                              detail::value_or_default(samples.get_max_fan_speed()),
-                              detail::value_or_default(samples.get_temperature_edge_min()),
-                              detail::value_or_default(samples.get_temperature_edge_max()),
-                              detail::value_or_default(samples.get_temperature_hotspot_min()),
-                              detail::value_or_default(samples.get_temperature_hotspot_max()),
-                              detail::value_or_default(samples.get_temperature_memory_min()),
-                              detail::value_or_default(samples.get_temperature_memory_max()),
-                              detail::value_or_default(samples.get_temperature_hbm_0_min()),
-                              detail::value_or_default(samples.get_temperature_hbm_0_max()),
-                              detail::value_or_default(samples.get_temperature_hbm_1_min()),
-                              detail::value_or_default(samples.get_temperature_hbm_1_max()),
-                              detail::value_or_default(samples.get_temperature_hbm_2_min()),
-                              detail::value_or_default(samples.get_temperature_hbm_2_max()),
-                              detail::value_or_default(samples.get_temperature_hbm_3_min()),
-                              detail::value_or_default(samples.get_temperature_hbm_3_max()),
-                              detail::join(detail::value_or_default(samples.get_fan_speed()), ", "),
-                              detail::join(detail::value_or_default(samples.get_temperature_edge()), ", "),
-                              detail::join(detail::value_or_default(samples.get_temperature_hotspot()), ", "),
-                              detail::join(detail::value_or_default(samples.get_temperature_memory()), ", "),
-                              detail::join(detail::value_or_default(samples.get_temperature_hbm_0()), ", "),
-                              detail::join(detail::value_or_default(samples.get_temperature_hbm_1()), ", "),
-                              detail::join(detail::value_or_default(samples.get_temperature_hbm_2()), ", "),
-                              detail::join(detail::value_or_default(samples.get_temperature_hbm_3()), ", "));
-}
-
-}  // namespace hws
diff --git a/src/hardware_sampling/gpu_intel/level_zero_samples.cpp b/src/hardware_sampling/gpu_intel/level_zero_samples.cpp
deleted file mode 100644
index 1c362cf..0000000
--- a/src/hardware_sampling/gpu_intel/level_zero_samples.cpp
+++ /dev/null
@@ -1,491 +0,0 @@
-/**
- * @author Marcel Breyer
- * @copyright 2024-today All Rights Reserved
- * @license This file is released under the MIT license.
- *          See the LICENSE.md file in the project root for full license information.
- */
-
-#include "hardware_sampling/gpu_intel/level_zero_samples.hpp"
-
-#include "hardware_sampling/utility.hpp"  // hws::detail::{value_or_default, join}
-
-#include <format>       // std::format
-#include <ostream>      // std::ostream
-#include <string>       // std::string
-#include <string_view>  // std::string_view
-#include <type_traits>  // std::remove_cvref_t, std::false_type, std::true_type
-#include <vector>       // std::vector
-
-namespace hws {
-
-namespace detail {
-
-template <typename T>
-struct is_vector : std::false_type { };
-
-template <typename T>
-struct is_vector<std::vector<T>> : std::true_type { };
-
-template <typename T>
-constexpr bool is_vector_v = is_vector<T>::value;
-
-template <typename MapType>
-void append_map_values(std::string &str, const std::string_view entry_name, const MapType &map) {
-    if (map.has_value()) {
-        for (const auto &[key, value] : map.value()) {
-            if constexpr (is_vector_v<std::remove_cvref_t<decltype(value)>>) {
-                str += std::format("{}_{}: [{}]\n", entry_name, key, detail::join(value, ", "));
-            } else {
-                str += std::format("{}_{}: {}\n", entry_name, key, value);
-            }
-        }
-    }
-}
-
-}  // namespace detail
-
-//*************************************************************************************************************************************//
-//                                                           general samples                                                           //
-//*************************************************************************************************************************************//
-
-std::string level_zero_general_samples::generate_yaml_string() const {
-    std::string str{ "general:\n" };
-
-    // the model name
-    if (this->name_.has_value()) {
-        str += std::format("  name:\n"
-                           "    unit: \"string\"\n"
-                           "    values: \"{}\"\n",
-                           this->name_.value());
-    }
-    // the standby mode
-    if (this->standby_mode_.has_value()) {
-        str += std::format("  standby_mode:\n"
-                           "    unit: \"string\"\n"
-                           "    values: \"{}\"\n",
-                           this->standby_mode_.value());
-    }
-    // the number of threads per EU unit
-    if (this->num_threads_per_eu_.has_value()) {
-        str += std::format("  num_threads_per_eu:\n"
-                           "    unit: \"int\"\n"
-                           "    values: {}\n",
-                           this->num_threads_per_eu_.value());
-    }
-    // the EU SIMD width
-    if (this->eu_simd_width_.has_value()) {
-        str += std::format("  physical_eu_simd_width:\n"
-                           "    unit: \"int\"\n"
-                           "    values: {}\n",
-                           this->eu_simd_width_.value());
-    }
-
-    // remove last newline
-    str.pop_back();
-
-    return str;
-}
-
-std::ostream &operator<<(std::ostream &out, const level_zero_general_samples &samples) {
-    return out << std::format("name [string]: {}\n"
-                              "standby_mode [string]: {}\n"
-                              "num_threads_per_eu [int]: {}\n"
-                              "eu_simd_width [int]: {}",
-                              detail::value_or_default(samples.get_name()),
-                              detail::value_or_default(samples.get_standby_mode()),
-                              detail::value_or_default(samples.get_num_threads_per_eu()),
-                              detail::value_or_default(samples.get_eu_simd_width()));
-}
-
-//*************************************************************************************************************************************//
-//                                                            clock samples                                                            //
-//*************************************************************************************************************************************//
-
-std::string level_zero_clock_samples::generate_yaml_string() const {
-    std::string str{ "clock:\n" };
-
-    // minimum GPU core clock
-    if (this->clock_gpu_min_.has_value()) {
-        str += std::format("  clock_gpu_min:\n"
-                           "    unit: \"MHz\"\n"
-                           "    values: {}\n",
-                           this->clock_gpu_min_.value());
-    }
-    // maximum GPU core clock
-    if (this->clock_gpu_max_.has_value()) {
-        str += std::format("  clock_gpu_max:\n"
-                           "    unit: \"MHz\"\n"
-                           "    values: {}\n",
-                           this->clock_gpu_max_.value());
-    }
-    // all possible GPU core clock frequencies
-    if (this->available_clocks_gpu_.has_value()) {
-        str += std::format("  available_clocks_gpu:\n"
-                           "    unit: \"MHz\"\n"
-                           "    values: [{}]\n",
-                           detail::join(this->available_clocks_gpu_.value(), ", "));
-    }
-    // minimum memory clock
-    if (this->clock_mem_min_.has_value()) {
-        str += std::format("  clock_mem_min:\n"
-                           "    unit: \"MHz\"\n"
-                           "    values: {}\n",
-                           this->clock_mem_min_.value());
-    }
-    // maximum memory clock
-    if (this->clock_mem_max_.has_value()) {
-        str += std::format("  clock_mem_max:\n"
-                           "    unit: \"MHz\"\n"
-                           "    values: {}\n",
-                           this->clock_mem_max_.value());
-    }
-    // all possible memory clock frequencies
-    if (this->available_clocks_mem_.has_value()) {
-        str += std::format("  available_clocks_mem:\n"
-                           "    unit: \"MHz\"\n"
-                           "    values: [{}]\n",
-                           detail::join(this->available_clocks_mem_.value(), ", "));
-    }
-
-    // the maximum GPU core frequency based on the current TDP limit
-    if (this->tdp_frequency_limit_gpu_.has_value()) {
-        str += std::format("  tdp_frequency_limit_gpu:\n"
-                           "    unit: \"MHz\"\n"
-                           "    values: [{}]\n",
-                           detail::join(this->tdp_frequency_limit_gpu_.value(), ", "));
-    }
-    // the current GPU core clock frequency
-    if (this->clock_gpu_.has_value()) {
-        str += std::format("  clock_gpu:\n"
-                           "    unit: \"MHz\"\n"
-                           "    values: [{}]\n",
-                           detail::join(this->clock_gpu_.value(), ", "));
-    }
-    // the current GPU core throttle reason
-    if (this->throttle_reason_gpu_.has_value()) {
-        str += std::format("  throttle_reason_gpu:\n"
-                           "    unit: \"bitmask\"\n"
-                           "    values: [{}]\n",
-                           detail::join(this->throttle_reason_gpu_.value(), ", "));
-    }
-    // the maximum memory frequency based on the current TDP limit
-    if (this->tdp_frequency_limit_mem_.has_value()) {
-        str += std::format("  tdp_frequency_limit_mem:\n"
-                           "    unit: \"MHz\"\n"
-                           "    values: [{}]\n",
-                           detail::join(this->tdp_frequency_limit_mem_.value(), ", "));
-    }
-    // the current memory clock frequency
-    if (this->clock_mem_.has_value()) {
-        str += std::format("  clock_mem:\n"
-                           "    unit: \"MHz\"\n"
-                           "    values: [{}]\n",
-                           detail::join(this->clock_mem_.value(), ", "));
-    }
-    // the current memory throttle reason
-    if (this->throttle_reason_mem_.has_value()) {
-        str += std::format("  throttle_reason_mem:\n"
-                           "    unit: \"bitmask\"\n"
-                           "    values: [{}]\n",
-                           detail::join(this->throttle_reason_mem_.value(), ", "));
-    }
-
-    // remove last newline
-    str.pop_back();
-
-    return str;
-}
-
-std::ostream &operator<<(std::ostream &out, const level_zero_clock_samples &samples) {
-    return out << std::format("clock_gpu_min [MHz]: {}\n"
-                              "clock_gpu_max [MHz]: {}\n"
-                              "available_clocks_gpu [MHz]: [{}]\n"
-                              "clock_mem_min [MHz]: {}\n"
-                              "clock_mem_max [MHz]: {}\n"
-                              "available_clocks_mem [MHz]: [{}]\n"
-                              "tdp_frequency_limit_gpu [MHz]: [{}]\n"
-                              "clock_gpu [MHz]: [{}]\n"
-                              "throttle_reason_gpu [bitmask]: [{}]\n"
-                              "tdp_frequency_limit_mem [MHz]: [{}]\n"
-                              "clock_mem [MHz]: [{}]\n"
-                              "throttle_reason_mem [bitmask]: [{}]",
-                              detail::value_or_default(samples.get_clock_gpu_min()),
-                              detail::value_or_default(samples.get_clock_gpu_max()),
-                              detail::join(detail::value_or_default(samples.get_available_clocks_gpu()), ", "),
-                              detail::value_or_default(samples.get_clock_mem_min()),
-                              detail::value_or_default(samples.get_clock_mem_max()),
-                              detail::join(detail::value_or_default(samples.get_available_clocks_mem()), ", "),
-                              detail::join(detail::value_or_default(samples.get_tdp_frequency_limit_gpu()), ", "),
-                              detail::join(detail::value_or_default(samples.get_clock_gpu()), ", "),
-                              detail::join(detail::value_or_default(samples.get_throttle_reason_gpu()), ", "),
-                              detail::join(detail::value_or_default(samples.get_tdp_frequency_limit_mem()), ", "),
-                              detail::join(detail::value_or_default(samples.get_clock_mem()), ", "),
-                              detail::join(detail::value_or_default(samples.get_throttle_reason_mem()), ", "));
-}
-
-//*************************************************************************************************************************************//
-//                                                            power samples                                                            //
-//*************************************************************************************************************************************//
-
-std::string level_zero_power_samples::generate_yaml_string() const {
-    std::string str{ "power:\n" };
-
-    // flag whether the energy threshold is enabled
-    if (this->energy_threshold_enabled_.has_value()) {
-        str += std::format("  energy_threshold_enabled:\n"
-                           "    unit: \"bool\"\n"
-                           "    values: {}\n",
-                           this->energy_threshold_enabled_.value());
-    }
-    // the energy threshold
-    if (this->energy_threshold_.has_value()) {
-        str += std::format("  energy_threshold:\n"
-                           "    unit: \"J\"\n"
-                           "    values: {}\n",
-                           this->energy_threshold_.value());
-    }
-
-    // the total consumed energy
-    if (this->power_total_energy_consumption_.has_value()) {
-        decltype(level_zero_power_samples::power_total_energy_consumption_)::value_type consumed_energy(this->power_total_energy_consumption_->size());
-        for (std::size_t i = 0; i < consumed_energy.size(); ++i) {
-            consumed_energy[i] = this->power_total_energy_consumption_.value()[i] - this->power_total_energy_consumption_->front();
-        }
-        str += std::format("  power_total_energy_consumed:\n"
-                           "    unit: \"J\"\n"
-                           "    values: [{}]\n",
-                           detail::join(consumed_energy, ", "));
-    }
-
-    // remove last newline
-    str.pop_back();
-
-    return str;
-}
-
-std::ostream &operator<<(std::ostream &out, const level_zero_power_samples &samples) {
-    return out << std::format("energy_threshold_enabled [bool]: {}\n"
-                              "energy_threshold [J]: {}\n"
-                              "power_total_energy_consumption [J]: [{}]",
-                              detail::value_or_default(samples.get_energy_threshold_enabled()),
-                              detail::value_or_default(samples.get_energy_threshold()),
-                              detail::join(detail::value_or_default(samples.get_power_total_energy_consumption()), ", "));
-}
-
-//*************************************************************************************************************************************//
-//                                                            memory samples                                                           //
-//*************************************************************************************************************************************//
-
-std::string level_zero_memory_samples::generate_yaml_string() const {
-    std::string str{ "memory:\n" };
-
-    // the total memory
-    if (this->memory_total_.has_value()) {
-        for (const auto &[key, value] : this->memory_total_.value()) {
-            str += std::format("  memory_total_{}:\n"
-                               "    unit: \"B\"\n"
-                               "    values: {}\n",
-                               key,
-                               value);
-        }
-    }
-    // the total allocatable memory
-    if (this->allocatable_memory_total_.has_value()) {
-        for (const auto &[key, value] : this->allocatable_memory_total_.value()) {
-            str += std::format("  allocatable_memory_total_{}:\n"
-                               "    unit: \"B\"\n"
-                               "    values: {}\n",
-                               key,
-                               value);
-        }
-    }
-    // the pcie max bandwidth
-    if (this->pcie_link_max_speed_.has_value()) {
-        str += std::format("  pcie_max_bandwidth:\n"
-                           "    unit: \"BPS\"\n"
-                           "    values: {}\n",
-                           this->pcie_link_max_speed_.value());
-    }
-    // the pcie link width
-    if (this->pcie_max_width_.has_value()) {
-        str += std::format("  max_pcie_link_width:\n"
-                           "    unit: \"int\"\n"
-                           "    values: {}\n",
-                           this->pcie_max_width_.value());
-    }
-    // the pcie generation
-    if (this->max_pcie_link_generation_.has_value()) {
-        str += std::format("  max_pcie_link_generation:\n"
-                           "    unit: \"int\"\n"
-                           "    values: {}\n",
-                           this->max_pcie_link_generation_.value());
-    }
-    // the memory bus width
-    if (this->bus_width_.has_value()) {
-        for (const auto &[key, value] : this->bus_width_.value()) {
-            str += std::format("  memory_bus_width_{}:\n"
-                               "    unit: \"Bit\"\n"
-                               "    values: {}\n",
-                               key,
-                               value);
-        }
-    }
-    // the number of memory channels
-    if (this->num_channels_.has_value()) {
-        for (const auto &[key, value] : this->num_channels_.value()) {
-            str += std::format("  memory_num_channels_{}:\n"
-                               "    unit: \"int\"\n"
-                               "    values: {}\n",
-                               key,
-                               value);
-        }
-    }
-    // the memory location (system or device)
-    if (this->location_.has_value()) {
-        for (const auto &[key, value] : this->location_.value()) {
-            str += std::format("  memory_location_{}:\n"
-                               "    unit: \"string\"\n"
-                               "    values: \"{}\"\n",
-                               key,
-                               value);
-        }
-    }
-
-    // the currently free and used memory
-    if (this->memory_free_.has_value()) {
-        for (const auto &[key, value] : this->memory_free_.value()) {
-            str += std::format("  memory_free_{}:\n"
-                               "    unit: \"string\"\n"
-                               "    values: [{}]\n",
-                               key,
-                               detail::join(value, ", "));
-
-            // calculate the used memory
-            if (this->allocatable_memory_total_.has_value()) {
-                decltype(level_zero_memory_samples::memory_free_)::value_type::mapped_type memory_used(value.size(), this->allocatable_memory_total_->at(key));
-                for (std::size_t i = 0; i < memory_used.size(); ++i) {
-                    memory_used[i] -= value[i];
-                }
-                str += std::format("  memory_used_{}:\n"
-                                   "    unit: \"string\"\n"
-                                   "    values: [{}]\n",
-                                   key,
-                                   detail::join(memory_used, ", "));
-            }
-        }
-    }
-    // PCIe link speed
-    if (this->pcie_link_speed_.has_value()) {
-        str += std::format("  pcie_bandwidth:\n"
-                           "    unit: \"MBPS\"\n"
-                           "    values: [{}]\n",
-                           detail::join(this->pcie_link_speed_.value(), ", "));
-    }
-    // PCIe link width
-    if (this->pcie_link_width_.has_value()) {
-        str += std::format("  pcie_link_width:\n"
-                           "    unit: \"int\"\n"
-                           "    values: [{}]\n",
-                           detail::join(this->pcie_link_width_.value(), ", "));
-    }
-    // PCIe link generation
-    if (this->pcie_link_generation_.has_value()) {
-        str += std::format("  pcie_link_generation:\n"
-                           "    unit: \"int\"\n"
-                           "    values: [{}]\n",
-                           detail::join(this->pcie_link_generation_.value(), ", "));
-    }
-
-    // remove last newline
-    str.pop_back();
-
-    return str;
-}
-
-std::ostream &operator<<(std::ostream &out, const level_zero_memory_samples &samples) {
-    std::string str{};
-
-    detail::append_map_values(str, "memory_total [B]", samples.get_memory_total());
-    detail::append_map_values(str, "allocatable_memory_total [B]", samples.get_allocatable_memory_total());
-
-    str += std::format("pcie_link_max_speed [BPS]: {}\n"
-                       "pcie_max_width [int]: {}\n"
-                       "max_pcie_link_generation [int]: {}\n",
-                       detail::value_or_default(samples.get_pcie_link_max_speed()),
-                       detail::value_or_default(samples.get_pcie_max_width()),
-                       detail::value_or_default(samples.get_max_pcie_link_generation()));
-
-    detail::append_map_values(str, "bus_width [Bit]", samples.get_bus_width());
-    detail::append_map_values(str, "num_channels [int]", samples.get_num_channels());
-    detail::append_map_values(str, "location [string]", samples.get_location());
-    detail::append_map_values(str, "memory_free [string]", samples.get_memory_free());
-
-    str += std::format("pcie_link_speed [MBPS]: [{}]\n"
-                       "pcie_link_width [int]: [{}]\n"
-                       "pcie_link_generation [int]: [{}]",
-                       detail::join(detail::value_or_default(samples.get_pcie_link_speed()), ", "),
-                       detail::join(detail::value_or_default(samples.get_pcie_link_width()), ", "),
-                       detail::join(detail::value_or_default(samples.get_pcie_link_generation()), ", "));
-
-    return out << str;
-}
-
-//*************************************************************************************************************************************//
-//                                                         temperature samples                                                         //
-//*************************************************************************************************************************************//
-
-std::string level_zero_temperature_samples::generate_yaml_string() const {
-    std::string str{ "temperature:\n" };
-
-    // the maximum sensor temperature
-    if (this->temperature_max_.has_value()) {
-        for (const auto &[key, value] : this->temperature_max_.value()) {
-            str += std::format("  temperature_{}_max:\n"
-                               "    unit: \"°C\"\n"
-                               "    values: {}\n",
-                               key,
-                               value);
-        }
-    }
-
-    // the current PSU temperatures
-    if (this->temperature_psu_.has_value()) {
-        str += std::format("  temperature_psu:\n"
-                           "    unit: \"°C\"\n"
-                           "    values: [{}]\n",
-                           detail::join(this->temperature_psu_.value(), ", "));
-    }
-    // the current sensor temperatures
-    if (this->temperature_.has_value()) {
-        for (const auto &[key, value] : this->temperature_.value()) {
-            str += std::format("  temperature_{}:\n"
-                               "    unit: \"°C\"\n"
-                               "    values: [{}]\n",
-                               key,
-                               detail::join(value, ", "));
-        }
-    }
-
-    // remove last newline
-    str.pop_back();
-
-    return str;
-}
-
-std::ostream &operator<<(std::ostream &out, const level_zero_temperature_samples &samples) {
-    std::string str{};
-
-    detail::append_map_values(str, "temperature_max [°C]", samples.get_temperature_max());
-
-    str += std::format("temperature_psu [°C]: [{}]\n",
-                       detail::join(detail::value_or_default(samples.get_temperature_psu()), ", "));
-
-    detail::append_map_values(str, "temperature [°C]", samples.get_temperature());
-
-    // remove last newline
-    str.pop_back();
-
-    return out << str;
-}
-
-}  // namespace hws
diff --git a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
deleted file mode 100644
index 4d9fbdc..0000000
--- a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp
+++ /dev/null
@@ -1,471 +0,0 @@
-/**
- * @author Marcel Breyer
- * @copyright 2024-today All Rights Reserved
- * @license This file is released under the MIT license.
- *          See the LICENSE.md file in the project root for full license information.
- */
-
-#include "hardware_sampling/gpu_nvidia/hardware_sampler.hpp"
-
-#include "hardware_sampling/gpu_nvidia/nvml_device_handle_impl.hpp"  // hws::detail::nvml_device_handle implementation
-#include "hardware_sampling/gpu_nvidia/nvml_samples.hpp"             // hws::{nvml_general_samples, nvml_clock_samples, nvml_power_samples, nvml_memory_samples, nvml_temperature_samples}
-#include "hardware_sampling/gpu_nvidia/utility.hpp"                  // HWS_NVML_ERROR_CHECK
-#include "hardware_sampling/hardware_sampler.hpp"                    // hws::hardware_sampler
-#include "hardware_sampling/utility.hpp"                             // hws::detail::{time_points_to_epoch, join}
-
-#include "nvml.h"  // NVML runtime functions
-
-#include <algorithm>  // std::min_element
-#include <chrono>     // std::chrono::{steady_clock, duration_cast, milliseconds}
-#include <cstddef>    // std::size_t
-#include <exception>  // std::exception, std::terminate
-#include <format>     // std::format
-#include <ios>        // std::ios_base
-#include <iostream>   // std::cerr, std::endl
-#include <optional>   // std::optional
-#include <ostream>    // std::ostream
-#include <stdexcept>  // std::runtime_error
-#include <string>     // std::string
-#include <thread>     // std::this_thread
-#include <vector>     // std::vector
-
-namespace hws {
-
-gpu_nvidia_hardware_sampler::gpu_nvidia_hardware_sampler() :
-    gpu_nvidia_hardware_sampler{ 0, HWS_SAMPLING_INTERVAL } { }
-
-gpu_nvidia_hardware_sampler::gpu_nvidia_hardware_sampler(const std::size_t device_id) :
-    gpu_nvidia_hardware_sampler{ device_id, HWS_SAMPLING_INTERVAL } { }
-
-gpu_nvidia_hardware_sampler::gpu_nvidia_hardware_sampler(const std::chrono::milliseconds sampling_interval) :
-    gpu_nvidia_hardware_sampler{ 0, sampling_interval } { }
-
-gpu_nvidia_hardware_sampler::gpu_nvidia_hardware_sampler(const std::size_t device_id, const std::chrono::milliseconds sampling_interval) :
-    hardware_sampler{ sampling_interval } {
-    // make sure that nvmlInit is only called once for all instances
-    if (instances_++ == 0) {
-        HWS_NVML_ERROR_CHECK(nvmlInit());
-        // notify that initialization has been finished
-        init_finished_ = true;
-    } else {
-        // wait until init has been finished!
-        while (!init_finished_) { }
-    }
-
-    // initialize samples -> can't be done beforehand since the device handle can only be initialized after a call to nvmlInit
-    device_ = detail::nvml_device_handle{ device_id };
-}
-
-gpu_nvidia_hardware_sampler::~gpu_nvidia_hardware_sampler() {
-    try {
-        // if this hardware sampler is still sampling, stop it
-        if (this->has_sampling_started() && !this->has_sampling_stopped()) {
-            this->stop_sampling();
-        }
-
-        // the last instance must shut down the NVML runtime
-        // make sure that nvmlShutdown is only called once
-        if (--instances_ == 0) {
-            HWS_NVML_ERROR_CHECK(nvmlShutdown());
-            // reset init_finished flag
-            init_finished_ = false;
-        }
-    } catch (const std::exception &e) {
-        std::cerr << e.what() << std::endl;
-        std::terminate();
-    }
-}
-
-void gpu_nvidia_hardware_sampler::sampling_loop() {
-    // get the nvml handle from the device
-    nvmlDevice_t device = device_.get_impl().device;
-
-    //
-    // add samples where we only have to retrieve the value once
-    //
-
-    this->add_time_point(std::chrono::steady_clock::now());
-
-    // retrieve initial general information
-    {
-        // fixed information -> only retrieved once
-        std::string name(NVML_DEVICE_NAME_V2_BUFFER_SIZE, '\0');
-        if (nvmlDeviceGetName(device, name.data(), name.size()) == NVML_SUCCESS) {
-            general_samples_.name_ = name.substr(0, name.find_first_of('\0'));
-        }
-
-        nvmlEnableState_t mode{};
-        if (nvmlDeviceGetPersistenceMode(device, &mode) == NVML_SUCCESS) {
-            general_samples_.persistence_mode_ = mode == NVML_FEATURE_ENABLED;
-        }
-
-        decltype(general_samples_.num_cores_)::value_type num_cores{};
-        if (nvmlDeviceGetNumGpuCores(device, &num_cores) == NVML_SUCCESS) {
-            general_samples_.num_cores_ = num_cores;
-        }
-
-        // queried samples -> retrieved every iteration if available
-        nvmlPstates_t pstate{};
-        if (nvmlDeviceGetPerformanceState(device, &pstate) == NVML_SUCCESS) {
-            general_samples_.performance_state_ = decltype(general_samples_.performance_state_)::value_type{ static_cast<decltype(general_samples_.performance_state_)::value_type::value_type>(pstate) };
-        }
-
-        nvmlUtilization_t util{};
-        if (nvmlDeviceGetUtilizationRates(device, &util) == NVML_SUCCESS) {
-            general_samples_.utilization_gpu_ = decltype(general_samples_.utilization_gpu_)::value_type{ util.gpu };
-            general_samples_.utilization_mem_ = decltype(general_samples_.utilization_gpu_)::value_type{ util.memory };
-        }
-    }
-
-    // retrieve initial clock related information
-    {
-        // fixed information -> only retrieved once
-        decltype(clock_samples_.adaptive_clock_status_)::value_type adaptive_clock_status{};
-        if (nvmlDeviceGetAdaptiveClockInfoStatus(device, &adaptive_clock_status) == NVML_SUCCESS) {
-            clock_samples_.adaptive_clock_status_ = adaptive_clock_status;
-        }
-
-        decltype(clock_samples_.clock_graph_max_)::value_type clock_graph_max{};
-        if (nvmlDeviceGetMaxClockInfo(device, NVML_CLOCK_GRAPHICS, &clock_graph_max) == NVML_SUCCESS) {
-            clock_samples_.clock_graph_max_ = clock_graph_max;
-        }
-
-        decltype(clock_samples_.clock_sm_max_)::value_type clock_sm_max{};
-        if (nvmlDeviceGetMaxClockInfo(device, NVML_CLOCK_SM, &clock_sm_max) == NVML_SUCCESS) {
-            clock_samples_.clock_sm_max_ = clock_sm_max;
-        }
-
-        decltype(clock_samples_.clock_mem_max_)::value_type clock_mem_max{};
-        if (nvmlDeviceGetMaxClockInfo(device, NVML_CLOCK_MEM, &clock_mem_max) == NVML_SUCCESS) {
-            clock_samples_.clock_mem_max_ = clock_mem_max;
-        }
-
-        {
-            unsigned int clock_count{ 128 };
-            std::vector<unsigned int> supported_clocks(clock_count);
-            if (nvmlDeviceGetSupportedMemoryClocks(device, &clock_count, supported_clocks.data()) == NVML_SUCCESS) {
-                supported_clocks.resize(clock_count);
-                clock_samples_.clock_mem_min_ = *std::min_element(supported_clocks.cbegin(), supported_clocks.cend());
-            }
-        }
-
-        {
-            unsigned int clock_count{ 128 };
-            std::vector<unsigned int> supported_clocks(clock_count);
-            if (clock_samples_.clock_mem_min_.has_value() && nvmlDeviceGetSupportedGraphicsClocks(device, clock_samples_.clock_mem_min_.value(), &clock_count, supported_clocks.data()) == NVML_SUCCESS) {
-                supported_clocks.resize(clock_count);
-                clock_samples_.clock_graph_min_ = *std::min_element(supported_clocks.cbegin(), supported_clocks.cend());
-            }
-        }
-
-        // queried samples -> retrieved every iteration if available
-        decltype(clock_samples_.clock_graph_)::value_type::value_type clock_graph{};
-        if (nvmlDeviceGetClockInfo(device, NVML_CLOCK_GRAPHICS, &clock_graph) == NVML_SUCCESS) {
-            clock_samples_.clock_graph_ = decltype(clock_samples_.clock_graph_)::value_type{ clock_graph };
-        }
-
-        decltype(clock_samples_.clock_sm_)::value_type::value_type clock_sm{};
-        if (nvmlDeviceGetClockInfo(device, NVML_CLOCK_SM, &clock_sm) == NVML_SUCCESS) {
-            clock_samples_.clock_sm_ = decltype(clock_samples_.clock_sm_)::value_type{ clock_sm };
-        }
-
-        decltype(clock_samples_.clock_mem_)::value_type::value_type clock_mem{};
-        if (nvmlDeviceGetClockInfo(device, NVML_CLOCK_MEM, &clock_mem) == NVML_SUCCESS) {
-            clock_samples_.clock_mem_ = decltype(clock_samples_.clock_mem_)::value_type{ clock_mem };
-        }
-
-        decltype(clock_samples_.clock_throttle_reason_)::value_type::value_type clock_throttle_reason{};
-        if (nvmlDeviceGetCurrentClocksThrottleReasons(device, &clock_throttle_reason) == NVML_SUCCESS) {
-            clock_samples_.clock_throttle_reason_ = decltype(clock_samples_.clock_throttle_reason_)::value_type{ clock_throttle_reason };
-        }
-
-        nvmlEnableState_t mode{};
-        nvmlEnableState_t default_mode{};
-        if (nvmlDeviceGetAutoBoostedClocksEnabled(device, &mode, &default_mode) == NVML_SUCCESS) {
-            clock_samples_.auto_boosted_clocks_ = decltype(clock_samples_.auto_boosted_clocks_)::value_type{ mode == NVML_FEATURE_ENABLED };
-        }
-    }
-
-    // retrieve initial power related information
-    {
-        // fixed information -> only retrieved once
-        nvmlEnableState_t mode{};
-        if (nvmlDeviceGetPowerManagementMode(device, &mode) == NVML_SUCCESS) {
-            power_samples_.power_management_mode_ = mode == NVML_FEATURE_ENABLED;
-        }
-
-        decltype(power_samples_.power_management_limit_)::value_type power_management_limit{};
-        if (nvmlDeviceGetPowerManagementLimit(device, &power_management_limit) == NVML_SUCCESS) {
-            power_samples_.power_management_limit_ = power_management_limit;
-        }
-
-        decltype(power_samples_.power_enforced_limit_)::value_type power_enforced_limit{};
-        if (nvmlDeviceGetEnforcedPowerLimit(device, &power_enforced_limit) == NVML_SUCCESS) {
-            power_samples_.power_enforced_limit_ = power_enforced_limit;
-        }
-
-        // queried samples -> retrieved every iteration if available
-        nvmlPstates_t pstate{};
-        if (nvmlDeviceGetPowerState(device, &pstate) == NVML_SUCCESS) {
-            power_samples_.power_state_ = decltype(power_samples_.power_state_)::value_type{ static_cast<decltype(power_samples_.power_state_)::value_type::value_type>(pstate) };
-        }
-
-        decltype(power_samples_.power_usage_)::value_type::value_type power_usage{};
-        if (nvmlDeviceGetPowerUsage(device, &power_usage) == NVML_SUCCESS) {
-            power_samples_.power_usage_ = decltype(power_samples_.power_usage_)::value_type{ power_usage };
-        }
-
-        decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type power_total_energy_consumption{};
-        if (nvmlDeviceGetTotalEnergyConsumption(device, &power_total_energy_consumption) == NVML_SUCCESS) {
-            power_samples_.power_total_energy_consumption_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ power_total_energy_consumption };
-        }
-    }
-
-    // retrieve initial memory related information
-    {
-        // fixed information -> only retrieved once
-        nvmlMemory_t memory_info{};
-        if (nvmlDeviceGetMemoryInfo(device, &memory_info) == NVML_SUCCESS) {
-            memory_samples_.memory_total_ = memory_info.total;
-            // queried samples -> retrieved every iteration if available
-            memory_samples_.memory_free_ = decltype(memory_samples_.memory_free_)::value_type{ memory_info.free };
-            memory_samples_.memory_used_ = decltype(memory_samples_.memory_used_)::value_type{ memory_info.used };
-        }
-
-        decltype(memory_samples_.memory_bus_width_)::value_type memory_bus_width{};
-        if (nvmlDeviceGetMemoryBusWidth(device, &memory_bus_width) == NVML_SUCCESS) {
-            memory_samples_.memory_bus_width_ = memory_bus_width;
-        }
-
-        decltype(memory_samples_.max_pcie_link_generation_)::value_type max_pcie_link_generation{};
-        if (nvmlDeviceGetMaxPcieLinkGeneration(device, &max_pcie_link_generation) == NVML_SUCCESS) {
-            memory_samples_.max_pcie_link_generation_ = max_pcie_link_generation;
-        }
-
-        decltype(memory_samples_.pcie_link_max_speed_)::value_type pcie_link_max_speed{};
-        if (nvmlDeviceGetPcieLinkMaxSpeed(device, &pcie_link_max_speed) == NVML_SUCCESS) {
-            memory_samples_.pcie_link_max_speed_ = pcie_link_max_speed;
-        }
-
-        // queried samples -> retrieved every iteration if available
-        decltype(memory_samples_.pcie_link_width_)::value_type::value_type pcie_link_width{};
-        if (nvmlDeviceGetCurrPcieLinkWidth(device, &pcie_link_width) == NVML_SUCCESS) {
-            memory_samples_.pcie_link_width_ = decltype(memory_samples_.pcie_link_width_)::value_type{ pcie_link_width };
-        }
-
-        decltype(memory_samples_.pcie_link_generation_)::value_type::value_type pcie_link_generation{};
-        if (nvmlDeviceGetCurrPcieLinkGeneration(device, &pcie_link_generation) == NVML_SUCCESS) {
-            memory_samples_.pcie_link_generation_ = decltype(memory_samples_.pcie_link_generation_)::value_type{ pcie_link_generation };
-        }
-    }
-
-    // retrieve initial temperature related information
-    {
-        // fixed information -> only retrieved once
-        decltype(temperature_samples_.num_fans_)::value_type num_fans{};
-        if (nvmlDeviceGetNumFans(device, &num_fans) == NVML_SUCCESS) {
-            temperature_samples_.num_fans_ = num_fans;
-        }
-
-        if (temperature_samples_.num_fans_.has_value() && temperature_samples_.num_fans_.value() > 0) {
-            decltype(temperature_samples_.min_fan_speed_)::value_type min_fan_speed{};
-            decltype(temperature_samples_.max_fan_speed_)::value_type max_fan_speed{};
-            if (nvmlDeviceGetMinMaxFanSpeed(device, &min_fan_speed, &max_fan_speed) == NVML_SUCCESS) {
-                temperature_samples_.min_fan_speed_ = min_fan_speed;
-                temperature_samples_.max_fan_speed_ = max_fan_speed;
-            }
-        }
-
-        decltype(temperature_samples_.temperature_threshold_gpu_max_)::value_type temperature_threshold_gpu_max{};
-        if (nvmlDeviceGetTemperatureThreshold(device, NVML_TEMPERATURE_THRESHOLD_GPU_MAX, &temperature_threshold_gpu_max) == NVML_SUCCESS) {
-            temperature_samples_.temperature_threshold_gpu_max_ = temperature_threshold_gpu_max;
-        }
-
-        decltype(temperature_samples_.temperature_threshold_mem_max_)::value_type temperature_threshold_mem_max{};
-        if (nvmlDeviceGetTemperatureThreshold(device, NVML_TEMPERATURE_THRESHOLD_MEM_MAX, &temperature_threshold_mem_max) == NVML_SUCCESS) {
-            temperature_samples_.temperature_threshold_mem_max_ = temperature_threshold_mem_max;
-        }
-
-        // queried samples -> retrieved every iteration if available
-        decltype(temperature_samples_.fan_speed_)::value_type::value_type fan_speed{};
-        if (nvmlDeviceGetFanSpeed(device, &fan_speed) == NVML_SUCCESS) {
-            temperature_samples_.fan_speed_ = decltype(temperature_samples_.fan_speed_)::value_type{ fan_speed };
-        }
-
-        decltype(temperature_samples_.temperature_gpu_)::value_type::value_type temperature_gpu{};
-        if (nvmlDeviceGetTemperature(device, NVML_TEMPERATURE_GPU, &temperature_gpu) == NVML_SUCCESS) {
-            temperature_samples_.temperature_gpu_ = decltype(temperature_samples_.temperature_gpu_)::value_type{ temperature_gpu };
-        }
-    }
-
-    //
-    // loop until stop_sampling() is called
-    //
-
-    while (!this->has_sampling_stopped()) {
-        // only sample values if the sampler currently isn't paused
-        if (this->is_sampling()) {
-            // add current time point
-            this->add_time_point(std::chrono::steady_clock::now());
-
-            // retrieve general samples
-            {
-                if (general_samples_.performance_state_.has_value()) {
-                    nvmlPstates_t pstate{};
-                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetPerformanceState(device, &pstate));
-                    general_samples_.performance_state_->push_back(static_cast<decltype(general_samples_.performance_state_)::value_type::value_type>(pstate));
-                }
-
-                if (general_samples_.utilization_gpu_.has_value() && general_samples_.utilization_mem_.has_value()) {
-                    nvmlUtilization_t util{};
-                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetUtilizationRates(device, &util));
-                    general_samples_.utilization_gpu_->push_back(util.gpu);
-                    general_samples_.utilization_mem_->push_back(util.memory);
-                }
-            }
-
-            // retrieve clock related samples
-            {
-                if (clock_samples_.clock_graph_.has_value()) {
-                    decltype(clock_samples_.clock_graph_)::value_type::value_type value{};
-                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetClockInfo(device, NVML_CLOCK_GRAPHICS, &value));
-                    clock_samples_.clock_graph_->push_back(value);
-                }
-
-                if (clock_samples_.clock_sm_.has_value()) {
-                    decltype(clock_samples_.clock_sm_)::value_type::value_type value{};
-                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetClockInfo(device, NVML_CLOCK_SM, &value));
-                    clock_samples_.clock_sm_->push_back(value);
-                }
-
-                if (clock_samples_.clock_mem_.has_value()) {
-                    decltype(clock_samples_.clock_mem_)::value_type::value_type value{};
-                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetClockInfo(device, NVML_CLOCK_MEM, &value));
-                    clock_samples_.clock_mem_->push_back(value);
-                }
-
-                if (clock_samples_.clock_throttle_reason_.has_value()) {
-                    decltype(clock_samples_.clock_throttle_reason_)::value_type::value_type value{};
-                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetCurrentClocksThrottleReasons(device, &value));
-                    clock_samples_.clock_throttle_reason_->push_back(value);
-                }
-
-                if (clock_samples_.auto_boosted_clocks_.has_value()) {
-                    nvmlEnableState_t mode{};
-                    nvmlEnableState_t default_mode{};
-                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetAutoBoostedClocksEnabled(device, &mode, &default_mode));
-                    clock_samples_.auto_boosted_clocks_->push_back(mode == NVML_FEATURE_ENABLED);
-                }
-            }
-
-            // retrieve power related information
-            {
-                if (power_samples_.power_state_.has_value()) {
-                    nvmlPstates_t pstate{};
-                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetPowerState(device, &pstate));
-                    power_samples_.power_state_->push_back(static_cast<decltype(power_samples_.power_state_)::value_type::value_type>(pstate));
-                }
-
-                if (power_samples_.power_usage_.has_value()) {
-                    decltype(power_samples_.power_usage_)::value_type::value_type value{};
-                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetPowerUsage(device, &value));
-                    power_samples_.power_usage_->push_back(value);
-                }
-
-                if (power_samples_.power_total_energy_consumption_.has_value()) {
-                    decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type value{};
-                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetTotalEnergyConsumption(device, &value));
-                    power_samples_.power_total_energy_consumption_->push_back(value);
-                }
-            }
-
-            // retrieve memory related information
-            {
-                if (memory_samples_.memory_free_.has_value() && memory_samples_.memory_used_.has_value()) {
-                    nvmlMemory_t memory_info{};
-                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetMemoryInfo(device, &memory_info));
-                    memory_samples_.memory_free_->push_back(memory_info.free);
-                    memory_samples_.memory_used_->push_back(memory_info.used);
-                }
-
-                if (memory_samples_.pcie_link_width_.has_value()) {
-                    decltype(memory_samples_.pcie_link_width_)::value_type::value_type value{};
-                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetCurrPcieLinkWidth(device, &value));
-                    memory_samples_.pcie_link_width_->push_back(value);
-                }
-
-                if (memory_samples_.pcie_link_generation_.has_value()) {
-                    decltype(memory_samples_.pcie_link_generation_)::value_type::value_type value{};
-                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetCurrPcieLinkGeneration(device, &value));
-                    memory_samples_.pcie_link_generation_->push_back(value);
-                }
-            }
-
-            // retrieve temperature related information
-            {
-                if (temperature_samples_.fan_speed_.has_value()) {
-                    decltype(temperature_samples_.fan_speed_)::value_type::value_type value{};
-                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetFanSpeed(device, &value));
-                    temperature_samples_.fan_speed_->push_back(value);
-                }
-
-                if (temperature_samples_.temperature_gpu_.has_value()) {
-                    decltype(temperature_samples_.temperature_gpu_)::value_type::value_type value{};
-                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetTemperature(device, NVML_TEMPERATURE_GPU, &value));
-                    temperature_samples_.temperature_gpu_->push_back(value);
-                }
-            }
-        }
-
-        // wait for the sampling interval to pass to retrieve the next sample
-        std::this_thread::sleep_for(this->sampling_interval());
-    }
-}
-
-std::string gpu_nvidia_hardware_sampler::device_identification() const {
-    nvmlPciInfo_st pcie_info{};
-    HWS_NVML_ERROR_CHECK(nvmlDeviceGetPciInfo_v3(device_.get_impl().device, &pcie_info));
-    return std::format("gpu_nvidia_device_{}_{}", pcie_info.bus, pcie_info.device);
-}
-
-std::string gpu_nvidia_hardware_sampler::generate_yaml_string() const {
-    // check whether it's safe to generate the YAML entry
-    if (this->is_sampling()) {
-        throw std::runtime_error{ "Can't create the final YAML entry if the hardware sampler is still running!" };
-    }
-
-    return std::format("{}\n"
-                       "{}\n"
-                       "{}\n"
-                       "{}\n"
-                       "{}",
-                       general_samples_.generate_yaml_string(),
-                       clock_samples_.generate_yaml_string(),
-                       power_samples_.generate_yaml_string(),
-                       memory_samples_.generate_yaml_string(),
-                       temperature_samples_.generate_yaml_string());
-}
-
-std::ostream &operator<<(std::ostream &out, const gpu_nvidia_hardware_sampler &sampler) {
-    if (sampler.is_sampling()) {
-        out.setstate(std::ios_base::failbit);
-        return out;
-    } else {
-        return out << std::format("sampling interval: {}\n"
-                                  "time points: [{}]\n\n"
-                                  "general samples:\n{}\n\n"
-                                  "clock samples:\n{}\n\n"
-                                  "power samples:\n{}\n\n"
-                                  "memory samples:\n{}\n\n"
-                                  "temperature samples:\n{}",
-                                  sampler.sampling_interval(),
-                                  detail::join(detail::time_points_to_epoch(sampler.sampling_time_points()), ", "),
-                                  sampler.general_samples(),
-                                  sampler.clock_samples(),
-                                  sampler.power_samples(),
-                                  sampler.memory_samples(),
-                                  sampler.temperature_samples());
-    }
-}
-
-}  // namespace hws
diff --git a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp
deleted file mode 100644
index 76ffe47..0000000
--- a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp
+++ /dev/null
@@ -1,466 +0,0 @@
-/**
- * @author Marcel Breyer
- * @copyright 2024-today All Rights Reserved
- * @license This file is released under the MIT license.
- *          See the LICENSE.md file in the project root for full license information.
- */
-
-#include "hardware_sampling/gpu_nvidia/nvml_samples.hpp"
-
-#include "hardware_sampling/utility.hpp"  // hws::detail::{value_or_default, join}
-
-#include "nvml.h"  // NVML_ADAPTIVE_CLOCKING_INFO_STATUS_ENABLED
-
-#include <format>   // std::format
-#include <ostream>  // std::ostream
-#include <string>   // std::string
-
-namespace hws {
-
-//*************************************************************************************************************************************//
-//                                                           general samples                                                           //
-//*************************************************************************************************************************************//
-
-std::string nvml_general_samples::generate_yaml_string() const {
-    std::string str{ "general:\n" };
-
-    // device name
-    if (this->name_.has_value()) {
-        str += std::format("  name:\n"
-                           "    unit: \"string\"\n"
-                           "    values: \"{}\"\n",
-                           this->name_.value());
-    }
-    // persistence mode enabled
-    if (this->persistence_mode_.has_value()) {
-        str += std::format("  persistence_mode:\n"
-                           "    unit: \"bool\"\n"
-                           "    values: {}\n",
-                           this->persistence_mode_.value());
-    }
-    // number of cores
-    if (this->num_cores_.has_value()) {
-        str += std::format("  num_cores:\n"
-                           "    unit: \"int\"\n"
-                           "    values: {}\n",
-                           this->num_cores_.value());
-    }
-
-    // performance state
-    if (this->performance_state_.has_value()) {
-        str += std::format("  performance_state:\n"
-                           "    unit: \"0 - maximum performance; 15 - minimum performance; 32 - unknown\"\n"
-                           "    values: [{}]\n",
-                           detail::join(this->performance_state_.value(), ", "));
-    }
-    // device compute utilization
-    if (this->utilization_gpu_.has_value()) {
-        str += std::format("  utilization_gpu:\n"
-                           "    unit: \"percentage\"\n"
-                           "    values: [{}]\n",
-                           detail::join(this->utilization_gpu_.value(), ", "));
-    }
-
-    // device compute utilization
-    if (this->utilization_mem_.has_value()) {
-        str += std::format("  utilization_mem:\n"
-                           "    unit: \"percentage\"\n"
-                           "    values: [{}]\n",
-                           detail::join(this->utilization_mem_.value(), ", "));
-    }
-
-    // remove last newline
-    str.pop_back();
-
-    return str;
-}
-
-std::ostream &operator<<(std::ostream &out, const nvml_general_samples &samples) {
-    return out << std::format("name [string]: {}\n"
-                              "persistence_mode [bool]: {}\n"
-                              "num_cores [int]: {}\n"
-                              "performance_state [int]: [{}]\n"
-                              "utilization_gpu [%]: [{}]\n"
-                              "utilization_mem [%]: [{}]",
-                              detail::value_or_default(samples.get_name()),
-                              detail::value_or_default(samples.get_persistence_mode()),
-                              detail::value_or_default(samples.get_num_cores()),
-                              detail::join(detail::value_or_default(samples.get_performance_state()), ", "),
-                              detail::join(detail::value_or_default(samples.get_utilization_gpu()), ", "),
-                              detail::join(detail::value_or_default(samples.get_utilization_mem()), ", "));
-}
-
-//*************************************************************************************************************************************//
-//                                                            clock samples                                                            //
-//*************************************************************************************************************************************//
-
-std::string nvml_clock_samples::generate_yaml_string() const {
-    std::string str{ "clock:\n" };
-
-    // adaptive clock status
-    if (this->adaptive_clock_status_.has_value()) {
-        str += std::format("  adaptive_clock_status:\n"
-                           "    unit: \"bool\"\n"
-                           "    values: {}\n",
-                           this->adaptive_clock_status_.value() == NVML_ADAPTIVE_CLOCKING_INFO_STATUS_ENABLED);
-    }
-    // maximum SM clock
-    if (this->clock_sm_max_.has_value()) {
-        str += std::format("  clock_sm_max:\n"
-                           "    unit: \"MHz\"\n"
-                           "    values: {}\n",
-                           this->clock_sm_max_.value());
-    }
-    // minimum memory clock
-    if (this->clock_mem_min_.has_value()) {
-        str += std::format("  clock_mem_min:\n"
-                           "    unit: \"MHz\"\n"
-                           "    values: {}\n",
-                           this->clock_mem_min_.value());
-    }
-    // maximum memory clock
-    if (this->clock_mem_max_.has_value()) {
-        str += std::format("  clock_mem_max:\n"
-                           "    unit: \"MHz\"\n"
-                           "    values: {}\n",
-                           this->clock_mem_max_.value());
-    }
-    // minimum graph clock
-    if (this->clock_graph_min_.has_value()) {
-        str += std::format("  clock_gpu_min:\n"
-                           "    unit: \"MHz\"\n"
-                           "    values: {}\n",
-                           this->clock_graph_min_.value());
-    }
-    // maximum graph clock
-    if (this->clock_graph_max_.has_value()) {
-        str += std::format("  clock_gpu_max:\n"
-                           "    unit: \"MHz\"\n"
-                           "    values: {}\n",
-                           this->clock_graph_max_.value());
-    }
-
-    // SM clock
-    if (this->clock_sm_.has_value()) {
-        str += std::format("  clock_sm:\n"
-                           "    unit: \"MHz\"\n"
-                           "    values: [{}]\n",
-                           detail::join(this->clock_sm_.value(), ", "));
-    }
-    // memory clock
-    if (this->clock_mem_.has_value()) {
-        str += std::format("  clock_mem:\n"
-                           "    unit: \"MHz\"\n"
-                           "    values: [{}]\n",
-                           detail::join(this->clock_mem_.value(), ", "));
-    }
-    // graph clock
-    if (this->clock_graph_.has_value()) {
-        str += std::format("  clock_gpu:\n"
-                           "    unit: \"MHz\"\n"
-                           "    values: [{}]\n",
-                           detail::join(this->clock_graph_.value(), ", "));
-    }
-    // clock throttle reason
-    if (this->clock_throttle_reason_.has_value()) {
-        str += std::format("  clock_throttle_reason:\n"
-                           "    unit: \"bitmask\"\n"
-                           "    values: [{}]\n",
-                           detail::join(this->clock_throttle_reason_.value(), ", "));
-    }
-    // clock is auto-boosted
-    if (this->auto_boosted_clocks_.has_value()) {
-        str += std::format("  auto_boosted_clocks:\n"
-                           "    unit: \"bool\"\n"
-                           "    values: [{}]\n",
-                           detail::join(this->auto_boosted_clocks_.value(), ", "));
-    }
-
-    // remove last newline
-    str.pop_back();
-
-    return str;
-}
-
-std::ostream &operator<<(std::ostream &out, const nvml_clock_samples &samples) {
-    return out << std::format("adaptive_clock_status [int]: {}\n"
-                              "clock_graph_min [MHz]: {}\n"
-                              "clock_graph_max [MHz]: {}\n"
-                              "clock_sm_max [MHz]: {}\n"
-                              "clock_mem_min [MHz]: {}\n"
-                              "clock_mem_max [MHz]: {}\n"
-                              "clock_graph [MHz]: [{}]\n"
-                              "clock_sm [MHz]: [{}]\n"
-                              "clock_mem [MHz]: [{}]\n"
-                              "clock_throttle_reason [bitmask]: [{}]\n"
-                              "auto_boosted_clocks [bool]: [{}]",
-                              detail::value_or_default(samples.get_adaptive_clock_status()),
-                              detail::value_or_default(samples.get_clock_graph_min()),
-                              detail::value_or_default(samples.get_clock_graph_max()),
-                              detail::value_or_default(samples.get_clock_sm_max()),
-                              detail::value_or_default(samples.get_clock_mem_min()),
-                              detail::value_or_default(samples.get_clock_mem_max()),
-                              detail::join(detail::value_or_default(samples.get_clock_graph()), ", "),
-                              detail::join(detail::value_or_default(samples.get_clock_sm()), ", "),
-                              detail::join(detail::value_or_default(samples.get_clock_mem()), ", "),
-                              detail::join(detail::value_or_default(samples.get_clock_throttle_reason()), ", "),
-                              detail::join(detail::value_or_default(samples.get_auto_boosted_clocks()), ", "));
-}
-
-//*************************************************************************************************************************************//
-//                                                            power samples                                                            //
-//*************************************************************************************************************************************//
-
-std::string nvml_power_samples::generate_yaml_string() const {
-    std::string str{ "power:\n" };
-
-    // the power management mode
-    if (this->power_management_mode_.has_value()) {
-        str += std::format("  power_management_mode:\n"
-                           "    unit: \"bool\"\n"
-                           "    values: {}\n",
-                           this->power_management_mode_.value());
-    }
-    // power management limit
-    if (this->power_management_limit_.has_value()) {
-        str += std::format("  power_management_limit:\n"
-                           "    unit: \"mW\"\n"
-                           "    values: {}\n",
-                           this->power_management_limit_.value());
-    }
-    // power enforced limit
-    if (this->power_enforced_limit_.has_value()) {
-        str += std::format("  power_enforced_limit:\n"
-                           "    unit: \"mW\"\n"
-                           "    values: {}\n",
-                           this->power_enforced_limit_.value());
-    }
-
-    // power state
-    if (this->power_state_.has_value()) {
-        str += std::format("  power_state:\n"
-                           "    unit: \"0 - maximum performance; 15 - minimum performance; 32 - unknown\"\n"
-                           "    values: [{}]\n",
-                           detail::join(this->power_state_.value(), ", "));
-    }
-    // current power usage
-    if (this->power_usage_.has_value()) {
-        str += std::format("  power_usage:\n"
-                           "    unit: \"mW\"\n"
-                           "    values: [{}]\n",
-                           detail::join(this->power_usage_.value(), ", "));
-    }
-    // total energy consumed
-    if (this->power_total_energy_consumption_.has_value()) {
-        decltype(nvml_power_samples::power_total_energy_consumption_)::value_type consumed_energy(this->power_total_energy_consumption_->size());
-        for (std::size_t i = 0; i < consumed_energy.size(); ++i) {
-            consumed_energy[i] = this->power_total_energy_consumption_.value()[i] - this->power_total_energy_consumption_->front();
-        }
-        str += std::format("  power_total_energy_consumed:\n"
-                           "    unit: \"J\"\n"
-                           "    values: [{}]\n",
-                           detail::join(consumed_energy, ", "));
-    }
-
-    // remove last newline
-    str.pop_back();
-
-    return str;
-}
-
-std::ostream &operator<<(std::ostream &out, const nvml_power_samples &samples) {
-    return out << std::format("power_management_mode [bool]: {}\n"
-                              "power_management_limit [mW]: {}\n"
-                              "power_enforced_limit [mW]: {}\n"
-                              "power_state [int]: [{}]\n"
-                              "power_usage [mW]: [{}]\n"
-                              "power_total_energy_consumption [J]: [{}]",
-                              detail::value_or_default(samples.get_power_management_mode()),
-                              detail::value_or_default(samples.get_power_management_limit()),
-                              detail::value_or_default(samples.get_power_enforced_limit()),
-                              detail::join(detail::value_or_default(samples.get_power_state()), ", "),
-                              detail::join(detail::value_or_default(samples.get_power_usage()), ", "),
-                              detail::join(detail::value_or_default(samples.get_power_total_energy_consumption()), ", "));
-}
-
-//*************************************************************************************************************************************//
-//                                                            memory samples                                                           //
-//*************************************************************************************************************************************//
-
-std::string nvml_memory_samples::generate_yaml_string() const {
-    std::string str{ "memory:\n" };
-
-    // total memory size
-    if (this->memory_total_.has_value()) {
-        str += std::format("  memory_total:\n"
-                           "    unit: \"B\"\n"
-                           "    values: {}\n",
-                           this->memory_total_.value());
-    }
-    // maximum PCIe link speed
-    if (this->pcie_link_max_speed_.has_value()) {
-        str += std::format("  pcie_max_bandwidth:\n"
-                           "    unit: \"MBPS\"\n"
-                           "    values: {}\n",
-                           this->pcie_link_max_speed_.value());
-    }
-    // memory bus width
-    if (this->memory_bus_width_.has_value()) {
-        str += std::format("  memory_bus_width:\n"
-                           "    unit: \"Bit\"\n"
-                           "    values: {}\n",
-                           this->memory_bus_width_.value());
-    }
-    // maximum PCIe link generation
-    if (this->max_pcie_link_generation_.has_value()) {
-        str += std::format("  max_pcie_link_generation:\n"
-                           "    unit: \"int\"\n"
-                           "    values: {}\n",
-                           this->max_pcie_link_generation_.value());
-    }
-
-    // free memory size
-    if (this->memory_free_.has_value()) {
-        str += std::format("  memory_free:\n"
-                           "    unit: \"B\"\n"
-                           "    values: [{}]\n",
-                           detail::join(this->memory_free_.value(), ", "));
-    }
-    // used memory size
-    if (this->memory_used_.has_value()) {
-        str += std::format("  memory_used:\n"
-                           "    unit: \"B\"\n"
-                           "    values: [{}]\n",
-                           detail::join(this->memory_used_.value(), ", "));
-    }
-    // PCIe link speed
-    if (this->pcie_link_speed_.has_value()) {
-        str += std::format("  pcie_bandwidth:\n"
-                           "    unit: \"MBPS\"\n"
-                           "    values: [{}]\n",
-                           detail::join(this->pcie_link_speed_.value(), ", "));
-    }
-    // PCIe link width
-    if (this->pcie_link_width_.has_value()) {
-        str += std::format("  pcie_link_width:\n"
-                           "    unit: \"int\"\n"
-                           "    values: [{}]\n",
-                           detail::join(this->pcie_link_width_.value(), ", "));
-    }
-    // PCIe link generation
-    if (this->pcie_link_generation_.has_value()) {
-        str += std::format("  pcie_link_generation:\n"
-                           "    unit: \"int\"\n"
-                           "    values: [{}]\n",
-                           detail::join(this->pcie_link_generation_.value(), ", "));
-    }
-
-    // remove last newline
-    str.pop_back();
-
-    return str;
-}
-
-std::ostream &operator<<(std::ostream &out, const nvml_memory_samples &samples) {
-    return out << std::format("memory_total [B]: {}\n"
-                              "pcie_link_max_speed [MBPS]: {}\n"
-                              "memory_bus_width [Bit]: {}\n"
-                              "max_pcie_link_generation [int]: {}\n"
-                              "memory_free [B]: [{}]\n"
-                              "memory_used [B]: [{}]\n"
-                              "pcie_link_speed [MBPS]: [{}]\n"
-                              "pcie_link_width [int]: [{}]\n"
-                              "pcie_link_generation [int]: [{}]",
-                              detail::value_or_default(samples.get_memory_total()),
-                              detail::value_or_default(samples.get_pcie_link_max_speed()),
-                              detail::value_or_default(samples.get_memory_bus_width()),
-                              detail::value_or_default(samples.get_max_pcie_link_generation()),
-                              detail::join(detail::value_or_default(samples.get_memory_free()), ", "),
-                              detail::join(detail::value_or_default(samples.get_memory_used()), ", "),
-                              detail::join(detail::value_or_default(samples.get_pcie_link_speed()), ", "),
-                              detail::join(detail::value_or_default(samples.get_pcie_link_width()), ", "),
-                              detail::join(detail::value_or_default(samples.get_pcie_link_generation()), ", "));
-}
-
-//*************************************************************************************************************************************//
-//                                                         temperature samples                                                         //
-//*************************************************************************************************************************************//
-
-std::string nvml_temperature_samples::generate_yaml_string() const {
-    std::string str{ "temperature:\n" };
-
-    // number of fans
-    if (this->num_fans_.has_value()) {
-        str += std::format("  num_fans:\n"
-                           "    unit: \"int\"\n"
-                           "    values: {}\n",
-                           this->num_fans_.value());
-    }
-    // min fan speed
-    if (this->min_fan_speed_.has_value()) {
-        str += std::format("  min_fan_speed:\n"
-                           "    unit: \"percentage\"\n"
-                           "    values: {}\n",
-                           this->min_fan_speed_.value());
-    }
-    // max fan speed
-    if (this->max_fan_speed_.has_value()) {
-        str += std::format("  max_fan_speed:\n"
-                           "    unit: \"percentage\"\n"
-                           "    values: {}\n",
-                           this->max_fan_speed_.value());
-    }
-    // temperature threshold GPU max
-    if (this->temperature_threshold_gpu_max_.has_value()) {
-        str += std::format("  temperature_gpu_max:\n"
-                           "    unit: \"°C\"\n"
-                           "    values: {}\n",
-                           this->temperature_threshold_gpu_max_.value());
-    }
-    // temperature threshold memory max
-    if (this->temperature_threshold_mem_max_.has_value()) {
-        str += std::format("  temperature_mem_max:\n"
-                           "    unit: \"°C\"\n"
-                           "    values: {}\n",
-                           this->temperature_threshold_mem_max_.value());
-    }
-
-    // fan speed
-    if (this->fan_speed_.has_value()) {
-        str += std::format("  fan_speed:\n"
-                           "    unit: \"percentage\"\n"
-                           "    values: [{}]\n",
-                           detail::join(this->fan_speed_.value(), ", "));
-    }
-    // temperature GPU
-    if (this->temperature_gpu_.has_value()) {
-        str += std::format("  temperature_gpu:\n"
-                           "    unit: \"°C\"\n"
-                           "    values: [{}]\n",
-                           detail::join(this->temperature_gpu_.value(), ", "));
-    }
-
-    // remove last newline
-    str.pop_back();
-
-    return str;
-}
-
-std::ostream &operator<<(std::ostream &out, const nvml_temperature_samples &samples) {
-    return out << std::format("num_fans [int]: {}\n"
-                              "min_fan_speed [%]: {}\n"
-                              "max_fan_speed [%]: {}\n"
-                              "temperature_threshold_gpu_max [°C]: {}\n"
-                              "temperature_threshold_mem_max [°C]: {}\n"
-                              "fan_speed [%]: [{}]\n"
-                              "temperature_gpu [°C]: [{}]",
-                              detail::value_or_default(samples.get_num_fans()),
-                              detail::value_or_default(samples.get_min_fan_speed()),
-                              detail::value_or_default(samples.get_max_fan_speed()),
-                              detail::value_or_default(samples.get_temperature_threshold_gpu_max()),
-                              detail::value_or_default(samples.get_temperature_threshold_mem_max()),
-                              detail::join(detail::value_or_default(samples.get_fan_speed()), ", "),
-                              detail::join(detail::value_or_default(samples.get_temperature_gpu()), ", "));
-}
-
-}  // namespace hws
diff --git a/src/hardware_sampling/cpu/cpu_samples.cpp b/src/hws/cpu/cpu_samples.cpp
similarity index 58%
rename from src/hardware_sampling/cpu/cpu_samples.cpp
rename to src/hws/cpu/cpu_samples.cpp
index ef5a3b9..e5690d2 100644
--- a/src/hardware_sampling/cpu/cpu_samples.cpp
+++ b/src/hws/cpu/cpu_samples.cpp
@@ -5,13 +5,15 @@
  *          See the LICENSE.md file in the project root for full license information.
  */
 
-#include "hardware_sampling/cpu/cpu_samples.hpp"
+#include "hws/cpu/cpu_samples.hpp"
 
-#include "hardware_sampling/utility.hpp"  // hws::detail::{value_or_default, join}
+#include "hws/utility.hpp"  // hws::detail::{value_or_default, quote}
+
+#include "fmt/format.h"  // fmt::format
+#include "fmt/ranges.h"  // fmt::join
 
 #include <array>        // std::array
 #include <cstddef>      // std::size_t
-#include <format>       // std::format
 #include <ostream>      // std::ostream
 #include <regex>        // std::regex, std::regex::extended, std::regex_match, std::regex_replace
 #include <string>       // std::string
@@ -24,138 +26,155 @@ namespace hws {
 //                                                           general samples                                                           //
 //*************************************************************************************************************************************//
 
+bool cpu_general_samples::has_samples() const {
+    return this->architecture_.has_value() || this->byte_order_.has_value() || this->num_cores_.has_value() || this->num_threads_.has_value()
+           || this->threads_per_core_.has_value() || this->cores_per_socket_.has_value() || this->num_sockets_.has_value() || this->numa_nodes_.has_value()
+           || this->vendor_id_.has_value() || this->name_.has_value() || this->flags_.has_value() || this->compute_utilization_.has_value()
+           || this->ipc_.has_value() || this->irq_.has_value() || this->smi_.has_value() || this->poll_.has_value() || this->poll_percent_.has_value();
+}
+
 std::string cpu_general_samples::generate_yaml_string() const {
+    // if no samples are available, return an empty string
+    if (!this->has_samples()) {
+        return "";
+    }
+
     std::string str{ "general:\n" };
 
     // architecture
     if (this->architecture_.has_value()) {
-        str += std::format("  architecture:\n"
+        str += fmt::format("  architecture:\n"
                            "    unit: \"string\"\n"
                            "    values: \"{}\"\n",
                            this->architecture_.value());
     }
     // byte order
     if (this->byte_order_.has_value()) {
-        str += std::format("  byte_order:\n"
+        str += fmt::format("  byte_order:\n"
                            "    unit: \"string\"\n"
                            "    values: \"{}\"\n",
                            this->byte_order_.value());
     }
+    // number of cores
+    if (this->num_cores_.has_value()) {
+        str += fmt::format("  num_cores:\n"
+                           "    unit: \"int\"\n"
+                           "    values: {}\n",
+                           this->num_cores_.value());
+    }
     // number of threads including hyper-threads
     if (this->num_threads_.has_value()) {
-        str += std::format("  num_threads:\n"
+        str += fmt::format("  num_threads:\n"
                            "    unit: \"int\"\n"
                            "    values: {}\n",
                            this->num_threads_.value());
     }
     // number of threads per core
     if (this->threads_per_core_.has_value()) {
-        str += std::format("  threads_per_core:\n"
+        str += fmt::format("  threads_per_core:\n"
                            "    unit: \"int\"\n"
                            "    values: {}\n",
                            this->threads_per_core_.value());
     }
     // number of cores per socket
     if (this->cores_per_socket_.has_value()) {
-        str += std::format("  cores_per_socket:\n"
+        str += fmt::format("  cores_per_socket:\n"
                            "    unit: \"int\"\n"
                            "    values: {}\n",
                            this->cores_per_socket_.value());
     }
     // number of cores per socket
     if (this->num_sockets_.has_value()) {
-        str += std::format("  num_sockets:\n"
+        str += fmt::format("  num_sockets:\n"
                            "    unit: \"int\"\n"
                            "    values: {}\n",
                            this->num_sockets_.value());
     }
     // number of NUMA nodes
     if (this->numa_nodes_.has_value()) {
-        str += std::format("  numa_nodes:\n"
+        str += fmt::format("  numa_nodes:\n"
                            "    unit: \"int\"\n"
                            "    values: {}\n",
                            this->numa_nodes_.value());
     }
     // the vendor specific ID
     if (this->vendor_id_.has_value()) {
-        str += std::format("  vendor_id:\n"
+        str += fmt::format("  vendor_id:\n"
                            "    unit: \"string\"\n"
                            "    values: \"{}\"\n",
                            this->vendor_id_.value());
     }
     // the CPU name
     if (this->name_.has_value()) {
-        str += std::format("  name:\n"
+        str += fmt::format("  name:\n"
                            "    unit: \"string\"\n"
                            "    values: \"{}\"\n",
                            this->name_.value());
     }
     // CPU specific flags (like SSE, AVX, ...)
     if (this->flags_.has_value()) {
-        str += std::format("  flags:\n"
+        str += fmt::format("  flags:\n"
                            "    unit: \"string\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->flags_.value(), ", "));
+                           fmt::join(detail::quote(this->flags_.value()), ", "));
     }
 
     // the percent the CPU was busy
-    if (this->busy_percent_.has_value()) {
-        str += std::format("  utilization:\n"
+    if (this->compute_utilization_.has_value()) {
+        str += fmt::format("  compute_utilization:\n"
                            "    turbostat_name: \"Busy%\"\n"
                            "    unit: \"percentage\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->busy_percent_.value(), ", "));
+                           fmt::join(this->compute_utilization_.value(), ", "));
     }
     // the instructions per cycle count
     if (this->ipc_.has_value()) {
-        str += std::format("  instructions_per_cycle:\n"
+        str += fmt::format("  instructions_per_cycle:\n"
                            "    turbostat_name: \"IPC\"\n"
                            "    unit: \"float\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->ipc_.value(), ", "));
+                           fmt::join(this->ipc_.value(), ", "));
     }
     // the number of interrupts
     if (this->irq_.has_value()) {
-        str += std::format("  interrupts:\n"
+        str += fmt::format("  interrupts:\n"
                            "    turbostat_name: \"IRQ\"\n"
                            "    unit: \"int\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->irq_.value(), ", "));
+                           fmt::join(this->irq_.value(), ", "));
     }
     // the number of system management interrupts
     if (this->smi_.has_value()) {
-        str += std::format("  system_management_interrupts:\n"
+        str += fmt::format("  system_management_interrupts:\n"
                            "    turbostat_name: \"SMI\"\n"
                            "    unit: \"int\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->smi_.value(), ", "));
+                           fmt::join(this->smi_.value(), ", "));
     }
     // the number of times the CPU was in the poll state
     if (this->poll_.has_value()) {
-        str += std::format("  polling_state:\n"
+        str += fmt::format("  polling_state:\n"
                            "    turbostat_name: \"POLL\"\n"
                            "    unit: \"int\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->poll_.value(), ", "));
+                           fmt::join(this->poll_.value(), ", "));
     }
     // the percent the CPU was in the polling state
     if (this->poll_percent_.has_value()) {
-        str += std::format("  polling_percentage:\n"
+        str += fmt::format("  polling_percentage:\n"
                            "    turbostat_name: \"POLL%\"\n"
                            "    unit: \"percentage\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->poll_percent_.value(), ", "));
+                           fmt::join(this->poll_percent_.value(), ", "));
     }
 
-    // remove last newline
-    str.pop_back();
-
     return str;
 }
 
 std::ostream &operator<<(std::ostream &out, const cpu_general_samples &samples) {
-    std::string str = std::format("architecture [string]: {}\n"
+    std::string str = fmt::format("architecture [string]: {}\n"
                                   "byte_order [string]: {}\n"
+                                  "num_cores [int]: {}\n"
                                   "num_threads [int]: {}\n"
                                   "threads_per_core [int]: {}\n"
                                   "cores_per_socket [int]: {}\n"
@@ -164,7 +183,7 @@ std::ostream &operator<<(std::ostream &out, const cpu_general_samples &samples)
                                   "vendor_id [string]: {}\n"
                                   "name [string]: {}\n"
                                   "flags [string]: [{}]\n"
-                                  "busy_percent [%]: [{}]\n"
+                                  "compute_utilization [%]: [{}]\n"
                                   "ipc [float]: [{}]\n"
                                   "irq [int]: [{}]\n"
                                   "smi [int]: [{}]\n"
@@ -172,6 +191,7 @@ std::ostream &operator<<(std::ostream &out, const cpu_general_samples &samples)
                                   "poll_percent [%]: [{}]",
                                   detail::value_or_default(samples.get_architecture()),
                                   detail::value_or_default(samples.get_byte_order()),
+                                  detail::value_or_default(samples.get_num_cores()),
                                   detail::value_or_default(samples.get_num_threads()),
                                   detail::value_or_default(samples.get_threads_per_core()),
                                   detail::value_or_default(samples.get_cores_per_socket()),
@@ -179,13 +199,13 @@ std::ostream &operator<<(std::ostream &out, const cpu_general_samples &samples)
                                   detail::value_or_default(samples.get_numa_nodes()),
                                   detail::value_or_default(samples.get_vendor_id()),
                                   detail::value_or_default(samples.get_name()),
-                                  detail::join(detail::value_or_default(samples.get_flags()), ", "),
-                                  detail::join(detail::value_or_default(samples.get_busy_percent()), ", "),
-                                  detail::join(detail::value_or_default(samples.get_ipc()), ", "),
-                                  detail::join(detail::value_or_default(samples.get_irq()), ", "),
-                                  detail::join(detail::value_or_default(samples.get_smi()), ", "),
-                                  detail::join(detail::value_or_default(samples.get_poll()), ", "),
-                                  detail::join(detail::value_or_default(samples.get_poll_percent()), ", "));
+                                  fmt::join(detail::value_or_default(samples.get_flags()), ", "),
+                                  fmt::join(detail::value_or_default(samples.get_compute_utilization()), ", "),
+                                  fmt::join(detail::value_or_default(samples.get_ipc()), ", "),
+                                  fmt::join(detail::value_or_default(samples.get_irq()), ", "),
+                                  fmt::join(detail::value_or_default(samples.get_smi()), ", "),
+                                  fmt::join(detail::value_or_default(samples.get_poll()), ", "),
+                                  fmt::join(detail::value_or_default(samples.get_poll_percent()), ", "));
 
     // remove last newline
     str.pop_back();
@@ -197,423 +217,489 @@ std::ostream &operator<<(std::ostream &out, const cpu_general_samples &samples)
 //                                                            clock samples                                                            //
 //*************************************************************************************************************************************//
 
+bool cpu_clock_samples::has_samples() const {
+    return this->auto_boosted_clock_enabled_.has_value() || this->clock_frequency_min_.has_value() || this->clock_frequency_max_.has_value()
+           || this->clock_frequency_.has_value() || this->average_non_idle_clock_frequency_.has_value() || this->time_stamp_counter_.has_value();
+}
+
 std::string cpu_clock_samples::generate_yaml_string() const {
+    // if no samples are available, return an empty string
+    if (!this->has_samples()) {
+        return "";
+    }
+
     std::string str{ "clock:\n" };
 
     // true if frequency boost is enabled
-    if (this->frequency_boost_.has_value()) {
-        str += std::format("  frequency_boost:\n"
+    if (this->auto_boosted_clock_enabled_.has_value()) {
+        str += fmt::format("  auto_boosted_clock_enabled:\n"
                            "    unit: \"bool\"\n"
                            "    values: {}\n",
-                           this->frequency_boost_.value());
+                           this->auto_boosted_clock_enabled_.value());
     }
     // the minimal CPU frequency
-    if (this->min_frequency_.has_value()) {
-        str += std::format("  min_cpu_frequency:\n"
+    if (this->clock_frequency_min_.has_value()) {
+        str += fmt::format("  clock_frequency_min:\n"
                            "    unit: \"MHz\"\n"
                            "    values: {}\n",
-                           this->min_frequency_.value());
+                           this->clock_frequency_min_.value());
     }
     // the maximum CPU frequency
-    if (this->max_frequency_.has_value()) {
-        str += std::format("  max_cpu_frequency:\n"
+    if (this->clock_frequency_max_.has_value()) {
+        str += fmt::format("  clock_frequency_max:\n"
                            "    unit: \"MHz\"\n"
                            "    values: {}\n",
-                           this->max_frequency_.value());
+                           this->clock_frequency_max_.value());
     }
 
     // the average CPU frequency
-    if (this->average_frequency_.has_value()) {
-        str += std::format("  average_frequency:\n"
+    if (this->clock_frequency_.has_value()) {
+        str += fmt::format("  clock_frequency:\n"
                            "    turbostat_name: \"Avg_MHz\"\n"
                            "    unit: \"MHz\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->average_frequency_.value(), ", "));
+                           fmt::join(this->clock_frequency_.value(), ", "));
     }
     // the average CPU frequency excluding idle time
-    if (this->average_non_idle_frequency_.has_value()) {
-        str += std::format("  average_non_idle_frequency:\n"
+    if (this->average_non_idle_clock_frequency_.has_value()) {
+        str += fmt::format("  average_non_idle_clock_frequency:\n"
                            "    turbostat_name: \"Bzy_MHz\"\n"
                            "    unit: \"MHz\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->average_non_idle_frequency_.value(), ", "));
+                           fmt::join(this->average_non_idle_clock_frequency_.value(), ", "));
     }
     // the time stamp counter
     if (this->time_stamp_counter_.has_value()) {
-        str += std::format("  time_stamp_counter:\n"
+        str += fmt::format("  time_stamp_counter:\n"
                            "    turbostat_name: \"TSC_MHz\"\n"
                            "    unit: \"MHz\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->time_stamp_counter_.value(), ", "));
+                           fmt::join(this->time_stamp_counter_.value(), ", "));
     }
 
-    // remove last newline
-    str.pop_back();
-
     return str;
 }
 
 std::ostream &operator<<(std::ostream &out, const cpu_clock_samples &samples) {
-    return out << std::format("frequency_boost [bool]: {}\n"
-                              "min_frequency [MHz]: {}\n"
-                              "max_frequency [MHz]: {}\n"
-                              "average_frequency [MHz]: [{}]\n"
-                              "average_non_idle_frequency [MHz]: [{}]\n"
+    return out << fmt::format("auto_boosted_clock_enabled [bool]: {}\n"
+                              "clock_frequency_min [MHz]: {}\n"
+                              "clock_frequency_max [MHz]: {}\n"
+                              "clock_frequency [MHz]: [{}]\n"
+                              "average_non_idle_clock_frequency [MHz]: [{}]\n"
                               "time_stamp_counter [MHz]: [{}]",
-                              detail::value_or_default(samples.get_frequency_boost()),
-                              detail::value_or_default(samples.get_min_frequency()),
-                              detail::value_or_default(samples.get_max_frequency()),
-                              detail::join(detail::value_or_default(samples.get_average_frequency()), ", "),
-                              detail::join(detail::value_or_default(samples.get_average_non_idle_frequency()), ", "),
-                              detail::join(detail::value_or_default(samples.get_time_stamp_counter()), ", "));
+                              detail::value_or_default(samples.get_auto_boosted_clock_enabled()),
+                              detail::value_or_default(samples.get_clock_frequency_min()),
+                              detail::value_or_default(samples.get_clock_frequency_max()),
+                              fmt::join(detail::value_or_default(samples.get_clock_frequency()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_average_non_idle_clock_frequency()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_time_stamp_counter()), ", "));
 }
 
 //*************************************************************************************************************************************//
 //                                                            power samples                                                            //
 //*************************************************************************************************************************************//
 
+bool cpu_power_samples::has_samples() const {
+    return this->power_measurement_type_.has_value() || this->power_usage_.has_value() || this->power_total_energy_consumption_.has_value()
+           || this->core_watt_.has_value() || this->ram_watt_.has_value() || this->package_rapl_throttle_percent_.has_value()
+           || this->dram_rapl_throttle_percent_.has_value();
+}
+
 std::string cpu_power_samples::generate_yaml_string() const {
+    // if no samples are available, return an empty string
+    if (!this->has_samples()) {
+        return "";
+    }
+
     std::string str{ "power:\n" };
 
+    // power measurement type
+    if (this->power_measurement_type_.has_value()) {
+        str += fmt::format("  power_measurement_type:\n"
+                           "    unit: \"string\"\n"
+                           "    values: \"{}\"\n",
+                           this->power_measurement_type_.value());
+    }
+
     // the package Watt
-    if (this->package_watt_.has_value()) {
-        str += std::format("  package_power:\n"
+    if (this->power_usage_.has_value()) {
+        str += fmt::format("  power_usage:\n"
                            "    turbostat_name: \"PkgWatt\"\n"
                            "    unit: \"W\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->package_watt_.value(), ", "));
+                           fmt::join(this->power_usage_.value(), ", "));
+    }
+    // total energy consumed
+    if (this->power_total_energy_consumption_.has_value()) {
+        str += fmt::format("  power_total_energy_consumed:\n"
+                           "    unit: \"J\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->power_total_energy_consumption_.value(), ", "));
     }
+
     // the core Watt
     if (this->core_watt_.has_value()) {
-        str += std::format("  core_power:\n"
+        str += fmt::format("  core_power:\n"
                            "    turbostat_name: \"CorWatt\"\n"
                            "    unit: \"W\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->core_watt_.value(), ", "));
+                           fmt::join(this->core_watt_.value(), ", "));
     }
     // the DRAM Watt
     if (this->ram_watt_.has_value()) {
-        str += std::format("  dram_power:\n"
+        str += fmt::format("  dram_power:\n"
                            "    turbostat_name: \"RAMWatt\"\n"
                            "    unit: \"W\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->ram_watt_.value(), ", "));
+                           fmt::join(this->ram_watt_.value(), ", "));
     }
     // the percent of time when the RAPL package throttle was active
     if (this->package_rapl_throttle_percent_.has_value()) {
-        str += std::format("  package_rapl_throttling:\n"
+        str += fmt::format("  package_rapl_throttling:\n"
                            "    turbostat_name: \"PKG_%\"\n"
                            "    unit: \"percentage\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->package_rapl_throttle_percent_.value(), ", "));
+                           fmt::join(this->package_rapl_throttle_percent_.value(), ", "));
     }
     // the percent of time when the RAPL DRAM throttle was active
     if (this->dram_rapl_throttle_percent_.has_value()) {
-        str += std::format("  dram_rapl_throttling:\n"
+        str += fmt::format("  dram_rapl_throttling:\n"
                            "    turbostat_name: \"RAM_%\"\n"
                            "    unit: \"percentage\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->dram_rapl_throttle_percent_.value(), ", "));
+                           fmt::join(this->dram_rapl_throttle_percent_.value(), ", "));
     }
 
-    // remove last newline
-    str.pop_back();
-
     return str;
 }
 
 std::ostream &operator<<(std::ostream &out, const cpu_power_samples &samples) {
-    return out << std::format("package_watt [W]: [{}]\n"
+    return out << fmt::format("power_measurement_type [string]: {}\n"
+                              "power_usage [W]: [{}]\n"
+                              "power_total_energy_consumption [J]: [{}]\n"
                               "core_watt [W]: [{}]\n"
                               "ram_watt [W]: [{}]\n"
                               "package_rapl_throttle_percent [%]: [{}]\n"
                               "dram_rapl_throttle_percent [%]: [{}]",
-                              detail::join(detail::value_or_default(samples.get_package_watt()), ", "),
-                              detail::join(detail::value_or_default(samples.get_core_watt()), ", "),
-                              detail::join(detail::value_or_default(samples.get_ram_watt()), ", "),
-                              detail::join(detail::value_or_default(samples.get_package_rapl_throttle_percent()), ", "),
-                              detail::join(detail::value_or_default(samples.get_dram_rapl_throttle_percent()), ", "));
+                              detail::value_or_default(samples.get_power_measurement_type()),
+                              fmt::join(detail::value_or_default(samples.get_power_usage()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_power_total_energy_consumption()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_core_watt()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_ram_watt()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_package_rapl_throttle_percent()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_dram_rapl_throttle_percent()), ", "));
 }
 
 //*************************************************************************************************************************************//
 //                                                            memory samples                                                           //
 //*************************************************************************************************************************************//
 
+bool cpu_memory_samples::has_samples() const {
+    return this->cache_size_L1d_.has_value() || this->cache_size_L1i_.has_value() || this->cache_size_L2_.has_value() || this->cache_size_L3_.has_value()
+           || this->memory_total_.has_value() || this->swap_memory_total_.has_value() || this->memory_used_.has_value() || this->swap_memory_free_.has_value()
+           || this->swap_memory_used_.has_value() || this->swap_memory_free_.has_value();
+}
+
 std::string cpu_memory_samples::generate_yaml_string() const {
+    // if no samples are available, return an empty string
+    if (!this->has_samples()) {
+        return "";
+    }
+
     std::string str{ "memory:\n" };
 
     // the size of the L1 data cache
-    if (this->l1d_cache_.has_value()) {
-        str += std::format("  cache_size_L1d:\n"
+    if (this->cache_size_L1d_.has_value()) {
+        str += fmt::format("  cache_size_L1d:\n"
                            "    unit: \"string\"\n"
                            "    values: \"{}\"\n",
-                           this->l1d_cache_.value());
+                           this->cache_size_L1d_.value());
     }
     // the size of the L1 instruction cache
-    if (this->l1i_cache_.has_value()) {
-        str += std::format("  cache_size_L1i:\n"
+    if (this->cache_size_L1i_.has_value()) {
+        str += fmt::format("  cache_size_L1i:\n"
                            "    unit: \"string\"\n"
                            "    values: \"{}\"\n",
-                           this->l1i_cache_.value());
+                           this->cache_size_L1i_.value());
     }
     // the size of the L2 cache
-    if (this->l2_cache_.has_value()) {
-        str += std::format("  cache_size_L2:\n"
+    if (this->cache_size_L2_.has_value()) {
+        str += fmt::format("  cache_size_L2:\n"
                            "    unit: \"string\"\n"
                            "    values: \"{}\"\n",
-                           this->l2_cache_.value());
+                           this->cache_size_L2_.value());
     }
     // the size of the L3 cache
-    if (this->l3_cache_.has_value()) {
-        str += std::format("  cache_size_L3:\n"
+    if (this->cache_size_L3_.has_value()) {
+        str += fmt::format("  cache_size_L3:\n"
                            "    unit: \"string\"\n"
                            "    values: \"{}\"\n",
-                           this->l3_cache_.value());
+                           this->cache_size_L3_.value());
     }
 
     // the total size of available memory
     if (this->memory_total_.has_value()) {
-        str += std::format("  memory_total:\n"
+        str += fmt::format("  memory_total:\n"
                            "    unit: \"B\"\n"
                            "    values: {}\n",
                            this->memory_total_.value());
     }
     // the total size of the swap memory
     if (this->swap_memory_total_.has_value()) {
-        str += std::format("  swap_memory_total:\n"
+        str += fmt::format("  swap_memory_total:\n"
                            "    unit: \"B\"\n"
                            "    values: {}\n",
                            this->swap_memory_total_.value());
     }
 
-    // the available free memory
-    if (this->memory_free_.has_value()) {
-        str += std::format("  memory_free:\n"
-                           "    unit: \"B\"\n"
-                           "    values: [{}]\n",
-                           detail::join(this->memory_free_.value(), ", "));
-    }
     // the used memory
     if (this->memory_used_.has_value()) {
-        str += std::format("  memory_used:\n"
+        str += fmt::format("  memory_used:\n"
                            "    unit: \"B\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->memory_used_.value(), ", "));
+                           fmt::join(this->memory_used_.value(), ", "));
     }
-    // the available swap memory
-    if (this->swap_memory_free_.has_value()) {
-        str += std::format("  swap_memory_free:\n"
+    // the available free memory
+    if (this->memory_free_.has_value()) {
+        str += fmt::format("  memory_free:\n"
                            "    unit: \"B\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->swap_memory_free_.value(), ", "));
+                           fmt::join(this->memory_free_.value(), ", "));
     }
     // the swap memory
     if (this->swap_memory_used_.has_value()) {
-        str += std::format("  swap_memory_used:\n"
+        str += fmt::format("  swap_memory_used:\n"
                            "    unit: \"B\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->swap_memory_used_.value(), ", "));
+                           fmt::join(this->swap_memory_used_.value(), ", "));
+    }
+    // the available swap memory
+    if (this->swap_memory_free_.has_value()) {
+        str += fmt::format("  swap_memory_free:\n"
+                           "    unit: \"B\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->swap_memory_free_.value(), ", "));
     }
-
-    // remove last newline
-    str.pop_back();
 
     return str;
 }
 
 std::ostream &operator<<(std::ostream &out, const cpu_memory_samples &samples) {
-    return out << std::format("l1d_cache [string]: {}\n"
-                              "l1i_cache [string]: {}\n"
-                              "l2_cache [string]: {}\n"
-                              "l3_cache [string]: {}\n"
+    return out << fmt::format("cache_size_L1d [string]: {}\n"
+                              "cache_size_L1i [string]: {}\n"
+                              "cache_size_L2 [string]: {}\n"
+                              "cache_size_L3 [string]: {}\n"
                               "memory_total [B]: {}\n"
                               "swap_memory_total [B]: {}\n"
-                              "memory_free [B]: [{}]\n"
                               "memory_used [B]: [{}]\n"
-                              "swap_memory_free [B]: [{}]\n"
-                              "swap_memory_used [B]: [{}]",
-                              detail::value_or_default(samples.get_l1d_cache()),
-                              detail::value_or_default(samples.get_l1i_cache()),
-                              detail::value_or_default(samples.get_l2_cache()),
-                              detail::value_or_default(samples.get_l3_cache()),
+                              "memory_free [B]: [{}]\n"
+                              "swap_memory_used [B]: [{}]\n"
+                              "swap_memory_free [B]: [{}]",
+                              detail::value_or_default(samples.get_cache_size_L1d()),
+                              detail::value_or_default(samples.get_cache_size_L1i()),
+                              detail::value_or_default(samples.get_cache_size_L2()),
+                              detail::value_or_default(samples.get_cache_size_L3()),
                               detail::value_or_default(samples.get_memory_total()),
                               detail::value_or_default(samples.get_swap_memory_total()),
-                              detail::join(detail::value_or_default(samples.get_memory_free()), ", "),
-                              detail::join(detail::value_or_default(samples.get_memory_used()), ", "),
-                              detail::join(detail::value_or_default(samples.get_swap_memory_free()), ", "),
-                              detail::join(detail::value_or_default(samples.get_swap_memory_used()), ", "));
+                              fmt::join(detail::value_or_default(samples.get_memory_used()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_memory_free()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_swap_memory_used()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_swap_memory_free()), ", "));
 }
 
 //*************************************************************************************************************************************//
 //                                                         temperature samples                                                         //
 //*************************************************************************************************************************************//
 
+bool cpu_temperature_samples::has_samples() const {
+    return this->temperature_.has_value() || this->core_temperature_.has_value() || this->core_throttle_percent_.has_value();
+}
+
 std::string cpu_temperature_samples::generate_yaml_string() const {
+    // if no samples are available, return an empty string
+    if (!this->has_samples()) {
+        return "";
+    }
+
     std::string str{ "temperature:\n" };
 
+    // the temperature of the whole package
+    if (this->temperature_.has_value()) {
+        str += fmt::format("  temperature:\n"
+                           "    turbostat_name: \"PkgTmp\"\n"
+                           "    unit: \"°C\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->temperature_.value(), ", "));
+    }
     // the temperature of the cores
     if (this->core_temperature_.has_value()) {
-        str += std::format("  per_core_temperature:\n"
+        str += fmt::format("  core_temperature:\n"
                            "    turbostat_name: \"CoreTmp\"\n"
                            "    unit: \"°C\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->core_temperature_.value(), ", "));
+                           fmt::join(this->core_temperature_.value(), ", "));
     }
     // the percentage of time the core throttled due the temperature constraints
     if (this->core_throttle_percent_.has_value()) {
-        str += std::format("  core_throttle_percentage:\n"
+        str += fmt::format("  core_throttle_percentage:\n"
                            "    turbostat_name: \"CoreThr\"\n"
                            "    unit: \"percentage\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->core_throttle_percent_.value(), ", "));
-    }
-    // the temperature of the whole package
-    if (this->package_temperature_.has_value()) {
-        str += std::format("  per_package_temperature:\n"
-                           "    turbostat_name: \"PkgTmp\"\n"
-                           "    unit: \"°C\"\n"
-                           "    values: [{}]\n",
-                           detail::join(this->package_temperature_.value(), ", "));
+                           fmt::join(this->core_throttle_percent_.value(), ", "));
     }
 
-    // remove last newline
-    str.pop_back();
-
     return str;
 }
 
 std::ostream &operator<<(std::ostream &out, const cpu_temperature_samples &samples) {
-    return out << std::format("core_temperature [°C]: [{}]\n"
-                              "core_throttle_percent [%]: [{}]\n"
-                              "package_temperature [°C]: [{}]",
-                              detail::join(detail::value_or_default(samples.get_core_temperature()), ", "),
-                              detail::join(detail::value_or_default(samples.get_core_throttle_percent()), ", "),
-                              detail::join(detail::value_or_default(samples.get_package_temperature()), ", "));
+    return out << fmt::format("temperature [°C]: [{}]\n"
+                              "core_temperature [°C]: [{}]\n"
+                              "core_throttle_percent [%]: [{}]",
+                              fmt::join(detail::value_or_default(samples.get_temperature()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_core_temperature()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_core_throttle_percent()), ", "));
 }
 
 //*************************************************************************************************************************************//
 //                                                          gfx (iGPU) samples                                                         //
 //*************************************************************************************************************************************//
 
+bool cpu_gfx_samples::has_samples() const {
+    return this->gfx_render_state_percent_.has_value() || this->gfx_frequency_.has_value() || this->average_gfx_frequency_.has_value()
+           || this->gfx_state_c0_percent_.has_value() || this->cpu_works_for_gpu_percent_.has_value() || this->gfx_watt_.has_value();
+}
+
 std::string cpu_gfx_samples::generate_yaml_string() const {
+    // if no samples are available, return an empty string
+    if (!this->has_samples()) {
+        return "";
+    }
+
     std::string str{ "integrated_gpu:\n" };
 
     // the percentage of time the iGPU was in the render state
     if (this->gfx_render_state_percent_.has_value()) {
-        str += std::format("  graphics_render_state:\n"
+        str += fmt::format("  graphics_render_state:\n"
                            "    turbostat_name: \"GFX%rc6\"\n"
                            "    unit: \"percentage\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->gfx_render_state_percent_.value(), ", "));
+                           fmt::join(this->gfx_render_state_percent_.value(), ", "));
     }
     // the core frequency of the iGPU
     if (this->gfx_frequency_.has_value()) {
-        str += std::format("  graphics_frequency:\n"
+        str += fmt::format("  graphics_frequency:\n"
                            "    turbostat_name: \"GFXMHz\"\n"
                            "    unit: \"MHz\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->gfx_frequency_.value(), ", "));
+                           fmt::join(this->gfx_frequency_.value(), ", "));
     }
     // the average core frequency of the iGPU
     if (this->average_gfx_frequency_.has_value()) {
-        str += std::format("  average_graphics_frequency:\n"
+        str += fmt::format("  average_graphics_frequency:\n"
                            "    turbostat_name: \"GFXAMHz\"\n"
                            "    unit: \"MHz\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->average_gfx_frequency_.value(), ", "));
+                           fmt::join(this->average_gfx_frequency_.value(), ", "));
     }
     // the percentage of time the iGPU was in the c0 state
     if (this->gfx_state_c0_percent_.has_value()) {
-        str += std::format("  gpu_state_c0:\n"
+        str += fmt::format("  gpu_state_c0:\n"
                            "    turbostat_name: \"GFX%C0\"\n"
                            "    unit: \"percentage\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->gfx_state_c0_percent_.value(), ", "));
+                           fmt::join(this->gfx_state_c0_percent_.value(), ", "));
     }
     // the percentage of time the CPU worked for the iGPU
     if (this->cpu_works_for_gpu_percent_.has_value()) {
-        str += std::format("  cpu_works_for_gpu:\n"
+        str += fmt::format("  cpu_works_for_gpu:\n"
                            "    turbostat_name: \"CPUGFX%\"\n"
                            "    unit: \"percentage\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->cpu_works_for_gpu_percent_.value(), ", "));
+                           fmt::join(this->cpu_works_for_gpu_percent_.value(), ", "));
     }
     // the iGPU Watt
     if (this->gfx_watt_.has_value()) {
-        str += std::format("  graphics_power:\n"
+        str += fmt::format("  graphics_power:\n"
                            "    turbostat_name: \"GFXWatt\"\n"
                            "    unit: \"W\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->gfx_watt_.value(), ", "));
+                           fmt::join(this->gfx_watt_.value(), ", "));
     }
 
-    // remove last newline
-    str.pop_back();
-
     return str;
 }
 
 std::ostream &operator<<(std::ostream &out, const cpu_gfx_samples &samples) {
-    return out << std::format("gfx_render_state_percent [%]: [{}]\n"
+    return out << fmt::format("gfx_render_state_percent [%]: [{}]\n"
                               "gfx_frequency [MHz]: [{}]\n"
                               "average_gfx_frequency [MHz]: [{}]\n"
                               "gfx_state_c0_percent [%]: [{}]\n"
                               "cpu_works_for_gpu_percent [%]: [{}]\n"
                               "gfx_watt [W]: [{}]",
-                              detail::join(detail::value_or_default(samples.get_gfx_render_state_percent()), ", "),
-                              detail::join(detail::value_or_default(samples.get_gfx_frequency()), ", "),
-                              detail::join(detail::value_or_default(samples.get_average_gfx_frequency()), ", "),
-                              detail::join(detail::value_or_default(samples.get_gfx_state_c0_percent()), ", "),
-                              detail::join(detail::value_or_default(samples.get_cpu_works_for_gpu_percent()), ", "),
-                              detail::join(detail::value_or_default(samples.get_gfx_watt()), ", "));
+                              fmt::join(detail::value_or_default(samples.get_gfx_render_state_percent()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_gfx_frequency()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_average_gfx_frequency()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_gfx_state_c0_percent()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_cpu_works_for_gpu_percent()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_gfx_watt()), ", "));
 }
 
 //*************************************************************************************************************************************//
 //                                                          idle state samples                                                         //
 //*************************************************************************************************************************************//
 
+bool cpu_idle_states_samples::has_samples() const {
+    return this->all_cpus_state_c0_percent_.has_value() || this->any_cpu_state_c0_percent_.has_value() || this->low_power_idle_state_percent_.has_value()
+           || this->system_low_power_idle_state_percent_.has_value() || this->package_low_power_idle_state_percent_.has_value() || this->idle_states_.has_value();
+}
+
 std::string cpu_idle_states_samples::generate_yaml_string() const {
+    // if no samples are available, return an empty string
+    if (!this->has_samples()) {
+        return "";
+    }
+
     std::string str{ "idle_states:\n" };
 
     // the percentage of time all CPUs were in the c0 state
     if (this->all_cpus_state_c0_percent_.has_value()) {
-        str += std::format("  all_cpus_state_c0:\n"
+        str += fmt::format("  all_cpus_state_c0:\n"
                            "    turbostat_name: \"Totl%C0\"\n"
                            "    unit: \"percentage\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->all_cpus_state_c0_percent_.value(), ", "));
+                           fmt::join(this->all_cpus_state_c0_percent_.value(), ", "));
     }
     // the percentage of time any CPU was in the c0 state
     if (this->any_cpu_state_c0_percent_.has_value()) {
-        str += std::format("  any_cpu_state_c0:\n"
+        str += fmt::format("  any_cpu_state_c0:\n"
                            "    turbostat_name: \"Any%C0\"\n"
                            "    unit: \"percentage\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->any_cpu_state_c0_percent_.value(), ", "));
+                           fmt::join(this->any_cpu_state_c0_percent_.value(), ", "));
     }
     // the percentage of time the CPUs were in the low power idle state
     if (this->low_power_idle_state_percent_.has_value()) {
-        str += std::format("  lower_power_idle_state:\n"
+        str += fmt::format("  lower_power_idle_state:\n"
                            "    turbostat_name: \"CPU%LPI\"\n"
                            "    unit: \"percentage\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->low_power_idle_state_percent_.value(), ", "));
+                           fmt::join(this->low_power_idle_state_percent_.value(), ", "));
     }
     // the percentage of time the CPUs were in the system low power idle state
     if (this->system_low_power_idle_state_percent_.has_value()) {
-        str += std::format("  system_lower_power_idle_state:\n"
+        str += fmt::format("  system_lower_power_idle_state:\n"
                            "    turbostat_name: \"SYS%LPI\"\n"
                            "    unit: \"percentage\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->system_low_power_idle_state_percent_.value(), ", "));
+                           fmt::join(this->system_low_power_idle_state_percent_.value(), ", "));
     }
     // the percentage of time the package was in the low power idle state
     if (this->package_low_power_idle_state_percent_.has_value()) {
-        str += std::format("  package_lower_power_idle_state:\n"
+        str += fmt::format("  package_lower_power_idle_state:\n"
                            "    turbostat_name: \"Pkg%LPI\"\n"
                            "    unit: \"percentage\"\n"
                            "    values: [{}]\n",
-                           detail::join(this->package_low_power_idle_state_percent_.value(), ", "));
+                           fmt::join(this->package_low_power_idle_state_percent_.value(), ", "));
     }
 
     // the other core idle states
@@ -642,42 +728,39 @@ std::string cpu_idle_states_samples::generate_yaml_string() const {
                     std::string entry_name_with_state{};
                     std::regex_replace(std::back_inserter(entry_name_with_state), entry_name_placeholder.begin(), entry_name_placeholder.end(), placeholder_reg, std::string{ state });
 
-                    str += std::format("  {}:\n"
+                    str += fmt::format("  {}:\n"
                                        "    turbostat_name: \"{}\"\n"
                                        "    unit: \"{}\"\n"
                                        "    values: [{}]\n",
                                        entry_name_with_state,
                                        entry,
                                        entry_unit,
-                                       detail::join(values, ", "));
+                                       fmt::join(values, ", "));
                     break;
                 }
             }
         }
     }
 
-    // remove last newline
-    str.pop_back();
-
     return str;
 }
 
 std::ostream &operator<<(std::ostream &out, const cpu_idle_states_samples &samples) {
-    std::string str = std::format("all_cpus_state_c0_percent [%]: [{}]\n"
+    std::string str = fmt::format("all_cpus_state_c0_percent [%]: [{}]\n"
                                   "any_cpu_state_c0_percent [%]: [{}]\n"
                                   "low_power_idle_state_percent [%]: [{}]\n"
                                   "system_low_power_idle_state_percent [%]: [{}]\n"
                                   "package_low_power_idle_state_percent [%]: [{}]\n",
-                                  detail::join(detail::value_or_default(samples.get_all_cpus_state_c0_percent()), ", "),
-                                  detail::join(detail::value_or_default(samples.get_any_cpu_state_c0_percent()), ", "),
-                                  detail::join(detail::value_or_default(samples.get_low_power_idle_state_percent()), ", "),
-                                  detail::join(detail::value_or_default(samples.get_system_low_power_idle_state_percent()), ", "),
-                                  detail::join(detail::value_or_default(samples.get_package_low_power_idle_state_percent()), ", "));
+                                  fmt::join(detail::value_or_default(samples.get_all_cpus_state_c0_percent()), ", "),
+                                  fmt::join(detail::value_or_default(samples.get_any_cpu_state_c0_percent()), ", "),
+                                  fmt::join(detail::value_or_default(samples.get_low_power_idle_state_percent()), ", "),
+                                  fmt::join(detail::value_or_default(samples.get_system_low_power_idle_state_percent()), ", "),
+                                  fmt::join(detail::value_or_default(samples.get_package_low_power_idle_state_percent()), ", "));
 
     // add map entries
     if (samples.get_idle_states().has_value()) {
         for (const auto &[key, value] : samples.get_idle_states().value()) {
-            str += std::format("{}: [{}]\n", key, detail::join(value, ", "));
+            str += fmt::format("{}: [{}]\n", key, fmt::join(value, ", "));
         }
     }
 
diff --git a/src/hws/cpu/hardware_sampler.cpp b/src/hws/cpu/hardware_sampler.cpp
new file mode 100644
index 0000000..505e0bb
--- /dev/null
+++ b/src/hws/cpu/hardware_sampler.cpp
@@ -0,0 +1,695 @@
+/**
+ * @author Marcel Breyer
+ * @copyright 2024-today All Rights Reserved
+ * @license This file is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ */
+
+#include "hws/cpu/hardware_sampler.hpp"
+
+#include "hws/cpu/cpu_samples.hpp"   // hws::{cpu_general_samples, clock_samples, power_samples, memory_samples, temperature_samples, gfx_samples, idle_state_samples}
+#include "hws/cpu/utility.hpp"       // HWS_SUBPROCESS_ERROR_CHECK, hws::detail::run_subprocess
+#include "hws/hardware_sampler.hpp"  // hws::tracking::hardware_sampler
+#include "hws/sample_category.hpp"   // hws::sample_category
+#include "hws/utility.hpp"           // hws::detail::{split, split_as, trim, convert_to, starts_with}
+
+#include "fmt/format.h"  // fmt::format
+#include "fmt/ranges.h"  // fmt::join
+
+#include <cassert>        // assert
+#include <chrono>         // std::chrono::{steady_clock, milliseconds}
+#include <cstddef>        // std::size_t
+#include <exception>      // std::exception, std::terminate
+#include <ios>            // std::ios_base
+#include <iostream>       // std::cerr, std::endl
+#include <optional>       // std::make_optional
+#include <ostream>        // std::ostream
+#include <regex>          // std::regex, std::regex::extended, std::regex_match, std::regex_replace
+#include <stdexcept>      // std::runtime_error
+#include <string>         // std::string
+#include <string_view>    // std::string_view
+#include <thread>         // std::this_thread
+#include <unordered_map>  // std::unordered_map
+#include <vector>         // std::vector
+
+namespace hws {
+
+cpu_hardware_sampler::cpu_hardware_sampler(const sample_category category) :
+    cpu_hardware_sampler{ HWS_SAMPLING_INTERVAL, category } { }
+
+cpu_hardware_sampler::cpu_hardware_sampler(const std::chrono::milliseconds sampling_interval, const sample_category category) :
+    hardware_sampler{ sampling_interval, category } { }
+
+cpu_hardware_sampler::~cpu_hardware_sampler() {
+    try {
+        // if this hardware sampler is still sampling, stop it
+        if (this->has_sampling_started() && !this->has_sampling_stopped()) {
+            this->stop_sampling();
+        }
+    } catch (const std::exception &e) {
+        std::cerr << e.what() << std::endl;
+        std::terminate();
+    }
+}
+
+void cpu_hardware_sampler::sampling_loop() {
+    //
+    // add samples where we only have to retrieve the value once
+    //
+
+    this->add_time_point(std::chrono::steady_clock::now());
+
+#if defined(HWS_VIA_LSCPU_ENABLED)
+    {
+        const std::string lscpu_output = detail::run_subprocess("lscpu");
+        const std::vector<std::string_view> lscpu_lines = detail::split(detail::trim(lscpu_output), '\n');
+
+        for (std::string_view line : lscpu_lines) {
+            line = detail::trim(line);
+            // extract the value
+            std::string_view value{ line };
+            value.remove_prefix(value.find_first_of(":") + 1);
+            value = detail::trim(value);
+
+            // check the lines if the start with an entry that we want to sample
+            if (this->sample_category_enabled(sample_category::general)) {
+                if (detail::starts_with(line, "Architecture")) {
+                    general_samples_.architecture_ = detail::convert_to<decltype(general_samples_.architecture_)::value_type>(value);
+                } else if (detail::starts_with(line, "Byte Order")) {
+                    general_samples_.byte_order_ = detail::convert_to<decltype(general_samples_.byte_order_)::value_type>(value);
+                } else if (detail::starts_with(line, "CPU(s)")) {
+                    general_samples_.num_threads_ = detail::convert_to<decltype(general_samples_.num_threads_)::value_type>(value);
+                } else if (detail::starts_with(line, "Thread(s) per core")) {
+                    general_samples_.threads_per_core_ = detail::convert_to<decltype(general_samples_.threads_per_core_)::value_type>(value);
+                } else if (detail::starts_with(line, "Core(s) per socket")) {
+                    general_samples_.cores_per_socket_ = detail::convert_to<decltype(general_samples_.cores_per_socket_)::value_type>(value);
+                } else if (detail::starts_with(line, "Socket(s)")) {
+                    general_samples_.num_sockets_ = detail::convert_to<decltype(general_samples_.num_sockets_)::value_type>(value);
+                } else if (detail::starts_with(line, "NUMA node(s)")) {
+                    general_samples_.numa_nodes_ = detail::convert_to<decltype(general_samples_.numa_nodes_)::value_type>(value);
+                } else if (detail::starts_with(line, "Vendor ID")) {
+                    general_samples_.vendor_id_ = detail::convert_to<decltype(general_samples_.vendor_id_)::value_type>(value);
+                } else if (detail::starts_with(line, "Model name")) {
+                    general_samples_.name_ = detail::convert_to<decltype(general_samples_.name_)::value_type>(value);
+                } else if (detail::starts_with(line, "Flags")) {
+                    general_samples_.flags_ = detail::split_as<decltype(general_samples_.flags_)::value_type::value_type>(value, ' ');
+                }
+            }
+            if (this->sample_category_enabled(sample_category::clock)) {
+                if (detail::starts_with(line, "Frequency boost")) {
+                    clock_samples_.auto_boosted_clock_enabled_ = value == "enabled";
+                } else if (detail::starts_with(line, "CPU max MHz")) {
+                    clock_samples_.clock_frequency_max_ = detail::convert_to<decltype(clock_samples_.clock_frequency_max_)::value_type>(value);
+                } else if (detail::starts_with(line, "CPU min MHz")) {
+                    clock_samples_.clock_frequency_min_ = detail::convert_to<decltype(clock_samples_.clock_frequency_min_)::value_type>(value);
+                }
+            }
+            if (this->sample_category_enabled(sample_category::memory)) {
+                if (detail::starts_with(line, "L1d cache")) {
+                    memory_samples_.cache_size_L1d_ = detail::convert_to<decltype(memory_samples_.cache_size_L1d_)::value_type>(value);
+                } else if (detail::starts_with(line, "L1i cache")) {
+                    memory_samples_.cache_size_L1i_ = detail::convert_to<decltype(memory_samples_.cache_size_L1i_)::value_type>(value);
+                } else if (detail::starts_with(line, "L2 cache")) {
+                    memory_samples_.cache_size_L2_ = detail::convert_to<decltype(memory_samples_.cache_size_L2_)::value_type>(value);
+                } else if (detail::starts_with(line, "L3 cache")) {
+                    memory_samples_.cache_size_L3_ = detail::convert_to<decltype(memory_samples_.cache_size_L3_)::value_type>(value);
+                }
+            }
+        }
+
+        if (this->sample_category_enabled(sample_category::general)) {
+            // check if the number of cores can be derived from the otherwise found values
+            if (general_samples_.num_threads_.has_value() && general_samples_.threads_per_core_.has_value()) {
+                general_samples_.num_cores_ = general_samples_.num_threads_.value() / general_samples_.threads_per_core_.value();
+            }
+        }
+    }
+#endif
+
+#if defined(HWS_VIA_FREE_ENABLED)
+    const std::regex whitespace_replace_reg{ "[ ]+", std::regex::extended };
+    if (this->sample_category_enabled(sample_category::memory)) {
+        std::string free_output = detail::run_subprocess("free -b");
+        free_output = std::regex_replace(free_output, whitespace_replace_reg, " ");
+        const std::vector<std::string_view> free_lines = detail::split(detail::trim(free_output), '\n');
+        assert((free_lines.size() >= 3) && "Must read more than three lines, but fewer were read!");
+
+        // read memory information
+        const std::vector<std::string_view> memory_data = detail::split(free_lines[1], ' ');
+        memory_samples_.memory_total_ = detail::convert_to<decltype(memory_samples_.memory_total_)::value_type>(memory_data[1]);
+        memory_samples_.memory_used_ = decltype(memory_samples_.memory_used_)::value_type{ detail::convert_to<decltype(memory_samples_.memory_used_)::value_type::value_type>(memory_data[2]) };
+        memory_samples_.memory_free_ = decltype(memory_samples_.memory_free_)::value_type{ detail::convert_to<decltype(memory_samples_.memory_free_)::value_type::value_type>(memory_data[3]) };
+
+        // read swap information
+        const std::vector<std::string_view> swap_data = detail::split(free_lines[2], ' ');
+        memory_samples_.swap_memory_total_ = detail::convert_to<decltype(memory_samples_.swap_memory_total_)::value_type>(swap_data[1]);
+        memory_samples_.swap_memory_used_ = decltype(memory_samples_.swap_memory_used_)::value_type{ detail::convert_to<decltype(memory_samples_.swap_memory_used_)::value_type::value_type>(swap_data[2]) };
+        memory_samples_.swap_memory_free_ = decltype(memory_samples_.swap_memory_free_)::value_type{ detail::convert_to<decltype(memory_samples_.swap_memory_free_)::value_type::value_type>(swap_data[3]) };
+    }
+#endif
+
+#if defined(HWS_VIA_TURBOSTAT_ENABLED)
+
+    // -n, --num_iterations     number of the measurement iterations
+    // -i, --interval           sampling interval in seconds (decimal number)
+    // -S, --Summary            limits output to 1-line per interval
+    // -q, --quiet              skip decoding system configuration header
+
+    // get header information
+    #if defined(HWS_VIA_TURBOSTAT_ROOT)
+    // run with sudo
+    const std::string_view turbostat_command_line = "sudo turbostat -n 1 -i 0.001 -S -q";
+    #else
+    // run without sudo
+    const std::string_view turbostat_command_line = "turbostat -n 1 -i 0.001 -S -q";
+    #endif
+
+    {
+        // run turbostat
+        const std::string turbostat_output = detail::run_subprocess(turbostat_command_line);
+
+        // retrieve the turbostat data
+        const std::vector<std::string_view> data = detail::split(detail::trim(turbostat_output), '\n');
+        assert((data.size() >= 2) && "Must read at least two lines!");
+        const std::vector<std::string_view> header = detail::split(data[0], '\t');
+        const std::vector<std::string_view> values = detail::split(data[1], '\t');
+
+        for (std::size_t i = 0; i < header.size(); ++i) {
+            // general samples
+            if (header[i] == "Busy%") {
+                if (this->sample_category_enabled(sample_category::general)) {
+                    using vector_type = decltype(general_samples_.compute_utilization_)::value_type;
+                    general_samples_.compute_utilization_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                }
+                continue;
+            } else if (header[i] == "IPC") {
+                if (this->sample_category_enabled(sample_category::general)) {
+                    using vector_type = decltype(general_samples_.ipc_)::value_type;
+                    general_samples_.ipc_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                }
+                continue;
+            } else if (header[i] == "IRQ") {
+                if (this->sample_category_enabled(sample_category::general)) {
+                    using vector_type = decltype(general_samples_.irq_)::value_type;
+                    general_samples_.irq_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                }
+                continue;
+            } else if (header[i] == "SMI") {
+                if (this->sample_category_enabled(sample_category::general)) {
+                    using vector_type = decltype(general_samples_.smi_)::value_type;
+                    general_samples_.smi_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                }
+            } else if (header[i] == "POLL") {
+                if (this->sample_category_enabled(sample_category::general)) {
+                    using vector_type = decltype(general_samples_.poll_)::value_type;
+                    general_samples_.poll_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                }
+                continue;
+            } else if (header[i] == "POLL%") {
+                if (this->sample_category_enabled(sample_category::general)) {
+                    using vector_type = decltype(general_samples_.poll_percent_)::value_type;
+                    general_samples_.poll_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                }
+                continue;
+            }
+
+            // clock related samples
+            if (header[i] == "Avg_MHz") {
+                if (this->sample_category_enabled(sample_category::clock)) {
+                    using vector_type = decltype(clock_samples_.clock_frequency_)::value_type;
+                    clock_samples_.clock_frequency_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                }
+                continue;
+            } else if (header[i] == "Bzy_MHz") {
+                if (this->sample_category_enabled(sample_category::clock)) {
+                    using vector_type = decltype(clock_samples_.average_non_idle_clock_frequency_)::value_type;
+                    clock_samples_.average_non_idle_clock_frequency_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                }
+                continue;
+            } else if (header[i] == "TSC_MHz") {
+                if (this->sample_category_enabled(sample_category::clock)) {
+                    using vector_type = decltype(clock_samples_.time_stamp_counter_)::value_type;
+                    clock_samples_.time_stamp_counter_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                }
+                continue;
+            }
+
+            // power related samples
+            if (header[i] == "PkgWatt") {
+                if (this->sample_category_enabled(sample_category::power)) {
+                    using vector_type = decltype(power_samples_.power_usage_)::value_type;
+                    power_samples_.power_usage_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                    power_samples_.power_measurement_type_ = "current/instant";
+                    power_samples_.power_total_energy_consumption_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ 0 };
+                }
+                continue;
+            } else if (header[i] == "CorWatt") {
+                if (this->sample_category_enabled(sample_category::power)) {
+                    using vector_type = decltype(power_samples_.core_watt_)::value_type;
+                    power_samples_.core_watt_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                }
+                continue;
+            } else if (header[i] == "RAMWatt") {
+                if (this->sample_category_enabled(sample_category::power)) {
+                    using vector_type = decltype(power_samples_.ram_watt_)::value_type;
+                    power_samples_.ram_watt_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                }
+                continue;
+            } else if (header[i] == "PKG_%") {
+                if (this->sample_category_enabled(sample_category::power)) {
+                    using vector_type = decltype(power_samples_.package_rapl_throttle_percent_)::value_type;
+                    power_samples_.package_rapl_throttle_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                }
+                continue;
+            } else if (header[i] == "RAM_%") {
+                if (this->sample_category_enabled(sample_category::power)) {
+                    using vector_type = decltype(power_samples_.dram_rapl_throttle_percent_)::value_type;
+                    power_samples_.dram_rapl_throttle_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                }
+                continue;
+            }
+
+            // temperature related samples
+            if (header[i] == "CoreTmp") {
+                if (this->sample_category_enabled(sample_category::temperature)) {
+                    using vector_type = decltype(temperature_samples_.core_temperature_)::value_type;
+                    temperature_samples_.core_temperature_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                }
+                continue;
+            } else if (header[i] == "CoreThr") {
+                if (this->sample_category_enabled(sample_category::temperature)) {
+                    using vector_type = decltype(temperature_samples_.core_throttle_percent_)::value_type;
+                    temperature_samples_.core_throttle_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                }
+                continue;
+            } else if (header[i] == "PkgTmp") {
+                if (this->sample_category_enabled(sample_category::temperature)) {
+                    using vector_type = decltype(temperature_samples_.temperature_)::value_type;
+                    temperature_samples_.temperature_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                }
+                continue;
+            }
+
+            // gfx (iGPU) related samples
+            if (header[i] == "GFX%rc6") {
+                if (this->sample_category_enabled(sample_category::gfx)) {
+                    using vector_type = decltype(gfx_samples_.gfx_render_state_percent_)::value_type;
+                    gfx_samples_.gfx_render_state_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                }
+                continue;
+            } else if (header[i] == "GFXMHz") {
+                if (this->sample_category_enabled(sample_category::gfx)) {
+                    using vector_type = decltype(gfx_samples_.gfx_frequency_)::value_type;
+                    gfx_samples_.gfx_frequency_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                }
+                continue;
+            } else if (header[i] == "GFXAMHz") {
+                if (this->sample_category_enabled(sample_category::gfx)) {
+                    using vector_type = decltype(gfx_samples_.average_gfx_frequency_)::value_type;
+                    gfx_samples_.average_gfx_frequency_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                }
+                continue;
+            } else if (header[i] == "GFX%C0") {
+                if (this->sample_category_enabled(sample_category::gfx)) {
+                    using vector_type = decltype(gfx_samples_.gfx_state_c0_percent_)::value_type;
+                    gfx_samples_.gfx_state_c0_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                }
+                continue;
+            } else if (header[i] == "CPUGFX%") {
+                if (this->sample_category_enabled(sample_category::gfx)) {
+                    using vector_type = decltype(gfx_samples_.cpu_works_for_gpu_percent_)::value_type;
+                    gfx_samples_.cpu_works_for_gpu_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                }
+                continue;
+            } else if (header[i] == "GFXWatt") {
+                if (this->sample_category_enabled(sample_category::gfx)) {
+                    using vector_type = decltype(gfx_samples_.gfx_watt_)::value_type;
+                    gfx_samples_.gfx_watt_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                }
+                continue;
+            }
+
+            // idle state related samples
+            if (header[i] == "Totl%C0") {
+                if (this->sample_category_enabled(sample_category::idle_state)) {
+                    using vector_type = decltype(idle_state_samples_.all_cpus_state_c0_percent_)::value_type;
+                    idle_state_samples_.all_cpus_state_c0_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                }
+                continue;
+            } else if (header[i] == "Any%C0") {
+                if (this->sample_category_enabled(sample_category::idle_state)) {
+                    using vector_type = decltype(idle_state_samples_.any_cpu_state_c0_percent_)::value_type;
+                    idle_state_samples_.any_cpu_state_c0_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                }
+                continue;
+            } else if (header[i] == "CPU%LPI") {
+                if (this->sample_category_enabled(sample_category::idle_state)) {
+                    using vector_type = decltype(idle_state_samples_.low_power_idle_state_percent_)::value_type;
+                    idle_state_samples_.low_power_idle_state_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                }
+                continue;
+            } else if (header[i] == "SYS%LPI") {
+                if (this->sample_category_enabled(sample_category::idle_state)) {
+                    using vector_type = decltype(idle_state_samples_.system_low_power_idle_state_percent_)::value_type;
+                    idle_state_samples_.system_low_power_idle_state_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                }
+                continue;
+            } else if (header[i] == "Pkg%LPI") {
+                if (this->sample_category_enabled(sample_category::idle_state)) {
+                    using vector_type = decltype(idle_state_samples_.package_low_power_idle_state_percent_)::value_type;
+                    idle_state_samples_.package_low_power_idle_state_percent_ = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                }
+                continue;
+            } else {
+                if (this->sample_category_enabled(sample_category::idle_state)) {
+                    // test against regex
+                    const std::string header_str{ header[i] };
+                    const std::regex reg{ std::string{ "CPU%[0-9a-zA-Z]+|Pkg%[0-9a-zA-Z]+|Pk%[0-9a-zA-Z]+|C[0-9a-zA-Z]+%|C[0-9a-zA-Z]+" }, std::regex::extended };
+                    if (std::regex_match(header_str, reg)) {
+                        // first time this branch is reached -> create optional value
+                        if (!idle_state_samples_.idle_states_.has_value()) {
+                            idle_state_samples_.idle_states_ = std::make_optional<typename cpu_idle_states_samples::map_type>();
+                        }
+
+                        using vector_type = cpu_idle_states_samples::map_type::mapped_type;
+                        idle_state_samples_.idle_states_.value()[header_str] = vector_type{ detail::convert_to<typename vector_type::value_type>(values[i]) };
+                    }
+                }
+                continue;
+            }
+        }
+    }
+#endif
+
+    //
+    // loop until stop_sampling() is called
+    //
+
+    while (!this->has_sampling_stopped()) {
+        // only sample values if the sampler currently isn't paused
+        if (this->is_sampling()) {
+            // add current time point
+            this->add_time_point(std::chrono::steady_clock::now());
+
+#if defined(HWS_VIA_FREE_ENABLED)
+            if (this->sample_category_enabled(sample_category::memory)) {
+                // run free
+                std::string free_output = detail::run_subprocess("free -b");
+                free_output = std::regex_replace(free_output, whitespace_replace_reg, " ");
+                const std::vector<std::string_view> free_lines = detail::split(detail::trim(free_output), '\n');
+                assert((free_lines.size() >= 3) && "Must read more than three lines, but fewer were read!");
+
+                // read memory information
+                const std::vector<std::string_view> memory_data = detail::split(free_lines[1], ' ');
+                memory_samples_.memory_used_->push_back(detail::convert_to<decltype(memory_samples_.memory_used_)::value_type::value_type>(memory_data[2]));
+                memory_samples_.memory_free_->push_back(detail::convert_to<decltype(memory_samples_.memory_free_)::value_type::value_type>(memory_data[3]));
+
+                // read swap information
+                const std::vector<std::string_view> swap_data = detail::split(free_lines[2], ' ');
+                memory_samples_.swap_memory_used_->push_back(detail::convert_to<decltype(memory_samples_.swap_memory_used_)::value_type::value_type>(swap_data[2]));
+                memory_samples_.swap_memory_free_->push_back(detail::convert_to<decltype(memory_samples_.swap_memory_free_)::value_type::value_type>(swap_data[3]));
+            }
+#endif
+
+#if defined(HWS_VIA_TURBOSTAT_ENABLED)
+            {
+                // run turbostat
+                const std::string turbostat_output = detail::run_subprocess(turbostat_command_line);
+
+                // retrieve the turbostat data
+                const std::vector<std::string_view> data = detail::split(detail::trim(turbostat_output), '\n');
+                assert((data.size() >= 2) && "Must read at least two lines!");
+                const std::vector<std::string_view> header = detail::split(data[0], '\t');
+                const std::vector<std::string_view> values = detail::split(data[1], '\t');
+
+                // add values to the respective sample entries
+                for (std::size_t i = 0; i < header.size(); ++i) {
+                    // general samples
+                    if (header[i] == "Busy%") {
+                        if (this->sample_category_enabled(sample_category::general)) {
+                            using vector_type = decltype(general_samples_.compute_utilization_)::value_type;
+                            general_samples_.compute_utilization_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        }
+                        continue;
+                    } else if (header[i] == "IPC") {
+                        if (this->sample_category_enabled(sample_category::general)) {
+                            using vector_type = decltype(general_samples_.ipc_)::value_type;
+                            general_samples_.ipc_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        }
+                        continue;
+                    } else if (header[i] == "IRQ") {
+                        if (this->sample_category_enabled(sample_category::general)) {
+                            using vector_type = decltype(general_samples_.irq_)::value_type;
+                            general_samples_.irq_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        }
+                        continue;
+                    } else if (header[i] == "SMI") {
+                        if (this->sample_category_enabled(sample_category::general)) {
+                            using vector_type = decltype(general_samples_.smi_)::value_type;
+                            general_samples_.smi_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        }
+                        continue;
+                    } else if (header[i] == "POLL") {
+                        if (this->sample_category_enabled(sample_category::general)) {
+                            using vector_type = decltype(general_samples_.poll_)::value_type;
+                            general_samples_.poll_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        }
+                        continue;
+                    } else if (header[i] == "POLL%") {
+                        if (this->sample_category_enabled(sample_category::general)) {
+                            using vector_type = decltype(general_samples_.poll_percent_)::value_type;
+                            general_samples_.poll_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        }
+                        continue;
+                    }
+
+                    // clock related samples
+                    if (header[i] == "Avg_MHz") {
+                        if (this->sample_category_enabled(sample_category::clock)) {
+                            using vector_type = decltype(clock_samples_.clock_frequency_)::value_type;
+                            clock_samples_.clock_frequency_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        }
+                        continue;
+                    } else if (header[i] == "Bzy_MHz") {
+                        if (this->sample_category_enabled(sample_category::clock)) {
+                            using vector_type = decltype(clock_samples_.average_non_idle_clock_frequency_)::value_type;
+                            clock_samples_.average_non_idle_clock_frequency_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        }
+                        continue;
+                    } else if (header[i] == "TSC_MHz") {
+                        if (this->sample_category_enabled(sample_category::clock)) {
+                            using vector_type = decltype(clock_samples_.time_stamp_counter_)::value_type;
+                            clock_samples_.time_stamp_counter_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        }
+                        continue;
+                    }
+
+                    // power related samples
+                    if (header[i] == "PkgWatt") {
+                        if (this->sample_category_enabled(sample_category::power)) {
+                            using vector_type = decltype(power_samples_.power_usage_)::value_type;
+                            power_samples_.power_usage_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                            // calculate total energy consumption
+                            using value_type = decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type;
+                            const std::size_t num_time_points = this->sampling_time_points().size();
+                            const value_type time_difference = std::chrono::duration<value_type>(this->sampling_time_points()[num_time_points - 1] - this->sampling_time_points()[num_time_points - 2]).count();
+                            const auto current = power_samples_.power_usage_->back() * time_difference;
+                            power_samples_.power_total_energy_consumption_->push_back(power_samples_.power_total_energy_consumption_->back() + current);
+                        }
+                        continue;
+                    } else if (header[i] == "CorWatt") {
+                        if (this->sample_category_enabled(sample_category::power)) {
+                            using vector_type = decltype(power_samples_.core_watt_)::value_type;
+                            power_samples_.core_watt_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        }
+                        continue;
+                    } else if (header[i] == "RAMWatt") {
+                        if (this->sample_category_enabled(sample_category::power)) {
+                            using vector_type = decltype(power_samples_.ram_watt_)::value_type;
+                            power_samples_.ram_watt_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        }
+                        continue;
+                    } else if (header[i] == "PKG_%") {
+                        if (this->sample_category_enabled(sample_category::power)) {
+                            using vector_type = decltype(power_samples_.package_rapl_throttle_percent_)::value_type;
+                            power_samples_.package_rapl_throttle_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        }
+                        continue;
+                    } else if (header[i] == "RAM_%") {
+                        if (this->sample_category_enabled(sample_category::power)) {
+                            using vector_type = decltype(power_samples_.dram_rapl_throttle_percent_)::value_type;
+                            power_samples_.dram_rapl_throttle_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        }
+                        continue;
+                    }
+
+                    // temperature related samples
+                    if (header[i] == "CoreTmp") {
+                        if (this->sample_category_enabled(sample_category::temperature)) {
+                            using vector_type = decltype(temperature_samples_.core_temperature_)::value_type;
+                            temperature_samples_.core_temperature_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        }
+                        continue;
+                    } else if (header[i] == "CoreThr") {
+                        if (this->sample_category_enabled(sample_category::temperature)) {
+                            using vector_type = decltype(temperature_samples_.core_throttle_percent_)::value_type;
+                            temperature_samples_.core_throttle_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        }
+                        continue;
+                    } else if (header[i] == "PkgTmp") {
+                        if (this->sample_category_enabled(sample_category::temperature)) {
+                            using vector_type = decltype(temperature_samples_.temperature_)::value_type;
+                            temperature_samples_.temperature_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        }
+                        continue;
+                    }
+
+                    // gfx (iGPU) related samples
+                    if (header[i] == "GFX%rc6") {
+                        if (this->sample_category_enabled(sample_category::gfx)) {
+                            using vector_type = decltype(gfx_samples_.gfx_render_state_percent_)::value_type;
+                            gfx_samples_.gfx_render_state_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        }
+                        continue;
+                    } else if (header[i] == "GFXMHz") {
+                        if (this->sample_category_enabled(sample_category::gfx)) {
+                            using vector_type = decltype(gfx_samples_.gfx_frequency_)::value_type;
+                            gfx_samples_.gfx_frequency_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        }
+                        continue;
+                    } else if (header[i] == "GFXAMHz") {
+                        if (this->sample_category_enabled(sample_category::gfx)) {
+                            using vector_type = decltype(gfx_samples_.average_gfx_frequency_)::value_type;
+                            gfx_samples_.average_gfx_frequency_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        }
+                        continue;
+                    } else if (header[i] == "GFX%C0") {
+                        if (this->sample_category_enabled(sample_category::gfx)) {
+                            using vector_type = decltype(gfx_samples_.gfx_state_c0_percent_)::value_type;
+                            gfx_samples_.gfx_state_c0_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        }
+                        continue;
+                    } else if (header[i] == "CPUGFX%") {
+                        if (this->sample_category_enabled(sample_category::gfx)) {
+                            using vector_type = decltype(gfx_samples_.cpu_works_for_gpu_percent_)::value_type;
+                            gfx_samples_.cpu_works_for_gpu_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        }
+                        continue;
+                    } else if (header[i] == "GFXWatt") {
+                        if (this->sample_category_enabled(sample_category::gfx)) {
+                            using vector_type = decltype(gfx_samples_.gfx_watt_)::value_type;
+                            gfx_samples_.gfx_watt_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        }
+                        continue;
+                    }
+
+                    // idle state related samples
+                    if (header[i] == "Totl%C0") {
+                        if (this->sample_category_enabled(sample_category::idle_state)) {
+                            using vector_type = decltype(idle_state_samples_.all_cpus_state_c0_percent_)::value_type;
+                            idle_state_samples_.all_cpus_state_c0_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        }
+                        continue;
+                    } else if (header[i] == "Any%C0") {
+                        if (this->sample_category_enabled(sample_category::idle_state)) {
+                            using vector_type = decltype(idle_state_samples_.any_cpu_state_c0_percent_)::value_type;
+                            idle_state_samples_.any_cpu_state_c0_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        }
+                        continue;
+                    } else if (header[i] == "CPU%LPI") {
+                        if (this->sample_category_enabled(sample_category::idle_state)) {
+                            using vector_type = decltype(idle_state_samples_.low_power_idle_state_percent_)::value_type;
+                            idle_state_samples_.low_power_idle_state_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        }
+                        continue;
+                    } else if (header[i] == "SYS%LPI") {
+                        if (this->sample_category_enabled(sample_category::idle_state)) {
+                            using vector_type = decltype(idle_state_samples_.system_low_power_idle_state_percent_)::value_type;
+                            idle_state_samples_.system_low_power_idle_state_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        }
+                        continue;
+                    } else if (header[i] == "Pkg%LPI") {
+                        if (this->sample_category_enabled(sample_category::idle_state)) {
+                            using vector_type = decltype(idle_state_samples_.package_low_power_idle_state_percent_)::value_type;
+                            idle_state_samples_.package_low_power_idle_state_percent_->push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                        }
+                        continue;
+                    } else {
+                        if (this->sample_category_enabled(sample_category::idle_state)) {
+                            const std::string header_str{ header[i] };
+                            if (idle_state_samples_.idle_states_.value().count(header_str) > decltype(idle_state_samples_)::map_type::size_type{ 0 }) {
+                                using vector_type = cpu_idle_states_samples::map_type::mapped_type;
+                                idle_state_samples_.idle_states_.value()[header_str].push_back(detail::convert_to<typename vector_type::value_type>(values[i]));
+                            }
+                        }
+                        continue;
+                    }
+                }
+            }
+#endif
+        }
+
+        // wait for the sampling interval to pass to retrieve the next sample
+        std::this_thread::sleep_for(this->sampling_interval());
+    }
+}
+
+std::string cpu_hardware_sampler::device_identification() const {
+    return "cpu_device";
+}
+
+std::string cpu_hardware_sampler::samples_only_as_yaml_string() const {
+    // check whether it's safe to generate the YAML entry
+    if (this->is_sampling()) {
+        throw std::runtime_error{ "Can't create the final YAML entry if the hardware sampler is still running!" };
+    }
+
+    return fmt::format("{}{}"
+                       "{}{}"
+                       "{}{}"
+                       "{}{}"
+                       "{}{}"
+                       "{}{}"
+                       "{}",
+                       general_samples_.generate_yaml_string(),
+                       general_samples_.has_samples() ? "\n" : "",
+                       clock_samples_.generate_yaml_string(),
+                       clock_samples_.has_samples() ? "\n" : "",
+                       power_samples_.generate_yaml_string(),
+                       power_samples_.has_samples() ? "\n" : "",
+                       memory_samples_.generate_yaml_string(),
+                       memory_samples_.has_samples() ? "\n" : "",
+                       temperature_samples_.generate_yaml_string(),
+                       temperature_samples_.has_samples() ? "\n" : "",
+                       gfx_samples_.generate_yaml_string(),
+                       gfx_samples_.has_samples() ? "\n" : "",
+                       idle_state_samples_.generate_yaml_string());
+}
+
+std::ostream &operator<<(std::ostream &out, const cpu_hardware_sampler &sampler) {
+    if (sampler.is_sampling()) {
+        out.setstate(std::ios_base::failbit);
+        return out;
+    } else {
+        return out << fmt::format("sampling interval: {}\n"
+                                  "time points: [{}]\n\n"
+                                  "general samples:\n{}\n\n"
+                                  "clock samples:\n{}\n\n"
+                                  "power samples:\n{}\n\n"
+                                  "memory samples:\n{}\n\n"
+                                  "temperature samples:\n{}\n\n"
+                                  "gfx samples:\n{}\n\n"
+                                  "idle state samples:\n{}",
+                                  sampler.sampling_interval(),
+                                  fmt::join(detail::time_points_to_epoch(sampler.sampling_time_points()), ", "),
+                                  sampler.general_samples(),
+                                  sampler.clock_samples(),
+                                  sampler.power_samples(),
+                                  sampler.memory_samples(),
+                                  sampler.temperature_samples(),
+                                  sampler.gfx_samples(),
+                                  sampler.idle_state_samples());
+    }
+}
+
+}  // namespace hws
diff --git a/src/hardware_sampling/cpu/utility.cpp b/src/hws/cpu/utility.cpp
similarity index 87%
rename from src/hardware_sampling/cpu/utility.cpp
rename to src/hws/cpu/utility.cpp
index 7ba16d2..7bb6b3d 100644
--- a/src/hardware_sampling/cpu/utility.cpp
+++ b/src/hws/cpu/utility.cpp
@@ -5,16 +5,16 @@
  *          See the LICENSE.md file in the project root for full license information.
  */
 
-#include "hardware_sampling/cpu/utility.hpp"
+#include "hws/cpu/utility.hpp"
 
-#include "hardware_sampling/utility.hpp"  // hws::detail::split_as
+#include "hws/utility.hpp"  // hws::detail::split_as
 
+#include "fmt/format.h"  // fmt::format
 #include "subprocess.h"  // subprocess_s, subprocess_create, subprocess_join, subprocess_stdout, subprocess_option_e
 
 #include <algorithm>    // std::transform
 #include <cstddef>      // std::size_t
 #include <cstdio>       // std::FILE, std::fread
-#include <format>       // std::format
 #include <stdexcept>    // std::runtime_error
 #include <string>       // std::string
 #include <string_view>  // std::string_view
@@ -36,12 +36,12 @@ std::string run_subprocess(const std::string_view cmd_line) {
 
     // create subprocess
     subprocess_s proc{};
-    HWS_SUBPROCESS_ERROR_CHECK(subprocess_create(cmd_ptr_split.data(), options, &proc));
+    HWS_SUBPROCESS_ERROR_CHECK(subprocess_create(cmd_ptr_split.data(), options, &proc))
     // wait until process has finished
     int return_code{};
-    HWS_SUBPROCESS_ERROR_CHECK(subprocess_join(&proc, &return_code));
+    HWS_SUBPROCESS_ERROR_CHECK(subprocess_join(&proc, &return_code))
     if (return_code != 0) {
-        throw std::runtime_error{ std::format("Error: \"{}\" returned with {}!", cmd_line, return_code) };
+        throw std::runtime_error{ fmt::format("Error: \"{}\" returned with {}!", cmd_line, return_code) };
     }
 
     // get output handle and read data -> stdout and stderr are the same handle
@@ -50,7 +50,7 @@ std::string run_subprocess(const std::string_view cmd_line) {
     const std::size_t bytes_read = std::fread(buffer.data(), sizeof(typename decltype(buffer)::value_type), buffer.size(), out_handle);
 
     // destroy subprocess
-    HWS_SUBPROCESS_ERROR_CHECK(subprocess_destroy(&proc));
+    HWS_SUBPROCESS_ERROR_CHECK(subprocess_destroy(&proc))
 
     // create output
     return buffer.substr(0, bytes_read);
diff --git a/src/hardware_sampling/event.cpp b/src/hws/event.cpp
similarity index 80%
rename from src/hardware_sampling/event.cpp
rename to src/hws/event.cpp
index b88eaa3..373990e 100644
--- a/src/hardware_sampling/event.cpp
+++ b/src/hws/event.cpp
@@ -5,15 +5,16 @@
  *          See the LICENSE.md file in the project root for full license information.
  */
 
-#include "hardware_sampling/event.hpp"
+#include "hws/event.hpp"
+
+#include "fmt/format.h"  // fmt::format
 
-#include <format>   // std::format
 #include <ostream>  // std::ostream
 
 namespace hws {
 
 std::ostream &operator<<(std::ostream &out, const event &e) {
-    return out << std::format("time_point: {}\n"
+    return out << fmt::format("time_point: {}\n"
                               "name: {}",
                               e.time_point.time_since_epoch(),
                               e.name);
diff --git a/src/hws/gpu_amd/hardware_sampler.cpp b/src/hws/gpu_amd/hardware_sampler.cpp
new file mode 100644
index 0000000..6d52e03
--- /dev/null
+++ b/src/hws/gpu_amd/hardware_sampler.cpp
@@ -0,0 +1,727 @@
+/**
+ * @author Marcel Breyer
+ * @copyright 2024-today All Rights Reserved
+ * @license This file is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ */
+
+#include "hws/gpu_amd/hardware_sampler.hpp"
+
+#include "hws/gpu_amd/rocm_smi_samples.hpp"  // hws::{rocm_smi_general_samples, rocm_smi_clock_samples, rocm_smi_power_samples, rocm_smi_memory_samples, rocm_smi_temperature_samples}
+#include "hws/gpu_amd/utility.hpp"           // hws::detail::performance_level_to_string, HWS_ROCM_SMI_ERROR_CHECK
+#include "hws/hardware_sampler.hpp"          // hws::hardware_sampler
+#include "hws/sample_category.hpp"           // hws::sample_category
+#include "hws/utility.hpp"                   // hws::detail::time_points_to_epoch
+
+#include "fmt/format.h"           // fmt::format
+#include "fmt/ranges.h"           // fmt::join
+#include "hip/hip_runtime_api.h"  // HIP runtime functions
+#include "rocm_smi/rocm_smi.h"    // ROCm SMI runtime functions
+
+#include <chrono>     // std::chrono::{steady_clock, duration_cast, milliseconds}
+#include <cstddef>    // std::size_t
+#include <cstdint>    // std::uint32_t, std::uint64_t
+#include <exception>  // std::exception, std::terminate
+#include <ios>        // std::ios_base
+#include <iostream>   // std::cerr, std::endl
+#include <optional>   // std::optional
+#include <ostream>    // std::ostream
+#include <stdexcept>  // std::runtime_error
+#include <string>     // std::string
+#include <thread>     // std::this_thread
+#include <utility>    // std::move
+#include <vector>     // std::vector
+
+namespace hws {
+
+gpu_amd_hardware_sampler::gpu_amd_hardware_sampler(const sample_category category) :
+    gpu_amd_hardware_sampler{ 0, HWS_SAMPLING_INTERVAL, category } { }
+
+gpu_amd_hardware_sampler::gpu_amd_hardware_sampler(const std::size_t device_id, const sample_category category) :
+    gpu_amd_hardware_sampler{ device_id, HWS_SAMPLING_INTERVAL, category } { }
+
+gpu_amd_hardware_sampler::gpu_amd_hardware_sampler(const std::chrono::milliseconds sampling_interval, const sample_category category) :
+    gpu_amd_hardware_sampler{ 0, sampling_interval, category } { }
+
+gpu_amd_hardware_sampler::gpu_amd_hardware_sampler(const std::size_t device_id, const std::chrono::milliseconds sampling_interval, const sample_category category) :
+    hardware_sampler{ sampling_interval, category },
+    device_id_{ static_cast<std::uint32_t>(device_id) } {
+    // make sure that rsmi_init is only called once for all instances
+    if (instances_++ == 0) {
+        HWS_ROCM_SMI_ERROR_CHECK(rsmi_init(std::uint64_t{ 0 }))
+        // notify that initialization has been finished
+        init_finished_ = true;
+    } else {
+        // wait until init has been finished!
+        while (!init_finished_) { }
+    }
+}
+
+gpu_amd_hardware_sampler::~gpu_amd_hardware_sampler() {
+    try {
+        // if this hardware sampler is still sampling, stop it
+        if (this->has_sampling_started() && !this->has_sampling_stopped()) {
+            this->stop_sampling();
+        }
+
+        // the last instance must shut down the ROCm SMI runtime
+        // make sure that rsmi_shut_down is only called once
+        if (--instances_ == 0) {
+            HWS_ROCM_SMI_ERROR_CHECK(rsmi_shut_down())
+            // reset init_finished flag
+            init_finished_ = false;
+        }
+    } catch (const std::exception &e) {
+        std::cerr << e.what() << std::endl;
+        std::terminate();
+    }
+}
+
+void gpu_amd_hardware_sampler::sampling_loop() {
+    //
+    // add samples where we only have to retrieve the value once
+    //
+
+    this->add_time_point(std::chrono::steady_clock::now());
+
+    double initial_total_power_consumption{};  // initial total power consumption in J
+
+    // retrieve initial general information
+    if (this->sample_category_enabled(sample_category::general)) {
+        // fixed information -> only retrieved once
+        // the byte order is given by AMD directly
+        general_samples_.byte_order_ = "Little Endian";
+
+        hipDeviceProp_t prop{};
+        if (hipGetDeviceProperties(&prop, static_cast<int>(device_id_)) == hipSuccess) {
+            const std::string architecture{ prop.gcnArchName };
+            general_samples_.architecture_ = architecture.substr(0, architecture.find_first_of('\0'));
+        }
+
+        std::string vendor_id(static_cast<std::string::size_type>(1024), '\0');
+        if (rsmi_dev_vendor_name_get(device_id_, vendor_id.data(), vendor_id.size()) == RSMI_STATUS_SUCCESS) {
+            general_samples_.vendor_id_ = vendor_id.substr(0, vendor_id.find_first_of('\0'));
+        }
+
+        std::string name(static_cast<std::string::size_type>(1024), '\0');
+        if (rsmi_dev_name_get(device_id_, name.data(), name.size()) == RSMI_STATUS_SUCCESS) {
+            general_samples_.name_ = name.substr(0, name.find_first_of('\0'));
+        }
+
+        // queried samples -> retrieved every iteration if available
+        rsmi_dev_perf_level_t pstate{};
+        if (rsmi_dev_perf_level_get(device_id_, &pstate) == RSMI_STATUS_SUCCESS) {
+            general_samples_.performance_level_ = decltype(general_samples_.performance_level_)::value_type{ detail::performance_level_to_string(pstate) };
+        }
+
+        decltype(general_samples_.compute_utilization_)::value_type::value_type utilization_gpu{};
+        if (rsmi_dev_busy_percent_get(device_id_, &utilization_gpu) == RSMI_STATUS_SUCCESS) {
+            general_samples_.compute_utilization_ = decltype(general_samples_.compute_utilization_)::value_type{ utilization_gpu };
+        }
+
+        decltype(general_samples_.memory_utilization_)::value_type::value_type utilization_mem{};
+        if (rsmi_dev_memory_busy_percent_get(device_id_, &utilization_mem) == RSMI_STATUS_SUCCESS) {
+            general_samples_.memory_utilization_ = decltype(general_samples_.memory_utilization_)::value_type{ utilization_mem };
+        }
+    }
+
+    // retrieve initial clock related information
+    if (this->sample_category_enabled(sample_category::clock)) {
+        rsmi_frequencies_t frequency_info{};
+        if (rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_SYS, &frequency_info) == RSMI_STATUS_SUCCESS) {
+            clock_samples_.clock_frequency_min_ = static_cast<decltype(clock_samples_.clock_frequency_min_)::value_type>(frequency_info.frequency[0]) / 1000'000.0;
+            clock_samples_.clock_frequency_max_ = static_cast<decltype(clock_samples_.clock_frequency_max_)::value_type>(frequency_info.frequency[frequency_info.num_supported - 1]) / 1000'000.0;
+            decltype(clock_samples_.available_clock_frequencies_)::value_type frequencies{};
+            for (std::size_t i = 0; i < frequency_info.num_supported; ++i) {
+                frequencies.push_back(static_cast<decltype(frequencies)::value_type>(frequency_info.frequency[i]) / 1000'000.0);
+            }
+            clock_samples_.available_clock_frequencies_ = frequencies;
+
+            // queried samples -> retrieved every iteration if available
+            clock_samples_.clock_frequency_ = decltype(clock_samples_.clock_frequency_)::value_type{};
+            if (frequency_info.current < RSMI_MAX_NUM_FREQUENCIES) {
+                clock_samples_.clock_frequency_->push_back(static_cast<decltype(clock_samples_.clock_frequency_)::value_type::value_type>(frequency_info.frequency[frequency_info.current]) / 1000'000.0);
+            } else {
+                clock_samples_.clock_frequency_->push_back(0);
+            }
+        }
+
+        if (rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_SOC, &frequency_info) == RSMI_STATUS_SUCCESS) {
+            clock_samples_.socket_clock_frequency_min_ = static_cast<decltype(clock_samples_.socket_clock_frequency_min_)::value_type>(frequency_info.frequency[0]) / 1000'000.0;
+            clock_samples_.socket_clock_frequency_max_ = static_cast<decltype(clock_samples_.socket_clock_frequency_max_)::value_type>(frequency_info.frequency[frequency_info.num_supported - 1]) / 1000'000.0;
+            // queried samples -> retrieved every iteration if available
+            clock_samples_.socket_clock_frequency_ = decltype(clock_samples_.socket_clock_frequency_)::value_type{};
+            if (frequency_info.current < RSMI_MAX_NUM_FREQUENCIES) {
+                clock_samples_.socket_clock_frequency_->push_back(static_cast<decltype(clock_samples_.socket_clock_frequency_)::value_type::value_type>(frequency_info.frequency[frequency_info.current]) / 1000'000.0);
+            } else {
+                clock_samples_.socket_clock_frequency_->push_back(0);
+            }
+        }
+
+        if (rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_MEM, &frequency_info) == RSMI_STATUS_SUCCESS) {
+            clock_samples_.memory_clock_frequency_min_ = static_cast<decltype(clock_samples_.memory_clock_frequency_min_)::value_type>(frequency_info.frequency[0]) / 1000'000.0;
+            clock_samples_.memory_clock_frequency_max_ = static_cast<decltype(clock_samples_.memory_clock_frequency_max_)::value_type>(frequency_info.frequency[frequency_info.num_supported - 1]) / 1000'000.0;
+            decltype(clock_samples_.available_memory_clock_frequencies_)::value_type frequencies{};
+            for (std::size_t i = 0; i < frequency_info.num_supported; ++i) {
+                frequencies.push_back(static_cast<decltype(frequencies)::value_type>(frequency_info.frequency[i]) / 1000'000.0);
+            }
+            clock_samples_.available_memory_clock_frequencies_ = frequencies;
+
+            // queried samples -> retrieved every iteration if available
+            clock_samples_.memory_clock_frequency_ = decltype(clock_samples_.memory_clock_frequency_)::value_type{};
+            if (frequency_info.current < RSMI_MAX_NUM_FREQUENCIES) {
+                clock_samples_.memory_clock_frequency_->push_back(static_cast<decltype(clock_samples_.memory_clock_frequency_)::value_type::value_type>(frequency_info.frequency[frequency_info.current]) / 1000'000.0);
+            } else {
+                clock_samples_.memory_clock_frequency_->push_back(0);
+            }
+        }
+
+        // queried samples -> retrieved every iteration if available
+        decltype(clock_samples_.overdrive_level_)::value_type::value_type overdrive_level{};
+        if (rsmi_dev_overdrive_level_get(device_id_, &overdrive_level) == RSMI_STATUS_SUCCESS) {
+            clock_samples_.overdrive_level_ = decltype(clock_samples_.overdrive_level_)::value_type{ overdrive_level };
+        }
+
+        decltype(clock_samples_.memory_overdrive_level_)::value_type::value_type memory_overdrive_level{};
+        if (rsmi_dev_mem_overdrive_level_get(device_id_, &memory_overdrive_level) == RSMI_STATUS_SUCCESS) {
+            clock_samples_.memory_overdrive_level_ = decltype(clock_samples_.memory_overdrive_level_)::value_type{ memory_overdrive_level };
+        }
+    }
+
+    // retrieve initial power related information
+    if (this->sample_category_enabled(sample_category::power)) {
+        std::uint64_t power_default_cap{};
+        if (rsmi_dev_power_cap_default_get(device_id_, &power_default_cap) == RSMI_STATUS_SUCCESS) {
+            power_samples_.power_management_limit_ = static_cast<decltype(power_samples_.power_management_limit_)::value_type>(power_default_cap) / 1000'000.0;
+        }
+
+        std::uint64_t power_cap{};
+        if (rsmi_dev_power_cap_get(device_id_, std::uint32_t{ 0 }, &power_cap) == RSMI_STATUS_SUCCESS) {
+            power_samples_.power_enforced_limit_ = static_cast<decltype(power_samples_.power_enforced_limit_)::value_type>(power_cap) / 1000'000.0;
+        }
+
+        {
+            RSMI_POWER_TYPE power_type{};
+            std::uint64_t power_usage{};
+            if (rsmi_dev_power_get(device_id_, &power_usage, &power_type) == RSMI_STATUS_SUCCESS) {
+                switch (power_type) {
+                    case RSMI_POWER_TYPE::RSMI_AVERAGE_POWER:
+                        power_samples_.power_measurement_type_ = "average";
+                        break;
+                    case RSMI_POWER_TYPE::RSMI_CURRENT_POWER:
+                        power_samples_.power_measurement_type_ = "current/instant";
+                        break;
+                    case RSMI_POWER_TYPE::RSMI_INVALID_POWER:
+                        power_samples_.power_measurement_type_ = "invalid/undetected";
+                        break;
+                }
+                // report power usage since the first sample
+                power_samples_.power_usage_ = decltype(power_samples_.power_usage_)::value_type{ static_cast<decltype(power_samples_.power_usage_)::value_type::value_type>(power_usage) / 1000'000.0 };
+            }
+        }
+
+        rsmi_power_profile_status_t power_profile{};
+        if (rsmi_dev_power_profile_presets_get(device_id_, std::uint32_t{ 0 }, &power_profile) == RSMI_STATUS_SUCCESS) {
+            decltype(power_samples_.available_power_profiles_)::value_type available_power_profiles{};
+            // go through all possible power profiles
+            if ((power_profile.available_profiles & RSMI_PWR_PROF_PRST_CUSTOM_MASK) != std::uint64_t{ 0 }) {
+                available_power_profiles.emplace_back("CUSTOM");
+            }
+            if ((power_profile.available_profiles & RSMI_PWR_PROF_PRST_VIDEO_MASK) != std::uint64_t{ 0 }) {
+                available_power_profiles.emplace_back("VIDEO");
+            }
+            if ((power_profile.available_profiles & RSMI_PWR_PROF_PRST_POWER_SAVING_MASK) != std::uint64_t{ 0 }) {
+                available_power_profiles.emplace_back("POWER_SAVING");
+            }
+            if ((power_profile.available_profiles & RSMI_PWR_PROF_PRST_COMPUTE_MASK) != std::uint64_t{ 0 }) {
+                available_power_profiles.emplace_back("COMPUTE");
+            }
+            if ((power_profile.available_profiles & RSMI_PWR_PROF_PRST_VR_MASK) != std::uint64_t{ 0 }) {
+                available_power_profiles.emplace_back("VR");
+            }
+            if ((power_profile.available_profiles & RSMI_PWR_PROF_PRST_3D_FULL_SCR_MASK) != std::uint64_t{ 0 }) {
+                available_power_profiles.emplace_back("3D_FULL_SCREEN");
+            }
+            if ((power_profile.available_profiles & RSMI_PWR_PROF_PRST_BOOTUP_DEFAULT) != std::uint64_t{ 0 }) {
+                available_power_profiles.emplace_back("BOOTUP_DEFAULT");
+            }
+            power_samples_.available_power_profiles_ = std::move(available_power_profiles);
+
+            // queried samples -> retrieved every iteration if available
+            switch (power_profile.current) {
+                case RSMI_PWR_PROF_PRST_CUSTOM_MASK:
+                    power_samples_.power_profile_ = decltype(power_samples_.power_profile_)::value_type{ "CUSTOM" };
+                    break;
+                case RSMI_PWR_PROF_PRST_VIDEO_MASK:
+                    power_samples_.power_profile_ = decltype(power_samples_.power_profile_)::value_type{ "VIDEO" };
+                    break;
+                case RSMI_PWR_PROF_PRST_POWER_SAVING_MASK:
+                    power_samples_.power_profile_ = decltype(power_samples_.power_profile_)::value_type{ "POWER_SAVING" };
+                    break;
+                case RSMI_PWR_PROF_PRST_COMPUTE_MASK:
+                    power_samples_.power_profile_ = decltype(power_samples_.power_profile_)::value_type{ "COMPUTE" };
+                    break;
+                case RSMI_PWR_PROF_PRST_VR_MASK:
+                    power_samples_.power_profile_ = decltype(power_samples_.power_profile_)::value_type{ "VR" };
+                    break;
+                case RSMI_PWR_PROF_PRST_3D_FULL_SCR_MASK:
+                    power_samples_.power_profile_ = decltype(power_samples_.power_profile_)::value_type{ "3D_FULL_SCREEN" };
+                    break;
+                case RSMI_PWR_PROF_PRST_BOOTUP_DEFAULT:
+                    power_samples_.power_profile_ = decltype(power_samples_.power_profile_)::value_type{ "BOOTUP_DEFAULT" };
+                    break;
+                case RSMI_PWR_PROF_PRST_INVALID:
+                    power_samples_.power_profile_ = decltype(power_samples_.power_profile_)::value_type{ "INVALID" };
+                    break;
+            }
+        }
+
+        // queried samples -> retrieved every iteration if available
+        [[maybe_unused]] std::uint64_t timestamp{};
+        float resolution{};
+        std::uint64_t power_total_energy_consumption{};
+        if (rsmi_dev_energy_count_get(device_id_, &power_total_energy_consumption, &resolution, &timestamp) == RSMI_STATUS_SUCCESS) {
+            const auto scaled_value = static_cast<decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type>(power_total_energy_consumption) * static_cast<decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type>(resolution);
+            initial_total_power_consumption = scaled_value / 1000'000.0;
+            power_samples_.power_total_energy_consumption_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ 0.0 };
+        } else if (power_samples_.power_usage_.has_value()) {
+            // if the total energy consumption cannot be retrieved, but the current power draw, approximate it
+            power_samples_.power_total_energy_consumption_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ 0.0 };
+        }
+    }
+
+    // retrieve initial memory related information
+    if (this->sample_category_enabled(sample_category::memory)) {
+        decltype(memory_samples_.memory_total_)::value_type memory_total{};
+        if (rsmi_dev_memory_total_get(device_id_, RSMI_MEM_TYPE_VRAM, &memory_total) == RSMI_STATUS_SUCCESS) {
+            memory_samples_.memory_total_ = memory_total;
+        }
+
+        decltype(memory_samples_.visible_memory_total_)::value_type visible_memory_total{};
+        if (rsmi_dev_memory_total_get(device_id_, RSMI_MEM_TYPE_VIS_VRAM, &visible_memory_total) == RSMI_STATUS_SUCCESS) {
+            memory_samples_.visible_memory_total_ = visible_memory_total;
+        }
+
+        rsmi_pcie_bandwidth_t bandwidth_info{};
+        if (rsmi_dev_pci_bandwidth_get(device_id_, &bandwidth_info) == RSMI_STATUS_SUCCESS) {
+            memory_samples_.num_pcie_lanes_min_ = bandwidth_info.lanes[0];
+            memory_samples_.num_pcie_lanes_max_ = bandwidth_info.lanes[bandwidth_info.transfer_rate.num_supported - 1];
+            memory_samples_.pcie_link_transfer_rate_min_ = bandwidth_info.transfer_rate.frequency[0] / 1'000'000;
+            memory_samples_.pcie_link_transfer_rate_max_ = bandwidth_info.transfer_rate.frequency[bandwidth_info.transfer_rate.num_supported - 1] / 1'000'000;
+
+            // queried samples -> retrieved every iteration if available
+            memory_samples_.pcie_link_transfer_rate_ = decltype(memory_samples_.pcie_link_transfer_rate_)::value_type{};
+            memory_samples_.num_pcie_lanes_ = decltype(memory_samples_.num_pcie_lanes_)::value_type{};
+            if (bandwidth_info.transfer_rate.current < RSMI_MAX_NUM_FREQUENCIES) {
+                memory_samples_.pcie_link_transfer_rate_->push_back(bandwidth_info.transfer_rate.frequency[bandwidth_info.transfer_rate.current] / 1'000'000);
+                memory_samples_.num_pcie_lanes_->push_back(bandwidth_info.lanes[bandwidth_info.transfer_rate.current]);
+            } else {
+                // the current index is (somehow) wrong
+                memory_samples_.pcie_link_transfer_rate_->push_back(0);
+                memory_samples_.num_pcie_lanes_->push_back(0);
+            }
+        }
+
+        // queried samples -> retrieved every iteration if available
+        decltype(memory_samples_.memory_used_)::value_type::value_type memory_used{};
+        if (rsmi_dev_memory_usage_get(device_id_, RSMI_MEM_TYPE_VRAM, &memory_used) == RSMI_STATUS_SUCCESS) {
+            memory_samples_.memory_used_ = decltype(memory_samples_.memory_used_)::value_type{ memory_used };
+            if (memory_samples_.memory_total_.has_value()) {
+                memory_samples_.memory_free_ = decltype(memory_samples_.memory_used_)::value_type{ memory_samples_.memory_total_.value() - memory_samples_.memory_used_->front() };
+            }
+        }
+    }
+
+    // retrieve fixed temperature related information
+    if (this->sample_category_enabled(sample_category::temperature)) {
+        std::uint32_t fan_id{ 0 };
+        std::int64_t fan_speed{};
+        while (rsmi_dev_fan_speed_get(device_id_, fan_id, &fan_speed) == RSMI_STATUS_SUCCESS) {
+            if (fan_id == 0) {
+                // queried samples -> retrieved every iteration if available
+                const auto percentage = static_cast<decltype(temperature_samples_.fan_speed_percentage_)::value_type::value_type>(fan_speed) / static_cast<decltype(temperature_samples_.fan_speed_percentage_)::value_type::value_type>(RSMI_MAX_FAN_SPEED);
+                temperature_samples_.fan_speed_percentage_ = decltype(temperature_samples_.fan_speed_percentage_)::value_type{ percentage };
+            }
+            ++fan_id;
+        }
+        temperature_samples_.num_fans_ = fan_id;
+
+        decltype(temperature_samples_.fan_speed_max_)::value_type max_fan_speed{};
+        if (rsmi_dev_fan_speed_max_get(device_id_, std::uint32_t{ 0 }, &max_fan_speed) == RSMI_STATUS_SUCCESS) {
+            temperature_samples_.fan_speed_max_ = max_fan_speed;
+        }
+
+        std::int64_t temperature_min{};
+        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_EDGE, RSMI_TEMP_MIN, &temperature_min) == RSMI_STATUS_SUCCESS) {
+            temperature_samples_.temperature_min_ = static_cast<decltype(temperature_samples_.temperature_min_)::value_type>(temperature_min) / 1000.0;
+        }
+
+        std::int64_t temperature_max{};
+        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_EDGE, RSMI_TEMP_MAX, &temperature_max) == RSMI_STATUS_SUCCESS) {
+            temperature_samples_.temperature_max_ = static_cast<decltype(temperature_samples_.temperature_max_)::value_type>(temperature_max) / 1000.0;
+        }
+
+        std::int64_t memory_temperature_min{};
+        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_MEMORY, RSMI_TEMP_MIN, &memory_temperature_min) == RSMI_STATUS_SUCCESS) {
+            temperature_samples_.memory_temperature_min_ = static_cast<decltype(temperature_samples_.memory_temperature_min_)::value_type>(memory_temperature_min) / 1000.0;
+        }
+
+        std::int64_t memory_temperature_max{};
+        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_MEMORY, RSMI_TEMP_MAX, &memory_temperature_max) == RSMI_STATUS_SUCCESS) {
+            temperature_samples_.memory_temperature_max_ = static_cast<decltype(temperature_samples_.memory_temperature_max_)::value_type>(memory_temperature_max) / 1000.0;
+        }
+
+        std::int64_t hotspot_temperature_min{};
+        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_JUNCTION, RSMI_TEMP_MIN, &hotspot_temperature_min) == RSMI_STATUS_SUCCESS) {
+            temperature_samples_.hotspot_temperature_min_ = static_cast<decltype(temperature_samples_.hotspot_temperature_min_)::value_type>(hotspot_temperature_min) / 1000.0;
+        }
+
+        std::int64_t hotspot_temperature_max{};
+        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_JUNCTION, RSMI_TEMP_MAX, &hotspot_temperature_max) == RSMI_STATUS_SUCCESS) {
+            temperature_samples_.hotspot_temperature_max_ = static_cast<decltype(temperature_samples_.hotspot_temperature_max_)::value_type>(hotspot_temperature_max) / 1000.0;
+        }
+
+        std::int64_t hbm_0_temperature_min{};
+        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_0, RSMI_TEMP_MIN, &hbm_0_temperature_min) == RSMI_STATUS_SUCCESS) {
+            temperature_samples_.hbm_0_temperature_min_ = static_cast<decltype(temperature_samples_.hbm_0_temperature_min_)::value_type>(hbm_0_temperature_min) / 1000.0;
+        }
+
+        std::int64_t hbm_0_temperature_max{};
+        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_0, RSMI_TEMP_MAX, &hbm_0_temperature_max) == RSMI_STATUS_SUCCESS) {
+            temperature_samples_.hbm_0_temperature_max_ = static_cast<decltype(temperature_samples_.hbm_0_temperature_max_)::value_type>(hbm_0_temperature_max) / 1000.0;
+        }
+
+        std::int64_t hbm_1_temperature_min{};
+        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_1, RSMI_TEMP_MIN, &hbm_1_temperature_min) == RSMI_STATUS_SUCCESS) {
+            temperature_samples_.hbm_1_temperature_min_ = static_cast<decltype(temperature_samples_.hbm_1_temperature_min_)::value_type>(hbm_1_temperature_min) / 1000.0;
+        }
+
+        std::int64_t hbm_1_temperature_max{};
+        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_1, RSMI_TEMP_MAX, &hbm_1_temperature_max) == RSMI_STATUS_SUCCESS) {
+            temperature_samples_.hbm_1_temperature_max_ = static_cast<decltype(temperature_samples_.hbm_1_temperature_max_)::value_type>(hbm_1_temperature_max) / 1000.0;
+        }
+
+        std::int64_t hbm_2_temperature_min{};
+        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_2, RSMI_TEMP_MIN, &hbm_2_temperature_min) == RSMI_STATUS_SUCCESS) {
+            temperature_samples_.hbm_2_temperature_min_ = static_cast<decltype(temperature_samples_.hbm_2_temperature_min_)::value_type>(hbm_2_temperature_min) / 1000.0;
+        }
+
+        std::int64_t hbm_2_temperature_max{};
+        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_2, RSMI_TEMP_MAX, &hbm_2_temperature_max) == RSMI_STATUS_SUCCESS) {
+            temperature_samples_.hbm_2_temperature_max_ = static_cast<decltype(temperature_samples_.hbm_2_temperature_max_)::value_type>(hbm_2_temperature_max) / 1000.0;
+        }
+
+        std::int64_t hbm_3_temperature_min{};
+        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_3, RSMI_TEMP_MIN, &hbm_3_temperature_min) == RSMI_STATUS_SUCCESS) {
+            temperature_samples_.hbm_3_temperature_min_ = static_cast<decltype(temperature_samples_.hbm_3_temperature_min_)::value_type>(hbm_3_temperature_min) / 1000.0;
+        }
+
+        std::int64_t hbm_3_temperature_max{};
+        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_3, RSMI_TEMP_MAX, &hbm_3_temperature_max) == RSMI_STATUS_SUCCESS) {
+            temperature_samples_.hbm_3_temperature_max_ = static_cast<decltype(temperature_samples_.hbm_3_temperature_max_)::value_type>(hbm_3_temperature_max) / 1000.0;
+        }
+
+        // queried samples -> retrieved every iteration if available
+        std::int64_t temperature{};
+        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_EDGE, RSMI_TEMP_CURRENT, &temperature) == RSMI_STATUS_SUCCESS) {
+            temperature_samples_.temperature_ = decltype(temperature_samples_.temperature_)::value_type{ static_cast<decltype(temperature_samples_.temperature_)::value_type::value_type>(temperature) / 1000.0 };
+        }
+
+        std::int64_t hotspot_temperature{};
+        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_JUNCTION, RSMI_TEMP_CURRENT, &hotspot_temperature) == RSMI_STATUS_SUCCESS) {
+            temperature_samples_.hotspot_temperature_ = decltype(temperature_samples_.hotspot_temperature_)::value_type{ static_cast<decltype(temperature_samples_.hotspot_temperature_)::value_type::value_type>(hotspot_temperature) / 1000.0 };
+        }
+
+        std::int64_t memory_temperature{};
+        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_MEMORY, RSMI_TEMP_CURRENT, &memory_temperature) == RSMI_STATUS_SUCCESS) {
+            temperature_samples_.memory_temperature_ = decltype(temperature_samples_.memory_temperature_)::value_type{ static_cast<decltype(temperature_samples_.memory_temperature_)::value_type::value_type>(memory_temperature) / 1000.0 };
+        }
+
+        std::int64_t hbm_0_temperature{};
+        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_0, RSMI_TEMP_CURRENT, &hbm_0_temperature) == RSMI_STATUS_SUCCESS) {
+            temperature_samples_.hbm_0_temperature_ = decltype(temperature_samples_.hbm_0_temperature_)::value_type{ static_cast<decltype(temperature_samples_.hbm_0_temperature_)::value_type::value_type>(hbm_0_temperature) / 1000.0 };
+        }
+
+        std::int64_t hbm_1_temperature{};
+        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_1, RSMI_TEMP_CURRENT, &hbm_1_temperature) == RSMI_STATUS_SUCCESS) {
+            temperature_samples_.hbm_1_temperature_ = decltype(temperature_samples_.hbm_1_temperature_)::value_type{ static_cast<decltype(temperature_samples_.hbm_1_temperature_)::value_type::value_type>(hbm_1_temperature) / 1000.0 };
+        }
+
+        std::int64_t hbm_2_temperature{};
+        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_2, RSMI_TEMP_CURRENT, &hbm_2_temperature) == RSMI_STATUS_SUCCESS) {
+            temperature_samples_.hbm_2_temperature_ = decltype(temperature_samples_.hbm_2_temperature_)::value_type{ static_cast<decltype(temperature_samples_.hbm_2_temperature_)::value_type::value_type>(hbm_2_temperature) / 1000.0 };
+        }
+
+        std::int64_t hbm_3_temperature{};
+        if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_3, RSMI_TEMP_CURRENT, &hbm_3_temperature) == RSMI_STATUS_SUCCESS) {
+            temperature_samples_.hbm_3_temperature_ = decltype(temperature_samples_.hbm_3_temperature_)::value_type{ static_cast<decltype(temperature_samples_.hbm_3_temperature_)::value_type::value_type>(hbm_3_temperature) / 1000.0 };
+        }
+    }
+
+    //
+    // loop until stop_sampling() is called
+    //
+
+    while (!this->has_sampling_stopped()) {
+        // only sample values if the sampler currently isn't paused
+        if (this->is_sampling()) {
+            // add current time point
+            this->add_time_point(std::chrono::steady_clock::now());
+
+            // retrieve general samples
+            if (this->sample_category_enabled(sample_category::general)) {
+                if (general_samples_.performance_level_.has_value()) {
+                    rsmi_dev_perf_level_t pstate{};
+                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_perf_level_get(device_id_, &pstate))
+                    general_samples_.performance_level_->push_back(detail::performance_level_to_string(pstate));
+                }
+
+                if (general_samples_.compute_utilization_.has_value()) {
+                    decltype(general_samples_.compute_utilization_)::value_type::value_type value{};
+                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_busy_percent_get(device_id_, &value))
+                    general_samples_.compute_utilization_->push_back(value);
+                }
+
+                if (general_samples_.memory_utilization_.has_value()) {
+                    decltype(general_samples_.memory_utilization_)::value_type::value_type value{};
+                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_memory_busy_percent_get(device_id_, &value))
+                    general_samples_.memory_utilization_->push_back(value);
+                }
+            }
+
+            // retrieve clock related samples
+            if (this->sample_category_enabled(sample_category::clock)) {
+                if (clock_samples_.clock_frequency_.has_value()) {
+                    rsmi_frequencies_t frequency_info{};
+                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_SYS, &frequency_info))
+                    if (frequency_info.current < RSMI_MAX_NUM_FREQUENCIES) {
+                        clock_samples_.clock_frequency_->push_back(static_cast<decltype(clock_samples_.clock_frequency_)::value_type::value_type>(frequency_info.frequency[frequency_info.current]) / 1000'000.0);
+                    } else {
+                        // the current index is (somehow) wrong
+                        clock_samples_.clock_frequency_->push_back(0);
+                    }
+                }
+
+                if (clock_samples_.socket_clock_frequency_.has_value()) {
+                    rsmi_frequencies_t frequency_info{};
+                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_SOC, &frequency_info))
+                    if (frequency_info.current < RSMI_MAX_NUM_FREQUENCIES) {
+                        clock_samples_.socket_clock_frequency_->push_back(static_cast<decltype(clock_samples_.socket_clock_frequency_)::value_type::value_type>(frequency_info.frequency[frequency_info.current]) / 1000'000.0);
+                    } else {
+                        // the current index is (somehow) wrong
+                        clock_samples_.socket_clock_frequency_->push_back(0);
+                    }
+                }
+
+                if (clock_samples_.memory_clock_frequency_.has_value()) {
+                    rsmi_frequencies_t frequency_info{};
+                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_MEM, &frequency_info))
+                    if (frequency_info.current < RSMI_MAX_NUM_FREQUENCIES) {
+                        clock_samples_.memory_clock_frequency_->push_back(static_cast<decltype(clock_samples_.memory_clock_frequency_)::value_type::value_type>(frequency_info.frequency[frequency_info.current]) / 1000'000.0);
+                    } else {
+                        // the current index is (somehow) wrong
+                        clock_samples_.memory_clock_frequency_->push_back(0);
+                    }
+                }
+
+                if (clock_samples_.overdrive_level_.has_value()) {
+                    decltype(clock_samples_.overdrive_level_)::value_type::value_type value{};
+                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_overdrive_level_get(device_id_, &value))
+                    clock_samples_.overdrive_level_->push_back(value);
+                }
+
+                if (clock_samples_.memory_overdrive_level_.has_value()) {
+                    decltype(clock_samples_.memory_overdrive_level_)::value_type::value_type value{};
+                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_mem_overdrive_level_get(device_id_, &value))
+                    clock_samples_.memory_overdrive_level_->push_back(value);
+                }
+            }
+
+            // retrieve power related samples
+            if (this->sample_category_enabled(sample_category::power)) {
+                if (power_samples_.power_usage_.has_value()) {
+                    [[maybe_unused]] RSMI_POWER_TYPE power_type{};
+                    std::uint64_t value{};
+                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_power_get(device_id_, &value, &power_type))
+                    power_samples_.power_usage_->push_back(static_cast<decltype(power_samples_.power_usage_)::value_type::value_type>(value) / 1000'000.0);
+                }
+
+                if (power_samples_.power_total_energy_consumption_.has_value()) {
+                    [[maybe_unused]] std::uint64_t timestamp{};
+                    float resolution{};
+                    std::uint64_t value{};
+                    if (rsmi_dev_energy_count_get(device_id_, &value, &resolution, &timestamp) == RSMI_STATUS_SUCCESS) {
+                        const auto scaled_value = static_cast<decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type>(value) * static_cast<decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type>(resolution);
+                        power_samples_.power_total_energy_consumption_->push_back((scaled_value / 1000'000.0) - initial_total_power_consumption);
+                    } else if (power_samples_.power_usage_.has_value()) {
+                        // if the total energy consumption cannot be retrieved, but the current power draw, approximate it
+                        const std::size_t num_time_points = this->sampling_time_points().size();
+                        const auto time_difference = std::chrono::duration<double>(this->sampling_time_points()[num_time_points - 1] - this->sampling_time_points()[num_time_points - 2]).count();
+                        const auto current = power_samples_.power_usage_->back() * time_difference;
+                        power_samples_.power_total_energy_consumption_->push_back(power_samples_.power_total_energy_consumption_->back() + current);
+                    }
+                }
+
+                if (power_samples_.power_profile_.has_value()) {
+                    rsmi_power_profile_status_t power_profile{};
+                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_power_profile_presets_get(device_id_, std::uint32_t{ 0 }, &power_profile))
+                    switch (power_profile.current) {
+                        case RSMI_PWR_PROF_PRST_CUSTOM_MASK:
+                            power_samples_.power_profile_->emplace_back("CUSTOM");
+                            break;
+                        case RSMI_PWR_PROF_PRST_VIDEO_MASK:
+                            power_samples_.power_profile_->emplace_back("VIDEO");
+                            break;
+                        case RSMI_PWR_PROF_PRST_POWER_SAVING_MASK:
+                            power_samples_.power_profile_->emplace_back("POWER_SAVING");
+                            break;
+                        case RSMI_PWR_PROF_PRST_COMPUTE_MASK:
+                            power_samples_.power_profile_->emplace_back("COMPUTE");
+                            break;
+                        case RSMI_PWR_PROF_PRST_VR_MASK:
+                            power_samples_.power_profile_->emplace_back("VR");
+                            break;
+                        case RSMI_PWR_PROF_PRST_3D_FULL_SCR_MASK:
+                            power_samples_.power_profile_->emplace_back("3D_FULL_SCREEN");
+                            break;
+                        case RSMI_PWR_PROF_PRST_BOOTUP_DEFAULT:
+                            power_samples_.power_profile_->emplace_back("BOOTUP_DEFAULT");
+                            break;
+                        case RSMI_PWR_PROF_PRST_INVALID:
+                            power_samples_.power_profile_->emplace_back("INVALID");
+                            break;
+                    }
+                }
+            }
+
+            // retrieve memory related samples
+            if (this->sample_category_enabled(sample_category::memory)) {
+                if (memory_samples_.memory_used_.has_value()) {
+                    decltype(memory_samples_.memory_used_)::value_type::value_type value{};
+                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_memory_usage_get(device_id_, RSMI_MEM_TYPE_VRAM, &value))
+                    memory_samples_.memory_used_->push_back(value);
+                    if (memory_samples_.memory_free_.has_value()) {
+                        memory_samples_.memory_free_->push_back(memory_samples_.memory_total_.value() - value);
+                    }
+                }
+
+                if (memory_samples_.pcie_link_transfer_rate_.has_value() && memory_samples_.num_pcie_lanes_.has_value()) {
+                    rsmi_pcie_bandwidth_t bandwidth_info{};
+                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_pci_bandwidth_get(device_id_, &bandwidth_info))
+                    if (bandwidth_info.transfer_rate.current < RSMI_MAX_NUM_FREQUENCIES) {
+                        memory_samples_.pcie_link_transfer_rate_->push_back(bandwidth_info.transfer_rate.frequency[bandwidth_info.transfer_rate.current] / 1'000'000);
+                        memory_samples_.num_pcie_lanes_->push_back(bandwidth_info.lanes[bandwidth_info.transfer_rate.current]);
+                    } else {
+                        // the current index is (somehow) wrong
+                        memory_samples_.pcie_link_transfer_rate_->push_back(0);
+                        memory_samples_.num_pcie_lanes_->push_back(0);
+                    }
+                }
+            }
+
+            // retrieve temperature related samples
+            if (this->sample_category_enabled(sample_category::temperature)) {
+                if (temperature_samples_.fan_speed_percentage_.has_value()) {
+                    std::int64_t value{};
+                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_fan_speed_get(device_id_, std::uint32_t{ 0 }, &value))
+                    temperature_samples_.fan_speed_percentage_->push_back(static_cast<decltype(temperature_samples_.fan_speed_percentage_)::value_type::value_type>(value) / static_cast<decltype(temperature_samples_.fan_speed_percentage_)::value_type::value_type>(RSMI_MAX_FAN_SPEED));
+                }
+
+                if (temperature_samples_.temperature_.has_value()) {
+                    std::int64_t value{};
+                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_EDGE, RSMI_TEMP_CURRENT, &value))
+                    temperature_samples_.temperature_->push_back(static_cast<decltype(temperature_samples_.temperature_)::value_type::value_type>(value) / 1000.0);
+                }
+
+                if (temperature_samples_.memory_temperature_.has_value()) {
+                    std::int64_t value{};
+                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_MEMORY, RSMI_TEMP_CURRENT, &value))
+                    temperature_samples_.memory_temperature_->push_back(static_cast<decltype(temperature_samples_.memory_temperature_)::value_type::value_type>(value) / 1000.0);
+                }
+
+                if (temperature_samples_.hotspot_temperature_.has_value()) {
+                    std::int64_t value{};
+                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_JUNCTION, RSMI_TEMP_CURRENT, &value))
+                    temperature_samples_.hotspot_temperature_->push_back(static_cast<decltype(temperature_samples_.hotspot_temperature_)::value_type::value_type>(value) / 1000.0);
+                }
+
+                if (temperature_samples_.hbm_0_temperature_.has_value()) {
+                    std::int64_t value{};
+                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_0, RSMI_TEMP_CURRENT, &value))
+                    temperature_samples_.hbm_0_temperature_->push_back(static_cast<decltype(temperature_samples_.hbm_0_temperature_)::value_type::value_type>(value) / 1000.0);
+                }
+
+                if (temperature_samples_.hbm_1_temperature_.has_value()) {
+                    std::int64_t value{};
+                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_1, RSMI_TEMP_CURRENT, &value))
+                    temperature_samples_.hbm_1_temperature_->push_back(static_cast<decltype(temperature_samples_.hbm_1_temperature_)::value_type::value_type>(value) / 1000.0);
+                }
+
+                if (temperature_samples_.hbm_2_temperature_.has_value()) {
+                    std::int64_t value{};
+                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_2, RSMI_TEMP_CURRENT, &value))
+                    temperature_samples_.hbm_2_temperature_->push_back(static_cast<decltype(temperature_samples_.hbm_2_temperature_)::value_type::value_type>(value) / 1000.0);
+                }
+
+                if (temperature_samples_.hbm_3_temperature_.has_value()) {
+                    std::int64_t value{};
+                    HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_3, RSMI_TEMP_CURRENT, &value))
+                    temperature_samples_.hbm_3_temperature_->push_back(static_cast<decltype(temperature_samples_.hbm_3_temperature_)::value_type::value_type>(value) / 1000.0);
+                }
+            }
+        }
+
+        // wait for the sampling interval to pass to retrieve the next sample
+        std::this_thread::sleep_for(this->sampling_interval());
+    }
+}
+
+std::string gpu_amd_hardware_sampler::device_identification() const {
+    return fmt::format("gpu_amd_device_{}", device_id_);
+}
+
+std::string gpu_amd_hardware_sampler::samples_only_as_yaml_string() const {
+    // check whether it's safe to generate the YAML entry
+    if (this->is_sampling()) {
+        throw std::runtime_error{ "Can't create the final YAML entry if the hardware sampler is still running!" };
+    }
+
+    return fmt::format("{}{}"
+                       "{}{}"
+                       "{}{}"
+                       "{}{}"
+                       "{}",
+                       general_samples_.generate_yaml_string(),
+                       general_samples_.has_samples() ? "\n" : "",
+                       clock_samples_.generate_yaml_string(),
+                       clock_samples_.has_samples() ? "\n" : "",
+                       power_samples_.generate_yaml_string(),
+                       power_samples_.has_samples() ? "\n" : "",
+                       memory_samples_.generate_yaml_string(),
+                       memory_samples_.has_samples() ? "\n" : "",
+                       temperature_samples_.generate_yaml_string());
+}
+
+std::ostream &operator<<(std::ostream &out, const gpu_amd_hardware_sampler &sampler) {
+    if (sampler.is_sampling()) {
+        out.setstate(std::ios_base::failbit);
+        return out;
+    } else {
+        return out << fmt::format("sampling interval: {}\n"
+                                  "time points: [{}]\n\n"
+                                  "general samples:\n{}\n\n"
+                                  "clock samples:\n{}\n\n"
+                                  "power samples:\n{}\n\n"
+                                  "memory samples:\n{}\n\n"
+                                  "temperature samples:\n{}",
+                                  sampler.sampling_interval(),
+                                  fmt::join(detail::time_points_to_epoch(sampler.sampling_time_points()), ", "),
+                                  sampler.general_samples(),
+                                  sampler.clock_samples(),
+                                  sampler.power_samples(),
+                                  sampler.memory_samples(),
+                                  sampler.temperature_samples());
+    }
+}
+
+}  // namespace hws
diff --git a/src/hws/gpu_amd/rocm_smi_samples.cpp b/src/hws/gpu_amd/rocm_smi_samples.cpp
new file mode 100644
index 0000000..f149c4e
--- /dev/null
+++ b/src/hws/gpu_amd/rocm_smi_samples.cpp
@@ -0,0 +1,706 @@
+/**
+ * @author Marcel Breyer
+ * @copyright 2024-today All Rights Reserved
+ * @license This file is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ */
+
+#include "hws/gpu_amd/rocm_smi_samples.hpp"
+
+#include "hws/utility.hpp"  // hws::detail::{value_or_default, quote}
+
+#include "fmt/format.h"  // fmt::format
+#include "fmt/ranges.h"  // fmt::join
+
+#include <ostream>  // std::ostream
+#include <string>   // std::string
+
+namespace hws {
+
+//*************************************************************************************************************************************//
+//                                                           general samples                                                           //
+//*************************************************************************************************************************************//
+
+bool rocm_smi_general_samples::has_samples() const {
+    return this->architecture_.has_value() || this->byte_order_.has_value() || this->vendor_id_.has_value() || this->name_.has_value()
+           || this->compute_utilization_.has_value() || this->memory_utilization_.has_value() || this->performance_level_.has_value();
+}
+
+std::string rocm_smi_general_samples::generate_yaml_string() const {
+    // if no samples are available, return an empty string
+    if (!this->has_samples()) {
+        return "";
+    }
+
+    std::string str{ "general:\n" };
+
+    // device architecture
+    if (this->architecture_.has_value()) {
+        str += fmt::format("  architecture:\n"
+                           "    unit: \"string\"\n"
+                           "    values: \"{}\"\n",
+                           this->architecture_.value());
+    }
+    // device byte order
+    if (this->byte_order_.has_value()) {
+        str += fmt::format("  byte_order:\n"
+                           "    unit: \"string\"\n"
+                           "    values: \"{}\"\n",
+                           this->byte_order_.value());
+    }
+    // the vendor specific ID
+    if (this->vendor_id_.has_value()) {
+        str += fmt::format("  vendor_id:\n"
+                           "    unit: \"string\"\n"
+                           "    values: \"{}\"\n",
+                           this->vendor_id_.value());
+    }
+    // device name
+    if (this->name_.has_value()) {
+        str += fmt::format("  name:\n"
+                           "    unit: \"string\"\n"
+                           "    values: \"{}\"\n",
+                           this->name_.value());
+    }
+
+    // device compute utilization
+    if (this->compute_utilization_.has_value()) {
+        str += fmt::format("  compute_utilization:\n"
+                           "    unit: \"percentage\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->compute_utilization_.value(), ", "));
+    }
+    // device memory utilization
+    if (this->memory_utilization_.has_value()) {
+        str += fmt::format("  memory_utilization:\n"
+                           "    unit: \"percentage\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->memory_utilization_.value(), ", "));
+    }
+    // performance state
+    if (this->performance_level_.has_value()) {
+        str += fmt::format("  performance_state:\n"
+                           "    unit: \"string\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(detail::quote(this->performance_level_.value()), ", "));
+    }
+
+    return str;
+}
+
+std::ostream &operator<<(std::ostream &out, const rocm_smi_general_samples &samples) {
+    return out << fmt::format("architecture [string]: {}\n"
+                              "byte_order [string]: {}\n"
+                              "vendor_id [string]: {}\n"
+                              "name [string]: {}\n"
+                              "compute_utilization [%]: [{}]\n"
+                              "memory_utilization [%]: [{}]\n"
+                              "performance_level [string]: [{}]",
+                              detail::value_or_default(samples.get_architecture()),
+                              detail::value_or_default(samples.get_byte_order()),
+                              detail::value_or_default(samples.get_vendor_id()),
+                              detail::value_or_default(samples.get_name()),
+                              fmt::join(detail::value_or_default(samples.get_compute_utilization()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_memory_utilization()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_performance_level()), ", "));
+}
+
+//*************************************************************************************************************************************//
+//                                                            clock samples                                                            //
+//*************************************************************************************************************************************//
+
+bool rocm_smi_clock_samples::has_samples() const {
+    return this->clock_frequency_min_.has_value() || this->clock_frequency_max_.has_value() || this->memory_clock_frequency_min_.has_value()
+           || this->memory_clock_frequency_max_.has_value() || this->socket_clock_frequency_min_.has_value() || this->socket_clock_frequency_max_.has_value()
+           || this->available_clock_frequencies_.has_value() || this->available_memory_clock_frequencies_.has_value() || this->clock_frequency_.has_value()
+           || this->memory_clock_frequency_.has_value() || this->socket_clock_frequency_.has_value() || this->overdrive_level_.has_value()
+           || this->memory_overdrive_level_.has_value();
+}
+
+std::string rocm_smi_clock_samples::generate_yaml_string() const {
+    // if no samples are available, return an empty string
+    if (!this->has_samples()) {
+        return "";
+    }
+
+    std::string str{ "clock:\n" };
+
+    // system clock min frequencies
+    if (this->clock_frequency_min_.has_value()) {
+        str += fmt::format("  clock_frequency_min:\n"
+                           "    unit: \"MHz\"\n"
+                           "    values: {}\n",
+                           this->clock_frequency_min_.value());
+    }
+    // system clock max frequencies
+    if (this->clock_frequency_max_.has_value()) {
+        str += fmt::format("  clock_frequency_max:\n"
+                           "    unit: \"MHz\"\n"
+                           "    values: {}\n",
+                           this->clock_frequency_max_.value());
+    }
+    // memory clock min frequencies
+    if (this->memory_clock_frequency_min_.has_value()) {
+        str += fmt::format("  memory_clock_frequency_min:\n"
+                           "    unit: \"MHz\"\n"
+                           "    values: {}\n",
+                           this->memory_clock_frequency_min_.value());
+    }
+    // memory clock max frequencies
+    if (this->memory_clock_frequency_max_.has_value()) {
+        str += fmt::format("  memory_clock_frequency_max:\n"
+                           "    unit: \"MHz\"\n"
+                           "    values: {}\n",
+                           this->memory_clock_frequency_max_.value());
+    }
+    // socket clock min frequencies
+    if (this->socket_clock_frequency_min_.has_value()) {
+        str += fmt::format("  socket_clock_frequency_min:\n"
+                           "    unit: \"MHz\"\n"
+                           "    values: {}\n",
+                           this->socket_clock_frequency_min_.value());
+    }
+    // socket clock max frequencies
+    if (this->socket_clock_frequency_max_.has_value()) {
+        str += fmt::format("  socket_clock_frequency_max:\n"
+                           "    unit: \"MHz\"\n"
+                           "    values: {}\n",
+                           this->socket_clock_frequency_max_.value());
+    }
+    // the available clock frequencies
+    if (this->available_clock_frequencies_.has_value()) {
+        str += fmt::format("  available_clock_frequencies:\n"
+                           "    unit: \"MHz\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->available_clock_frequencies_.value(), ", "));
+    }
+    // the available memory clock frequencies
+    if (this->available_memory_clock_frequencies_.has_value()) {
+        str += fmt::format("  available_memory_clock_frequencies:\n"
+                           "    unit: \"MHz\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->available_memory_clock_frequencies_.value(), ", "));
+    }
+
+    // system clock frequency
+    if (this->clock_frequency_.has_value()) {
+        str += fmt::format("  clock_frequency:\n"
+                           "    unit: \"MHz\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->clock_frequency_.value(), ", "));
+    }
+    // memory clock frequency
+    if (this->memory_clock_frequency_.has_value()) {
+        str += fmt::format("  memory_clock_frequency:\n"
+                           "    unit: \"MHz\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->memory_clock_frequency_.value(), ", "));
+    }
+    // socket clock frequency
+    if (this->socket_clock_frequency_.has_value()) {
+        str += fmt::format("  socket_clock_frequency:\n"
+                           "    unit: \"MHz\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->socket_clock_frequency_.value(), ", "));
+    }
+    // overdrive level
+    if (this->overdrive_level_.has_value()) {
+        str += fmt::format("  overdrive_level:\n"
+                           "    unit: \"percentage\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->overdrive_level_.value(), ", "));
+    }
+    // memory overdrive level
+    if (this->memory_overdrive_level_.has_value()) {
+        str += fmt::format("  memory_overdrive_level:\n"
+                           "    unit: \"percentage\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->memory_overdrive_level_.value(), ", "));
+    }
+
+    return str;
+}
+
+std::ostream &operator<<(std::ostream &out, const rocm_smi_clock_samples &samples) {
+    return out << fmt::format("clock_frequency_min [MHz]: {}\n"
+                              "clock_frequency_max [MHz]: {}\n"
+                              "memory_clock_frequency_min [MHz]: {}\n"
+                              "memory_clock_frequency_max [MHz]: {}\n"
+                              "socket_clock_frequency_min [MHz]: {}\n"
+                              "socket_clock_frequency_max [MHz]: {}\n"
+                              "available_clock_frequencies [MHz]: [{}]\n"
+                              "available_memory_clock_frequencies [MHz]: [{}]\n"
+                              "clock_frequency [MHz]: [{}]\n"
+                              "memory_clock_frequency [MHz]: [{}]\n"
+                              "socket_clock_frequency [MHz]: [{}]\n"
+                              "overdrive_level [%]: [{}]\n"
+                              "memory_overdrive_level [%]: [{}]",
+                              detail::value_or_default(samples.get_clock_frequency_min()),
+                              detail::value_or_default(samples.get_clock_frequency_max()),
+                              detail::value_or_default(samples.get_memory_clock_frequency_min()),
+                              detail::value_or_default(samples.get_memory_clock_frequency_max()),
+                              detail::value_or_default(samples.get_socket_clock_frequency_min()),
+                              detail::value_or_default(samples.get_socket_clock_frequency_max()),
+                              fmt::join(detail::value_or_default(samples.get_available_clock_frequencies()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_available_memory_clock_frequencies()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_clock_frequency()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_memory_clock_frequency()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_socket_clock_frequency()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_overdrive_level()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_memory_overdrive_level()), ", "));
+}
+
+//*************************************************************************************************************************************//
+//                                                            power samples                                                            //
+//*************************************************************************************************************************************//
+
+bool rocm_smi_power_samples::has_samples() const {
+    return this->power_management_limit_.has_value() || this->power_enforced_limit_.has_value() || this->power_measurement_type_.has_value()
+           || this->available_power_profiles_.has_value() || this->power_usage_.has_value() || this->power_total_energy_consumption_.has_value()
+           || this->power_profile_.has_value();
+}
+
+std::string rocm_smi_power_samples::generate_yaml_string() const {
+    // if no samples are available, return an empty string
+    if (!this->has_samples()) {
+        return "";
+    }
+
+    std::string str{ "power:\n" };
+
+    // power management limit
+    if (this->power_management_limit_.has_value()) {
+        str += fmt::format("  power_management_limit:\n"
+                           "    unit: \"W\"\n"
+                           "    values: {}\n",
+                           this->power_management_limit_.value());
+    }
+    // power enforced limit
+    if (this->power_enforced_limit_.has_value()) {
+        str += fmt::format("  power_enforced_limit:\n"
+                           "    unit: \"W\"\n"
+                           "    values: {}\n",
+                           this->power_enforced_limit_.value());
+    }
+    // power measurement type
+    if (this->power_measurement_type_.has_value()) {
+        str += fmt::format("  power_measurement_type:\n"
+                           "    unit: \"string\"\n"
+                           "    values: \"{}\"\n",
+                           this->power_measurement_type_.value());
+    }
+    // available power levels
+    if (this->available_power_profiles_.has_value()) {
+        str += fmt::format("  available_power_profiles:\n"
+                           "    unit: \"string\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(detail::quote(this->available_power_profiles_.value()), ", "));
+    }
+
+    // current power usage
+    if (this->power_usage_.has_value()) {
+        str += fmt::format("  power_usage:\n"
+                           "    unit: \"W\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->power_usage_.value(), ", "));
+    }
+    // total energy consumed
+    if (this->power_total_energy_consumption_.has_value()) {
+        str += fmt::format("  power_total_energy_consumed:\n"
+                           "    unit: \"J\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->power_total_energy_consumption_.value(), ", "));
+    }
+    // current power level
+    if (this->power_profile_.has_value()) {
+        str += fmt::format("  power_profile:\n"
+                           "    unit: \"string\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(detail::quote(this->power_profile_.value()), ", "));
+    }
+
+    return str;
+}
+
+std::ostream &operator<<(std::ostream &out, const rocm_smi_power_samples &samples) {
+    return out << fmt::format("power_management_limit [W]: {}\n"
+                              "power_enforced_limit [W]: {}\n"
+                              "power_measurement_type [string]: {}\n"
+                              "available_power_profiles [string]: [{}]\n"
+                              "power_usage [W]: [{}]\n"
+                              "power_total_energy_consumption [J]: [{}]\n"
+                              "power_profile [string]: [{}]",
+                              detail::value_or_default(samples.get_power_management_limit()),
+                              detail::value_or_default(samples.get_power_enforced_limit()),
+                              detail::value_or_default(samples.get_power_measurement_type()),
+                              fmt::join(detail::value_or_default(samples.get_available_power_profiles()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_power_usage()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_power_total_energy_consumption()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_power_profile()), ", "));
+}
+
+//*************************************************************************************************************************************//
+//                                                            memory samples                                                           //
+//*************************************************************************************************************************************//
+
+bool rocm_smi_memory_samples::has_samples() const {
+    return this->memory_total_.has_value() || this->visible_memory_total_.has_value() || this->num_pcie_lanes_min_.has_value()
+           || this->num_pcie_lanes_max_.has_value() || this->pcie_link_transfer_rate_min_.has_value() || this->pcie_link_transfer_rate_max_.has_value()
+           || this->memory_used_.has_value() || this->memory_free_.has_value() || this->num_pcie_lanes_.has_value() || this->pcie_link_transfer_rate_.has_value();
+}
+
+std::string rocm_smi_memory_samples::generate_yaml_string() const {
+    // if no samples are available, return an empty string
+    if (!this->has_samples()) {
+        return "";
+    }
+
+    std::string str{ "memory:\n" };
+
+    // total memory
+    if (this->memory_total_.has_value()) {
+        str += fmt::format("  memory_total:\n"
+                           "    unit: \"B\"\n"
+                           "    values: {}\n",
+                           this->memory_total_.value());
+    }
+    // total visible memory
+    if (this->visible_memory_total_.has_value()) {
+        str += fmt::format("  visible_memory_total:\n"
+                           "    unit: \"B\"\n"
+                           "    values: {}\n",
+                           this->visible_memory_total_.value());
+    }
+    // min number of PCIe lanes
+    if (this->num_pcie_lanes_min_.has_value()) {
+        str += fmt::format("  num_pcie_lanes_min:\n"
+                           "    unit: \"int\"\n"
+                           "    values: {}\n",
+                           this->num_pcie_lanes_min_.value());
+    }
+    // max number of PCIe lanes
+    if (this->num_pcie_lanes_max_.has_value()) {
+        str += fmt::format("  num_pcie_lanes_max:\n"
+                           "    unit: \"int\"\n"
+                           "    values: {}\n",
+                           this->num_pcie_lanes_max_.value());
+    }
+    // the minimum PCIe link transfer rate
+    if (this->pcie_link_transfer_rate_min_.has_value()) {
+        str += fmt::format("  pcie_link_transfer_rate_min:\n"
+                           "    unit: \"MT/s\"\n"
+                           "    values: {}\n",
+                           this->pcie_link_transfer_rate_min_.value());
+    }
+    // the maximum PCIe link transfer rate
+    if (this->pcie_link_transfer_rate_max_.has_value()) {
+        str += fmt::format("  pcie_link_transfer_rate_max:\n"
+                           "    unit: \"MT/s\"\n"
+                           "    values: {}\n",
+                           this->pcie_link_transfer_rate_max_.value());
+    }
+
+    // used memory
+    if (this->memory_used_.has_value()) {
+        str += fmt::format("  memory_used:\n"
+                           "    unit: \"B\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->memory_used_.value(), ", "));
+    }
+    // free memory
+    if (this->memory_free_.has_value()) {
+        str += fmt::format("  memory_free:\n"
+                           "    unit: \"B\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->memory_free_.value(), ", "));
+    }
+
+    // number of PCIe lanes
+    if (this->num_pcie_lanes_.has_value()) {
+        str += fmt::format("  num_pcie_lanes:\n"
+                           "    unit: \"int\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->num_pcie_lanes_.value(), ", "));
+    }
+    // PCIe transfer rate
+    if (this->pcie_link_transfer_rate_.has_value()) {
+        str += fmt::format("  pcie_link_transfer_rate:\n"
+                           "    unit: \"MT/s\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->pcie_link_transfer_rate_.value(), ", "));
+    }
+
+    return str;
+}
+
+std::ostream &operator<<(std::ostream &out, const rocm_smi_memory_samples &samples) {
+    return out << fmt::format("memory_total [B]: {}\n"
+                              "visible_memory_total [B]: {}\n"
+                              "num_pcie_lanes_min [int]: {}\n"
+                              "num_pcie_lanes_max [int]: {}\n"
+                              "pcie_link_transfer_rate_min [MBPS]: {}\n"
+                              "pcie_link_transfer_rate_max [MBPS]: {}\n"
+                              "memory_used [B]: [{}]\n"
+                              "memory_free [B]: [{}]\n"
+                              "num_pcie_lanes [int]: [{}]\n"
+                              "pcie_link_transfer_rate [MBPS]: [{}]",
+                              detail::value_or_default(samples.get_memory_total()),
+                              detail::value_or_default(samples.get_visible_memory_total()),
+                              detail::value_or_default(samples.get_num_pcie_lanes_min()),
+                              detail::value_or_default(samples.get_num_pcie_lanes_max()),
+                              detail::value_or_default(samples.get_pcie_link_transfer_rate_min()),
+                              detail::value_or_default(samples.get_pcie_link_transfer_rate_max()),
+                              fmt::join(detail::value_or_default(samples.get_memory_used()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_memory_free()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_num_pcie_lanes()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_pcie_link_transfer_rate()), ", "));
+}
+
+//*************************************************************************************************************************************//
+//                                                         temperature samples                                                         //
+//*************************************************************************************************************************************//
+
+bool rocm_smi_temperature_samples::has_samples() const {
+    return this->num_fans_.has_value() || this->fan_speed_max_.has_value() || this->temperature_min_.has_value() || this->temperature_max_.has_value()
+           || this->memory_temperature_min_.has_value() || this->memory_temperature_max_.has_value() || this->hotspot_temperature_min_.has_value()
+           || this->hotspot_temperature_max_.has_value() || this->hbm_0_temperature_min_.has_value() || this->hbm_0_temperature_max_.has_value()
+           || this->hbm_1_temperature_min_.has_value() || this->hbm_1_temperature_max_.has_value() || this->hbm_2_temperature_min_.has_value()
+           || this->hbm_2_temperature_max_.has_value() || this->hbm_3_temperature_min_.has_value() || this->hbm_3_temperature_max_.has_value()
+           || this->fan_speed_percentage_.has_value() || this->temperature_.has_value() || this->memory_temperature_.has_value()
+           || this->hotspot_temperature_.has_value() || this->hbm_0_temperature_.has_value() || this->hbm_1_temperature_.has_value()
+           || this->hbm_2_temperature_.has_value() || this->hbm_3_temperature_.has_value();
+}
+
+std::string rocm_smi_temperature_samples::generate_yaml_string() const {
+    // if no samples are available, return an empty string
+    if (!this->has_samples()) {
+        return "";
+    }
+
+    std::string str{ "temperature:\n" };
+
+    // number of fans (emulated)
+    if (this->num_fans_.has_value()) {
+        str += fmt::format("  num_fans:\n"
+                           "    unit: \"int\"\n"
+                           "    values: {}\n",
+                           this->num_fans_.value());
+    }
+    // maximum fan speed
+    if (this->fan_speed_max_.has_value()) {
+        str += fmt::format("  fan_speed_max:\n"
+                           "    unit: \"RPM\"\n"
+                           "    values: {}\n",
+                           this->fan_speed_max_.value());
+    }
+    // minimum GPU edge temperature
+    if (this->temperature_min_.has_value()) {
+        str += fmt::format("  temperature_min:\n"
+                           "    unit: \"°C\"\n"
+                           "    values: {}\n",
+                           this->temperature_min_.value());
+    }
+    // maximum GPU edge temperature
+    if (this->temperature_max_.has_value()) {
+        str += fmt::format("  temperature_max:\n"
+                           "    unit: \"°C\"\n"
+                           "    values: {}\n",
+                           this->temperature_max_.value());
+    }
+    // minimum GPU memory temperature
+    if (this->memory_temperature_min_.has_value()) {
+        str += fmt::format("  memory_temperature_min:\n"
+                           "    unit: \"°C\"\n"
+                           "    values: {}\n",
+                           this->memory_temperature_min_.value());
+    }
+    // maximum GPU memory temperature
+    if (this->memory_temperature_max_.has_value()) {
+        str += fmt::format("  memory_temperature_max:\n"
+                           "    unit: \"°C\"\n"
+                           "    values: {}\n",
+                           this->memory_temperature_max_.value());
+    }
+    // minimum GPU hotspot temperature
+    if (this->hotspot_temperature_min_.has_value()) {
+        str += fmt::format("  hotspot_temperature_min:\n"
+                           "    unit: \"°C\"\n"
+                           "    values: {}\n",
+                           this->hotspot_temperature_min_.value());
+    }
+    // maximum GPU hotspot temperature
+    if (this->hotspot_temperature_max_.has_value()) {
+        str += fmt::format("  hotspot_temperature_max:\n"
+                           "    unit: \"°C\"\n"
+                           "    values: {}\n",
+                           this->hotspot_temperature_max_.value());
+    }
+    // minimum GPU HBM 0 temperature
+    if (this->hbm_0_temperature_min_.has_value()) {
+        str += fmt::format("  hbm_0_temperature_min:\n"
+                           "    unit: \"°C\"\n"
+                           "    values: {}\n",
+                           this->hbm_0_temperature_min_.value());
+    }
+    // maximum GPU HBM 0 temperature
+    if (this->hbm_0_temperature_max_.has_value()) {
+        str += fmt::format("  hbm_0_temperature_max:\n"
+                           "    unit: \"°C\"\n"
+                           "    values: {}\n",
+                           this->hbm_0_temperature_max_.value());
+    }
+    // minimum GPU HBM 1 temperature
+    if (this->hbm_1_temperature_min_.has_value()) {
+        str += fmt::format("  hbm_1_temperature_min:\n"
+                           "    unit: \"°C\"\n"
+                           "    values: {}\n",
+                           this->hbm_1_temperature_min_.value());
+    }
+    // maximum GPU HBM 1 temperature
+    if (this->hbm_1_temperature_max_.has_value()) {
+        str += fmt::format("  hbm_1_temperature_max:\n"
+                           "    unit: \"°C\"\n"
+                           "    values: {}\n",
+                           this->hbm_1_temperature_max_.value());
+    }
+    // minimum GPU HBM 2 temperature
+    if (this->hbm_2_temperature_min_.has_value()) {
+        str += fmt::format("  hbm_2_temperature_min:\n"
+                           "    unit: \"°C\"\n"
+                           "    values: {}\n",
+                           this->hbm_2_temperature_min_.value());
+    }
+    // maximum GPU HBM 2 temperature
+    if (this->hbm_2_temperature_max_.has_value()) {
+        str += fmt::format("  hbm_2_temperature_max:\n"
+                           "    unit: \"°C\"\n"
+                           "    values: {}\n",
+                           this->hbm_2_temperature_max_.value());
+    }
+    // minimum GPU HBM 3 temperature
+    if (this->hbm_3_temperature_min_.has_value()) {
+        str += fmt::format("  hbm_3_temperature_min:\n"
+                           "    unit: \"°C\"\n"
+                           "    values: {}\n",
+                           this->hbm_3_temperature_min_.value());
+    }
+    // maximum GPU HBM 3 temperature
+    if (this->hbm_3_temperature_max_.has_value()) {
+        str += fmt::format("  hbm_3_temperature_max:\n"
+                           "    unit: \"°C\"\n"
+                           "    values: {}\n",
+                           this->hbm_3_temperature_max_.value());
+    }
+
+    // fan speed
+    if (this->fan_speed_percentage_.has_value()) {
+        str += fmt::format("  fan_speed_percentage:\n"
+                           "    unit: \"percentage\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->fan_speed_percentage_.value(), ", "));
+    }
+    // GPU edge temperature
+    if (this->temperature_.has_value()) {
+        str += fmt::format("  temperature:\n"
+                           "    unit: \"°C\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->temperature_.value(), ", "));
+    }
+    // GPU memory temperature
+    if (this->memory_temperature_.has_value()) {
+        str += fmt::format("  memory_temperature:\n"
+                           "    unit: \"°C\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->memory_temperature_.value(), ", "));
+    }
+    // GPU hotspot temperature
+    if (this->hotspot_temperature_.has_value()) {
+        str += fmt::format("  hotspot_temperature:\n"
+                           "    unit: \"°C\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->hotspot_temperature_.value(), ", "));
+    }
+    // GPU HBM 0 temperature
+    if (this->hbm_0_temperature_.has_value()) {
+        str += fmt::format("  hbm_0_temperature:\n"
+                           "    unit: \"°C\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->hbm_0_temperature_.value(), ", "));
+    }
+    // GPU HBM 1 temperature
+    if (this->hbm_1_temperature_.has_value()) {
+        str += fmt::format("  hbm_1_temperature:\n"
+                           "    unit: \"°C\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->hbm_1_temperature_.value(), ", "));
+    }
+    // GPU HBM 2 temperature
+    if (this->hbm_2_temperature_.has_value()) {
+        str += fmt::format("  hbm_2_temperature:\n"
+                           "    unit: \"°C\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->hbm_2_temperature_.value(), ", "));
+    }
+    // GPU HBM 3 temperature
+    if (this->hbm_3_temperature_.has_value()) {
+        str += fmt::format("  hbm_3_temperature:\n"
+                           "    unit: \"°C\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->hbm_3_temperature_.value(), ", "));
+    }
+
+    return str;
+}
+
+std::ostream &operator<<(std::ostream &out, const rocm_smi_temperature_samples &samples) {
+    return out << fmt::format("num_fans [int]: {}\n"
+                              "fan_speed_max [RPM]: {}\n"
+                              "temperature_min [°C]: {}\n"
+                              "temperature_max [°C]: {}\n"
+                              "memory_temperature_min [°C]: {}\n"
+                              "memory_temperature_max [°C]: {}\n"
+                              "hotspot_temperature_min [°C]: {}\n"
+                              "hotspot_temperature_max [°C]: {}\n"
+                              "hbm_0_temperature_min [°C]: {}\n"
+                              "hbm_0_temperature_max [°C]: {}\n"
+                              "hbm_1_temperature_min [°C]: {}\n"
+                              "hbm_1_temperature_max [°C]: {}\n"
+                              "hbm_2_temperature_min [°C]: {}\n"
+                              "hbm_2_temperature_max [°C]: {}\n"
+                              "hbm_3_temperature_min [°C]: {}\n"
+                              "hbm_3_temperature_max [°C]: {}\n"
+                              "fan_speed_percentage [%]: [{}]\n"
+                              "temperature [°C]: [{}]\n"
+                              "memory_temperature [°C]: [{}]\n"
+                              "hotspot_temperature [°C]: [{}]\n"
+                              "hbm_0_temperature [°C]: [{}]\n"
+                              "hbm_1_temperature [°C]: [{}]\n"
+                              "hbm_2_temperature [°C]: [{}]\n"
+                              "hbm_3_temperature [°C]: [{}]",
+                              detail::value_or_default(samples.get_num_fans()),
+                              detail::value_or_default(samples.get_fan_speed_max()),
+                              detail::value_or_default(samples.get_temperature_min()),
+                              detail::value_or_default(samples.get_temperature_max()),
+                              detail::value_or_default(samples.get_memory_temperature_min()),
+                              detail::value_or_default(samples.get_memory_temperature_max()),
+                              detail::value_or_default(samples.get_hotspot_temperature_min()),
+                              detail::value_or_default(samples.get_hotspot_temperature_max()),
+                              detail::value_or_default(samples.get_hbm_0_temperature_min()),
+                              detail::value_or_default(samples.get_hbm_0_temperature_max()),
+                              detail::value_or_default(samples.get_hbm_1_temperature_min()),
+                              detail::value_or_default(samples.get_hbm_1_temperature_max()),
+                              detail::value_or_default(samples.get_hbm_2_temperature_min()),
+                              detail::value_or_default(samples.get_hbm_2_temperature_max()),
+                              detail::value_or_default(samples.get_hbm_3_temperature_min()),
+                              detail::value_or_default(samples.get_hbm_3_temperature_max()),
+                              fmt::join(detail::value_or_default(samples.get_fan_speed_percentage()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_temperature()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_memory_temperature()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_hotspot_temperature()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_hbm_0_temperature()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_hbm_1_temperature()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_hbm_2_temperature()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_hbm_3_temperature()), ", "));
+}
+
+}  // namespace hws
diff --git a/src/hws/gpu_amd/utility.cpp b/src/hws/gpu_amd/utility.cpp
new file mode 100644
index 0000000..a88969a
--- /dev/null
+++ b/src/hws/gpu_amd/utility.cpp
@@ -0,0 +1,42 @@
+/**
+ * @author Marcel Breyer
+ * @copyright 2024-today All Rights Reserved
+ * @license This file is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ */
+
+#include "hws/gpu_amd/utility.hpp"
+
+#include "rocm_smi/rocm_smi.h"  // ROCm SMI runtime functions
+
+#include <string>  // std::string
+
+namespace hws::detail {
+
+std::string performance_level_to_string(const rsmi_dev_perf_level_t perf_level) {
+    switch (perf_level) {
+        case RSMI_DEV_PERF_LEVEL_AUTO:
+            return "auto";
+        case RSMI_DEV_PERF_LEVEL_LOW:
+            return "low";
+        case RSMI_DEV_PERF_LEVEL_HIGH:
+            return "high";
+        case RSMI_DEV_PERF_LEVEL_MANUAL:
+            return "manual";
+        case RSMI_DEV_PERF_LEVEL_STABLE_STD:
+            return "stable_std";
+        case RSMI_DEV_PERF_LEVEL_STABLE_PEAK:
+            return "stable_peak";
+        case RSMI_DEV_PERF_LEVEL_STABLE_MIN_MCLK:
+            return "stable_min_mclk";
+        case RSMI_DEV_PERF_LEVEL_STABLE_MIN_SCLK:
+            return "stable_min_sclk";
+        case RSMI_DEV_PERF_LEVEL_DETERMINISM:
+            return "determinism";
+        case RSMI_DEV_PERF_LEVEL_UNKNOWN:
+        default:
+            return "unknown";
+    }
+}
+
+}  // namespace hws::detail
diff --git a/src/hardware_sampling/gpu_intel/hardware_sampler.cpp b/src/hws/gpu_intel/hardware_sampler.cpp
similarity index 50%
rename from src/hardware_sampling/gpu_intel/hardware_sampler.cpp
rename to src/hws/gpu_intel/hardware_sampler.cpp
index 7bfa1c6..0be124e 100644
--- a/src/hardware_sampling/gpu_intel/hardware_sampler.cpp
+++ b/src/hws/gpu_intel/hardware_sampler.cpp
@@ -5,22 +5,23 @@
  *          See the LICENSE.md file in the project root for full license information.
  */
 
-#include "hardware_sampling/gpu_intel/hardware_sampler.hpp"
+#include "hws/gpu_intel/hardware_sampler.hpp"
 
-#include "hardware_sampling/gpu_intel/level_zero_device_handle_impl.hpp"  // hws::level_zero_device_handle implementation
-#include "hardware_sampling/gpu_intel/level_zero_samples.hpp"             // hws::{level_zero_general_samples, level_zero_clock_samples, level_zero_power_samples, level_zero_memory_samples, level_zero_temperature_samples}
-#include "hardware_sampling/gpu_intel/utility.hpp"                        // HWS_LEVEL_ZERO_ERROR_CHECK
-#include "hardware_sampling/hardware_sampler.hpp"                         // hws::hardware_sampler
-#include "hardware_sampling/utility.hpp"                                  // hws::{durations_from_reference_time, join}
+#include "hws/gpu_intel/level_zero_device_handle_impl.hpp"  // hws::level_zero_device_handle implementation
+#include "hws/gpu_intel/level_zero_samples.hpp"             // hws::{level_zero_general_samples, level_zero_clock_samples, level_zero_power_samples, level_zero_memory_samples, level_zero_temperature_samples}
+#include "hws/gpu_intel/utility.hpp"                        // HWS_LEVEL_ZERO_ERROR_CHECK
+#include "hws/hardware_sampler.hpp"                         // hws::hardware_sampler
+#include "hws/sample_category.hpp"                          // hws::sample_category
+#include "hws/utility.hpp"                                  // hws::{durations_from_reference_time, join}
 
+#include "fmt/format.h"          // fmt::format
 #include "level_zero/ze_api.h"   // Level Zero runtime functions
 #include "level_zero/zes_api.h"  // Level Zero runtime functions
 
 #include <chrono>     // std::chrono::{steady_clock, duration_cast, milliseconds}
 #include <cstddef>    // std::size_t
-#include <cstdint>    // std::int32_t
+#include <cstdint>    // std::int32_t, std::int64_t
 #include <exception>  // std::exception, std::terminate
-#include <format>     // std::format
 #include <ios>        // std::ios_base
 #include <iostream>   // std::cerr, std::endl
 #include <stdexcept>  // std::runtime_error
@@ -31,20 +32,20 @@
 
 namespace hws {
 
-gpu_intel_hardware_sampler::gpu_intel_hardware_sampler() :
-    gpu_intel_hardware_sampler{ 0, HWS_SAMPLING_INTERVAL } { }
+gpu_intel_hardware_sampler::gpu_intel_hardware_sampler(const sample_category category) :
+    gpu_intel_hardware_sampler{ 0, HWS_SAMPLING_INTERVAL, category } { }
 
-gpu_intel_hardware_sampler::gpu_intel_hardware_sampler(const std::size_t device_id) :
-    gpu_intel_hardware_sampler{ device_id, HWS_SAMPLING_INTERVAL } { }
+gpu_intel_hardware_sampler::gpu_intel_hardware_sampler(const std::size_t device_id, const sample_category category) :
+    gpu_intel_hardware_sampler{ device_id, HWS_SAMPLING_INTERVAL, category } { }
 
-gpu_intel_hardware_sampler::gpu_intel_hardware_sampler(const std::chrono::milliseconds sampling_interval) :
-    gpu_intel_hardware_sampler{ 0, sampling_interval } { }
+gpu_intel_hardware_sampler::gpu_intel_hardware_sampler(const std::chrono::milliseconds sampling_interval, const sample_category category) :
+    gpu_intel_hardware_sampler{ 0, sampling_interval, category } { }
 
-gpu_intel_hardware_sampler::gpu_intel_hardware_sampler(const std::size_t device_id, const std::chrono::milliseconds sampling_interval) :
-    hardware_sampler{ sampling_interval } {
+gpu_intel_hardware_sampler::gpu_intel_hardware_sampler(const std::size_t device_id, const std::chrono::milliseconds sampling_interval, const sample_category category) :
+    hardware_sampler{ sampling_interval, category } {
     // make sure that zeInit is only called once for all instances
     if (instances_++ == 0) {
-        HWS_LEVEL_ZERO_ERROR_CHECK(zeInit(ZE_INIT_FLAG_GPU_ONLY));
+        HWS_LEVEL_ZERO_ERROR_CHECK(zeInit(ZE_INIT_FLAG_GPU_ONLY))
         // notify that initialization has been finished
         init_finished_ = true;
     } else {
@@ -77,6 +78,7 @@ void gpu_intel_hardware_sampler::sampling_loop() {
     std::vector<zes_freq_handle_t> frequency_handles{};
     std::vector<zes_pwr_handle_t> power_handles{};
     std::vector<zes_mem_handle_t> memory_handles{};
+    std::vector<zes_fan_handle_t> fan_handles{};
     std::vector<zes_psu_handle_t> psu_handles{};
     std::vector<zes_temp_handle_t> temperature_handles{};
 
@@ -86,12 +88,21 @@ void gpu_intel_hardware_sampler::sampling_loop() {
 
     this->add_time_point(std::chrono::steady_clock::now());
 
+    double initial_total_power_consumption{};  // initial total power consumption in J
+
     // retrieve initial general information
-    {
+    if (this->sample_category_enabled(sample_category::general)) {
+        // the byte order is given by Intel directly
+        general_samples_.byte_order_ = "Little Endian";
+
         ze_device_properties_t ze_device_prop{};
         if (zeDeviceGetProperties(device, &ze_device_prop) == ZE_RESULT_SUCCESS) {
+            general_samples_.vendor_id_ = fmt::format("{:x}", ze_device_prop.vendorId);
             general_samples_.num_threads_per_eu_ = ze_device_prop.numThreadsPerEU;
             general_samples_.eu_simd_width_ = ze_device_prop.physicalEUSimdWidth;
+
+            // assemble list of GPU flags
+            general_samples_.flags_ = detail::property_flags_to_vector(ze_device_prop.flags);
         }
 
         zes_device_properties_t zes_device_prop{};
@@ -127,7 +138,7 @@ void gpu_intel_hardware_sampler::sampling_loop() {
     }
 
     // retrieve initial clock related information
-    {
+    if (this->sample_category_enabled(sample_category::clock)) {
         std::uint32_t num_frequency_domains{ 0 };
         if (zesDeviceEnumFrequencyDomains(device, &num_frequency_domains, nullptr) == ZE_RESULT_SUCCESS) {
             frequency_handles.resize(num_frequency_domains);
@@ -135,16 +146,16 @@ void gpu_intel_hardware_sampler::sampling_loop() {
                 for (zes_freq_handle_t handle : frequency_handles) {
                     // get frequency properties
                     zes_freq_properties_t prop{};
-                    if (zesFrequencyGetProperties(handle, &prop)) {
+                    if (zesFrequencyGetProperties(handle, &prop) == ZE_RESULT_SUCCESS) {
                         // determine the frequency domain (e.g. GPU, memory, etc)
                         switch (prop.type) {
                             case ZES_FREQ_DOMAIN_GPU:
-                                clock_samples_.clock_gpu_min_ = prop.min;
-                                clock_samples_.clock_gpu_max_ = prop.max;
+                                clock_samples_.clock_frequency_min_ = prop.min;
+                                clock_samples_.clock_frequency_max_ = prop.max;
                                 break;
                             case ZES_FREQ_DOMAIN_MEMORY:
-                                clock_samples_.clock_mem_min_ = prop.min;
-                                clock_samples_.clock_mem_max_ = prop.max;
+                                clock_samples_.memory_clock_frequency_min_ = prop.min;
+                                clock_samples_.memory_clock_frequency_max_ = prop.max;
                                 break;
                             default:
                                 // do nothing
@@ -159,10 +170,10 @@ void gpu_intel_hardware_sampler::sampling_loop() {
                                 // determine the frequency domain (e.g. GPU, memory, etc)
                                 switch (prop.type) {
                                     case ZES_FREQ_DOMAIN_GPU:
-                                        clock_samples_.available_clocks_gpu_ = available_clocks;
+                                        clock_samples_.available_clock_frequencies_ = available_clocks;
                                         break;
                                     case ZES_FREQ_DOMAIN_MEMORY:
-                                        clock_samples_.available_clocks_mem_ = available_clocks;
+                                        clock_samples_.available_memory_clock_frequencies_ = available_clocks;
                                         break;
                                     default:
                                         // do nothing
@@ -179,28 +190,40 @@ void gpu_intel_hardware_sampler::sampling_loop() {
                                 case ZES_FREQ_DOMAIN_GPU:
                                     {
                                         if (frequency_state.tdp >= 0.0) {
-                                            clock_samples_.tdp_frequency_limit_gpu_ = decltype(clock_samples_.tdp_frequency_limit_gpu_)::value_type{ frequency_state.tdp };
+                                            clock_samples_.frequency_limit_tdp_ = decltype(clock_samples_.frequency_limit_tdp_)::value_type{ frequency_state.tdp };
                                         }
                                         if (frequency_state.actual >= 0.0) {
-                                            clock_samples_.clock_gpu_ = decltype(clock_samples_.clock_gpu_)::value_type{ frequency_state.actual };
+                                            clock_samples_.clock_frequency_ = decltype(clock_samples_.clock_frequency_)::value_type{ frequency_state.actual };
                                         }
                                         if (frequency_state.throttleReasons >= 0.0) {
-                                            using vector_type = decltype(clock_samples_.throttle_reason_gpu_)::value_type;
-                                            clock_samples_.throttle_reason_gpu_ = vector_type{ static_cast<vector_type::value_type>(frequency_state.throttleReasons) };
+                                            {
+                                                using vector_type = decltype(clock_samples_.throttle_reason_)::value_type;
+                                                clock_samples_.throttle_reason_ = vector_type{ static_cast<vector_type::value_type>(static_cast<std::int64_t>(frequency_state.throttleReasons)) };
+                                            }
+                                            {
+                                                using vector_type = decltype(clock_samples_.throttle_reason_string_)::value_type;
+                                                clock_samples_.throttle_reason_string_ = vector_type{ static_cast<vector_type::value_type>(detail::throttle_reason_to_string(frequency_state.throttleReasons)) };
+                                            }
                                         }
                                     }
                                     break;
                                 case ZES_FREQ_DOMAIN_MEMORY:
                                     {
                                         if (frequency_state.tdp >= 0.0) {
-                                            clock_samples_.tdp_frequency_limit_mem_ = decltype(clock_samples_.tdp_frequency_limit_mem_)::value_type{ frequency_state.tdp };
+                                            clock_samples_.memory_frequency_limit_tdp_ = decltype(clock_samples_.memory_frequency_limit_tdp_)::value_type{ frequency_state.tdp };
                                         }
                                         if (frequency_state.actual >= 0.0) {
-                                            clock_samples_.clock_mem_ = decltype(clock_samples_.clock_mem_)::value_type{ frequency_state.actual };
+                                            clock_samples_.memory_clock_frequency_ = decltype(clock_samples_.memory_clock_frequency_)::value_type{ frequency_state.actual };
                                         }
                                         if (frequency_state.throttleReasons >= 0.0) {
-                                            using vector_type = decltype(clock_samples_.throttle_reason_mem_)::value_type;
-                                            clock_samples_.throttle_reason_mem_ = vector_type{ static_cast<vector_type::value_type>(frequency_state.throttleReasons) };
+                                            {
+                                                using vector_type = decltype(clock_samples_.memory_throttle_reason_)::value_type;
+                                                clock_samples_.memory_throttle_reason_ = vector_type{ static_cast<vector_type::value_type>(static_cast<std::int64_t>(frequency_state.throttleReasons)) };
+                                            }
+                                            {
+                                                using vector_type = decltype(clock_samples_.memory_throttle_reason_string_)::value_type;
+                                                clock_samples_.memory_throttle_reason_string_ = vector_type{ static_cast<vector_type::value_type>(detail::throttle_reason_to_string(frequency_state.throttleReasons)) };
+                                            }
                                         }
                                     }
                                     break;
@@ -216,24 +239,54 @@ void gpu_intel_hardware_sampler::sampling_loop() {
     }
 
     // retrieve initial power related information
-    {
+    if (this->sample_category_enabled(sample_category::power)) {
         std::uint32_t num_power_domains{ 0 };
         if (zesDeviceEnumPowerDomains(device, &num_power_domains, nullptr) == ZE_RESULT_SUCCESS) {
             power_handles.resize(num_power_domains);
             if (zesDeviceEnumPowerDomains(device, &num_power_domains, power_handles.data()) == ZE_RESULT_SUCCESS) {
                 if (!power_handles.empty()) {
                     // NOTE: only the first power domain is used here
+                    // get the power measurement type
+                    // NOTE: only the first value is used here!
+                    std::uint32_t num_power_limit_descriptors{ 1 };
+                    zes_power_limit_ext_desc_t desc{};
+                    if (zesPowerGetLimitsExt(power_handles.front(), &num_power_limit_descriptors, &desc) == ZE_RESULT_SUCCESS) {
+                        switch (desc.level) {
+                            case ZES_POWER_LEVEL_UNKNOWN:
+                                power_samples_.power_measurement_type_ = "unknown";
+                                break;
+                            case ZES_POWER_LEVEL_SUSTAINED:
+                                power_samples_.power_measurement_type_ = "sustained";
+                                break;
+                            case ZES_POWER_LEVEL_BURST:
+                                power_samples_.power_measurement_type_ = "burst";
+                                break;
+                            case ZES_POWER_LEVEL_PEAK:
+                                power_samples_.power_measurement_type_ = "peak";
+                                break;
+                            case ZES_POWER_LEVEL_INSTANTANEOUS:
+                                power_samples_.power_measurement_type_ = "current/instant";
+                                break;
+                            case ZES_POWER_LEVEL_FORCE_UINT32:
+                                power_samples_.power_measurement_type_ = "force uint32";
+                                break;
+                        }
+
+                        power_samples_.power_enforced_limit_ = static_cast<decltype(power_samples_.power_enforced_limit_)::value_type>(desc.limit) / 1000.0;
+                    }
+
                     // get total power consumption
                     zes_power_energy_counter_t energy_counter{};
                     if (zesPowerGetEnergyCounter(power_handles.front(), &energy_counter) == ZE_RESULT_SUCCESS) {
-                        power_samples_.power_total_energy_consumption_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ energy_counter.energy };
+                        initial_total_power_consumption = static_cast<decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type>(energy_counter.energy) / 1000.0 / 1000.0;
+                        power_samples_.power_total_energy_consumption_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ 0.0 };
+                        power_samples_.power_usage_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ 0.0 };
                     }
 
                     // get energy thresholds
                     zes_energy_threshold_t energy_threshold{};
                     if (zesPowerGetEnergyThreshold(power_handles.front(), &energy_threshold) == ZE_RESULT_SUCCESS) {
-                        power_samples_.energy_threshold_enabled_ = static_cast<decltype(power_samples_.energy_threshold_enabled_)::value_type>(energy_threshold.enable);
-                        power_samples_.energy_threshold_ = energy_threshold.threshold;
+                        power_samples_.power_management_mode_ = static_cast<decltype(power_samples_.power_management_mode_)::value_type>(energy_threshold.enable);
                     }
                 }
             }
@@ -241,7 +294,7 @@ void gpu_intel_hardware_sampler::sampling_loop() {
     }
 
     // retrieve initial memory related information
-    {
+    if (this->sample_category_enabled(sample_category::memory)) {
         std::uint32_t num_memory_modules{ 0 };
         if (zesDeviceEnumMemoryModules(device, &num_memory_modules, nullptr) == ZE_RESULT_SUCCESS) {
             memory_handles.resize(num_memory_modules);
@@ -262,40 +315,46 @@ void gpu_intel_hardware_sampler::sampling_loop() {
                         }
                         if (prop.busWidth != -1) {
                             // first value to add -> initialize map
-                            if (!memory_samples_.bus_width_.has_value()) {
-                                memory_samples_.bus_width_ = decltype(memory_samples_.bus_width_)::value_type{};
+                            if (!memory_samples_.memory_bus_width_.has_value()) {
+                                memory_samples_.memory_bus_width_ = decltype(memory_samples_.memory_bus_width_)::value_type{};
                             }
                             // add new memory bus width
-                            memory_samples_.bus_width_.value()[memory_module_name] = prop.busWidth;
+                            memory_samples_.memory_bus_width_.value()[memory_module_name] = prop.busWidth;
                         }
                         if (prop.numChannels != -1) {
                             // first value to add -> initialize map
-                            if (!memory_samples_.num_channels_.has_value()) {
-                                memory_samples_.num_channels_ = decltype(memory_samples_.num_channels_)::value_type{};
+                            if (!memory_samples_.memory_num_channels_.has_value()) {
+                                memory_samples_.memory_num_channels_ = decltype(memory_samples_.memory_num_channels_)::value_type{};
                             }
                             // add new number of memory channels
-                            memory_samples_.num_channels_.value()[memory_module_name] = prop.numChannels;
+                            memory_samples_.memory_num_channels_.value()[memory_module_name] = prop.numChannels;
                         }
                         // first value to add -> initialize map
-                        if (!memory_samples_.location_.has_value()) {
-                            memory_samples_.location_ = decltype(memory_samples_.location_)::value_type{};
+                        if (!memory_samples_.memory_location_.has_value()) {
+                            memory_samples_.memory_location_ = decltype(memory_samples_.memory_location_)::value_type{};
                         }
-                        memory_samples_.location_.value()[memory_module_name] = detail::memory_location_to_name(prop.location);
+                        memory_samples_.memory_location_.value()[memory_module_name] = detail::memory_location_to_name(prop.location);
 
                         // get current memory information
                         zes_mem_state_t mem_state{};
                         if (zesMemoryGetState(handle, &mem_state) == ZE_RESULT_SUCCESS) {
                             // first value to add -> initialize map
-                            if (!memory_samples_.allocatable_memory_total_.has_value()) {
-                                memory_samples_.allocatable_memory_total_ = decltype(memory_samples_.allocatable_memory_total_)::value_type{};
+                            if (!memory_samples_.visible_memory_total_.has_value()) {
+                                memory_samples_.visible_memory_total_ = decltype(memory_samples_.visible_memory_total_)::value_type{};
                             }
-                            memory_samples_.allocatable_memory_total_.value()[memory_module_name] = mem_state.size;
+                            memory_samples_.visible_memory_total_.value()[memory_module_name] = mem_state.size;
 
                             // first value to add -> initialize map
                             if (!memory_samples_.memory_free_.has_value()) {
                                 memory_samples_.memory_free_ = decltype(memory_samples_.memory_free_)::value_type{};
                             }
                             memory_samples_.memory_free_.value()[memory_module_name].push_back(mem_state.free);
+
+                            // first value to add -> initialize map
+                            if (!memory_samples_.memory_used_.has_value()) {
+                                memory_samples_.memory_used_ = decltype(memory_samples_.memory_used_)::value_type{};
+                            }
+                            memory_samples_.memory_used_.value()[memory_module_name].push_back(mem_state.size - mem_state.free);
                         }
                     }
                 }
@@ -304,13 +363,13 @@ void gpu_intel_hardware_sampler::sampling_loop() {
                 zes_pci_properties_t pci_prop{};
                 if (zesDevicePciGetProperties(device, &pci_prop) == ZE_RESULT_SUCCESS) {
                     if (pci_prop.maxSpeed.gen != -1) {
-                        memory_samples_.max_pcie_link_generation_ = pci_prop.maxSpeed.gen;
+                        memory_samples_.pcie_link_generation_max_ = pci_prop.maxSpeed.gen;
                     }
                     if (pci_prop.maxSpeed.width != -1) {
-                        memory_samples_.pcie_max_width_ = pci_prop.maxSpeed.width;
+                        memory_samples_.num_pcie_lanes_max_ = pci_prop.maxSpeed.width;
                     }
                     if (pci_prop.maxSpeed.maxBandwidth != -1) {
-                        memory_samples_.pcie_link_max_speed_ = pci_prop.maxSpeed.maxBandwidth;
+                        memory_samples_.pcie_link_speed_max_ = static_cast<decltype(memory_samples_.pcie_link_speed_max_)::value_type>(static_cast<double>(pci_prop.maxSpeed.maxBandwidth) / 1e6);
                     }
                 }
 
@@ -318,10 +377,10 @@ void gpu_intel_hardware_sampler::sampling_loop() {
                 zes_pci_state_t pci_state{};
                 if (zesDevicePciGetState(device, &pci_state) == ZE_RESULT_SUCCESS) {
                     if (pci_state.speed.maxBandwidth != -1) {
-                        memory_samples_.pcie_link_speed_ = decltype(memory_samples_.pcie_link_speed_)::value_type{ pci_state.speed.maxBandwidth };
+                        memory_samples_.pcie_link_speed_ = decltype(memory_samples_.pcie_link_speed_)::value_type{ static_cast<decltype(memory_samples_.pcie_link_speed_max_)::value_type>(static_cast<double>(pci_state.speed.maxBandwidth) / 1e6) };
                     }
                     if (pci_state.speed.width != -1) {
-                        memory_samples_.pcie_link_width_ = decltype(memory_samples_.pcie_link_width_)::value_type{ pci_state.speed.width };
+                        memory_samples_.num_pcie_lanes_ = decltype(memory_samples_.num_pcie_lanes_)::value_type{ pci_state.speed.width };
                     }
                     if (pci_state.speed.gen != -1) {
                         memory_samples_.pcie_link_generation_ = decltype(memory_samples_.pcie_link_generation_)::value_type{ pci_state.speed.gen };
@@ -332,7 +391,30 @@ void gpu_intel_hardware_sampler::sampling_loop() {
     }
 
     // retrieve initial temperature related information
-    {
+    if (this->sample_category_enabled(sample_category::temperature)) {
+        std::uint32_t num_fans{ 0 };
+        if (zesDeviceEnumFans(device, &num_fans, nullptr) == ZE_RESULT_SUCCESS) {
+            temperature_samples_.num_fans_ = num_fans;
+
+            fan_handles.resize(num_fans);
+            if (zesDeviceEnumFans(device, &num_fans, fan_handles.data()) == ZE_RESULT_SUCCESS) {
+                // NOTE: only the first fan handle is used here
+                if (!fan_handles.empty()) {
+                    zes_fan_properties_t prop{};
+                    if (zesFanGetProperties(fan_handles.front(), &prop) == ZE_RESULT_SUCCESS) {
+                        temperature_samples_.fan_speed_max_ = prop.maxRPM;
+                    }
+
+                    std::int32_t fan_speed{};
+                    if (zesFanGetState(fan_handles.front(), ZES_FAN_SPEED_UNITS_PERCENT, &fan_speed) == ZE_RESULT_SUCCESS) {
+                        if (fan_speed != -1) {
+                            temperature_samples_.fan_speed_percentage_ = decltype(temperature_samples_.fan_speed_percentage_)::value_type{ static_cast<decltype(temperature_samples_.fan_speed_percentage_)::value_type::value_type>(fan_speed) };
+                        }
+                    }
+                }
+            }
+        }
+
         std::uint32_t num_psus{ 0 };
         if (zesDeviceEnumPsus(device, &num_psus, nullptr) == ZE_RESULT_SUCCESS) {
             psu_handles.resize(num_psus);
@@ -342,7 +424,7 @@ void gpu_intel_hardware_sampler::sampling_loop() {
                     zes_psu_state_t psu_state{};
                     if (zesPsuGetState(psu_handles.front(), &psu_state) == ZE_RESULT_SUCCESS) {
                         if (psu_state.temperature != -1) {
-                            temperature_samples_.temperature_psu_ = decltype(temperature_samples_.temperature_psu_)::value_type{ psu_state.temperature };
+                            temperature_samples_.psu_temperature_ = static_cast<decltype(temperature_samples_.psu_temperature_)::value_type>(psu_state.temperature);
                         }
                     }
                 }
@@ -356,26 +438,66 @@ void gpu_intel_hardware_sampler::sampling_loop() {
                 for (zes_temp_handle_t handle : temperature_handles) {
                     zes_temp_properties_t prop{};
                     if (zesTemperatureGetProperties(handle, &prop) == ZE_RESULT_SUCCESS) {
-                        const std::string sensor_name = detail::temperature_sensor_type_to_name(prop.type);
-                        if (sensor_name.empty()) {
-                            // unsupported sensor type
-                            continue;
-                        }
+                        switch (prop.type) {
+                            case ZES_TEMP_SENSORS_GLOBAL:
+                                {
+                                    // first value to add -> initialize map
+                                    if (!temperature_samples_.global_temperature_max_.has_value()) {
+                                        temperature_samples_.global_temperature_max_ = decltype(temperature_samples_.global_temperature_max_)::value_type{};
+                                    }
+                                    // add new maximum temperature
+                                    temperature_samples_.global_temperature_max_ = prop.maxTemperature;
 
-                        // first value to add -> initialize map
-                        if (!temperature_samples_.temperature_max_.has_value()) {
-                            temperature_samples_.temperature_max_ = decltype(temperature_samples_.temperature_max_)::value_type{};
-                        }
-                        // add new maximum temperature
-                        temperature_samples_.temperature_max_.value()[sensor_name] = prop.maxTemperature;
+                                    // first value to add -> initialize map
+                                    if (!temperature_samples_.global_temperature_.has_value()) {
+                                        temperature_samples_.global_temperature_ = decltype(temperature_samples_.global_temperature_)::value_type{};
+                                    }
+                                    double temp{};
+                                    if (zesTemperatureGetState(handle, &temp) == ZE_RESULT_SUCCESS) {
+                                        temperature_samples_.global_temperature_->push_back(temp);
+                                    }
+                                }
+                                break;
+                            case ZES_TEMP_SENSORS_GPU:
+                                {
+                                    // first value to add -> initialize map
+                                    if (!temperature_samples_.temperature_max_.has_value()) {
+                                        temperature_samples_.temperature_max_ = decltype(temperature_samples_.temperature_max_)::value_type{};
+                                    }
+                                    // add new maximum temperature
+                                    temperature_samples_.temperature_max_ = prop.maxTemperature;
 
-                        // first value to add -> initialize map
-                        if (!temperature_samples_.temperature_.has_value()) {
-                            temperature_samples_.temperature_ = decltype(temperature_samples_.temperature_)::value_type{};
-                        }
-                        double temp{};
-                        if (zesTemperatureGetState(handle, &temp) == ZE_RESULT_SUCCESS) {
-                            temperature_samples_.temperature_.value()[sensor_name].push_back(temp);
+                                    // first value to add -> initialize map
+                                    if (!temperature_samples_.temperature_.has_value()) {
+                                        temperature_samples_.temperature_ = decltype(temperature_samples_.temperature_)::value_type{};
+                                    }
+                                    double temp{};
+                                    if (zesTemperatureGetState(handle, &temp) == ZE_RESULT_SUCCESS) {
+                                        temperature_samples_.temperature_->push_back(temp);
+                                    }
+                                }
+                                break;
+                            case ZES_TEMP_SENSORS_MEMORY:
+                                {
+                                    // first value to add -> initialize map
+                                    if (!temperature_samples_.memory_temperature_max_.has_value()) {
+                                        temperature_samples_.memory_temperature_max_ = decltype(temperature_samples_.memory_temperature_max_)::value_type{};
+                                    }
+                                    // add new maximum temperature
+                                    temperature_samples_.memory_temperature_max_ = prop.maxTemperature;
+
+                                    // first value to add -> initialize map
+                                    if (!temperature_samples_.memory_temperature_.has_value()) {
+                                        temperature_samples_.memory_temperature_ = decltype(temperature_samples_.memory_temperature_)::value_type{};
+                                    }
+                                    double temp{};
+                                    if (zesTemperatureGetState(handle, &temp) == ZE_RESULT_SUCCESS) {
+                                        temperature_samples_.memory_temperature_->push_back(temp);
+                                    }
+                                }
+                                break;
+                            default:
+                                break;
                         }
                     }
                 }
@@ -394,41 +516,47 @@ void gpu_intel_hardware_sampler::sampling_loop() {
             this->add_time_point(std::chrono::steady_clock::now());
 
             // retrieve clock related samples
-            {
+            if (this->sample_category_enabled(sample_category::clock)) {
                 for (zes_freq_handle_t handle : frequency_handles) {
                     // get frequency properties
                     zes_freq_properties_t prop{};
-                    HWS_LEVEL_ZERO_ERROR_CHECK(zesFrequencyGetProperties(handle, &prop));
+                    HWS_LEVEL_ZERO_ERROR_CHECK(zesFrequencyGetProperties(handle, &prop))
 
                     // get current frequency information
                     zes_freq_state_t frequency_state{};
-                    if (clock_samples_.clock_gpu_.has_value() || clock_samples_.clock_mem_.has_value()) {
-                        HWS_LEVEL_ZERO_ERROR_CHECK(zesFrequencyGetState(handle, &frequency_state));
+                    if (clock_samples_.clock_frequency_.has_value() || clock_samples_.memory_clock_frequency_.has_value()) {
+                        HWS_LEVEL_ZERO_ERROR_CHECK(zesFrequencyGetState(handle, &frequency_state))
                         // determine the frequency domain (e.g. GPU, memory, etc)
                         switch (prop.type) {
                             case ZES_FREQ_DOMAIN_GPU:
                                 {
-                                    if (clock_samples_.tdp_frequency_limit_gpu_.has_value()) {
-                                        clock_samples_.tdp_frequency_limit_gpu_->push_back(frequency_state.tdp);
+                                    if (clock_samples_.frequency_limit_tdp_.has_value()) {
+                                        clock_samples_.frequency_limit_tdp_->push_back(frequency_state.tdp);
+                                    }
+                                    if (clock_samples_.clock_frequency_.has_value()) {
+                                        clock_samples_.clock_frequency_->push_back(frequency_state.actual);
                                     }
-                                    if (clock_samples_.clock_gpu_.has_value()) {
-                                        clock_samples_.clock_gpu_->push_back(frequency_state.actual);
+                                    if (clock_samples_.throttle_reason_.has_value()) {
+                                        clock_samples_.throttle_reason_->push_back(static_cast<std::int64_t>(frequency_state.throttleReasons));
                                     }
-                                    if (clock_samples_.throttle_reason_gpu_.has_value()) {
-                                        clock_samples_.throttle_reason_gpu_->push_back(static_cast<decltype(clock_samples_.throttle_reason_gpu_)::value_type::value_type>(frequency_state.throttleReasons));
+                                    if (clock_samples_.throttle_reason_string_.has_value()) {
+                                        clock_samples_.throttle_reason_string_->push_back(detail::throttle_reason_to_string(frequency_state.throttleReasons));
                                     }
                                 }
                                 break;
                             case ZES_FREQ_DOMAIN_MEMORY:
                                 {
-                                    if (clock_samples_.tdp_frequency_limit_mem_.has_value()) {
-                                        clock_samples_.tdp_frequency_limit_mem_->push_back(frequency_state.tdp);
+                                    if (clock_samples_.memory_frequency_limit_tdp_.has_value()) {
+                                        clock_samples_.memory_frequency_limit_tdp_->push_back(frequency_state.tdp);
                                     }
-                                    if (clock_samples_.clock_mem_.has_value()) {
-                                        clock_samples_.clock_mem_->push_back(frequency_state.actual);
+                                    if (clock_samples_.memory_clock_frequency_.has_value()) {
+                                        clock_samples_.memory_clock_frequency_->push_back(frequency_state.actual);
                                     }
-                                    if (clock_samples_.throttle_reason_mem_.has_value()) {
-                                        clock_samples_.throttle_reason_mem_->push_back(static_cast<decltype(clock_samples_.throttle_reason_mem_)::value_type::value_type>(frequency_state.throttleReasons));
+                                    if (clock_samples_.memory_throttle_reason_.has_value()) {
+                                        clock_samples_.memory_throttle_reason_->push_back(static_cast<std::int64_t>(frequency_state.throttleReasons));
+                                    }
+                                    if (clock_samples_.memory_throttle_reason_string_.has_value()) {
+                                        clock_samples_.memory_throttle_reason_string_->push_back(detail::throttle_reason_to_string(frequency_state.throttleReasons));
                                     }
                                 }
                                 break;
@@ -441,24 +569,32 @@ void gpu_intel_hardware_sampler::sampling_loop() {
             }
 
             // retrieve power related samples
-            {
+            if (this->sample_category_enabled(sample_category::power)) {
                 if (!power_handles.empty()) {
                     // NOTE: only the first power domain is used here
                     if (power_samples_.power_total_energy_consumption_.has_value()) {
                         // get total power consumption
                         zes_power_energy_counter_t energy_counter{};
-                        HWS_LEVEL_ZERO_ERROR_CHECK(zesPowerGetEnergyCounter(power_handles.front(), &energy_counter));
+                        HWS_LEVEL_ZERO_ERROR_CHECK(zesPowerGetEnergyCounter(power_handles.front(), &energy_counter))
+
+                        const auto power_consumption = static_cast<decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type>(energy_counter.energy) / 1000.0 / 1000.0;
 
-                        power_samples_.power_total_energy_consumption_->push_back(energy_counter.energy);
+                        // calculate current power draw as (Energy Difference [J]) / (Time Difference [s])
+                        const std::size_t last_index = this->sampling_time_points().size() - 1;
+                        const double power_usage = ((power_consumption - initial_total_power_consumption) - power_samples_.power_total_energy_consumption_->back()) / (std::chrono::duration<double>(this->sampling_time_points()[last_index] - this->sampling_time_points()[last_index - 1]).count());
+                        power_samples_.power_usage_->push_back(power_usage);
+
+                        // add power consumption last to be able to use the std::vector::back() function
+                        power_samples_.power_total_energy_consumption_->push_back(power_consumption - initial_total_power_consumption);
                     }
                 }
             }
 
             // retrieve memory related samples
-            {
+            if (this->sample_category_enabled(sample_category::memory)) {
                 for (zes_mem_handle_t handle : memory_handles) {
                     zes_mem_properties_t prop{};
-                    HWS_LEVEL_ZERO_ERROR_CHECK(zesMemoryGetProperties(handle, &prop));
+                    HWS_LEVEL_ZERO_ERROR_CHECK(zesMemoryGetProperties(handle, &prop))
 
                     // get the memory module name
                     const std::string memory_module_name = detail::memory_module_to_name(prop.type);
@@ -466,53 +602,77 @@ void gpu_intel_hardware_sampler::sampling_loop() {
                     if (memory_samples_.memory_free_.has_value()) {
                         // get current memory information
                         zes_mem_state_t mem_state{};
-                        HWS_LEVEL_ZERO_ERROR_CHECK(zesMemoryGetState(handle, &mem_state));
+                        HWS_LEVEL_ZERO_ERROR_CHECK(zesMemoryGetState(handle, &mem_state))
 
                         memory_samples_.memory_free_.value()[memory_module_name].push_back(mem_state.free);
+
+                        if (memory_samples_.visible_memory_total_.has_value()) {
+                            memory_samples_.memory_used_.value()[memory_module_name].push_back(memory_samples_.visible_memory_total_.value()[memory_module_name] - mem_state.free);
+                        }
                     }
                 }
 
-                if (memory_samples_.pcie_link_speed_.has_value() || memory_samples_.pcie_link_width_.has_value() || memory_samples_.pcie_link_width_.has_value()) {
+                if (memory_samples_.pcie_link_speed_.has_value() || memory_samples_.num_pcie_lanes_.has_value() || memory_samples_.num_pcie_lanes_.has_value()) {
                     // the current PCIe stats
                     zes_pci_state_t pci_state{};
-                    HWS_LEVEL_ZERO_ERROR_CHECK(zesDevicePciGetState(device, &pci_state));
+                    HWS_LEVEL_ZERO_ERROR_CHECK(zesDevicePciGetState(device, &pci_state))
                     if (memory_samples_.pcie_link_speed_.has_value()) {
-                        memory_samples_.pcie_link_speed_->push_back(pci_state.speed.maxBandwidth);
+                        memory_samples_.pcie_link_speed_->push_back(static_cast<decltype(memory_samples_.pcie_link_speed_)::value_type::value_type>(static_cast<double>(pci_state.speed.maxBandwidth) / 1e6));
                     }
-                    if (memory_samples_.pcie_link_width_.has_value()) {
-                        memory_samples_.pcie_link_width_->push_back(pci_state.speed.width);
+                    if (memory_samples_.num_pcie_lanes_.has_value()) {
+                        memory_samples_.num_pcie_lanes_->push_back(pci_state.speed.width);
                     }
-                    if (memory_samples_.pcie_link_width_.has_value()) {
+                    if (memory_samples_.pcie_link_generation_.has_value()) {
                         memory_samples_.pcie_link_generation_->push_back(pci_state.speed.gen);
                     }
                 }
             }
 
             // retrieve temperature related samples
-            {
+            if (this->sample_category_enabled(sample_category::temperature)) {
                 if (!psu_handles.empty()) {
-                    if (temperature_samples_.temperature_psu_.has_value()) {
+                    if (temperature_samples_.psu_temperature_.has_value()) {
                         // NOTE: only the first PSU is used here
                         zes_psu_state_t psu_state{};
-                        HWS_LEVEL_ZERO_ERROR_CHECK(zesPsuGetState(psu_handles.front(), &psu_state));
-                        temperature_samples_.temperature_psu_->push_back(psu_state.temperature);
+                        HWS_LEVEL_ZERO_ERROR_CHECK(zesPsuGetState(psu_handles.front(), &psu_state))
+                        temperature_samples_.psu_temperature_->push_back(psu_state.temperature);
                     }
                 }
 
                 for (zes_temp_handle_t handle : temperature_handles) {
                     zes_temp_properties_t prop{};
-                    HWS_LEVEL_ZERO_ERROR_CHECK(zesTemperatureGetProperties(handle, &prop));
-
-                    const std::string sensor_name = detail::temperature_sensor_type_to_name(prop.type);
-                    if (sensor_name.empty()) {
-                        // unsupported sensor type
-                        continue;
-                    }
-
-                    if (temperature_samples_.temperature_.has_value() && temperature_samples_.temperature_.value().contains(sensor_name)) {
-                        double temp{};
-                        HWS_LEVEL_ZERO_ERROR_CHECK(zesTemperatureGetState(handle, &temp));
-                        temperature_samples_.temperature_.value()[sensor_name].push_back(temp);
+                    HWS_LEVEL_ZERO_ERROR_CHECK(zesTemperatureGetProperties(handle, &prop))
+
+                    switch (prop.type) {
+                        case ZES_TEMP_SENSORS_GLOBAL:
+                            {
+                                if (temperature_samples_.global_temperature_.has_value()) {
+                                    double temp{};
+                                    HWS_LEVEL_ZERO_ERROR_CHECK(zesTemperatureGetState(handle, &temp))
+                                    temperature_samples_.global_temperature_->push_back(temp);
+                                }
+                            }
+                            break;
+                        case ZES_TEMP_SENSORS_GPU:
+                            {
+                                if (temperature_samples_.temperature_.has_value()) {
+                                    double temp{};
+                                    HWS_LEVEL_ZERO_ERROR_CHECK(zesTemperatureGetState(handle, &temp))
+                                    temperature_samples_.temperature_->push_back(temp);
+                                }
+                            }
+                            break;
+                        case ZES_TEMP_SENSORS_MEMORY:
+                            {
+                                if (temperature_samples_.memory_temperature_.has_value()) {
+                                    double temp{};
+                                    HWS_LEVEL_ZERO_ERROR_CHECK(zesTemperatureGetState(handle, &temp))
+                                    temperature_samples_.memory_temperature_->push_back(temp);
+                                }
+                            }
+                            break;
+                        default:
+                            break;
                     }
                 }
             }
@@ -527,25 +687,29 @@ std::string gpu_intel_hardware_sampler::device_identification() const {
     // get the level zero handle from the device
     ze_device_handle_t device = device_.get_impl().device;
     ze_device_properties_t prop{};
-    HWS_LEVEL_ZERO_ERROR_CHECK(zeDeviceGetProperties(device, &prop));
-    return std::format("gpu_intel_device_{}", prop.deviceId);
+    HWS_LEVEL_ZERO_ERROR_CHECK(zeDeviceGetProperties(device, &prop))
+    return fmt::format("gpu_intel_device_{}", prop.deviceId);
 }
 
-std::string gpu_intel_hardware_sampler::generate_yaml_string() const {
+std::string gpu_intel_hardware_sampler::samples_only_as_yaml_string() const {
     // check whether it's safe to generate the YAML entry
     if (this->is_sampling()) {
         throw std::runtime_error{ "Can't create the final YAML entry if the hardware sampler is still running!" };
     }
 
-    return std::format("{}\n"
-                       "{}\n"
-                       "{}\n"
-                       "{}\n"
+    return fmt::format("{}{}"
+                       "{}{}"
+                       "{}{}"
+                       "{}{}"
                        "{}",
                        general_samples_.generate_yaml_string(),
+                       general_samples_.has_samples() ? "\n" : "",
                        clock_samples_.generate_yaml_string(),
+                       clock_samples_.has_samples() ? "\n" : "",
                        power_samples_.generate_yaml_string(),
+                       power_samples_.has_samples() ? "\n" : "",
                        memory_samples_.generate_yaml_string(),
+                       memory_samples_.has_samples() ? "\n" : "",
                        temperature_samples_.generate_yaml_string());
 }
 
@@ -554,7 +718,7 @@ std::ostream &operator<<(std::ostream &out, const gpu_intel_hardware_sampler &sa
         out.setstate(std::ios_base::failbit);
         return out;
     } else {
-        return out << std::format("sampling interval: {}\n"
+        return out << fmt::format("sampling interval: {}\n"
                                   "time points: [{}]\n\n"
                                   "general samples:\n{}\n\n"
                                   "clock samples:\n{}\n\n"
@@ -562,7 +726,7 @@ std::ostream &operator<<(std::ostream &out, const gpu_intel_hardware_sampler &sa
                                   "memory samples:\n{}\n\n"
                                   "temperature samples:\n{}",
                                   sampler.sampling_interval(),
-                                  detail::join(detail::time_points_to_epoch(sampler.sampling_time_points()), ", "),
+                                  fmt::join(detail::time_points_to_epoch(sampler.sampling_time_points()), ", "),
                                   sampler.general_samples(),
                                   sampler.clock_samples(),
                                   sampler.power_samples(),
diff --git a/src/hws/gpu_intel/level_zero_samples.cpp b/src/hws/gpu_intel/level_zero_samples.cpp
new file mode 100644
index 0000000..e296cab
--- /dev/null
+++ b/src/hws/gpu_intel/level_zero_samples.cpp
@@ -0,0 +1,628 @@
+/**
+ * @author Marcel Breyer
+ * @copyright 2024-today All Rights Reserved
+ * @license This file is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ */
+
+#include "hws/gpu_intel/level_zero_samples.hpp"
+
+#include "hws/utility.hpp"  // hws::detail::{value_or_default, remove_cvref_t}
+
+#include <ostream>      // std::ostream
+#include <string>       // std::string
+#include <string_view>  // std::string_view
+#include <type_traits>  // std::remove_cvref_t, std::false_type, std::true_type
+#include <vector>       // std::vector
+
+namespace hws {
+
+namespace {
+
+template <typename MapType>
+void append_map_values(std::string &str, const std::string_view entry_name, const MapType &map) {
+    if (map.has_value()) {
+        for (const auto &[key, value] : map.value()) {
+            if constexpr (detail::is_vector_v<detail::remove_cvref_t<decltype(value)>>) {
+                str += fmt::format("{}_{}: [{}]\n", entry_name, key, fmt::join(value, ", "));
+            } else {
+                str += fmt::format("{}_{}: {}\n", entry_name, key, value);
+            }
+        }
+    }
+}
+
+}  // namespace
+
+//*************************************************************************************************************************************//
+//                                                           general samples                                                           //
+//*************************************************************************************************************************************//
+
+bool level_zero_general_samples::has_samples() const {
+    return this->byte_order_.has_value() || this->vendor_id_.has_value() || this->name_.has_value() || this->flags_.has_value() || this->standby_mode_.has_value()
+           || this->num_threads_per_eu_.has_value() || this->eu_simd_width_.has_value();
+}
+
+std::string level_zero_general_samples::generate_yaml_string() const {
+    // if no samples are available, return an empty string
+    if (!this->has_samples()) {
+        return "";
+    }
+
+    std::string str{ "general:\n" };
+
+    // device byte order
+    if (this->byte_order_.has_value()) {
+        str += fmt::format("  byte_order:\n"
+                           "    unit: \"string\"\n"
+                           "    values: \"{}\"\n",
+                           this->byte_order_.value());
+    }
+    // the vendor specific ID
+    if (this->vendor_id_.has_value()) {
+        str += fmt::format("  vendor_id:\n"
+                           "    unit: \"string\"\n"
+                           "    values: \"{}\"\n",
+                           this->vendor_id_.value());
+    }
+    // device name
+    if (this->name_.has_value()) {
+        str += fmt::format("  name:\n"
+                           "    unit: \"string\"\n"
+                           "    values: \"{}\"\n",
+                           this->name_.value());
+    }
+    // GPU specific flags
+    if (this->flags_.has_value()) {
+        str += fmt::format("  flags:\n"
+                           "    unit: \"string\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(detail::quote(this->flags_.value()), ", "));
+    }
+    // the standby mode
+    if (this->standby_mode_.has_value()) {
+        str += fmt::format("  standby_mode:\n"
+                           "    unit: \"string\"\n"
+                           "    values: \"{}\"\n",
+                           this->standby_mode_.value());
+    }
+    // the number of threads per EU unit
+    if (this->num_threads_per_eu_.has_value()) {
+        str += fmt::format("  num_threads_per_eu:\n"
+                           "    unit: \"int\"\n"
+                           "    values: {}\n",
+                           this->num_threads_per_eu_.value());
+    }
+    // the EU SIMD width
+    if (this->eu_simd_width_.has_value()) {
+        str += fmt::format("  eu_simd_width:\n"
+                           "    unit: \"int\"\n"
+                           "    values: {}\n",
+                           this->eu_simd_width_.value());
+    }
+
+    return str;
+}
+
+std::ostream &operator<<(std::ostream &out, const level_zero_general_samples &samples) {
+    return out << fmt::format("byte_order [string]: {}\n"
+                              "vendor_id [string]: {}\n"
+                              "name [string]: {}\n"
+                              "flags [string]: [{}]\n"
+                              "standby_mode [string]: {}\n"
+                              "num_threads_per_eu [int]: {}\n"
+                              "eu_simd_width [int]: {}",
+                              detail::value_or_default(samples.get_byte_order()),
+                              detail::value_or_default(samples.get_vendor_id()),
+                              detail::value_or_default(samples.get_name()),
+                              fmt::join(detail::value_or_default(samples.get_flags()), ", "),
+                              detail::value_or_default(samples.get_standby_mode()),
+                              detail::value_or_default(samples.get_num_threads_per_eu()),
+                              detail::value_or_default(samples.get_eu_simd_width()));
+}
+
+//*************************************************************************************************************************************//
+//                                                            clock samples                                                            //
+//*************************************************************************************************************************************//
+
+bool level_zero_clock_samples::has_samples() const {
+    return this->clock_frequency_min_.has_value() || this->clock_frequency_max_.has_value() || this->memory_clock_frequency_min_.has_value()
+           || this->memory_clock_frequency_max_.has_value() || this->available_clock_frequencies_.has_value() || this->available_memory_clock_frequencies_.has_value()
+           || this->clock_frequency_.has_value() || this->memory_clock_frequency_.has_value() || this->throttle_reason_.has_value()
+           || this->throttle_reason_string_.has_value() || this->memory_throttle_reason_.has_value() || this->memory_throttle_reason_string_.has_value()
+           || this->frequency_limit_tdp_.has_value() || this->memory_frequency_limit_tdp_.has_value();
+}
+
+std::string level_zero_clock_samples::generate_yaml_string() const {
+    // if no samples are available, return an empty string
+    if (!this->has_samples()) {
+        return "";
+    }
+
+    std::string str{ "clock:\n" };
+
+    // minimum GPU core clock
+    if (this->clock_frequency_min_.has_value()) {
+        str += fmt::format("  clock_frequency_min:\n"
+                           "    unit: \"MHz\"\n"
+                           "    values: {}\n",
+                           this->clock_frequency_min_.value());
+    }
+    // maximum GPU core clock
+    if (this->clock_frequency_max_.has_value()) {
+        str += fmt::format("  clock_gpu_max:\n"
+                           "    unit: \"MHz\"\n"
+                           "    values: {}\n",
+                           this->clock_frequency_max_.value());
+    }
+    // minimum memory clock
+    if (this->memory_clock_frequency_min_.has_value()) {
+        str += fmt::format("  memory_clock_frequency_min:\n"
+                           "    unit: \"MHz\"\n"
+                           "    values: {}\n",
+                           this->memory_clock_frequency_min_.value());
+    }
+    // maximum memory clock
+    if (this->memory_clock_frequency_max_.has_value()) {
+        str += fmt::format("  memory_clock_frequency_max:\n"
+                           "    unit: \"MHz\"\n"
+                           "    values: {}\n",
+                           this->memory_clock_frequency_max_.value());
+    }
+    // all possible GPU core clock frequencies
+    if (this->available_clock_frequencies_.has_value()) {
+        str += fmt::format("  available_clock_frequencies:\n"
+                           "    unit: \"MHz\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->available_clock_frequencies_.value(), ", "));
+    }
+    // all possible memory clock frequencies
+    if (this->available_memory_clock_frequencies_.has_value()) {
+        str += fmt::format("  available_memory_clock_frequencies:\n"
+                           "    unit: \"MHz\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->available_memory_clock_frequencies_.value(), ", "));
+    }
+
+    // the current GPU core clock frequency
+    if (this->clock_frequency_.has_value()) {
+        str += fmt::format("  clock_frequency:\n"
+                           "    unit: \"MHz\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->clock_frequency_.value(), ", "));
+    }
+    // the current memory clock frequency
+    if (this->memory_clock_frequency_.has_value()) {
+        str += fmt::format("  memory_clock_frequency:\n"
+                           "    unit: \"MHz\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->memory_clock_frequency_.value(), ", "));
+    }
+    // the current GPU core throttle reason as bitmask
+    if (this->throttle_reason_.has_value()) {
+        str += fmt::format("  throttle_reason:\n"
+                           "    unit: \"bitmask\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->throttle_reason_.value(), ", "));
+    }
+    // the current GPU core throttle reason as string
+    if (this->throttle_reason_string_.has_value()) {
+        str += fmt::format("  throttle_reason_string:\n"
+                           "    unit: \"string\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->throttle_reason_string_.value(), ", "));
+    }
+    // the current memory throttle reason as bitmask
+    if (this->memory_throttle_reason_.has_value()) {
+        str += fmt::format("  memory_throttle_reason:\n"
+                           "    unit: \"bitmask\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->memory_throttle_reason_.value(), ", "));
+    }
+    // the current memory throttle reason as string
+    if (this->memory_throttle_reason_string_.has_value()) {
+        str += fmt::format("  memory_throttle_reason_string:\n"
+                           "    unit: \"string\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->memory_throttle_reason_string_.value(), ", "));
+    }
+    // the maximum GPU core frequency based on the current TDP limit
+    if (this->frequency_limit_tdp_.has_value()) {
+        str += fmt::format("  frequency_limit_tdp:\n"
+                           "    unit: \"MHz\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->frequency_limit_tdp_.value(), ", "));
+    }
+    // the maximum memory frequency based on the current TDP limit
+    if (this->memory_frequency_limit_tdp_.has_value()) {
+        str += fmt::format("  memory_frequency_limit_tdp:\n"
+                           "    unit: \"MHz\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->memory_frequency_limit_tdp_.value(), ", "));
+    }
+
+    return str;
+}
+
+std::ostream &operator<<(std::ostream &out, const level_zero_clock_samples &samples) {
+    return out << fmt::format("clock_frequency_min [MHz]: {}\n"
+                              "clock_frequency_max [MHz]: {}\n"
+                              "memory_clock_frequency_min [MHz]: {}\n"
+                              "memory_clock_frequency_max [MHz]: {}\n"
+                              "available_clock_frequencies [MHz]: [{}]\n"
+                              "available_memory_clock_frequencies [MHz]: [{}]\n"
+                              "clock_frequency [MHz]: [{}]\n"
+                              "memory_clock_frequency [MHz]: [{}]\n"
+                              "throttle_reason [bitmask]: [{}]\n"
+                              "throttle_reason_string [string]: [{}]\n"
+                              "memory_throttle_reason [bitmask]: [{}]\n"
+                              "memory_throttle_reason_string [string]: [{}]\n"
+                              "frequency_limit_tdp [MHz]: [{}]\n"
+                              "memory_frequency_limit_tdp [MHz]: [{}]",
+                              detail::value_or_default(samples.get_clock_frequency_min()),
+                              detail::value_or_default(samples.get_clock_frequency_max()),
+                              detail::value_or_default(samples.get_memory_clock_frequency_min()),
+                              detail::value_or_default(samples.get_memory_clock_frequency_max()),
+                              fmt::join(detail::value_or_default(samples.get_available_clock_frequencies()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_available_memory_clock_frequencies()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_clock_frequency()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_memory_clock_frequency()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_throttle_reason()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_throttle_reason_string()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_memory_throttle_reason()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_memory_throttle_reason_string()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_frequency_limit_tdp()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_memory_frequency_limit_tdp()), ", "));
+}
+
+//*************************************************************************************************************************************//
+//                                                            power samples                                                            //
+//*************************************************************************************************************************************//
+
+bool level_zero_power_samples::has_samples() const {
+    return this->power_enforced_limit_.has_value() || this->power_measurement_type_.has_value() || this->power_management_mode_.has_value()
+           || this->power_usage_.has_value() || this->power_total_energy_consumption_.has_value();
+}
+
+std::string level_zero_power_samples::generate_yaml_string() const {
+    // if no samples are available, return an empty string
+    if (!this->has_samples()) {
+        return "";
+    }
+
+    std::string str{ "power:\n" };
+
+    // power enforced limit
+    if (this->power_enforced_limit_.has_value()) {
+        str += fmt::format("  power_enforced_limit:\n"
+                           "    unit: \"W\"\n"
+                           "    values: {}\n",
+                           this->power_enforced_limit_.value());
+    }
+    // power measurement type
+    if (this->power_measurement_type_.has_value()) {
+        str += fmt::format("  power_measurement_type:\n"
+                           "    unit: \"string\"\n"
+                           "    values: {}\n",
+                           this->power_measurement_type_.value());
+    }
+    // the power management mode
+    if (this->power_management_mode_.has_value()) {
+        str += fmt::format("  power_management_mode:\n"
+                           "    unit: \"bool\"\n"
+                           "    values: {}\n",
+                           this->power_management_mode_.value());
+    }
+
+    // the current power draw
+    if (this->power_usage_.has_value()) {
+        str += fmt::format("  power_usage:\n"
+                           "    unit: \"W\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->power_usage_.value(), ", "));
+    }
+    // the total consumed energy
+    if (this->power_total_energy_consumption_.has_value()) {
+        str += fmt::format("  power_total_energy_consumption:\n"
+                           "    unit: \"J\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->power_total_energy_consumption_.value(), ", "));
+    }
+
+    return str;
+}
+
+std::ostream &operator<<(std::ostream &out, const level_zero_power_samples &samples) {
+    return out << fmt::format("power_enforced_limit [W]: {}\n"
+                              "power_measurement_type [string]: {}\n"
+                              "power_management_mode [bool]: {}\n"
+                              "power_usage [W]: [{}]\n"
+                              "power_total_energy_consumption [J]: [{}]",
+                              detail::value_or_default(samples.get_power_enforced_limit()),
+                              detail::value_or_default(samples.get_power_measurement_type()),
+                              detail::value_or_default(samples.get_power_management_mode()),
+                              fmt::join(detail::value_or_default(samples.get_power_usage()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_power_total_energy_consumption()), ", "));
+}
+
+//*************************************************************************************************************************************//
+//                                                            memory samples                                                           //
+//*************************************************************************************************************************************//
+
+bool level_zero_memory_samples::has_samples() const {
+    return this->memory_total_.has_value() || this->visible_memory_total_.has_value() || this->memory_location_.has_value()
+           || this->num_pcie_lanes_max_.has_value() || this->pcie_link_generation_max_.has_value() || this->pcie_link_speed_max_.has_value()
+           || this->memory_bus_width_.has_value() || this->memory_num_channels_.has_value() || this->memory_free_.has_value()
+           || this->memory_used_.has_value() || this->num_pcie_lanes_.has_value() || this->pcie_link_generation_.has_value()
+           || this->pcie_link_speed_.has_value();
+}
+
+std::string level_zero_memory_samples::generate_yaml_string() const {
+    // if no samples are available, return an empty string
+    if (!this->has_samples()) {
+        return "";
+    }
+
+    std::string str{ "memory:\n" };
+
+    // the total memory
+    if (this->memory_total_.has_value()) {
+        for (const auto &[key, value] : this->memory_total_.value()) {
+            str += fmt::format("  {}_memory_total:\n"
+                               "    unit: \"B\"\n"
+                               "    values: {}\n",
+                               key,
+                               value);
+        }
+    }
+    // the total allocatable memory
+    if (this->visible_memory_total_.has_value()) {
+        for (const auto &[key, value] : this->visible_memory_total_.value()) {
+            str += fmt::format("  {}_visible_memory_total:\n"
+                               "    unit: \"B\"\n"
+                               "    values: {}\n",
+                               key,
+                               value);
+        }
+    }
+    // the memory location (system or device)
+    if (this->memory_location_.has_value()) {
+        for (const auto &[key, value] : this->memory_location_.value()) {
+            str += fmt::format("  {}_memory_location:\n"
+                               "    unit: \"string\"\n"
+                               "    values: \"{}\"\n",
+                               key,
+                               value);
+        }
+    }
+    // the pcie link width
+    if (this->num_pcie_lanes_max_.has_value()) {
+        str += fmt::format("  num_pcie_lanes_max:\n"
+                           "    unit: \"int\"\n"
+                           "    values: {}\n",
+                           this->num_pcie_lanes_max_.value());
+    }
+    // the pcie generation
+    if (this->pcie_link_generation_max_.has_value()) {
+        str += fmt::format("  pcie_link_generation_max:\n"
+                           "    unit: \"int\"\n"
+                           "    values: {}\n",
+                           this->pcie_link_generation_max_.value());
+    }
+    // the pcie max bandwidth
+    if (this->pcie_link_speed_max_.has_value()) {
+        str += fmt::format("  pcie_link_speed_max:\n"
+                           "    unit: \"MBPS\"\n"
+                           "    values: {}\n",
+                           this->pcie_link_speed_max_.value());
+    }
+    // the memory bus width
+    if (this->memory_bus_width_.has_value()) {
+        for (const auto &[key, value] : this->memory_bus_width_.value()) {
+            str += fmt::format("  {}_memory_bus_width:\n"
+                               "    unit: \"Bit\"\n"
+                               "    values: {}\n",
+                               key,
+                               value);
+        }
+    }
+    // the number of memory channels
+    if (this->memory_num_channels_.has_value()) {
+        for (const auto &[key, value] : this->memory_num_channels_.value()) {
+            str += fmt::format("  {}_memory_num_channels:\n"
+                               "    unit: \"int\"\n"
+                               "    values: {}\n",
+                               key,
+                               value);
+        }
+    }
+
+    // the currently free memory
+    if (this->memory_free_.has_value()) {
+        for (const auto &[key, value] : this->memory_free_.value()) {
+            str += fmt::format("  {}_memory_free:\n"
+                               "    unit: \"string\"\n"
+                               "    values: [{}]\n",
+                               key,
+                               fmt::join(value, ", "));
+        }
+    }
+    // the currently used memory
+    if (this->memory_used_.has_value()) {
+        for (const auto &[key, value] : this->memory_used_.value()) {
+            str += fmt::format("  {}_memory_used:\n"
+                               "    unit: \"string\"\n"
+                               "    values: [{}]\n",
+                               key,
+                               fmt::join(value, ", "));
+        }
+    }
+    // PCIe link width
+    if (this->num_pcie_lanes_.has_value()) {
+        str += fmt::format("  num_pcie_lanes:\n"
+                           "    unit: \"int\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->num_pcie_lanes_.value(), ", "));
+    }
+    // PCIe link generation
+    if (this->pcie_link_generation_.has_value()) {
+        str += fmt::format("  pcie_link_generation:\n"
+                           "    unit: \"int\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->pcie_link_generation_.value(), ", "));
+    }
+    // PCIe link speed
+    if (this->pcie_link_speed_.has_value()) {
+        str += fmt::format("  pcie_link_speed:\n"
+                           "    unit: \"MBPS\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->pcie_link_speed_.value(), ", "));
+    }
+
+    return str;
+}
+
+std::ostream &operator<<(std::ostream &out, const level_zero_memory_samples &samples) {
+    std::string str{};
+
+    append_map_values(str, "memory_total [B]", samples.get_memory_total());
+    append_map_values(str, "visible_memory_total [B]", samples.get_visible_memory_total());
+    append_map_values(str, "memory_location [string]", samples.get_memory_location());
+    str += fmt::format("num_pcie_lanes_max [int]: {}\n"
+                       "pcie_link_generation_max [int]: {}\n"
+                       "pcie_link_speed_max [MBPS]: {}\n",
+                       detail::value_or_default(samples.get_num_pcie_lanes_max()),
+                       detail::value_or_default(samples.get_pcie_link_generation_max()),
+                       detail::value_or_default(samples.get_pcie_link_speed_max()));
+    append_map_values(str, "memory_bus_width [Bit]", samples.get_memory_bus_width());
+    append_map_values(str, "memory_num_channels [int]", samples.get_memory_num_channels());
+
+    append_map_values(str, "memory_free [string]", samples.get_memory_free());
+    append_map_values(str, "memory_used [string]", samples.get_memory_used());
+    str += fmt::format("num_pcie_lanes [int]: [{}]\n"
+                       "pcie_link_generation [int]: [{}]\n"
+                       "pcie_link_speed [MBPS]: [{}]",
+                       fmt::join(detail::value_or_default(samples.get_num_pcie_lanes()), ", "),
+                       fmt::join(detail::value_or_default(samples.get_pcie_link_generation()), ", "),
+                       fmt::join(detail::value_or_default(samples.get_pcie_link_speed()), ", "));
+
+    return out << str;
+}
+
+//*************************************************************************************************************************************//
+//                                                         temperature samples                                                         //
+//*************************************************************************************************************************************//
+
+bool level_zero_temperature_samples::has_samples() const {
+    return this->num_fans_.has_value() || this->fan_speed_max_.has_value() || this->temperature_max_.has_value() || this->memory_temperature_max_.has_value()
+           || this->global_temperature_max_.has_value() || this->fan_speed_percentage_.has_value() || this->temperature_.has_value()
+           || this->memory_temperature_.has_value() || this->global_temperature_.has_value() || this->psu_temperature_.has_value();
+}
+
+std::string level_zero_temperature_samples::generate_yaml_string() const {
+    // if no samples are available, return an empty string
+    if (!this->has_samples()) {
+        return "";
+    }
+
+    std::string str{ "temperature:\n" };
+
+    // the number of fans
+    if (this->num_fans_.has_value()) {
+        str += fmt::format("  num_fans:\n"
+                           "    unit: \"int\"\n"
+                           "    values: {}\n",
+                           this->num_fans_.value());
+    }
+    // the maximum fan speed in RPM
+    if (this->fan_speed_max_.has_value()) {
+        str += fmt::format("  fan_speed_max:\n"
+                           "    unit: \"RPM\"\n"
+                           "    values: {}\n",
+                           this->fan_speed_max_.value());
+    }
+    // the maximum GPU temperature
+    if (this->temperature_max_.has_value()) {
+        str += fmt::format("  temperature_max:\n"
+                           "    unit: \"°C\"\n"
+                           "    values: {}\n",
+                           this->temperature_max_.value());
+    }
+    // the maximum memory temperature
+    if (this->memory_temperature_max_.has_value()) {
+        str += fmt::format("  memory_temperature_max:\n"
+                           "    unit: \"°C\"\n"
+                           "    values: {}\n",
+                           this->memory_temperature_max_.value());
+    }
+    // the maximum global temperature
+    if (this->global_temperature_max_.has_value()) {
+        str += fmt::format("  global_temperature_max:\n"
+                           "    unit: \"°C\"\n"
+                           "    values: {}\n",
+                           this->global_temperature_max_.value());
+    }
+
+    // the current fan speed in percent
+    if (this->fan_speed_percentage_.has_value()) {
+        str += fmt::format("  fan_speed_percentage:\n"
+                           "    unit: \"percentage\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->fan_speed_percentage_.value(), ", "));
+    }
+    // the current GPU temperature
+    if (this->temperature_.has_value()) {
+        str += fmt::format("  temperature:\n"
+                           "    unit: \"°C\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->temperature_.value(), ", "));
+    }
+    // the current memory temperature
+    if (this->memory_temperature_.has_value()) {
+        str += fmt::format("  memory_temperature:\n"
+                           "    unit: \"°C\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->memory_temperature_.value(), ", "));
+    }
+    // the current global temperature
+    if (this->global_temperature_.has_value()) {
+        str += fmt::format("  global_temperature:\n"
+                           "    unit: \"°C\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->global_temperature_.value(), ", "));
+    }
+    // the current PSU temperature
+    if (this->psu_temperature_.has_value()) {
+        str += fmt::format("  psu_temperature:\n"
+                           "    unit: \"°C\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->psu_temperature_.value(), ", "));
+    }
+
+    return str;
+}
+
+std::ostream &operator<<(std::ostream &out, const level_zero_temperature_samples &samples) {
+    return out << fmt::format("num_fans [int]: {}\n"
+                              "fan_speed_max [RPM]: {}\n"
+                              "temperature_max [°C]: {}\n"
+                              "memory_temperature_max [°C]: {}\n"
+                              "global_temperature_max [°C]: {}\n"
+                              "fan_speed_percentage [%]: [{}]\n"
+                              "temperature [°C]: [{}]\n"
+                              "memory_temperature [°C]: [{}]\n"
+                              "global_temperature [°C]: [{}]\n"
+                              "psu_temperature [°C]: [{}]",
+                              detail::value_or_default(samples.get_num_fans()),
+                              detail::value_or_default(samples.get_fan_speed_max()),
+                              detail::value_or_default(samples.get_temperature_max()),
+                              detail::value_or_default(samples.get_memory_temperature_max()),
+                              detail::value_or_default(samples.get_global_temperature_max()),
+                              fmt::join(detail::value_or_default(samples.get_fan_speed_percentage()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_temperature()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_memory_temperature()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_global_temperature()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_psu_temperature()), ", "));
+}
+
+}  // namespace hws
diff --git a/src/hardware_sampling/gpu_intel/utility.cpp b/src/hws/gpu_intel/utility.cpp
similarity index 81%
rename from src/hardware_sampling/gpu_intel/utility.cpp
rename to src/hws/gpu_intel/utility.cpp
index a0cefe9..5a29eee 100644
--- a/src/hardware_sampling/gpu_intel/utility.cpp
+++ b/src/hws/gpu_intel/utility.cpp
@@ -5,16 +5,68 @@
  *          See the LICENSE.md file in the project root for full license information.
  */
 
-#include "hardware_sampling/gpu_intel/utility.hpp"
+#include "hws/gpu_intel/utility.hpp"
 
+#include "fmt/format.h"          // fmt::format
+#include "fmt/ranges.h"          // fmt::join
 #include "level_zero/ze_api.h"   // Level Zero runtime functions
 #include "level_zero/zes_api.h"  // Level Zero runtime functions
 
 #include <string>       // std::string
 #include <string_view>  // std::string_view
+#include <vector>       // std::vector
 
 namespace hws::detail {
 
+std::vector<std::string> property_flags_to_vector(const ze_device_property_flags_t flags) {
+    std::vector<std::string> string_flags{};
+
+    if ((flags & ze_device_property_flag_t::ZE_DEVICE_PROPERTY_FLAG_INTEGRATED) != 0) {
+        string_flags.emplace_back("integrated_gpu");
+    }
+    if ((flags & ze_device_property_flag_t::ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE) != 0) {
+        string_flags.emplace_back("sub-device");
+    }
+    if ((flags & ze_device_property_flag_t::ZE_DEVICE_PROPERTY_FLAG_ECC) != 0) {
+        string_flags.emplace_back("ecc");
+    }
+    if ((flags & ze_device_property_flag_t::ZE_DEVICE_PROPERTY_FLAG_ONDEMANDPAGING) != 0) {
+        string_flags.emplace_back("on-demand_page-faulting");
+    }
+
+    return string_flags;
+}
+
+std::string throttle_reason_to_string(const zes_freq_throttle_reason_flags_t reasons) {
+    if (reasons == 0) {
+        return "None";
+    } else {
+        std::vector<std::string> string_reasons{};
+        if ((reasons & zes_freq_throttle_reason_flag_t::ZES_FREQ_THROTTLE_REASON_FLAG_AVE_PWR_CAP) != 0) {
+            string_reasons.emplace_back("average_power");
+        }
+        if ((reasons & zes_freq_throttle_reason_flag_t::ZES_FREQ_THROTTLE_REASON_FLAG_BURST_PWR_CAP) != 0) {
+            string_reasons.emplace_back("burst_power");
+        }
+        if ((reasons & zes_freq_throttle_reason_flag_t::ZES_FREQ_THROTTLE_REASON_FLAG_CURRENT_LIMIT) != 0) {
+            string_reasons.emplace_back("current_limit");
+        }
+        if ((reasons & zes_freq_throttle_reason_flag_t::ZES_FREQ_THROTTLE_REASON_FLAG_THERMAL_LIMIT) != 0) {
+            string_reasons.emplace_back("thermal_limit");
+        }
+        if ((reasons & zes_freq_throttle_reason_flag_t::ZES_FREQ_THROTTLE_REASON_FLAG_PSU_ALERT) != 0) {
+            string_reasons.emplace_back("psu_assertion");
+        }
+        if ((reasons & zes_freq_throttle_reason_flag_t::ZES_FREQ_THROTTLE_REASON_FLAG_SW_RANGE) != 0) {
+            string_reasons.emplace_back("software_frequency_range");
+        }
+        if ((reasons & zes_freq_throttle_reason_flag_t::ZES_FREQ_THROTTLE_REASON_FLAG_HW_RANGE) != 0) {
+            string_reasons.emplace_back("hardware_frequency_range");
+        }
+        return fmt::format("{}", fmt::join(string_reasons, "|"));
+    }
+}
+
 std::string_view to_result_string(const ze_result_t errc) {
     switch (errc) {
         case ZE_RESULT_SUCCESS:
@@ -175,17 +227,4 @@ std::string memory_location_to_name(const zes_mem_loc_t mem_loc) {
     }
 }
 
-std::string temperature_sensor_type_to_name(const zes_temp_sensors_t sensor_type) {
-    switch (sensor_type) {
-        case ZES_TEMP_SENSORS_GLOBAL:
-            return "global";
-        case ZES_TEMP_SENSORS_GPU:
-            return "gpu";
-        case ZES_TEMP_SENSORS_MEMORY:
-            return "memory";
-        default:
-            return "";
-    }
-}
-
 }  // namespace hws::detail
diff --git a/src/hws/gpu_nvidia/hardware_sampler.cpp b/src/hws/gpu_nvidia/hardware_sampler.cpp
new file mode 100644
index 0000000..9c2a927
--- /dev/null
+++ b/src/hws/gpu_nvidia/hardware_sampler.cpp
@@ -0,0 +1,595 @@
+/**
+ * @author Marcel Breyer
+ * @copyright 2024-today All Rights Reserved
+ * @license This file is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ */
+
+#include "hws/gpu_nvidia/hardware_sampler.hpp"
+
+#include "hws/gpu_nvidia/nvml_device_handle_impl.hpp"  // hws::detail::nvml_device_handle implementation
+#include "hws/gpu_nvidia/nvml_samples.hpp"             // hws::{nvml_general_samples, nvml_clock_samples, nvml_power_samples, nvml_memory_samples, nvml_temperature_samples}
+#include "hws/gpu_nvidia/utility.hpp"                  // HWS_NVML_ERROR_CHECK
+#include "hws/hardware_sampler.hpp"                    // hws::hardware_sampler
+#include "hws/sample_category.hpp"                     // hws::sample_category
+#include "hws/utility.hpp"                             // hws::detail::time_points_to_epoch
+
+#include "fmt/format.h"  // fmt::format
+#include "fmt/ranges.h"  // fmt::join
+#include "nvml.h"        // NVML runtime functions
+
+#include <algorithm>  // std::min_element, std::sort, std::transform
+#include <chrono>     // std::chrono::{steady_clock, duration_cast, milliseconds}
+#include <cstddef>    // std::size_t
+#include <exception>  // std::exception, std::terminate
+#include <ios>        // std::ios_base
+#include <iostream>   // std::cerr, std::endl
+#include <numeric>    // std::iota
+#include <optional>   // std::optional
+#include <ostream>    // std::ostream
+#include <stdexcept>  // std::runtime_error
+#include <string>     // std::string
+#include <thread>     // std::this_thread
+#include <vector>     // std::vector
+
+namespace hws {
+
+gpu_nvidia_hardware_sampler::gpu_nvidia_hardware_sampler(const sample_category category) :
+    gpu_nvidia_hardware_sampler{ 0, HWS_SAMPLING_INTERVAL, category } { }
+
+gpu_nvidia_hardware_sampler::gpu_nvidia_hardware_sampler(const std::size_t device_id, const sample_category category) :
+    gpu_nvidia_hardware_sampler{ device_id, HWS_SAMPLING_INTERVAL, category } { }
+
+gpu_nvidia_hardware_sampler::gpu_nvidia_hardware_sampler(const std::chrono::milliseconds sampling_interval, const sample_category category) :
+    gpu_nvidia_hardware_sampler{ 0, sampling_interval, category } { }
+
+gpu_nvidia_hardware_sampler::gpu_nvidia_hardware_sampler(const std::size_t device_id, const std::chrono::milliseconds sampling_interval, const sample_category category) :
+    hardware_sampler{ sampling_interval, category } {
+    // make sure that nvmlInit is only called once for all instances
+    if (instances_++ == 0) {
+        HWS_NVML_ERROR_CHECK(nvmlInit())
+        // notify that initialization has been finished
+        init_finished_ = true;
+    } else {
+        // wait until init has been finished!
+        while (!init_finished_) { }
+    }
+
+    // initialize samples -> can't be done beforehand since the device handle can only be initialized after a call to nvmlInit
+    device_ = detail::nvml_device_handle{ device_id };
+}
+
+gpu_nvidia_hardware_sampler::~gpu_nvidia_hardware_sampler() {
+    try {
+        // if this hardware sampler is still sampling, stop it
+        if (this->has_sampling_started() && !this->has_sampling_stopped()) {
+            this->stop_sampling();
+        }
+
+        // the last instance must shut down the NVML runtime
+        // make sure that nvmlShutdown is only called once
+        if (--instances_ == 0) {
+            HWS_NVML_ERROR_CHECK(nvmlShutdown())
+            // reset init_finished flag
+            init_finished_ = false;
+        }
+    } catch (const std::exception &e) {
+        std::cerr << e.what() << std::endl;
+        std::terminate();
+    }
+}
+
+void gpu_nvidia_hardware_sampler::sampling_loop() {
+    // get the nvml handle from the device
+    nvmlDevice_t device = device_.get_impl().device;
+
+    //
+    // add samples where we only have to retrieve the value once
+    //
+
+    this->add_time_point(std::chrono::steady_clock::now());
+
+    double initial_total_power_consumption{};  // initial total power consumption in J
+
+    // retrieve initial general information
+    if (this->sample_category_enabled(sample_category::general)) {
+        // fixed information -> only retrieved once
+        nvmlDeviceArchitecture_t device_arch{};
+        if (nvmlDeviceGetArchitecture(device, &device_arch) == NVML_SUCCESS) {
+            switch (device_arch) {
+#if defined(NVML_DEVICE_ARCH_KEPLER)
+                case NVML_DEVICE_ARCH_KEPLER:
+                    general_samples_.architecture_ = "Kepler";
+                    break;
+#endif
+#if defined(NVML_DEVICE_ARCH_MAXWELL)
+                case NVML_DEVICE_ARCH_MAXWELL:
+                    general_samples_.architecture_ = "Maxwell";
+                    break;
+#endif
+#if defined(NVML_DEVICE_ARCH_PASCAL)
+                case NVML_DEVICE_ARCH_PASCAL:
+                    general_samples_.architecture_ = "Pascal";
+                    break;
+#endif
+#if defined(NVML_DEVICE_ARCH_VOLTA)
+                case NVML_DEVICE_ARCH_VOLTA:
+                    general_samples_.architecture_ = "Volta";
+                    break;
+#endif
+#if defined(NVML_DEVICE_ARCH_TURING)
+                case NVML_DEVICE_ARCH_TURING:
+                    general_samples_.architecture_ = "Turing";
+                    break;
+#endif
+#if defined(NVML_DEVICE_ARCH_AMPERE)
+                case NVML_DEVICE_ARCH_AMPERE:
+                    general_samples_.architecture_ = "Ampere";
+                    break;
+#endif
+#if defined(NVML_DEVICE_ARCH_ADA)
+                case NVML_DEVICE_ARCH_ADA:
+                    general_samples_.architecture_ = "Ada";
+                    break;
+#endif
+#if defined(NVML_DEVICE_ARCH_HOPPER)
+                case NVML_DEVICE_ARCH_HOPPER:
+                    general_samples_.architecture_ = "Hopper";
+                    break;
+#endif
+#if defined(NVML_DEVICE_ARCH_BLACKWELL)
+                case NVML_DEVICE_ARCH_BLACKWELL:
+                    general_samples_.architecture_ = "Blackwell";
+                    break;
+#endif
+#if defined(NVML_DEVICE_ARCH_T23X)
+                case NVML_DEVICE_ARCH_T23X:
+                    general_samples_.architecture_ = "Orin";
+                    break;
+#endif
+                default:
+                    break;
+            }
+        }
+
+        // the byte order is given by the NVIDIA CUDA guide
+        general_samples_.byte_order_ = "Little Endian";
+
+        // the vendor ID is fixed for NVIDIA GPUs
+        general_samples_.byte_order_ = "NVIDIA";
+
+        std::string name(NVML_DEVICE_NAME_V2_BUFFER_SIZE, '\0');
+        if (nvmlDeviceGetName(device, name.data(), name.size()) == NVML_SUCCESS) {
+            general_samples_.name_ = name.substr(0, name.find_first_of('\0'));
+        }
+
+        nvmlEnableState_t mode{};
+        if (nvmlDeviceGetPersistenceMode(device, &mode) == NVML_SUCCESS) {
+            general_samples_.persistence_mode_ = mode == NVML_FEATURE_ENABLED;
+        }
+
+        decltype(general_samples_.num_cores_)::value_type num_cores{};
+        if (nvmlDeviceGetNumGpuCores(device, &num_cores) == NVML_SUCCESS) {
+            general_samples_.num_cores_ = num_cores;
+        }
+
+        // queried samples -> retrieved every iteration if available
+        nvmlPstates_t pstate{};
+        if (nvmlDeviceGetPerformanceState(device, &pstate) == NVML_SUCCESS) {
+            general_samples_.performance_level_ = decltype(general_samples_.performance_level_)::value_type{ static_cast<decltype(general_samples_.performance_level_)::value_type::value_type>(pstate) };
+        }
+
+        nvmlUtilization_t util{};
+        if (nvmlDeviceGetUtilizationRates(device, &util) == NVML_SUCCESS) {
+            general_samples_.compute_utilization_ = decltype(general_samples_.compute_utilization_)::value_type{ util.gpu };
+            general_samples_.memory_utilization_ = decltype(general_samples_.memory_utilization_)::value_type{ util.memory };
+        }
+    }
+
+    // retrieve initial clock related information
+    if (this->sample_category_enabled(sample_category::clock)) {
+        // fixed information -> only retrieved once
+        unsigned int adaptive_clock_status{};
+        if (nvmlDeviceGetAdaptiveClockInfoStatus(device, &adaptive_clock_status) == NVML_SUCCESS) {
+            clock_samples_.auto_boosted_clock_enabled_ = adaptive_clock_status == NVML_ADAPTIVE_CLOCKING_INFO_STATUS_ENABLED;
+        }
+
+        unsigned int clock_graph_max{};
+        if (nvmlDeviceGetMaxClockInfo(device, NVML_CLOCK_GRAPHICS, &clock_graph_max) == NVML_SUCCESS) {
+            clock_samples_.clock_frequency_max_ = static_cast<decltype(clock_samples_.clock_frequency_max_)::value_type>(clock_graph_max);
+        }
+
+        unsigned int clock_sm_max{};
+        if (nvmlDeviceGetMaxClockInfo(device, NVML_CLOCK_SM, &clock_sm_max) == NVML_SUCCESS) {
+            clock_samples_.sm_clock_frequency_max_ = static_cast<decltype(clock_samples_.sm_clock_frequency_max_)::value_type>(clock_sm_max);
+        }
+
+        unsigned int clock_mem_max{};
+        if (nvmlDeviceGetMaxClockInfo(device, NVML_CLOCK_MEM, &clock_mem_max) == NVML_SUCCESS) {
+            clock_samples_.memory_clock_frequency_max_ = static_cast<decltype(clock_samples_.memory_clock_frequency_max_)::value_type>(clock_mem_max);
+        }
+
+        {
+            unsigned int clock_count{ 128 };
+            std::vector<unsigned int> supported_clocks(clock_count);
+            if (nvmlDeviceGetSupportedMemoryClocks(device, &clock_count, supported_clocks.data()) == NVML_SUCCESS) {
+                supported_clocks.resize(clock_count);
+                clock_samples_.memory_clock_frequency_min_ = static_cast<decltype(clock_samples_.memory_clock_frequency_min_)::value_type>(*std::min_element(supported_clocks.cbegin(), supported_clocks.cend()));
+
+                decltype(clock_samples_.available_memory_clock_frequencies_)::value_type available_memory_clock_frequencies(supported_clocks.size());
+                // convert unsigned int values to double values
+                std::transform(supported_clocks.cbegin(), supported_clocks.cend(), available_memory_clock_frequencies.begin(), [](const unsigned int c) { return static_cast<decltype(clock_samples_.available_memory_clock_frequencies_)::value_type::value_type>(c); });
+                // we want to report all supported memory clocks in ascending order
+                std::sort(available_memory_clock_frequencies.begin(), available_memory_clock_frequencies.end());
+                clock_samples_.available_memory_clock_frequencies_ = available_memory_clock_frequencies;
+            }
+        }
+
+        {
+            unsigned int clock_count{ 128 };
+            std::vector<unsigned int> supported_clocks(clock_count);
+            if (clock_samples_.memory_clock_frequency_min_.has_value() && nvmlDeviceGetSupportedGraphicsClocks(device, static_cast<unsigned int>(clock_samples_.memory_clock_frequency_min_.value()), &clock_count, supported_clocks.data()) == NVML_SUCCESS) {
+                clock_samples_.clock_frequency_min_ = static_cast<decltype(clock_samples_.clock_frequency_min_)::value_type>(*std::min_element(supported_clocks.cbegin(), supported_clocks.cbegin() + clock_count));
+            }
+
+            if (clock_samples_.available_memory_clock_frequencies_.has_value()) {
+                for (const auto value : clock_samples_.available_memory_clock_frequencies_.value()) {
+                    if (nvmlDeviceGetSupportedGraphicsClocks(device, static_cast<unsigned int>(value), &clock_count, supported_clocks.data()) == NVML_SUCCESS) {
+                        decltype(clock_samples_.available_clock_frequencies_)::value_type::mapped_type available_clock_frequencies(clock_count);
+                        // convert unsigned int values to double values
+                        std::transform(supported_clocks.cbegin(), supported_clocks.cbegin() + clock_count, available_clock_frequencies.begin(), [](const unsigned int c) { return static_cast<decltype(clock_samples_.available_clock_frequencies_)::value_type::mapped_type::value_type>(c); });
+                        // we want to report all supported memory clocks in ascending order
+                        std::sort(available_clock_frequencies.begin(), available_clock_frequencies.end());
+                        // if no map exists, default construct an empty map
+                        if (!clock_samples_.available_clock_frequencies_.has_value()) {
+                            clock_samples_.available_clock_frequencies_ = decltype(clock_samples_)::map_type{};
+                        }
+                        clock_samples_.available_clock_frequencies_->emplace(value, available_clock_frequencies);
+                    }
+                }
+            }
+        }
+
+        // queried samples -> retrieved every iteration if available
+        unsigned int clock_graph{};
+        if (nvmlDeviceGetClockInfo(device, NVML_CLOCK_GRAPHICS, &clock_graph) == NVML_SUCCESS) {
+            clock_samples_.clock_frequency_ = decltype(clock_samples_.clock_frequency_)::value_type{ static_cast<decltype(clock_samples_.clock_frequency_)::value_type::value_type>(clock_graph) };
+        }
+
+        unsigned int clock_sm{};
+        if (nvmlDeviceGetClockInfo(device, NVML_CLOCK_SM, &clock_sm) == NVML_SUCCESS) {
+            clock_samples_.sm_clock_frequency_ = decltype(clock_samples_.sm_clock_frequency_)::value_type{ static_cast<decltype(clock_samples_.sm_clock_frequency_)::value_type::value_type>(clock_sm) };
+        }
+
+        unsigned int clock_mem{};
+        if (nvmlDeviceGetClockInfo(device, NVML_CLOCK_MEM, &clock_mem) == NVML_SUCCESS) {
+            clock_samples_.memory_clock_frequency_ = decltype(clock_samples_.memory_clock_frequency_)::value_type{ static_cast<decltype(clock_samples_.memory_clock_frequency_)::value_type::value_type>(clock_mem) };
+        }
+
+        decltype(clock_samples_.throttle_reason_)::value_type::value_type clock_throttle_reason{};
+        if (nvmlDeviceGetCurrentClocksEventReasons(device, &clock_throttle_reason) == NVML_SUCCESS) {
+            clock_samples_.throttle_reason_ = decltype(clock_samples_.throttle_reason_)::value_type{ clock_throttle_reason };
+            clock_samples_.throttle_reason_string_ = decltype(clock_samples_.throttle_reason_string_)::value_type{ detail::throttle_event_reason_to_string(clock_throttle_reason) };
+        }
+
+        nvmlEnableState_t mode{};
+        nvmlEnableState_t default_mode{};
+        if (nvmlDeviceGetAutoBoostedClocksEnabled(device, &mode, &default_mode) == NVML_SUCCESS) {
+            clock_samples_.auto_boosted_clock_ = decltype(clock_samples_.auto_boosted_clock_)::value_type{ mode == NVML_FEATURE_ENABLED };
+        }
+    }
+
+    // retrieve initial power related information
+    if (this->sample_category_enabled(sample_category::power)) {
+        // fixed information -> only retrieved once
+        nvmlEnableState_t mode{};
+        if (nvmlDeviceGetPowerManagementMode(device, &mode) == NVML_SUCCESS) {
+            power_samples_.power_management_mode_ = mode == NVML_FEATURE_ENABLED;
+        }
+
+        unsigned int power_management_limit{};
+        if (nvmlDeviceGetPowerManagementLimit(device, &power_management_limit) == NVML_SUCCESS) {
+            power_samples_.power_management_limit_ = static_cast<decltype(power_samples_.power_management_limit_)::value_type>(power_management_limit) / 1000.0;
+        }
+
+        unsigned int power_enforced_limit{};
+        if (nvmlDeviceGetEnforcedPowerLimit(device, &power_enforced_limit) == NVML_SUCCESS) {
+            power_samples_.power_enforced_limit_ = static_cast<decltype(power_samples_.power_enforced_limit_)::value_type>(power_enforced_limit) / 1000.0;
+        }
+
+        if (general_samples_.architecture_.has_value()) {
+            // based on https://docs.nvidia.com/deploy/nvml-api/group__nvmlDeviceQueries.html#group__nvmlDeviceQueries_1gf754f109beca3a4a8c8c1cd650d7d66c
+            if (general_samples_.architecture_ == "Kepler" || general_samples_.architecture_ == "Maxwell" || general_samples_.architecture_ == "Pascal" || general_samples_.architecture_ == "Volta" || general_samples_.architecture_ == "Turing") {
+                power_samples_.power_measurement_type_ = "current/instant";
+            } else if (general_samples_.architecture_ == "Ampere" || general_samples_.architecture_ == "Ada" || general_samples_.architecture_ == "Hopper" || general_samples_.architecture_ == "Blackwell" || general_samples_.architecture_ == "Orin") {
+                if (general_samples_.name_.has_value() && general_samples_.name_.value().find("A100") != std::string::npos) {
+                    // GA100 also has instant power draw measurements
+                    power_samples_.power_measurement_type_ = "current/instant";
+                } else {
+                    power_samples_.power_measurement_type_ = "average";
+                }
+            } else {
+                power_samples_.power_measurement_type_ = "invalid/undetected";
+            }
+        }
+
+        decltype(power_samples_.available_power_profiles_)::value_type power_states(17, 32);  // 17 power states, value 32 = unknown
+        std::iota(power_states.begin(), power_states.end() - 1, decltype(power_samples_.available_power_profiles_)::value_type::value_type{ 0 });
+        power_samples_.available_power_profiles_ = power_states;
+
+        // queried samples -> retrieved every iteration if available
+        unsigned int power_usage{};
+        if (nvmlDeviceGetPowerUsage(device, &power_usage) == NVML_SUCCESS) {
+            power_samples_.power_usage_ = decltype(power_samples_.power_usage_)::value_type{ static_cast<decltype(power_samples_.power_usage_)::value_type::value_type>(power_usage) / 1000.0 };
+        }
+
+        unsigned long long power_total_energy_consumption{};
+        if (nvmlDeviceGetTotalEnergyConsumption(device, &power_total_energy_consumption) == NVML_SUCCESS) {
+            initial_total_power_consumption = static_cast<decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type>(power_total_energy_consumption) / 1000.0;
+            power_samples_.power_total_energy_consumption_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ 0.0 };
+        }
+
+        nvmlPstates_t pstate{};
+        if (nvmlDeviceGetPowerState(device, &pstate) == NVML_SUCCESS) {
+            power_samples_.power_profile_ = decltype(power_samples_.power_profile_)::value_type{ static_cast<decltype(power_samples_.power_profile_)::value_type::value_type>(pstate) };
+        }
+    }
+
+    // retrieve initial memory related information
+    if (this->sample_category_enabled(sample_category::memory)) {
+        // fixed information -> only retrieved once
+        nvmlMemory_t memory_info{};
+        if (nvmlDeviceGetMemoryInfo(device, &memory_info) == NVML_SUCCESS) {
+            memory_samples_.memory_total_ = memory_info.total;
+            // queried samples -> retrieved every iteration if available
+            memory_samples_.memory_free_ = decltype(memory_samples_.memory_free_)::value_type{ memory_info.free };
+            memory_samples_.memory_used_ = decltype(memory_samples_.memory_used_)::value_type{ memory_info.used };
+        }
+
+        decltype(memory_samples_.memory_bus_width_)::value_type memory_bus_width{};
+        if (nvmlDeviceGetMemoryBusWidth(device, &memory_bus_width) == NVML_SUCCESS) {
+            memory_samples_.memory_bus_width_ = memory_bus_width;
+        }
+
+        decltype(memory_samples_.num_pcie_lanes_max_)::value_type num_pcie_lanes_max{};
+        if (nvmlDeviceGetMaxPcieLinkWidth(device, &num_pcie_lanes_max) == NVML_SUCCESS) {
+            memory_samples_.num_pcie_lanes_max_ = num_pcie_lanes_max;
+        }
+
+        decltype(memory_samples_.pcie_link_generation_max_)::value_type pcie_link_generation_max{};
+        if (nvmlDeviceGetMaxPcieLinkGeneration(device, &pcie_link_generation_max) == NVML_SUCCESS) {
+            memory_samples_.pcie_link_generation_max_ = pcie_link_generation_max;
+        }
+
+        decltype(memory_samples_.pcie_link_speed_max_)::value_type pcie_link_speed_max{};
+        if (nvmlDeviceGetPcieLinkMaxSpeed(device, &pcie_link_speed_max) == NVML_SUCCESS) {
+            memory_samples_.pcie_link_speed_max_ = pcie_link_speed_max;
+        }
+
+        // queried samples -> retrieved every iteration if available
+        decltype(memory_samples_.num_pcie_lanes_)::value_type::value_type num_pcie_lanes{};
+        if (nvmlDeviceGetCurrPcieLinkWidth(device, &num_pcie_lanes) == NVML_SUCCESS) {
+            memory_samples_.num_pcie_lanes_ = decltype(memory_samples_.num_pcie_lanes_)::value_type{ num_pcie_lanes };
+        }
+
+        decltype(memory_samples_.pcie_link_generation_)::value_type::value_type pcie_link_generation{};
+        if (nvmlDeviceGetCurrPcieLinkGeneration(device, &pcie_link_generation) == NVML_SUCCESS) {
+            memory_samples_.pcie_link_generation_ = decltype(memory_samples_.pcie_link_generation_)::value_type{ pcie_link_generation };
+        }
+    }
+
+    // retrieve initial temperature related information
+    if (this->sample_category_enabled(sample_category::temperature)) {
+        // fixed information -> only retrieved once
+        decltype(temperature_samples_.num_fans_)::value_type num_fans{};
+        if (nvmlDeviceGetNumFans(device, &num_fans) == NVML_SUCCESS) {
+            temperature_samples_.num_fans_ = num_fans;
+        }
+
+        if (temperature_samples_.num_fans_.has_value() && temperature_samples_.num_fans_.value() > 0) {
+            decltype(temperature_samples_.fan_speed_min_)::value_type min_fan_speed{};
+            decltype(temperature_samples_.fan_speed_max_)::value_type max_fan_speed{};
+            if (nvmlDeviceGetMinMaxFanSpeed(device, &min_fan_speed, &max_fan_speed) == NVML_SUCCESS) {
+                temperature_samples_.fan_speed_min_ = min_fan_speed;
+                temperature_samples_.fan_speed_max_ = max_fan_speed;
+            }
+        }
+
+        unsigned int temperature_max{};
+        if (nvmlDeviceGetTemperatureThreshold(device, NVML_TEMPERATURE_THRESHOLD_GPU_MAX, &temperature_max) == NVML_SUCCESS) {
+            temperature_samples_.temperature_max_ = static_cast<decltype(temperature_samples_.temperature_max_)::value_type>(temperature_max);
+        }
+
+        unsigned int memory_temperature_max{};
+        if (nvmlDeviceGetTemperatureThreshold(device, NVML_TEMPERATURE_THRESHOLD_MEM_MAX, &memory_temperature_max) == NVML_SUCCESS) {
+            temperature_samples_.memory_temperature_max_ = static_cast<decltype(temperature_samples_.memory_temperature_max_)::value_type>(memory_temperature_max);
+        }
+
+        // queried samples -> retrieved every iteration if available
+        unsigned int fan_speed_percentage{};
+        if (nvmlDeviceGetFanSpeed(device, &fan_speed_percentage) == NVML_SUCCESS) {
+            temperature_samples_.fan_speed_percentage_ = decltype(temperature_samples_.fan_speed_percentage_)::value_type{ static_cast<decltype(temperature_samples_.fan_speed_percentage_)::value_type::value_type>(fan_speed_percentage) };
+        }
+
+        unsigned int temperature{};
+        if (nvmlDeviceGetTemperature(device, NVML_TEMPERATURE_GPU, &temperature) == NVML_SUCCESS) {
+            temperature_samples_.temperature_ = decltype(temperature_samples_.temperature_)::value_type{ static_cast<decltype(temperature_samples_.temperature_)::value_type::value_type>(temperature) };
+        }
+    }
+
+    //
+    // loop until stop_sampling() is called
+    //
+
+    while (!this->has_sampling_stopped()) {
+        // only sample values if the sampler currently isn't paused
+        if (this->is_sampling()) {
+            // add current time point
+            this->add_time_point(std::chrono::steady_clock::now());
+
+            // retrieve general samples
+            if (this->sample_category_enabled(sample_category::general)) {
+                if (general_samples_.performance_level_.has_value()) {
+                    nvmlPstates_t pstate{};
+                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetPerformanceState(device, &pstate))
+                    general_samples_.performance_level_->push_back(static_cast<decltype(general_samples_.performance_level_)::value_type::value_type>(pstate));
+                }
+
+                if (general_samples_.compute_utilization_.has_value() && general_samples_.memory_utilization_.has_value()) {
+                    nvmlUtilization_t util{};
+                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetUtilizationRates(device, &util))
+                    general_samples_.compute_utilization_->push_back(util.gpu);
+                    general_samples_.memory_utilization_->push_back(util.memory);
+                }
+            }
+
+            // retrieve clock related samples
+            if (this->sample_category_enabled(sample_category::clock)) {
+                if (clock_samples_.clock_frequency_.has_value()) {
+                    unsigned int value{};
+                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetClockInfo(device, NVML_CLOCK_GRAPHICS, &value))
+                    clock_samples_.clock_frequency_->push_back(static_cast<decltype(clock_samples_.clock_frequency_)::value_type::value_type>(value));
+                }
+
+                if (clock_samples_.sm_clock_frequency_.has_value()) {
+                    unsigned int value{};
+                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetClockInfo(device, NVML_CLOCK_SM, &value))
+                    clock_samples_.sm_clock_frequency_->push_back(static_cast<decltype(clock_samples_.sm_clock_frequency_)::value_type::value_type>(value));
+                }
+
+                if (clock_samples_.memory_clock_frequency_.has_value()) {
+                    unsigned int value{};
+                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetClockInfo(device, NVML_CLOCK_MEM, &value))
+                    clock_samples_.memory_clock_frequency_->push_back(static_cast<decltype(clock_samples_.memory_clock_frequency_)::value_type::value_type>(value));
+                }
+
+                if (clock_samples_.throttle_reason_string_.has_value()) {
+                    decltype(clock_samples_.throttle_reason_)::value_type::value_type value{};
+                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetCurrentClocksEventReasons(device, &value))
+                    clock_samples_.throttle_reason_->push_back(value);
+                    clock_samples_.throttle_reason_string_->push_back(detail::throttle_event_reason_to_string(value));
+                }
+
+                if (clock_samples_.auto_boosted_clock_.has_value()) {
+                    nvmlEnableState_t mode{};
+                    nvmlEnableState_t default_mode{};
+                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetAutoBoostedClocksEnabled(device, &mode, &default_mode))
+                    clock_samples_.auto_boosted_clock_->push_back(mode == NVML_FEATURE_ENABLED);
+                }
+            }
+
+            // retrieve power related information
+            if (this->sample_category_enabled(sample_category::power)) {
+                if (power_samples_.power_profile_.has_value()) {
+                    nvmlPstates_t pstate{};
+                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetPowerState(device, &pstate))
+                    power_samples_.power_profile_->push_back(static_cast<decltype(power_samples_.power_profile_)::value_type::value_type>(pstate));
+                }
+
+                if (power_samples_.power_usage_.has_value()) {
+                    unsigned int value{};
+                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetPowerUsage(device, &value))
+                    power_samples_.power_usage_->push_back(static_cast<decltype(power_samples_.power_usage_)::value_type::value_type>(value) / 1000.0);
+                }
+
+                if (power_samples_.power_total_energy_consumption_.has_value()) {
+                    unsigned long long value{};
+                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetTotalEnergyConsumption(device, &value))
+                    power_samples_.power_total_energy_consumption_->push_back((static_cast<decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type>(value) / 1000.0) - initial_total_power_consumption);
+                }
+            }
+
+            // retrieve memory related information
+            if (this->sample_category_enabled(sample_category::memory)) {
+                if (memory_samples_.memory_free_.has_value() && memory_samples_.memory_used_.has_value()) {
+                    nvmlMemory_t memory_info{};
+                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetMemoryInfo(device, &memory_info))
+                    memory_samples_.memory_free_->push_back(memory_info.free);
+                    memory_samples_.memory_used_->push_back(memory_info.used);
+                }
+
+                if (memory_samples_.num_pcie_lanes_.has_value()) {
+                    decltype(memory_samples_.num_pcie_lanes_)::value_type::value_type value{};
+                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetCurrPcieLinkWidth(device, &value))
+                    memory_samples_.num_pcie_lanes_->push_back(value);
+                }
+
+                if (memory_samples_.pcie_link_generation_.has_value()) {
+                    decltype(memory_samples_.pcie_link_generation_)::value_type::value_type value{};
+                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetCurrPcieLinkGeneration(device, &value))
+                    memory_samples_.pcie_link_generation_->push_back(value);
+                }
+            }
+
+            // retrieve temperature related information
+            if (this->sample_category_enabled(sample_category::temperature)) {
+                if (temperature_samples_.fan_speed_percentage_.has_value()) {
+                    unsigned int value{};
+                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetFanSpeed(device, &value))
+                    temperature_samples_.fan_speed_percentage_->push_back(static_cast<decltype(temperature_samples_.fan_speed_percentage_)::value_type::value_type>(value));
+                }
+
+                if (temperature_samples_.temperature_.has_value()) {
+                    unsigned int value{};
+                    HWS_NVML_ERROR_CHECK(nvmlDeviceGetTemperature(device, NVML_TEMPERATURE_GPU, &value))
+                    temperature_samples_.temperature_->push_back(static_cast<decltype(temperature_samples_.temperature_)::value_type::value_type>(value));
+                }
+            }
+        }
+
+        // wait for the sampling interval to pass to retrieve the next sample
+        std::this_thread::sleep_for(this->sampling_interval());
+    }
+}
+
+std::string gpu_nvidia_hardware_sampler::device_identification() const {
+    nvmlPciInfo_st pcie_info{};
+    HWS_NVML_ERROR_CHECK(nvmlDeviceGetPciInfo_v3(device_.get_impl().device, &pcie_info))
+    return fmt::format("gpu_nvidia_device_{}_{}", pcie_info.device, pcie_info.bus);
+}
+
+std::string gpu_nvidia_hardware_sampler::samples_only_as_yaml_string() const {
+    // check whether it's safe to generate the YAML entry
+    if (this->is_sampling()) {
+        throw std::runtime_error{ "Can't create the final YAML entry if the hardware sampler is still running!" };
+    }
+
+    return fmt::format("{}{}"
+                       "{}{}"
+                       "{}{}"
+                       "{}{}"
+                       "{}",
+                       general_samples_.generate_yaml_string(),
+                       general_samples_.has_samples() ? "\n" : "",
+                       clock_samples_.generate_yaml_string(),
+                       clock_samples_.has_samples() ? "\n" : "",
+                       power_samples_.generate_yaml_string(),
+                       power_samples_.has_samples() ? "\n" : "",
+                       memory_samples_.generate_yaml_string(),
+                       memory_samples_.has_samples() ? "\n" : "",
+                       temperature_samples_.generate_yaml_string());
+}
+
+std::ostream &operator<<(std::ostream &out, const gpu_nvidia_hardware_sampler &sampler) {
+    if (sampler.is_sampling()) {
+        out.setstate(std::ios_base::failbit);
+        return out;
+    } else {
+        return out << fmt::format("sampling interval: {}\n"
+                                  "time points: [{}]\n\n"
+                                  "general samples:\n{}\n\n"
+                                  "clock samples:\n{}\n\n"
+                                  "power samples:\n{}\n\n"
+                                  "memory samples:\n{}\n\n"
+                                  "temperature samples:\n{}",
+                                  sampler.sampling_interval(),
+                                  fmt::join(detail::time_points_to_epoch(sampler.sampling_time_points()), ", "),
+                                  sampler.general_samples(),
+                                  sampler.clock_samples(),
+                                  sampler.power_samples(),
+                                  sampler.memory_samples(),
+                                  sampler.temperature_samples());
+    }
+}
+
+}  // namespace hws
diff --git a/src/hws/gpu_nvidia/nvml_samples.cpp b/src/hws/gpu_nvidia/nvml_samples.cpp
new file mode 100644
index 0000000..3ce65c3
--- /dev/null
+++ b/src/hws/gpu_nvidia/nvml_samples.cpp
@@ -0,0 +1,587 @@
+/**
+ * @author Marcel Breyer
+ * @copyright 2024-today All Rights Reserved
+ * @license This file is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ */
+
+#include "hws/gpu_nvidia/nvml_samples.hpp"
+
+#include "hws/utility.hpp"  // hws::detail::{value_or_default, map_entry_to_string, quote}
+
+#include "fmt/format.h"  // fmt::format
+#include "fmt/ranges.h"  // fmt::join
+
+#include <ostream>  // std::ostream
+#include <string>   // std::string
+
+namespace hws {
+
+//*************************************************************************************************************************************//
+//                                                           general samples                                                           //
+//*************************************************************************************************************************************//
+
+bool nvml_general_samples::has_samples() const {
+    return this->architecture_.has_value() || this->byte_order_.has_value() || this->vendor_id_.has_value() || this->name_.has_value()
+           || this->persistence_mode_.has_value() || this->num_cores_.has_value() || this->compute_utilization_.has_value()
+           || this->memory_utilization_.has_value() || this->performance_level_.has_value();
+}
+
+std::string nvml_general_samples::generate_yaml_string() const {
+    // if no samples are available, return an empty string
+    if (!this->has_samples()) {
+        return "";
+    }
+
+    std::string str{ "general:\n" };
+
+    // device architecture
+    if (this->architecture_.has_value()) {
+        str += fmt::format("  architecture:\n"
+                           "    unit: \"string\"\n"
+                           "    values: \"{}\"\n",
+                           this->architecture_.value());
+    }
+    // device byte order
+    if (this->byte_order_.has_value()) {
+        str += fmt::format("  byte_order:\n"
+                           "    unit: \"string\"\n"
+                           "    values: \"{}\"\n",
+                           this->byte_order_.value());
+    }
+    // the vendor specific ID
+    if (this->vendor_id_.has_value()) {
+        str += fmt::format("  vendor_id:\n"
+                           "    unit: \"string\"\n"
+                           "    values: \"{}\"\n",
+                           this->vendor_id_.value());
+    }
+    // device name
+    if (this->name_.has_value()) {
+        str += fmt::format("  name:\n"
+                           "    unit: \"string\"\n"
+                           "    values: \"{}\"\n",
+                           this->name_.value());
+    }
+    // persistence mode enabled
+    if (this->persistence_mode_.has_value()) {
+        str += fmt::format("  persistence_mode:\n"
+                           "    unit: \"bool\"\n"
+                           "    values: {}\n",
+                           this->persistence_mode_.value());
+    }
+    // number of cores
+    if (this->num_cores_.has_value()) {
+        str += fmt::format("  num_cores:\n"
+                           "    unit: \"int\"\n"
+                           "    values: {}\n",
+                           this->num_cores_.value());
+    }
+
+    // device compute utilization
+    if (this->compute_utilization_.has_value()) {
+        str += fmt::format("  compute_utilization:\n"
+                           "    unit: \"percentage\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->compute_utilization_.value(), ", "));
+    }
+
+    // device memory utilization
+    if (this->memory_utilization_.has_value()) {
+        str += fmt::format("  memory_utilization:\n"
+                           "    unit: \"percentage\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->memory_utilization_.value(), ", "));
+    }
+    // performance state
+    if (this->performance_level_.has_value()) {
+        str += fmt::format("  performance_level:\n"
+                           "    unit: \"0 - maximum performance; 15 - minimum performance; 32 - unknown\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->performance_level_.value(), ", "));
+    }
+
+    return str;
+}
+
+std::ostream &operator<<(std::ostream &out, const nvml_general_samples &samples) {
+    return out << fmt::format("architecture [string]: {}\n"
+                              "byte_order [string]: {}\n"
+                              "num_cores [int]: {}\n"
+                              "vendor_id [string]: {}\n"
+                              "name [string]: {}\n"
+                              "persistence_mode [bool]: {}\n"
+                              "compute_utilization [%]: [{}]\n"
+                              "memory_utilization [%]: [{}]\n"
+                              "performance_level [int]: [{}]",
+                              detail::value_or_default(samples.get_architecture()),
+                              detail::value_or_default(samples.get_byte_order()),
+                              detail::value_or_default(samples.get_num_cores()),
+                              detail::value_or_default(samples.get_vendor_id()),
+                              detail::value_or_default(samples.get_name()),
+                              detail::value_or_default(samples.get_persistence_mode()),
+                              fmt::join(detail::value_or_default(samples.get_compute_utilization()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_memory_utilization()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_performance_level()), ", "));
+}
+
+//*************************************************************************************************************************************//
+//                                                            clock samples                                                            //
+//*************************************************************************************************************************************//
+
+bool nvml_clock_samples::has_samples() const {
+    return this->auto_boosted_clock_enabled_.has_value() || this->clock_frequency_min_.has_value() || this->clock_frequency_max_.has_value()
+           || this->memory_clock_frequency_min_.has_value() || this->memory_clock_frequency_max_.has_value() || this->sm_clock_frequency_max_.has_value()
+           || this->available_clock_frequencies_.has_value() || this->available_memory_clock_frequencies_.has_value() || this->clock_frequency_.has_value()
+           || this->memory_clock_frequency_.has_value() || this->sm_clock_frequency_.has_value() || this->throttle_reason_.has_value()
+           || this->throttle_reason_string_.has_value() || this->auto_boosted_clock_.has_value();
+}
+
+std::string nvml_clock_samples::generate_yaml_string() const {
+    // if no samples are available, return an empty string
+    if (!this->has_samples()) {
+        return "";
+    }
+
+    std::string str{ "clock:\n" };
+
+    // adaptive clock status
+    if (this->auto_boosted_clock_enabled_.has_value()) {
+        str += fmt::format("  auto_boosted_clock_enabled:\n"
+                           "    unit: \"bool\"\n"
+                           "    values: {}\n",
+                           this->auto_boosted_clock_enabled_.value());
+    }
+    // minimum graph clock
+    if (this->clock_frequency_min_.has_value()) {
+        str += fmt::format("  clock_frequency_min:\n"
+                           "    unit: \"MHz\"\n"
+                           "    values: {}\n",
+                           this->clock_frequency_min_.value());
+    }
+    // maximum graph clock
+    if (this->clock_frequency_max_.has_value()) {
+        str += fmt::format("  clock_frequency_max:\n"
+                           "    unit: \"MHz\"\n"
+                           "    values: {}\n",
+                           this->clock_frequency_max_.value());
+    }
+    // minimum memory clock
+    if (this->memory_clock_frequency_min_.has_value()) {
+        str += fmt::format("  memory_clock_frequency_min:\n"
+                           "    unit: \"MHz\"\n"
+                           "    values: {}\n",
+                           this->memory_clock_frequency_min_.value());
+    }
+    // maximum memory clock
+    if (this->memory_clock_frequency_max_.has_value()) {
+        str += fmt::format("  memory_clock_frequency_max:\n"
+                           "    unit: \"MHz\"\n"
+                           "    values: {}\n",
+                           this->memory_clock_frequency_max_.value());
+    }
+    // maximum SM clock
+    if (this->sm_clock_frequency_max_.has_value()) {
+        str += fmt::format("  sm_clock_frequency_max:\n"
+                           "    unit: \"MHz\"\n"
+                           "    values: {}\n",
+                           this->sm_clock_frequency_max_.value());
+    }
+    // the available clock frequencies
+    if (this->available_clock_frequencies_.has_value()) {
+        str += fmt::format("  available_clock_frequencies:\n"
+                           "    unit: \"MHz\"\n"
+                           "    values:\n");
+        for (const auto &[key, value] : this->available_clock_frequencies_.value()) {
+            str += fmt::format("      memory_clock_frequency_{}: [{}]\n", key, fmt::join(value, ", "));
+        }
+    }
+    // the available memory clock frequencies
+    if (this->available_memory_clock_frequencies_.has_value()) {
+        str += fmt::format("  available_memory_clock_frequencies:\n"
+                           "    unit: \"MHz\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->available_memory_clock_frequencies_.value(), ", "));
+    }
+
+    // graph clock
+    if (this->clock_frequency_.has_value()) {
+        str += fmt::format("  clock_frequency:\n"
+                           "    unit: \"MHz\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->clock_frequency_.value(), ", "));
+    }
+    // memory clock
+    if (this->memory_clock_frequency_.has_value()) {
+        str += fmt::format("  memory_clock_frequency:\n"
+                           "    unit: \"MHz\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->memory_clock_frequency_.value(), ", "));
+    }
+    // SM clock
+    if (this->sm_clock_frequency_.has_value()) {
+        str += fmt::format("  sm_clock_frequency:\n"
+                           "    unit: \"MHz\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->sm_clock_frequency_.value(), ", "));
+    }
+    // clock throttle reason as bitmask
+    if (this->throttle_reason_.has_value()) {
+        str += fmt::format("  throttle_reason:\n"
+                           "    unit: \"bitmask\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(detail::quote(this->throttle_reason_.value()), ", "));
+    }
+    // clock throttle reason as string
+    if (this->throttle_reason_string_.has_value()) {
+        str += fmt::format("  throttle_reason_string:\n"
+                           "    unit: \"string\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(detail::quote(this->throttle_reason_string_.value()), ", "));
+    }
+    // clock is auto-boosted
+    if (this->auto_boosted_clock_.has_value()) {
+        str += fmt::format("  auto_boosted_clock:\n"
+                           "    unit: \"bool\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->auto_boosted_clock_.value(), ", "));
+    }
+
+    return str;
+}
+
+std::ostream &operator<<(std::ostream &out, const nvml_clock_samples &samples) {
+    return out << fmt::format("auto_boosted_clock_enabled [bool]: {}\n"
+                              "clock_frequency_min [MHz]: {}\n"
+                              "clock_frequency_max [MHz]: {}\n"
+                              "memory_clock_frequency_min [MHz]: {}\n"
+                              "memory_clock_frequency_max [MHz]: {}\n"
+                              "sm_clock_frequency_max [MHz]: {}\n"
+                              "available_clock_frequencies [MHz]: [{}]\n"
+                              "available_memory_clock_frequencies [MHz]: [{}]\n"
+                              "clock_frequency [MHz]: [{}]\n"
+                              "memory_clock_frequency [MHz]: [{}]\n"
+                              "sm_clock_frequency [MHz]: [{}]\n"
+                              "throttle_reason [bitmask]: [{}]\n"
+                              "throttle_reason_string [string]: [{}]\n"
+                              "auto_boosted_clock [bool]: [{}]",
+                              detail::value_or_default(samples.get_auto_boosted_clock_enabled()),
+                              detail::value_or_default(samples.get_clock_frequency_min()),
+                              detail::value_or_default(samples.get_clock_frequency_max()),
+                              detail::value_or_default(samples.get_memory_clock_frequency_min()),
+                              detail::value_or_default(samples.get_memory_clock_frequency_max()),
+                              detail::value_or_default(samples.get_sm_clock_frequency_max()),
+                              detail::map_entry_to_string(samples.get_available_clock_frequencies()),
+                              fmt::join(detail::value_or_default(samples.get_available_memory_clock_frequencies()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_clock_frequency()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_memory_clock_frequency()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_sm_clock_frequency()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_throttle_reason()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_throttle_reason_string()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_auto_boosted_clock()), ", "));
+}
+
+//*************************************************************************************************************************************//
+//                                                            power samples                                                            //
+//*************************************************************************************************************************************//
+
+bool nvml_power_samples::has_samples() const {
+    return this->power_management_limit_.has_value() || this->power_enforced_limit_.has_value() || this->power_measurement_type_.has_value()
+           || this->power_management_mode_.has_value() || this->available_power_profiles_.has_value() || this->power_usage_.has_value()
+           || this->power_total_energy_consumption_.has_value() || this->power_profile_.has_value();
+}
+
+std::string nvml_power_samples::generate_yaml_string() const {
+    // if no samples are available, return an empty string
+    if (!this->has_samples()) {
+        return "";
+    }
+
+    std::string str{ "power:\n" };
+
+    // power management limit
+    if (this->power_management_limit_.has_value()) {
+        str += fmt::format("  power_management_limit:\n"
+                           "    unit: \"W\"\n"
+                           "    values: {}\n",
+                           this->power_management_limit_.value());
+    }
+    // power enforced limit
+    if (this->power_enforced_limit_.has_value()) {
+        str += fmt::format("  power_enforced_limit:\n"
+                           "    unit: \"W\"\n"
+                           "    values: {}\n",
+                           this->power_enforced_limit_.value());
+    }
+    // power measurement type
+    if (this->power_measurement_type_.has_value()) {
+        str += fmt::format("  power_measurement_type:\n"
+                           "    unit: \"string\"\n"
+                           "    values: \"{}\"\n",
+                           this->power_measurement_type_.value());
+    }
+    // the power management mode
+    if (this->power_management_mode_.has_value()) {
+        str += fmt::format("  power_management_mode:\n"
+                           "    unit: \"bool\"\n"
+                           "    values: {}\n",
+                           this->power_management_mode_.value());
+    }
+    // available power levels
+    if (this->available_power_profiles_.has_value()) {
+        str += fmt::format("  available_power_profiles:\n"
+                           "    unit: \"int\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->available_power_profiles_.value(), ", "));
+    }
+
+    // current power usage
+    if (this->power_usage_.has_value()) {
+        str += fmt::format("  power_usage:\n"
+                           "    unit: \"W\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->power_usage_.value(), ", "));
+    }
+    // total energy consumed
+    if (this->power_total_energy_consumption_.has_value()) {
+        str += fmt::format("  power_total_energy_consumed:\n"
+                           "    unit: \"J\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->power_total_energy_consumption_.value(), ", "));
+    }
+    // power state
+    if (this->power_profile_.has_value()) {
+        str += fmt::format("  power_profile:\n"
+                           "    unit: \"int\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->power_profile_.value(), ", "));
+    }
+
+    return str;
+}
+
+std::ostream &operator<<(std::ostream &out, const nvml_power_samples &samples) {
+    return out << fmt::format("power_management_limit [W]: {}\n"
+                              "power_enforced_limit [W]: {}\n"
+                              "power_measurement_type [string]: {}\n"
+                              "power_management_mode [bool]: {}\n"
+                              "available_power_profiles [int]: [{}]\n"
+                              "power_usage [W]: [{}]\n"
+                              "power_total_energy_consumption [J]: [{}]"
+                              "power_profile [int]: [{}]\n",
+                              detail::value_or_default(samples.get_power_management_limit()),
+                              detail::value_or_default(samples.get_power_enforced_limit()),
+                              detail::value_or_default(samples.get_power_measurement_type()),
+                              detail::value_or_default(samples.get_power_management_mode()),
+                              fmt::join(detail::value_or_default(samples.get_available_power_profiles()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_power_usage()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_power_total_energy_consumption()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_power_profile()), ", "));
+}
+
+//*************************************************************************************************************************************//
+//                                                            memory samples                                                           //
+//*************************************************************************************************************************************//
+
+bool nvml_memory_samples::has_samples() const {
+    return this->memory_total_.has_value() || this->pcie_link_speed_max_.has_value() || this->pcie_link_generation_max_.has_value()
+           || this->num_pcie_lanes_max_.has_value() || this->memory_bus_width_.has_value() || this->memory_used_.has_value()
+           || this->memory_free_.has_value() || this->num_pcie_lanes_.has_value() || this->pcie_link_generation_.has_value()
+           || this->pcie_link_speed_.has_value();
+}
+
+std::string nvml_memory_samples::generate_yaml_string() const {
+    // if no samples are available, return an empty string
+    if (!this->has_samples()) {
+        return "";
+    }
+
+    std::string str{ "memory:\n" };
+
+    // total memory size
+    if (this->memory_total_.has_value()) {
+        str += fmt::format("  memory_total:\n"
+                           "    unit: \"B\"\n"
+                           "    values: {}\n",
+                           this->memory_total_.value());
+    }
+    // maximum PCIe link speed
+    if (this->pcie_link_speed_max_.has_value()) {
+        str += fmt::format("  pcie_link_speed_max:\n"
+                           "    unit: \"MBPS\"\n"
+                           "    values: {}\n",
+                           this->pcie_link_speed_max_.value());
+    }
+    // maximum PCIe link generation
+    if (this->pcie_link_generation_max_.has_value()) {
+        str += fmt::format("  pcie_link_generation_max:\n"
+                           "    unit: \"int\"\n"
+                           "    values: {}\n",
+                           this->pcie_link_generation_max_.value());
+    }
+    // maximum number of available PCIe lanes
+    if (this->num_pcie_lanes_max_.has_value()) {
+        str += fmt::format("  num_pcie_lanes_max:\n"
+                           "    unit: \"int\"\n"
+                           "    values: {}\n",
+                           this->num_pcie_lanes_max_.value());
+    }
+    // memory bus width
+    if (this->memory_bus_width_.has_value()) {
+        str += fmt::format("  memory_bus_width:\n"
+                           "    unit: \"Bit\"\n"
+                           "    values: {}\n",
+                           this->memory_bus_width_.value());
+    }
+
+    // used memory size
+    if (this->memory_used_.has_value()) {
+        str += fmt::format("  memory_used:\n"
+                           "    unit: \"B\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->memory_used_.value(), ", "));
+    }
+    // free memory size
+    if (this->memory_free_.has_value()) {
+        str += fmt::format("  memory_free:\n"
+                           "    unit: \"B\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->memory_free_.value(), ", "));
+    }
+    // PCIe link width
+    if (this->num_pcie_lanes_.has_value()) {
+        str += fmt::format("  num_pcie_lanes:\n"
+                           "    unit: \"int\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->num_pcie_lanes_.value(), ", "));
+    }
+    // PCIe link generation
+    if (this->pcie_link_generation_.has_value()) {
+        str += fmt::format("  pcie_link_generation:\n"
+                           "    unit: \"int\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->pcie_link_generation_.value(), ", "));
+    }
+    // PCIe link speed
+    if (this->pcie_link_speed_.has_value()) {
+        str += fmt::format("  pcie_link_speed:\n"
+                           "    unit: \"MBPS\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->pcie_link_speed_.value(), ", "));
+    }
+
+    return str;
+}
+
+std::ostream &operator<<(std::ostream &out, const nvml_memory_samples &samples) {
+    return out << fmt::format("memory_total [B]: {}\n"
+                              "pcie_link_speed_max [MBPS]: {}\n"
+                              "pcie_link_generation_max [int]: {}\n"
+                              "num_pcie_lanes_max [int]: {}\n"
+                              "memory_bus_width [Bit]: {}\n"
+                              "memory_used [B]: [{}]\n"
+                              "memory_free [B]: [{}]\n"
+                              "num_pcie_lanes [int]: [{}]\n"
+                              "pcie_link_generation [int]: [{}]\n"
+                              "pcie_link_speed [MBPS]: [{}]",
+                              detail::value_or_default(samples.get_memory_total()),
+                              detail::value_or_default(samples.get_pcie_link_speed_max()),
+                              detail::value_or_default(samples.get_pcie_link_generation_max()),
+                              detail::value_or_default(samples.get_num_pcie_lanes_max()),
+                              detail::value_or_default(samples.get_memory_bus_width()),
+                              fmt::join(detail::value_or_default(samples.get_memory_used()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_memory_free()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_num_pcie_lanes()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_pcie_link_generation()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_pcie_link_speed()), ", "));
+}
+
+//*************************************************************************************************************************************//
+//                                                         temperature samples                                                         //
+//*************************************************************************************************************************************//
+
+bool nvml_temperature_samples::has_samples() const {
+    return this->num_fans_.has_value() || this->fan_speed_min_.has_value() || this->fan_speed_max_.has_value() || this->temperature_max_.has_value()
+           || this->memory_temperature_max_.has_value() || this->fan_speed_percentage_.has_value() || this->temperature_.has_value();
+}
+
+std::string nvml_temperature_samples::generate_yaml_string() const {
+    // if no samples are available, return an empty string
+    if (!this->has_samples()) {
+        return "";
+    }
+
+    std::string str{ "temperature:\n" };
+
+    // number of fans
+    if (this->num_fans_.has_value()) {
+        str += fmt::format("  num_fans:\n"
+                           "    unit: \"int\"\n"
+                           "    values: {}\n",
+                           this->num_fans_.value());
+    }
+    // min fan speed
+    if (this->fan_speed_min_.has_value()) {
+        str += fmt::format("  fan_speed_min:\n"
+                           "    unit: \"percentage\"\n"
+                           "    values: {}\n",
+                           this->fan_speed_min_.value());
+    }
+    // max fan speed
+    if (this->fan_speed_max_.has_value()) {
+        str += fmt::format("  fan_speed_max:\n"
+                           "    unit: \"percentage\"\n"
+                           "    values: {}\n",
+                           this->fan_speed_max_.value());
+    }
+    // temperature threshold GPU max
+    if (this->temperature_max_.has_value()) {
+        str += fmt::format("  temperature_max:\n"
+                           "    unit: \"°C\"\n"
+                           "    values: {}\n",
+                           this->temperature_max_.value());
+    }
+    // temperature threshold memory max
+    if (this->memory_temperature_max_.has_value()) {
+        str += fmt::format("  memory_temperature_max:\n"
+                           "    unit: \"°C\"\n"
+                           "    values: {}\n",
+                           this->memory_temperature_max_.value());
+    }
+
+    // fan speed
+    if (this->fan_speed_percentage_.has_value()) {
+        str += fmt::format("  fan_speed_percentage:\n"
+                           "    unit: \"percentage\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->fan_speed_percentage_.value(), ", "));
+    }
+    // temperature GPU
+    if (this->temperature_.has_value()) {
+        str += fmt::format("  temperature:\n"
+                           "    unit: \"°C\"\n"
+                           "    values: [{}]\n",
+                           fmt::join(this->temperature_.value(), ", "));
+    }
+
+    return str;
+}
+
+std::ostream &operator<<(std::ostream &out, const nvml_temperature_samples &samples) {
+    return out << fmt::format("num_fans [int]: {}\n"
+                              "min_fan_speed [%]: {}\n"
+                              "max_fan_speed [%]: {}\n"
+                              "temperature__max [°C]: {}\n"
+                              "memory_temperature_max [°C]: {}\n"
+                              "fan_speed_percentage [%]: [{}]\n"
+                              "temperature [°C]: [{}]",
+                              detail::value_or_default(samples.get_num_fans()),
+                              detail::value_or_default(samples.get_fan_speed_min()),
+                              detail::value_or_default(samples.get_fan_speed_max()),
+                              detail::value_or_default(samples.get_temperature_max()),
+                              detail::value_or_default(samples.get_memory_temperature_max()),
+                              fmt::join(detail::value_or_default(samples.get_fan_speed_percentage()), ", "),
+                              fmt::join(detail::value_or_default(samples.get_temperature()), ", "));
+}
+
+}  // namespace hws
diff --git a/src/hws/gpu_nvidia/utility.cpp b/src/hws/gpu_nvidia/utility.cpp
new file mode 100644
index 0000000..7c1b9f7
--- /dev/null
+++ b/src/hws/gpu_nvidia/utility.cpp
@@ -0,0 +1,55 @@
+/**
+ * @author Marcel Breyer
+ * @copyright 2024-today All Rights Reserved
+ * @license This file is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ */
+
+#include "hws/gpu_nvidia/utility.hpp"
+
+#include "fmt/format.h"  // fmt::format
+#include "fmt/ranges.h"  // fmt::join
+#include "nvml.h"        // NVML runtime functions
+
+#include <string>  // std::string
+#include <vector>  // std::vector
+
+namespace hws::detail {
+
+std::string throttle_event_reason_to_string(const unsigned long long clocks_event_reasons) {
+    if (clocks_event_reasons == 0ull) {
+        return "None";
+    } else {
+        std::vector<std::string> reasons{};
+        if ((clocks_event_reasons & nvmlClocksEventReasonApplicationsClocksSetting) != 0ull) {
+            reasons.emplace_back("ApplicationsClocksSetting");
+        }
+        if ((clocks_event_reasons & nvmlClocksEventReasonDisplayClockSetting) != 0ull) {
+            reasons.emplace_back("DisplayClockSetting");
+        }
+        if ((clocks_event_reasons & nvmlClocksEventReasonGpuIdle) != 0ull) {
+            reasons.emplace_back("GpuIdle");
+        }
+        if ((clocks_event_reasons & nvmlClocksEventReasonSwPowerCap) != 0ull) {
+            reasons.emplace_back("SwPowerCap");
+        }
+        if ((clocks_event_reasons & nvmlClocksEventReasonSwThermalSlowdown) != 0ull) {
+            reasons.emplace_back("SwThermalSlowdown");
+        }
+        if ((clocks_event_reasons & nvmlClocksEventReasonSyncBoost) != 0ull) {
+            reasons.emplace_back("SyncBoost");
+        }
+        if ((clocks_event_reasons & nvmlClocksThrottleReasonHwPowerBrakeSlowdown) != 0ull) {
+            reasons.emplace_back("HwPowerBrakeSlowdown");
+        }
+        if ((clocks_event_reasons & nvmlClocksThrottleReasonHwSlowdown) != 0ull) {
+            reasons.emplace_back("HwSlowdown");
+        }
+        if ((clocks_event_reasons & nvmlClocksThrottleReasonHwThermalSlowdown) != 0ull) {
+            reasons.emplace_back("HwThermalSlowdown");
+        }
+        return fmt::format("{}", fmt::join(reasons, "|"));
+    }
+}
+
+}  // namespace hws::detail
diff --git a/src/hardware_sampling/hardware_sampler.cpp b/src/hws/hardware_sampler.cpp
similarity index 58%
rename from src/hardware_sampling/hardware_sampler.cpp
rename to src/hws/hardware_sampler.cpp
index c6554cd..abd907d 100644
--- a/src/hardware_sampling/hardware_sampler.cpp
+++ b/src/hws/hardware_sampler.cpp
@@ -5,15 +5,19 @@
  *          See the LICENSE.md file in the project root for full license information.
  */
 
-#include "hardware_sampling/hardware_sampler.hpp"
+#include "hws/hardware_sampler.hpp"
 
-#include "hardware_sampling/event.hpp"    // hws::event
-#include "hardware_sampling/utility.hpp"  // hws::detail::{durations_from_reference_time, join}
+#include "hws/event.hpp"    // hws::event
+#include "hws/utility.hpp"  // hws::detail::durations_from_reference_time
+#include "hws/version.hpp"  // hws::version::version
+
+#include "fmt/chrono.h"  // direct formatting of std::chrono types
+#include "fmt/format.h"  // fmt::format
+#include "fmt/ranges.h"  // fmt::join
 
 #include <chrono>     // std::chrono::{system_clock, steady_clock, duration_cast, milliseconds}
 #include <cstddef>    // std::size_t
 #include <exception>  // std::exception
-#include <format>     // std::format
 #include <fstream>    // std::ofstream
 #include <iostream>   // std::cerr, std::endl
 #include <stdexcept>  // std::runtime_error, std::out_of_range
@@ -22,8 +26,13 @@
 
 namespace hws {
 
-hardware_sampler::hardware_sampler(const std::chrono::milliseconds sampling_interval) :
-    sampling_interval_{ sampling_interval } { }
+hardware_sampler::hardware_sampler(const std::chrono::milliseconds sampling_interval, const sample_category category) :
+    sampling_interval_{ sampling_interval },
+    sample_category_{ category } {
+    if (sampling_interval == std::chrono::milliseconds{ 0 }) {
+        throw std::invalid_argument{ "The sampling interval must be larger than 0ms!" };
+    }
+}
 
 hardware_sampler::~hardware_sampler() = default;
 
@@ -109,13 +118,13 @@ void hardware_sampler::add_event(decltype(event::name) name) {
 
 event hardware_sampler::get_event(const std::size_t idx) const {
     if (idx >= this->num_events()) {
-        throw std::out_of_range{ std::format("The index {} is out-of-range for the number of events {}!", idx, this->num_events()) };
+        throw std::out_of_range{ fmt::format("The index {} is out-of-range for the number of events {}!", idx, this->num_events()) };
     }
 
     return events_[idx];
 }
 
-void hardware_sampler::dump_yaml(const char *filename) {
+void hardware_sampler::dump_yaml(const char *filename) const {
     if (!this->has_sampling_stopped()) {
         throw std::runtime_error{ "Can dump samples to the YAML file only after the sampling has been stopped!" };
     }
@@ -123,42 +132,68 @@ void hardware_sampler::dump_yaml(const char *filename) {
     std::ofstream file{ filename, std::ios_base::app };
 
     // begin a new YAML document (only with "---" multiple YAML documents in a single file are allowed)
-    file << "---\n\n";
+    file << "---\n\n"
+         << this->as_yaml_string();
+}
 
-    // set the device identification
-    file << std::format("device_identification: {}\n\n", this->device_identification());
+void hardware_sampler::dump_yaml(const std::string &filename) const {
+    this->dump_yaml(filename.c_str());
+}
+
+void hardware_sampler::dump_yaml(const std::filesystem::path &filename) const {
+    this->dump_yaml(filename.string().c_str());
+}
 
-    // output the start date time of this hardware sampling
-    file << std::format("start_time: \"{:%Y-%m-%d %X}\"\n\n", std::chrono::current_zone()->to_local(start_date_time_));
+std::string hardware_sampler::as_yaml_string() const {
+    if (!this->has_sampling_stopped()) {
+        throw std::runtime_error{ "Can return samples as string only after the sampling has been stopped!" };
+    }
 
-    // output the event information
+    // generate the event information
     std::vector<decltype(event::time_point)> event_time_points{};
     std::vector<decltype(event::name)> event_names{};
     for (const auto &[time_point, name] : events_) {
         event_time_points.push_back(time_point);
-        event_names.push_back(name);
+        event_names.push_back(fmt::format("\"{}\"", name));
     }
-    file << std::format("events:\n"
-                        "  time_points: [{}]\n"
-                        "  names: [{}]\n\n",
-                        detail::join(detail::durations_from_reference_time(event_time_points, this->get_event(0).time_point), ", "),
-                        detail::join(event_names, ", "));
-
-    // output the sampling information
-    file << std::format("sampling_interval: {}\n"
-                        "time_points: [{}]\n"
-                        "{}\n\n",
-                        this->sampling_interval(),
-                        detail::join(detail::durations_from_reference_time(this->sampling_time_points(), this->get_event(0).time_point), ", "),
-                        this->generate_yaml_string());
-}
-
-void hardware_sampler::dump_yaml(const std::string &filename) {
-    this->dump_yaml(filename.c_str());
+
+    return fmt::format("device_identification: \"{}\"\n"
+                       "\n"
+                       "version: \"{}\"\n"
+                       "\n"
+                       "start_time: \"{:%Y-%m-%d %X}\"\n"
+                       "\n"
+                       "events:\n"
+                       "  time_points:\n"
+                       "    unit: \"s\"\n"
+                       "    values: [{}]\n"
+                       "  names: [{}]\n"
+                       "\n"
+                       "sampling_interval:\n"
+                       "  unit: \"ms\"\n"
+                       "  values: {}\n"
+                       "\n"
+                       "time_points:\n"
+                       "  unit: \"s\"\n"
+                       "  values: [{}]\n"
+                       "\n"
+                       "{}\n",
+                       this->device_identification(),
+                       version::version,
+                       start_date_time_,
+                       fmt::join(detail::durations_from_reference_time(event_time_points, this->get_event(0).time_point), ", "),
+                       fmt::join(event_names, ", "),
+                       this->sampling_interval().count(),
+                       fmt::join(detail::durations_from_reference_time(this->sampling_time_points(), this->get_event(0).time_point), ", "),
+                       this->samples_only_as_yaml_string());
 }
 
-void hardware_sampler::dump_yaml(const std::filesystem::path &filename) {
-    this->dump_yaml(filename.string().c_str());
+void hardware_sampler::add_time_point(const std::chrono::steady_clock::time_point time_point) {
+    time_points_.push_back(time_point);
+}
+
+bool hardware_sampler::sample_category_enabled(const sample_category category) const noexcept {
+    return static_cast<int>(this->sample_category_ & category) != 0;
 }
 
 }  // namespace hws
diff --git a/src/hws/system_hardware_sampler.cpp b/src/hws/system_hardware_sampler.cpp
new file mode 100644
index 0000000..1c08762
--- /dev/null
+++ b/src/hws/system_hardware_sampler.cpp
@@ -0,0 +1,212 @@
+/**
+ * @author Marcel Breyer
+ * @copyright 2024-today All Rights Reserved
+ * @license This file is released under the MIT license.
+ *          See the LICENSE.md file in the project root for full license information.
+ */
+
+#include "hws/system_hardware_sampler.hpp"
+
+#include "hws/event.hpp"            // hws::event
+#include "hws/sample_category.hpp"  // hws::sample_category
+
+#if defined(HWS_FOR_CPUS_ENABLED)
+    #include "hws/cpu/hardware_sampler.hpp"  // hws::cpu_hardware_sampler
+#endif
+#if defined(HWS_FOR_NVIDIA_GPUS_ENABLED)
+    #include "hws/gpu_nvidia/hardware_sampler.hpp"  // hws::gpu_nvidia_hardware_sampler
+    #include "hws/gpu_nvidia/utility.hpp"           // HWS_CUDA_ERROR_CHECK
+
+    #include "cuda_runtime.h"  // cudaGetDeviceCount
+#endif
+#if defined(HWS_FOR_AMD_GPUS_ENABLED)
+    #include "hws/gpu_amd/hardware_sampler.hpp"  // hws::gpu_amd_hardware_sampler
+    #include "hws/gpu_amd/utility.hpp"           // HWS_HIP_ERROR_CHECK
+
+    #include "hip/hip_runtime.h"  // hipGetDeviceCount
+#endif
+#if defined(HWS_FOR_INTEL_GPUS_ENABLED)
+    #include "hws/gpu_intel/hardware_sampler.hpp"  // hws::gpu_intel_hardware_sampler
+    #include "hws/gpu_intel/utility.hpp"           // HWS_LEVEL_ZERO_ERROR_CHECK
+#endif
+
+#include "fmt/format.h"  // fmt::format
+
+#include <algorithm>  // std::for_each, std::all_of
+#include <chrono>     // std::chrono::milliseconds
+#include <cstddef>    // std::size_t
+#include <cstdint>    // std::uint32_t
+#include <memory>     // std::unique_ptr, std::make_unique
+#include <numeric>    // std::accumulate
+#include <stdexcept>  // std::out_of_range
+#include <vector>     // std::vector
+
+namespace hws {
+
+system_hardware_sampler::system_hardware_sampler(const sample_category category) :
+    system_hardware_sampler{ HWS_SAMPLING_INTERVAL, category } { }
+
+system_hardware_sampler::system_hardware_sampler(const std::chrono::milliseconds sampling_interval, sample_category category) {
+    // create the hardware samplers based on the available hardware
+#if defined(HWS_FOR_CPUS_ENABLED)
+    {
+        samplers_.push_back(std::make_unique<cpu_hardware_sampler>(sampling_interval, category));
+    }
+#endif
+#if defined(HWS_FOR_NVIDIA_GPUS_ENABLED)
+    {
+        int device_count{};
+        HWS_CUDA_ERROR_CHECK(cudaGetDeviceCount(&device_count));
+        for (int device = 0; device < device_count; ++device) {
+            samplers_.push_back(std::make_unique<gpu_nvidia_hardware_sampler>(static_cast<std::size_t>(device), sampling_interval, category));
+        }
+    }
+#endif
+#if defined(HWS_FOR_AMD_GPUS_ENABLED)
+    {
+        int device_count{};
+        HWS_HIP_ERROR_CHECK(hipGetDeviceCount(&device_count));
+        for (int device = 0; device < device_count; ++device) {
+            samplers_.push_back(std::make_unique<gpu_amd_hardware_sampler>(static_cast<std::size_t>(device), sampling_interval, category));
+        }
+    }
+#endif
+#if defined(HWS_FOR_INTEL_GPUS_ENABLED)
+    {
+        // init level zero driver
+        HWS_LEVEL_ZERO_ERROR_CHECK(zeInit(ZE_INIT_FLAG_GPU_ONLY))
+
+        // discover the number of drivers
+        std::uint32_t driver_count{ 0 };
+        HWS_LEVEL_ZERO_ERROR_CHECK(zeDriverGet(&driver_count, nullptr))
+
+        // check if only the single GPU driver has been found
+        if (driver_count > 1) {
+            throw std::runtime_error{ fmt::format("Found too many GPU drivers ({})!", driver_count) };
+        }
+
+        // get the GPU driver
+        ze_driver_handle_t driver{};
+        HWS_LEVEL_ZERO_ERROR_CHECK(zeDriverGet(&driver_count, &driver))
+
+        // get all GPUs for the current driver
+        std::uint32_t device_count{ 0 };
+        HWS_LEVEL_ZERO_ERROR_CHECK(zeDeviceGet(driver, &device_count, nullptr))
+        for (std::uint32_t device = 0; device < device_count; ++device) {
+            samplers_.push_back(std::make_unique<gpu_intel_hardware_sampler>(static_cast<std::size_t>(device), sampling_interval, category));
+        }
+    }
+#endif
+}
+
+void system_hardware_sampler::start_sampling() {
+    std::for_each(samplers_.begin(), samplers_.end(), [](auto &ptr) { ptr->start_sampling(); });
+}
+
+void system_hardware_sampler::stop_sampling() {
+    std::for_each(samplers_.begin(), samplers_.end(), [](auto &ptr) { ptr->stop_sampling(); });
+}
+
+void system_hardware_sampler::pause_sampling() {
+    std::for_each(samplers_.begin(), samplers_.end(), [](auto &ptr) { ptr->pause_sampling(); });
+}
+
+void system_hardware_sampler::resume_sampling() {
+    std::for_each(samplers_.begin(), samplers_.end(), [](auto &ptr) { ptr->resume_sampling(); });
+}
+
+bool system_hardware_sampler::has_sampling_started() const noexcept {
+    return std::all_of(samplers_.cbegin(), samplers_.cend(), [](const auto &ptr) { return ptr->has_sampling_started(); });
+}
+
+bool system_hardware_sampler::is_sampling() const noexcept {
+    return std::all_of(samplers_.cbegin(), samplers_.cend(), [](const auto &ptr) { return ptr->is_sampling(); });
+}
+
+bool system_hardware_sampler::has_sampling_stopped() const noexcept {
+    return std::all_of(samplers_.cbegin(), samplers_.cend(), [](const auto &ptr) { return ptr->has_sampling_stopped(); });
+}
+
+void system_hardware_sampler::add_event(event e) {
+    std::for_each(samplers_.begin(), samplers_.end(), [&e](auto &ptr) { ptr->add_event(e); });
+}
+
+void system_hardware_sampler::add_event(decltype(event::time_point) time_point, decltype(event::name) name) {
+    std::for_each(samplers_.begin(), samplers_.end(), [&time_point, &name](auto &ptr) { ptr->add_event(time_point, name); });
+}
+
+void system_hardware_sampler::add_event(decltype(event::name) name) {
+    std::for_each(samplers_.begin(), samplers_.end(), [&name](auto &ptr) { ptr->add_event(name); });
+}
+
+std::vector<std::size_t> system_hardware_sampler::num_events() const {
+    std::vector<std::size_t> num_events_per_sampler(this->num_samplers());
+    std::transform(samplers_.cbegin(), samplers_.cend(), num_events_per_sampler.begin(), [](const auto &ptr) { return ptr->num_events(); });
+    return num_events_per_sampler;
+}
+
+std::vector<std::vector<event>> system_hardware_sampler::get_events() const {
+    std::vector<std::vector<event>> events_per_sampler(this->num_samplers());
+    std::transform(samplers_.cbegin(), samplers_.cend(), events_per_sampler.begin(), [](const auto &ptr) { return ptr->get_events(); });
+    return events_per_sampler;
+}
+
+std::vector<std::vector<std::chrono::steady_clock::time_point>> system_hardware_sampler::sampling_time_points() const {
+    std::vector<std::vector<std::chrono::steady_clock::time_point>> sampling_time_points_per_sampler(this->num_samplers());
+    std::transform(samplers_.cbegin(), samplers_.cend(), sampling_time_points_per_sampler.begin(), [](const auto &ptr) { return ptr->sampling_time_points(); });
+    return sampling_time_points_per_sampler;
+}
+
+std::vector<std::chrono::milliseconds> system_hardware_sampler::sampling_interval() const {
+    std::vector<std::chrono::milliseconds> sampling_interval_per_sampler(this->num_samplers());
+    std::transform(samplers_.cbegin(), samplers_.cend(), sampling_interval_per_sampler.begin(), [](const auto &ptr) { return ptr->sampling_interval(); });
+    return sampling_interval_per_sampler;
+}
+
+std::size_t system_hardware_sampler::num_samplers() const noexcept {
+    return samplers_.size();
+}
+
+std::vector<std::unique_ptr<hardware_sampler>> &system_hardware_sampler::samplers() noexcept {
+    return samplers_;
+}
+
+const std::vector<std::unique_ptr<hardware_sampler>> &system_hardware_sampler::samplers() const noexcept {
+    return samplers_;
+}
+
+std::unique_ptr<hardware_sampler> &system_hardware_sampler::sampler(const std::size_t idx) {
+    if (idx >= samplers_.size()) {
+        throw std::out_of_range{ fmt::format("Index {} is out-of-range for size {}!", idx, samplers_.size()) };
+    }
+    return samplers_[idx];
+}
+
+const std::unique_ptr<hardware_sampler> &system_hardware_sampler::sampler(const std::size_t idx) const {
+    if (idx >= samplers_.size()) {
+        throw std::out_of_range{ fmt::format("Index {} is out-of-range for size {}!", idx, samplers_.size()) };
+    }
+    return samplers_[idx];
+}
+
+void system_hardware_sampler::dump_yaml(const char *filename) const {
+    std::for_each(samplers_.cbegin(), samplers_.cend(), [&filename](const auto &ptr) { ptr->dump_yaml(filename); });
+}
+
+void system_hardware_sampler::dump_yaml(const std::string &filename) const {
+    std::for_each(samplers_.cbegin(), samplers_.cend(), [&filename](const auto &ptr) { ptr->dump_yaml(filename); });
+}
+
+void system_hardware_sampler::dump_yaml(const std::filesystem::path &filename) const {
+    std::for_each(samplers_.cbegin(), samplers_.cend(), [&filename](const auto &ptr) { ptr->dump_yaml(filename); });
+}
+
+std::string system_hardware_sampler::as_yaml_string() const {
+    return std::accumulate(samplers_.cbegin(), samplers_.cend(), std::string{}, [](const std::string str, const auto &ptr) { return str + ptr->as_yaml_string(); });
+}
+
+std::string system_hardware_sampler::samples_only_as_yaml_string() const {
+    return std::accumulate(samplers_.cbegin(), samplers_.cend(), std::string{}, [](const std::string str, const auto &ptr) { return str + ptr->samples_only_as_yaml_string(); });
+}
+
+}  // namespace hws
diff --git a/src/hardware_sampling/utility.cpp b/src/hws/utility.cpp
similarity index 91%
rename from src/hardware_sampling/utility.cpp
rename to src/hws/utility.cpp
index 58e604c..6651763 100644
--- a/src/hardware_sampling/utility.cpp
+++ b/src/hws/utility.cpp
@@ -5,7 +5,7 @@
  *          See the LICENSE.md file in the project root for full license information.
  */
 
-#include "hardware_sampling/utility.hpp"
+#include "hws/utility.hpp"
 
 #include <algorithm>    // std::min, std::transform
 #include <cctype>       // std::tolower
@@ -15,6 +15,10 @@
 
 namespace hws::detail {
 
+bool starts_with(const std::string_view sv, const std::string_view start) noexcept {
+    return sv.substr(0, start.size()) == start;
+}
+
 std::string_view trim(std::string_view str) noexcept {
     // trim right
     {