diff --git a/.clang-format b/.clang-format index 9fc54fe..84b7fa5 100644 --- a/.clang-format +++ b/.clang-format @@ -77,9 +77,9 @@ ForEachMacros: [ 'foreach', 'Q_FOREACH', 'BOOST_FOREACH' ] IfMacros: [ ] IncludeBlocks: Regroup IncludeCategories: - - Regex: '^"hardware_sampling/' + - Regex: '^"hws/' Priority: 1 - - Regex: '^"(pybind|nvml|rocm_smi|level_zero|subprocess)' + - Regex: '^"(pybind|nvml|cuda|rocm_smi|hip|level_zero|subprocess|fmt)' Priority: 2 - Regex: '^.*' Priority: 3 diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml new file mode 100644 index 0000000..7f41715 --- /dev/null +++ b/.github/workflows/documentation.yml @@ -0,0 +1,43 @@ +name: Generate documentation + +# only trigger this action on specific events +on: + push: + branches: + - main + pull_request: + branches: + - main + +jobs: + build-documentation: + runs-on: ubuntu-latest + steps: + # checkout repository + - name: Checkout hws + uses: actions/checkout@v4.2.0 + with: + path: hardware_sampling + # install dependencies + - name: Dependencies + run: | + sudo apt update + sudo apt-get install -y doxygen graphviz + # configure project via CMake + - name: Configure + run: | + cd hardware_sampling + mkdir build + cd build + cmake -DHWS_ENABLE_DOCUMENTATION=ON .. + # build project + - name: Generate + run: | + cd hardware_sampling/build + make doc + # deploy generated documentation using github.io + - name: Deploy + uses: peaceiris/actions-gh-pages@v4 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + publish_dir: ./hardware_sampling/docs/html \ No newline at end of file diff --git a/.gitignore b/.gitignore index 9e03206..9f74de0 100644 --- a/.gitignore +++ b/.gitignore @@ -36,6 +36,8 @@ Prerequisites # CMake ================================ bin/ build*/ +docs/html +install*/ cmake-build*/ CMakeLists.txt.user CMakeCache.txt @@ -53,4 +55,7 @@ CTestTestfile.cmake # IDEs ================================ .idea/ .vscode/ -.vs/ \ No newline at end of file +.vs/ + +# auto-generated version header +include/hws/version.hpp \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 272d1c0..97ccbe1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,27 +6,29 @@ cmake_minimum_required(VERSION 3.22) -project("HWS - Hardware Sampling for GPUs and CPUs" +project("hws - Hardware Sampling for GPUs and CPUs" VERSION 1.0.0 LANGUAGES CXX - DESCRIPTION "Hardware sampling (e.g., clock frequencies, memory consumption, temperatures, or energy draw) for CPUs, and GPUS.") + DESCRIPTION "Hardware sampling (e.g., clock frequencies, memory consumption, temperatures, or energy draw) for CPUs and GPUS.") # explicitly set library source files set(HWS_SOURCES - ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/event.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/hardware_sampler.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/src/hardware_sampling/utility.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/hws/event.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/hws/hardware_sampler.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/hws/system_hardware_sampler.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/src/hws/utility.cpp ) # create hardware sampling library -set(HWS_LIBRARY_NAME hardware_sampling) +set(HWS_LIBRARY_NAME hws) add_library(${HWS_LIBRARY_NAME} SHARED ${HWS_SOURCES}) +add_library(hws::hws ALIAS ${HWS_LIBRARY_NAME}) # set install target set(HWS_TARGETS_TO_INSTALL ${HWS_LIBRARY_NAME}) -# use C++20 -target_compile_features(${HWS_LIBRARY_NAME} PUBLIC cxx_std_20) +# use C++17 +target_compile_features(${HWS_LIBRARY_NAME} PUBLIC cxx_std_17) # add target include directory target_include_directories(${HWS_LIBRARY_NAME} PUBLIC @@ -58,6 +60,44 @@ endif () message(STATUS "Setting the hardware sampler interval to ${HWS_SAMPLING_INTERVAL}ms.") target_compile_definitions(${HWS_LIBRARY_NAME} PUBLIC HWS_SAMPLING_INTERVAL=${HWS_SAMPLING_INTERVAL}ms) +# install fmt as dependency +include(FetchContent) +set(HWS_fmt_VERSION 11.0.2) +find_package(fmt 11.0.2 QUIET) +if (fmt_FOUND) + message(STATUS "Found package fmt.") +else () + message(STATUS "Couldn't find package fmt. Building version ${HWS_fmt_VERSION} from source.") + set(FMT_PEDANTIC OFF CACHE INTERNAL "" FORCE) + set(FMT_WERROR OFF CACHE INTERNAL "" FORCE) + set(FMT_DOC OFF CACHE INTERNAL "" FORCE) + set(FMT_INSTALL ON CACHE INTERNAL "" FORCE) # let {fmt} handle the install target + set(FMT_TEST OFF CACHE INTERNAL "" FORCE) + set(FMT_FUZZ OFF CACHE INTERNAL "" FORCE) + set(FMT_CUDA_TEST OFF CACHE INTERNAL "" FORCE) + set(FMT_MODULE OFF CACHE INTERNAL "" FORCE) + set(FMT_SYSTEM_HEADERS ON CACHE INTERNAL "" FORCE) + # fetch string formatting library fmt + FetchContent_Declare(fmt + GIT_REPOSITORY https://github.com/fmtlib/fmt.git + GIT_TAG ${HWS_fmt_VERSION} + QUIET + ) + FetchContent_MakeAvailable(fmt) + set_property(TARGET fmt PROPERTY POSITION_INDEPENDENT_CODE ON) + add_dependencies(${HWS_LIBRARY_NAME} fmt) +endif () +target_link_libraries(${HWS_LIBRARY_NAME} PUBLIC fmt::fmt) + +######################################################################################################################## +## configure version header ## +######################################################################################################################## +message(STATUS "Configuring version information.") +configure_file( + ${CMAKE_CURRENT_SOURCE_DIR}/include/hws/version.hpp.in + ${CMAKE_CURRENT_SOURCE_DIR}/include/hws/version.hpp + @ONLY +) #################################################################################################################### ## CPU measurements ## @@ -148,9 +188,9 @@ if (HWS_LSCPU_FOUND OR HWS_FREE_FOUND OR HWS_TURBOSTAT_EXECUTION_TYPE) # add source file to source file list target_sources(${HWS_LIBRARY_NAME} PRIVATE $) # add compile definitions @@ -166,15 +206,16 @@ endif () # find libraries necessary for NVML and link against them find_package(CUDAToolkit QUIET) if (CUDAToolkit_FOUND) - target_link_libraries(${HWS_LIBRARY_NAME} PRIVATE CUDA::nvml) + target_link_libraries(${HWS_LIBRARY_NAME} PRIVATE CUDA::nvml CUDA::cudart) message(STATUS "Enable sampling of NVIDIA GPU information using NVML.") # add source file to source file list target_sources(${HWS_LIBRARY_NAME} PRIVATE $) # add compile definition @@ -190,7 +231,8 @@ endif () ## try finding ROCm SMI find_package(rocm_smi QUIET) if (rocm_smi_FOUND) - target_link_libraries(${HWS_LIBRARY_NAME} PRIVATE -lrocm_smi64) + find_package(HIP REQUIRED) + target_link_libraries(${HWS_LIBRARY_NAME} PRIVATE -lrocm_smi64 hip::host) target_include_directories(${HWS_LIBRARY_NAME} PRIVATE ${ROCM_SMI_INCLUDE_DIR}) message(STATUS "Enable sampling of AMD GPU information using ROCm SMI.") @@ -198,8 +240,9 @@ if (rocm_smi_FOUND) # add source file to source file list target_sources(${HWS_LIBRARY_NAME} PRIVATE $) # add compile definition @@ -222,9 +265,9 @@ if (level_zero_FOUND) # add source file to source file list target_sources(${HWS_LIBRARY_NAME} PRIVATE $) # add compile definition @@ -238,19 +281,27 @@ endif () ## enable Python bindings ## #################################################################################################################### option(HWS_ENABLE_PYTHON_BINDINGS "Build language bindings for Python." ON) - if (HWS_ENABLE_PYTHON_BINDINGS) add_subdirectory(bindings) endif () +######################################################################################################################## +## add documentation ## +######################################################################################################################## +option(HWS_ENABLE_DOCUMENTATION "Add documentation using Doxygen." OFF) +if (HWS_ENABLE_DOCUMENTATION) + add_subdirectory(docs) +endif () + + ######################################################################################################################## ## add support for `make install` ## ######################################################################################################################## include(GNUInstallDirs) ## install all necessary library targets install(TARGETS ${HWS_TARGETS_TO_INSTALL} - EXPORT hardware_sampling_Targets + EXPORT hws_Targets ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}" # all files that are neither executables, shared lib or headers LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" # all shared lib files RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" # all executables @@ -264,28 +315,28 @@ install(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/include/" ## manage version comparison include(CMakePackageConfigHelpers) write_basic_package_version_file( - "hardware_samplingConfigVersion.cmake" + "hwsConfigVersion.cmake" VERSION ${PROJECT_VERSION} COMPATIBILITY SameMajorVersion ) ## generate configuration file configure_package_config_file( - "${CMAKE_CURRENT_SOURCE_DIR}/cmake/hardware_samplingConfig.cmake.in" - "${PROJECT_BINARY_DIR}/hardware_samplingConfig.cmake" - INSTALL_DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/hardware_sampling/cmake + "${CMAKE_CURRENT_SOURCE_DIR}/cmake/hwsConfig.cmake.in" + "${PROJECT_BINARY_DIR}/hwsConfig.cmake" + INSTALL_DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/hws/cmake ) ## create and copy install-targets file -install(EXPORT hardware_sampling_Targets - FILE hardware_samplingTargets.cmake +install(EXPORT hws_Targets + FILE hwsTargets.cmake NAMESPACE hws:: - DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/hardware_sampling/cmake + DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/hws/cmake ) ## create file containing the build configuration and version information install(FILES - "${PROJECT_BINARY_DIR}/hardware_samplingConfig.cmake" - "${PROJECT_BINARY_DIR}/hardware_samplingConfigVersion.cmake" - DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/hardware_sampling/cmake + "${PROJECT_BINARY_DIR}/hwsConfig.cmake" + "${PROJECT_BINARY_DIR}/hwsConfigVersion.cmake" + DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/hws/cmake ) \ No newline at end of file diff --git a/README.md b/README.md index 39c2c3d..f21b0eb 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,7 @@ # hws - Hardware Sampling for CPUs and GPUs -The Hardware Sampling (hws) library can be used to track hardware performance like clock frequency, memory usage, temperatures, or power draw. +The Hardware Sampling (hws) library can be used to track hardware performance like clock frequency, memory usage, +temperatures, or power draw. It currently supports CPUs as well as GPUs from NVIDIA, AMD, and Intel. ## Getting Started @@ -9,15 +10,24 @@ It currently supports CPUs as well as GPUs from NVIDIA, AMD, and Intel. General dependencies: -- a C++20 capable compiler supporting `std::format` (tested with GCC 14.1.0) -- [Pybind11 > v2.13.1](https://github.com/pybind/pybind11) if Python bindings are enabled (automatically build during the CMake configuration if it couldn't be found using the respective `find_package` call) +- a C++17 capable compiler +- [{fmt} > 11.0.2](https://github.com/fmtlib/fmt) for string formatting (automatically build during the CMake + configuration if it couldn't be found using the respective `find_package` call) +- [Pybind11 > v2.13.1](https://github.com/pybind/pybind11) if Python bindings are enabled (automatically build during + the CMake configuration if it couldn't be found using the respective `find_package` call) Dependencies based on the hardware to sample: -- if a CPU should be targeted: at least one of [`turbostat`](https://www.linux.org/docs/man8/turbostat.html) (may require root privileges), [`lscpu`](https://man7.org/linux/man-pages/man1/lscpu.1.html), or [`free`](https://man7.org/linux/man-pages/man1/free.1.html) and the [`subprocess.h`](https://github.com/sheredom/subprocess.h) library (automatically build during the CMake configuration if it couldn't be found using the respective `find_package` call) +- if a CPU should be targeted: at least one of [`turbostat`](https://www.linux.org/docs/man8/turbostat.html) (may + require root privileges), [`lscpu`](https://man7.org/linux/man-pages/man1/lscpu.1.html), or [ + `free`](https://man7.org/linux/man-pages/man1/free.1.html) and the [ + `subprocess.h`](https://github.com/sheredom/subprocess.h) library (automatically build during the CMake configuration + if it couldn't be found using the respective `find_package` call) - if an NVIDIA GPU should be targeted: NVIDIA's Management Library [`NVML`](https://docs.nvidia.com/deploy/nvml-api/) -- if an AMD GPU should be targeted: AMD's ROCm SMI library [`rocm_smi_lib`](https://rocm.docs.amd.com/projects/rocm_smi_lib/en/latest/doxygen/html/modules.html) -- if an Intel GPU should be targeted: Intel's [`Level Zero library`](https://spec.oneapi.io/level-zero/latest/core/INTRO.html) +- if an AMD GPU should be targeted: AMD's ROCm SMI library [ + `rocm_smi_lib`](https://rocm.docs.amd.com/projects/rocm_smi_lib/en/latest/doxygen/html/modules.html) +- if an Intel GPU should be targeted: Intel's [ + `Level Zero library`](https://spec.oneapi.io/level-zero/latest/core/INTRO.html) ### Building hws @@ -40,7 +50,8 @@ cmake --build . -j The `[optional_options]` can be one or multiple of: -- `HWS_ENABLE_ERROR_CHECKS=ON|OFF` (default: `OFF`): enable sanity checks during hardware sampling, may be problematic with smaller sample intervals +- `HWS_ENABLE_ERROR_CHECKS=ON|OFF` (default: `OFF`): enable sanity checks during hardware sampling, may be problematic + with smaller sample intervals - `HWS_SAMPLING_INTERVAL=100ms` (default: `100ms`): set the sampling interval in milliseconds - `HWS_ENABLE_PYTHON_BINDINGS=ON|OFF` (default: `ON`): enable Python bindings @@ -55,23 +66,193 @@ cmake --install . --prefix "/home/myuser/installdir" Afterward, the necessary exports should be performed: ```bash -export CMAKE_PREFIX_PATH=${CMAKE_INSTALL_PREFIX}/share/hardware_sampling/cmake:${CMAKE_PREFIX_PATH} +export CMAKE_PREFIX_PATH=${CMAKE_INSTALL_PREFIX}/share/hws/cmake:${CMAKE_PREFIX_PATH} export LD_LIBRARY_PATH=${CMAKE_INSTALL_PREFIX}/lib:${LD_LIBRARY_PATH} export CPLUS_INCLUDE_PATH=${CMAKE_INSTALL_PREFIX}/include:${CPLUS_INCLUDE_PATH} export PYTHONPATH=${CMAKE_INSTALL_PREFIX}/lib:${PYTHONPATH} ``` +Note: when using Intel GPUs and segmentation faults are encountered in calls to `zes` functions, it may be necessary to set `export ZES_ENABLE_SYSMAN=1`. + +## Available samples + +The sampling type `fixed` denotes samples that are gathered once per hardware samples like maximum clock frequencies or +temperatures or the total available memory. +The sampling type `sampled` denotes samples that are gathered during the whole hardware sampling process like the +current clock frequencies, temperatures, or memory consumption. + +### General samples + +| sample | sample type | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs | +|:--------------------|:-----------:|:-----------:|:-----------:|:---------:|:-------------:| +| architecture | fixed | str | str | str | - | +| byte_order | fixed | str | str (fix) | str (fix) | str (fix) | +| num_cores | fixed | int | int | - | - | +| num_threads | fixed | int | - | - | - | +| threads_per_core | fixed | int | - | - | - | +| cores_per_socket | fixed | int | - | - | - | +| num_sockets | fixed | int | - | - | - | +| numa_nodes | fixed | int | - | - | - | +| vendor_id | fixed | str | str (fix) | str | str (PCIe ID) | +| name | fixed | str | str | str | str | +| flags | fixed | list of str | - | - | list of str | +| persistence_mode | fixed | - | bool | - | - | +| standby_mode | fixed | - | - | - | str | +| num_threads_per_eu | fixed | - | - | - | int | +| eu_simd_width | fixed | - | - | - | int | +| compute_utilization | sampled | % | % | % | - | +| memory_utilization | sampled | - | % | % | - | +| ipc | sampled | float | - | - | - | +| irq | sampled | int | - | - | - | +| smi | sampled | int | - | - | - | +| poll | sampled | int | - | - | - | +| poll_percent | sampled | % | - | - | - | +| performance_level | sampled | - | int | str | - | + +### clock-related samples + +| sample | sample type | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs | +|:-----------------------------------|:-----------:|:----:|:-----------:|:-----------:|:-----------:| +| auto_boosted_clock_enabled | fixed | bool | bool | - | - | +| clock_frequency_min | fixed | MHz | MHz | MHz | MHz | +| clock_frequency_max | fixed | MHz | MHz | MHz | MHz | +| memory_clock_frequency_min | fixed | - | MHz | MHz | MHz | +| memory_clock_frequency_max | fixed | - | MHz | MHz | MHz | +| socket_clock_frequency_min | fixed | - | - | MHz | - | +| socket_clock_frequency_min | fixed | - | - | MHz | - | +| sm_clock_frequency_max | fixed | - | MHz | - | - | +| available_clock_frequencies | fixed | - | map of MHz | list of MHz | list of MHz | +| available_memory_clock_frequencies | fixed | - | list of MHz | list of MHz | list of MHz | +| clock_frequency | sampled | MHz | MHz | MHz | MHz | +| average_non_idle_clock_frequency | sampled | MHz | - | - | - | +| time_stamp_counter | sampled | MHz | - | - | - | +| memory_clock_frequency | sampled | - | MHz | MHz | MHz | +| socket_clock_frequency | sampled | - | - | MHz | - | +| sm_clock_frequency | sampled | - | MHz | - | - | +| overdrive_level | sampled | - | - | % | - | +| memory_overdrive_level | sampled | - | - | % | - | +| throttle_reason | sampled | - | bitmask | - | bitmask | +| throttle_reason_string | sampled | - | str | - | str | +| memory_throttle_reason | sampled | - | - | - | bitmask | +| memory_throttle_reason_string | sampled | - | - | - | str | +| auto_boosted_clock | sampled | - | bool | - | - | +| frequency_limit_tdp | sampled | - | - | - | MHz | +| memory_frequency_limit_tdp | sampled | - | - | - | MHz | + +### power-related samples + +| sample | sample type | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs | +|:-------------------------------|:-----------:|:---------------------------------:|:-----------:|:--------------------------------------------------------------------------------------:|:----------------------------------------------------:| +| power_management_limit | fixed | - | W | W | - | +| power_enforced_limit | fixed | - | W | W | W | +| power_measurement_type | fixed | str (fix) | str | str | str | +| power_management_mode | fixed | - | bool | - | bool | +| available_power_profiles | fixed | - | list of int | list of str | - | +| power_usage | sampled | W | W | W | W
(calculated via power_total_energy_consumption) | +| core_watt | sampled | W | - | - | - | +| dram_watt | sampled | W | - | - | - | +| package_rapl_throttling | sampled | % | - | - | - | +| dram_rapl_throttling | sampled | % | - | - | - | +| power_total_energy_consumption | sampled | J
(calculated via power_usage) | J | J
(calculated via power_usage if
power_total_energy_consumption isn't available) | J | +| power_profile | sampled | - | int | str | - | + +### memory-related samples + +| sample | sample type | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs | +|:----------------------------|:-----------:|:----:|:-----------:|:--------:|:------------------------------:| +| cache_size_L1d | fixed | str | - | - | - | +| cache_size_L1i | fixed | str | - | - | - | +| cache_size_L2 | fixed | str | - | - | - | +| cache_size_L3 | fixed | str | - | - | - | +| memory_total | fixed | B | B | B | B
(map of memory modules) | +| visible_memory_total | fixed | - | - | B | B
(map of memory modules) | +| swap_memory_total | fixed | B | - | - | - | +| num_pcie_lanes_min | fixed | - | - | int | - | +| num_pcie_lanes_max | fixed | - | int | int | int | +| pcie_link_generation_max | fixed | - | int | - | int | +| pcie_link_speed_max | fixed | - | MBPS | - | MBPS | +| pcie_link_transfer_rate_min | fixed | - | - | MT/s | - | +| pcie_link_transfer_rate_max | fixed | - | - | MT/s | - | +| memory_bus_width | fixed | - | Bit | - | Bit
(map of memory modules) | +| memory_num_channels | fixed | - | - | - | int
(map of memory modules) | +| memory_used | sampled | B | B | B | B
(map of memory modules) | +| memory_free | sampled | B | B | B | B
(map of memory modules) | +| swap_memory_used | sampled | B | - | - | - | +| swap_memory_free | sampled | B | - | - | - | +| num_pcie_lanes | sampled | - | int | int | int | +| pcie_link_generation | sampled | - | int | - | int | +| pcie_link_speed | sampled | - | MBPS | - | MBPS | +| pcie_link_transfer_rate | sampled | - | - | T/s | - | + +### temperature-related samples + +| sample | sample type | CPUs | NVIDIA GPUs | AMD GPUs | Intel GPUs | +|:------------------------|:-----------:|:----:|:-----------:|:--------:|:----------:| +| num_fans | fixed | - | int | int | int | +| fan_speed_min | fixed | - | % | - | - | +| fan_speed_max | fixed | - | % | RPM | RPM | +| temperature_min | fixed | - | - | °C | - | +| temperature_max | fixed | - | °C | °C | °C | +| memory_temperature_min | fixed | - | - | °C | - | +| memory_temperature_max | fixed | - | °C | °C | °C | +| hotspot_temperature_min | fixed | - | - | °C | - | +| hotspot_temperature_max | fixed | - | - | °C | - | +| hbm_0_temperature_min | fixed | - | - | °C | - | +| hbm_0_temperature_max | fixed | - | - | °C | - | +| hbm_1_temperature_min | fixed | - | - | °C | - | +| hbm_1_temperature_max | fixed | - | - | °C | - | +| hbm_2_temperature_min | fixed | - | - | °C | - | +| hbm_2_temperature_max | fixed | - | - | °C | - | +| hbm_3_temperature_min | fixed | - | - | °C | - | +| hbm_3_temperature_max | fixed | - | - | °C | - | +| global_temperature_max | fixed | - | - | °C | °C | +| fan_speed_percentage | sampled | - | % | % | % | +| temperature | sampled | °C | °C | °C | °C | +| memory_temperature | sampled | - | - | °C | °C | +| hotspot_temperature | sampled | - | - | °C | - | +| hbm_0_temperature | sampled | - | - | °C | - | +| hbm_1_temperature | sampled | - | - | °C | - | +| hbm_2_temperature | sampled | - | - | °C | - | +| hbm_3_temperature | sampled | - | - | °C | - | +| global_temperature | sampled | - | - | - | °C | +| psu_temperature | sampled | - | - | - | °C | +| core_temperature | sampled | °C | - | - | - | +| core_throttle_percent | sampled | % | - | - | - | + +### gfx-related (iGPU) samples + +| sample | sample type | CPUs | +|:--------------------------|:-----------:|:----:| +| gfx_render_state_percent | sampled | % | +| gfx_frequency | sampled | MHz | +| average_gfx_frequency | sampled | MHz | +| gfx_state_c0_percent | sampled | % | +| cpu_works_for_gpu_percent | sampled | % | +| gfx_watt | sampled | W | + +### "idle states"-related samples + +| sample | sample type | CPUs | +|:-------------------------------------|:-----------:|:-------------:| +| idle_states | fixed | map of values | +| all_cpus_state_c0_percent | sampled | % | +| any_cpu_state_c0_percent | sampled | % | +| low_power_idle_state_percent | sampled | % | +| system_low_power_idle_state_percent | sampled | % | +| package_low_power_idle_state_percent | sampled | % | + ## Example Python usage ```python -import HardwareSampling +import HardwareSampling as hws import numpy as np import matplotlib.pyplot as plt +import matplotlib.dates as mdates import datetime -sampler = HardwareSampling.CpuHardwareSampler() +sampler = hws.CpuHardwareSampler() # could also be, e.g., -# sampler = HardwareSampling.GpuNvidiaHardwareSampler() +# sampler = hws.GpuNvidiaHardwareSampler() sampler.start() sampler.add_event("init") @@ -85,19 +266,18 @@ sampler.stop() sampler.dump_yaml("track.yaml") # plot the results -time_points = sampler.time_points() -relative_time_points = [(t - time_points[0]) / datetime.timedelta(milliseconds=1) for t in time_points] +time_points = sampler.relative_time_points() -plt.plot(relative_time_points, sampler.clock_samples().get_average_frequency(), label="average") -plt.plot(relative_time_points, sampler.clock_samples().get_average_non_idle_frequency(), label="average non-idle") +plt.plot(time_points, sampler.clock_samples().get_clock_frequency(), label="average") +plt.plot(time_points, sampler.clock_samples().get_average_non_idle_clock_frequency(), label="average non-idle") axes = plt.gcf().axes[0] x_bounds = axes.get_xlim() -for event in sampler.get_events()[1:-1]: - tp = (event.time_point - time_points[0]) / datetime.timedelta(milliseconds=1) - - axes.axvline(x=tp, color='r') - axes.annotate(text=event.name, xy=(((tp - x_bounds[0]) / (x_bounds[1] - x_bounds[0])), 1.025), xycoords='axes fraction', rotation=270) +for event in sampler.get_relative_events()[1:-1]: + axes.axvline(x=event.relative_time_point, color='r') + axes.annotate(text=event.name, + xy=(((event.relative_time_point - x_bounds[0]) / (x_bounds[1] - x_bounds[0])), 1.025), + xycoords='axes fraction', rotation=270) plt.xlabel("runtime [ms]") plt.ylabel("clock frequency [MHz]") @@ -111,4 +291,5 @@ plt.show() ## License -The hws library is distributed under the [MIT license](https://github.com/SC-SGS/hardware_sampling/blob/main/LICENSE.md). \ No newline at end of file +The hws library is distributed under +the [MIT license](https://github.com/SC-SGS/hardware_sampling/blob/main/LICENSE.md). \ No newline at end of file diff --git a/bindings/CMakeLists.txt b/bindings/CMakeLists.txt index 95f6a2b..f2ef8d8 100644 --- a/bindings/CMakeLists.txt +++ b/bindings/CMakeLists.txt @@ -4,7 +4,7 @@ ## See the LICENSE.md file in the project root for full license information. ######################################################################################################################## -message(STATUS "Building Python language bindings for PLSSVM.") +message(STATUS "Building Python language bindings.") find_package(Python COMPONENTS Interpreter Development) @@ -32,7 +32,11 @@ endif () # set source files that are always used set(HWS_PYTHON_BINDINGS_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/event.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/relative_event.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hardware_sampler.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/sample_category.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/system_hardware_sampler.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/version.cpp ${CMAKE_CURRENT_SOURCE_DIR}/main.cpp ) diff --git a/bindings/cpu_hardware_sampler.cpp b/bindings/cpu_hardware_sampler.cpp index 75f0a01..8e3e104 100644 --- a/bindings/cpu_hardware_sampler.cpp +++ b/bindings/cpu_hardware_sampler.cpp @@ -5,24 +5,27 @@ * See the LICENSE.md file in the project root for full license information. */ -#include "hardware_sampling/cpu/cpu_samples.hpp" // hws::{cpu_general_samples, clock_samples, power_samples, memory_samples, temperature_samples, gfx_samples, idle_state_samples} -#include "hardware_sampling/cpu/hardware_sampler.hpp" // hws::cpu_hardware_sampler -#include "hardware_sampling/hardware_sampler.hpp" // hws::hardware_sampler +#include "hws/cpu/cpu_samples.hpp" // hws::{cpu_general_samples, clock_samples, power_samples, memory_samples, temperature_samples, gfx_samples, idle_state_samples} +#include "hws/cpu/hardware_sampler.hpp" // hws::cpu_hardware_sampler +#include "hws/hardware_sampler.hpp" // hws::hardware_sampler +#include "hws/sample_category.hpp" // hws::sample_category +#include "fmt/format.h" // fmt::format #include "pybind11/chrono.h" // automatic bindings for std::chrono::milliseconds #include "pybind11/pybind11.h" // py::module_ #include "pybind11/stl.h" // bind STL types #include // std::chrono::milliseconds -#include // std::format namespace py = pybind11; void init_cpu_hardware_sampler(py::module_ &m) { // bind the general samples py::class_(m, "CpuGeneralSamples") + .def("has_samples", &hws::cpu_general_samples::has_samples, "true if any sample is available, false otherwise") .def("get_architecture", &hws::cpu_general_samples::get_architecture, "the CPU architecture (e.g., x86_64)") .def("get_byte_order", &hws::cpu_general_samples::get_byte_order, "the byte order (e.g., little/big endian)") + .def("get_num_cores", &hws::cpu_general_samples::get_num_cores, "the total number of cores of the CPU(s)") .def("get_num_threads", &hws::cpu_general_samples::get_num_threads, "the number of threads of the CPU(s) including potential hyper-threads") .def("get_threads_per_core", &hws::cpu_general_samples::get_threads_per_core, "the number of hyper-threads per core") .def("get_cores_per_socket", &hws::cpu_general_samples::get_cores_per_socket, "the number of physical cores per socket") @@ -31,45 +34,50 @@ void init_cpu_hardware_sampler(py::module_ &m) { .def("get_vendor_id", &hws::cpu_general_samples::get_vendor_id, "the vendor ID (e.g. GenuineIntel)") .def("get_name", &hws::cpu_general_samples::get_name, "the name of the CPU") .def("get_flags", &hws::cpu_general_samples::get_flags, "potential CPU flags (e.g., sse4_1, avx, avx, etc)") - .def("get_busy_percent", &hws::cpu_general_samples::get_busy_percent, "the percent the CPU was busy doing work") + .def("get_compute_utilization", &hws::cpu_general_samples::get_compute_utilization, "the percent the CPU was busy doing work") .def("get_ipc", &hws::cpu_general_samples::get_ipc, "the instructions-per-cycle count") .def("get_irq", &hws::cpu_general_samples::get_irq, "the number of interrupts") .def("get_smi", &hws::cpu_general_samples::get_smi, "the number of system management interrupts") .def("get_poll", &hws::cpu_general_samples::get_poll, "the number of times the CPU was in the polling state") .def("get_poll_percent", &hws::cpu_general_samples::get_poll_percent, "the percent of the CPU was in the polling state") .def("__repr__", [](const hws::cpu_general_samples &self) { - return std::format("", self); + return fmt::format("", self); }); // bind the clock samples py::class_(m, "CpuClockSamples") - .def("get_frequency_boost", &hws::cpu_clock_samples::get_frequency_boost, "true if frequency boosting is enabled") - .def("get_min_frequency", &hws::cpu_clock_samples::get_min_frequency, "the minimum possible CPU frequency in MHz") - .def("get_max_frequency", &hws::cpu_clock_samples::get_max_frequency, "the maximum possible CPU frequency in MHz") - .def("get_average_frequency", &hws::cpu_clock_samples::get_average_frequency, "the average CPU frequency in MHz including idle cores") - .def("get_average_non_idle_frequency", &hws::cpu_clock_samples::get_average_non_idle_frequency, "the average CPU frequency in MHz excluding idle cores") + .def("has_samples", &hws::cpu_clock_samples::has_samples, "true if any sample is available, false otherwise") + .def("get_auto_boosted_clock_enabled", &hws::cpu_clock_samples::get_auto_boosted_clock_enabled, "true if frequency boosting is enabled") + .def("get_clock_frequency_min", &hws::cpu_clock_samples::get_clock_frequency_min, "the minimum possible CPU frequency in MHz") + .def("get_clock_frequency_max", &hws::cpu_clock_samples::get_clock_frequency_max, "the maximum possible CPU frequency in MHz") + .def("get_clock_frequency", &hws::cpu_clock_samples::get_clock_frequency, "the average CPU frequency in MHz including idle cores") + .def("get_average_non_idle_clock_frequency", &hws::cpu_clock_samples::get_average_non_idle_clock_frequency, "the average CPU frequency in MHz excluding idle cores") .def("get_time_stamp_counter", &hws::cpu_clock_samples::get_time_stamp_counter, "the time stamp counter") .def("__repr__", [](const hws::cpu_clock_samples &self) { - return std::format("", self); + return fmt::format("", self); }); // bind the power samples py::class_(m, "CpuPowerSamples") - .def("get_package_watt", &hws::cpu_power_samples::get_package_watt, "the currently consumed power of the package of the CPU in W") + .def("has_samples", &hws::cpu_power_samples::has_samples, "true if any sample is available, false otherwise") + .def("get_power_measurement_type", &hws::cpu_power_samples::get_power_measurement_type, "the type of the power readings: always \"instant/current\"") + .def("get_power_usage", &hws::cpu_power_samples::get_power_usage, "the currently consumed power of the package of the CPU in W") + .def("get_power_total_energy_consumed", &hws::cpu_power_samples::get_power_total_energy_consumption, "the total power consumption in J") .def("get_core_watt", &hws::cpu_power_samples::get_core_watt, "the currently consumed power of the core part of the CPU in W") .def("get_ram_watt", &hws::cpu_power_samples::get_ram_watt, "the currently consumed power of the RAM part of the CPU in W") .def("get_package_rapl_throttle_percent", &hws::cpu_power_samples::get_package_rapl_throttle_percent, "the percent of time the package throttled due to RAPL limiters") .def("get_dram_rapl_throttle_percent", &hws::cpu_power_samples::get_dram_rapl_throttle_percent, "the percent of time the DRAM throttled due to RAPL limiters") .def("__repr__", [](const hws::cpu_power_samples &self) { - return std::format("", self); + return fmt::format("", self); }); // bind the memory samples py::class_(m, "CpuMemorySamples") - .def("get_l1d_cache", &hws::cpu_memory_samples::get_l1d_cache, "the size of the L1 data cache") - .def("get_l1i_cache", &hws::cpu_memory_samples::get_l1i_cache, "the size of the L1 instruction cache") - .def("get_l2_cache", &hws::cpu_memory_samples::get_l2_cache, "the size of the L2 cache") - .def("get_l3_cache", &hws::cpu_memory_samples::get_l3_cache, "the size of the L2 cache") + .def("has_samples", &hws::cpu_memory_samples::has_samples, "true if any sample is available, false otherwise") + .def("get_cache_size_L1d", &hws::cpu_memory_samples::get_cache_size_L1d, "the size of the L1 data cache") + .def("get_cache_size_L1i", &hws::cpu_memory_samples::get_cache_size_L1i, "the size of the L1 instruction cache") + .def("get_cache_size_L2", &hws::cpu_memory_samples::get_cache_size_L2, "the size of the L2 cache") + .def("get_cache_size_L3", &hws::cpu_memory_samples::get_cache_size_L3, "the size of the L2 cache") .def("get_memory_total", &hws::cpu_memory_samples::get_memory_total, "the total available memory in Byte") .def("get_swap_memory_total", &hws::cpu_memory_samples::get_swap_memory_total, "the total available swap memory in Byte") .def("get_memory_used", &hws::cpu_memory_samples::get_memory_used, "the currently used memory in Byte") @@ -77,20 +85,22 @@ void init_cpu_hardware_sampler(py::module_ &m) { .def("get_swap_memory_used", &hws::cpu_memory_samples::get_swap_memory_used, "the currently used swap memory in Byte") .def("get_swap_memory_free", &hws::cpu_memory_samples::get_swap_memory_free, "the currently free swap memory in Byte") .def("__repr__", [](const hws::cpu_memory_samples &self) { - return std::format("", self); + return fmt::format("", self); }); // bind the temperature samples py::class_(m, "CpuTemperatureSamples") + .def("has_samples", &hws::cpu_temperature_samples::has_samples, "true if any sample is available, false otherwise") + .def("get_temperature", &hws::cpu_temperature_samples::get_temperature, "the current temperature of the whole package in °C") .def("get_core_temperature", &hws::cpu_temperature_samples::get_core_temperature, "the current temperature of the core part of the CPU in °C") .def("get_core_throttle_percent", &hws::cpu_temperature_samples::get_core_throttle_percent, "the percent of time the CPU has throttled") - .def("get_package_temperature", &hws::cpu_temperature_samples::get_package_temperature, "the current temperature of the whole package in °C") .def("__repr__", [](const hws::cpu_temperature_samples &self) { - return std::format("", self); + return fmt::format("", self); }); // bind the gfx samples py::class_(m, "CpuGfxSamples") + .def("has_samples", &hws::cpu_gfx_samples::has_samples, "true if any sample is available, false otherwise") .def("get_gfx_render_state_percent", &hws::cpu_gfx_samples::get_gfx_render_state_percent, "the percent of time the iGPU was in the render state") .def("get_gfx_frequency", &hws::cpu_gfx_samples::get_gfx_frequency, "the current iGPU power consumption in W") .def("get_average_gfx_frequency", &hws::cpu_gfx_samples::get_average_gfx_frequency, "the average iGPU frequency in MHz") @@ -98,11 +108,12 @@ void init_cpu_hardware_sampler(py::module_ &m) { .def("get_cpu_works_for_gpu_percent", &hws::cpu_gfx_samples::get_cpu_works_for_gpu_percent, "the percent of time the CPU was doing work for the iGPU") .def("get_gfx_watt", &hws::cpu_gfx_samples::get_gfx_watt, "the currently consumed power of the iGPU of the CPU in W") .def("__repr__", [](const hws::cpu_gfx_samples &self) { - return std::format("", self); + return fmt::format("", self); }); // bind the idle state samples py::class_(m, "CpuIdleStateSamples") + .def("has_samples", &hws::cpu_idle_states_samples::has_samples, "true if any sample is available, false otherwise") .def("get_idle_states", &hws::cpu_idle_states_samples::get_idle_states, "the map of additional CPU idle states") .def("get_all_cpus_state_c0_percent", &hws::cpu_idle_states_samples::get_all_cpus_state_c0_percent, "the percent of time all CPUs were in idle state c0") .def("get_any_cpu_state_c0_percent", &hws::cpu_idle_states_samples::get_any_cpu_state_c0_percent, "the percent of time any CPU was in the idle state c0") @@ -110,13 +121,15 @@ void init_cpu_hardware_sampler(py::module_ &m) { .def("get_system_low_power_idle_state_percent", &hws::cpu_idle_states_samples::get_system_low_power_idle_state_percent, "the percent of time the CPU was in the system low power idle state") .def("get_package_low_power_idle_state_percent", &hws::cpu_idle_states_samples::get_package_low_power_idle_state_percent, "the percent of time the CPU was in the package low power idle state") .def("__repr__", [](const hws::cpu_gfx_samples &self) { - return std::format("", self); + return fmt::format("", self); }); // bind the CPU hardware sampler class py::class_(m, "CpuHardwareSampler") .def(py::init<>(), "construct a new CPU hardware sampler") + .def(py::init(), "construct a new CPU hardware sampler sampling only the provided sample_category samples") .def(py::init(), "construct a new CPU hardware sampler specifying the used sampling interval") + .def(py::init(), "construct a new CPU hardware sampler specifying the used sampling interval sampling only the provided sample_category samples") .def("general_samples", &hws::cpu_hardware_sampler::general_samples, "get all general samples") .def("clock_samples", &hws::cpu_hardware_sampler::clock_samples, "get all clock related samples") .def("power_samples", &hws::cpu_hardware_sampler::power_samples, "get all power related samples") @@ -124,7 +137,8 @@ void init_cpu_hardware_sampler(py::module_ &m) { .def("temperature_samples", &hws::cpu_hardware_sampler::temperature_samples, "get all temperature related samples") .def("gfx_samples", &hws::cpu_hardware_sampler::gfx_samples, "get all gfx (iGPU) related samples") .def("idle_state_samples", &hws::cpu_hardware_sampler::idle_state_samples, "get all idle state related samples") + .def("samples_only_as_yaml_string", &hws::cpu_hardware_sampler::samples_only_as_yaml_string, "return all hardware samples as YAML string") .def("__repr__", [](const hws::cpu_hardware_sampler &self) { - return std::format("", self); + return fmt::format("", self); }); } diff --git a/bindings/event.cpp b/bindings/event.cpp index ba463ad..f19315a 100644 --- a/bindings/event.cpp +++ b/bindings/event.cpp @@ -5,14 +5,13 @@ * See the LICENSE.md file in the project root for full license information. */ -#include "hardware_sampling/event.hpp" // hws::event +#include "hws/event.hpp" // hws::event +#include "fmt/format.h" // fmt::format #include "pybind11/chrono.h" // bind std::chrono types #include "pybind11/pybind11.h" // py::module_ #include "pybind11/stl.h" // bind STL types -#include // std::format - namespace py = pybind11; void init_event(py::module_ &m) { @@ -22,6 +21,6 @@ void init_event(py::module_ &m) { .def_readonly("time_point", &hws::event::time_point, "read the time point associated to this event") .def_readonly("name", &hws::event::name, "read the name associated to this event") .def("__repr__", [](const hws::event &self) { - return std::format("", self.time_point.time_since_epoch(), self.name); + return fmt::format("", self.time_point.time_since_epoch(), self.name); }); } diff --git a/bindings/gpu_amd_hardware_sampler.cpp b/bindings/gpu_amd_hardware_sampler.cpp index f43c741..db846da 100644 --- a/bindings/gpu_amd_hardware_sampler.cpp +++ b/bindings/gpu_amd_hardware_sampler.cpp @@ -5,116 +5,135 @@ * See the LICENSE.md file in the project root for full license information. */ -#include "hardware_sampling/gpu_amd/hardware_sampler.hpp" // hws::gpu_amd_hardware_sampler -#include "hardware_sampling/gpu_amd/rocm_smi_samples.hpp" // hws::{rocm_smi_general_samples, rocm_smi_clock_samples, rocm_smi_power_samples, rocm_smi_memory_samples, rocm_smi_temperature_samples} -#include "hardware_sampling/hardware_sampler.hpp" // hws::hardware_sampler +#include "hws/gpu_amd/hardware_sampler.hpp" // hws::gpu_amd_hardware_sampler +#include "hws/gpu_amd/rocm_smi_samples.hpp" // hws::{rocm_smi_general_samples, rocm_smi_clock_samples, rocm_smi_power_samples, rocm_smi_memory_samples, rocm_smi_temperature_samples} +#include "hws/hardware_sampler.hpp" // hws::hardware_sampler +#include "hws/sample_category.hpp" // hws::sample_category +#include "fmt/format.h" // fmt::format #include "pybind11/chrono.h" // automatic bindings for std::chrono::milliseconds #include "pybind11/pybind11.h" // py::module_ #include "pybind11/stl.h" // bind STL types #include // std::chrono::milliseconds #include // std::size_t -#include // std::format namespace py = pybind11; void init_gpu_amd_hardware_sampler(py::module_ &m) { // bind the general samples py::class_(m, "RocmSmiGeneralSamples") + .def("has_samples", &hws::rocm_smi_general_samples::has_samples, "true if any sample is available, false otherwise") + .def("get_architecture", &hws::rocm_smi_general_samples::get_name, "the architecture name of the device") + .def("get_byte_order", &hws::rocm_smi_general_samples::get_byte_order, "the byte order (e.g., little/big endian)") + .def("get_vendor_id", &hws::rocm_smi_general_samples::get_vendor_id, "the vendor ID") .def("get_name", &hws::rocm_smi_general_samples::get_name, "the name of the device") + .def("get_compute_utilization", &hws::rocm_smi_general_samples::get_compute_utilization, "the GPU compute utilization in percent") + .def("get_memory_utilization", &hws::rocm_smi_general_samples::get_memory_utilization, "the GPU memory utilization in percent") .def("get_performance_level", &hws::rocm_smi_general_samples::get_performance_level, "the performance level: one of rsmi_dev_perf_level_t") - .def("get_utilization_gpu", &hws::rocm_smi_general_samples::get_utilization_gpu, "the GPU compute utilization in percent") - .def("get_utilization_mem", &hws::rocm_smi_general_samples::get_utilization_mem, "the GPU memory utilization in percent") .def("__repr__", [](const hws::rocm_smi_general_samples &self) { - return std::format("", self); + return fmt::format("", self); }); // bind the clock samples py::class_(m, "RocmSmiClockSamples") - .def("get_clock_system_min", &hws::rocm_smi_clock_samples::get_clock_system_min, "the minimum possible system clock frequency in Hz") - .def("get_clock_system_max", &hws::rocm_smi_clock_samples::get_clock_system_max, "the maximum possible system clock frequency in Hz") - .def("get_clock_socket_min", &hws::rocm_smi_clock_samples::get_clock_socket_min, "the minimum possible socket clock frequency in Hz") - .def("get_clock_socket_max", &hws::rocm_smi_clock_samples::get_clock_socket_max, "the maximum possible socket clock frequency in Hz") - .def("get_clock_memory_min", &hws::rocm_smi_clock_samples::get_clock_memory_min, "the minimum possible memory clock frequency in Hz") - .def("get_clock_memory_max", &hws::rocm_smi_clock_samples::get_clock_memory_max, "the maximum possible memory clock frequency in Hz") - .def("get_clock_system", &hws::rocm_smi_clock_samples::get_clock_system, "the current system clock frequency in Hz") - .def("get_clock_socket", &hws::rocm_smi_clock_samples::get_clock_socket, "the current socket clock frequency in Hz") - .def("get_clock_memory", &hws::rocm_smi_clock_samples::get_clock_memory, "the current memory clock frequency in Hz") + .def("has_samples", &hws::rocm_smi_clock_samples::has_samples, "true if any sample is available, false otherwise") + .def("get_clock_frequency_min", &hws::rocm_smi_clock_samples::get_clock_frequency_min, "the minimum possible system clock frequency in MHz") + .def("get_clock_frequency_max", &hws::rocm_smi_clock_samples::get_clock_frequency_max, "the maximum possible system clock frequency in MHz") + .def("get_memory_clock_frequency_min", &hws::rocm_smi_clock_samples::get_memory_clock_frequency_min, "the minimum possible memory clock frequency in MHz") + .def("get_memory_clock_frequency_max", &hws::rocm_smi_clock_samples::get_memory_clock_frequency_max, "the maximum possible memory clock frequency in MHz") + .def("get_socket_clock_frequency_min", &hws::rocm_smi_clock_samples::get_socket_clock_frequency_min, "the minimum possible socket clock frequency in MHz") + .def("get_socket_clock_frequency_max", &hws::rocm_smi_clock_samples::get_socket_clock_frequency_max, "the maximum possible socket clock frequency in MHz") + .def("get_available_clock_frequencies", &hws::rocm_smi_clock_samples::get_available_clock_frequencies, "the available clock frequencies in MHz (slowest to fastest)") + .def("get_available_memory_clock_frequencies", &hws::rocm_smi_clock_samples::get_available_memory_clock_frequencies, "the available memory clock frequencies in MHz (slowest to fastest)") + .def("get_clock_frequency", &hws::rocm_smi_clock_samples::get_clock_frequency, "the current system clock frequency in MHz") + .def("get_memory_clock_frequency", &hws::rocm_smi_clock_samples::get_memory_clock_frequency, "the current memory clock frequency in MHz") + .def("get_socket_clock_frequency", &hws::rocm_smi_clock_samples::get_socket_clock_frequency, "the current socket clock frequency in MHz") .def("get_overdrive_level", &hws::rocm_smi_clock_samples::get_overdrive_level, "the GPU overdrive percentage") .def("get_memory_overdrive_level", &hws::rocm_smi_clock_samples::get_memory_overdrive_level, "the GPU's memory overdrive percentage") .def("__repr__", [](const hws::rocm_smi_clock_samples &self) { - return std::format("", self); + return fmt::format("", self); }); // bind the power samples py::class_(m, "RocmSmiPowerSamples") - .def("get_power_default_cap", &hws::rocm_smi_power_samples::get_power_default_cap, "the default power cap, may be different from power cap") - .def("get_power_cap", &hws::rocm_smi_power_samples::get_power_cap, "if the GPU draws more power (μW) than the power cap, the GPU may throttle") - .def("get_power_type", &hws::rocm_smi_power_samples::get_power_type, "the type of the power management: either current power draw or average power draw") + .def("has_samples", &hws::rocm_smi_power_samples::has_samples, "true if any sample is available, false otherwise") + .def("get_power_management_limit", &hws::rocm_smi_power_samples::get_power_management_limit, "the default power cap (W), may be different from power cap") + .def("get_power_enforced_limit", &hws::rocm_smi_power_samples::get_power_enforced_limit, "if the GPU draws more power (W) than the power cap, the GPU may throttle") + .def("get_power_measurement_type", &hws::rocm_smi_power_samples::get_power_measurement_type, "the type of the power readings: either current power draw or average power draw") .def("get_available_power_profiles", &hws::rocm_smi_power_samples::get_available_power_profiles, "a list of the available power profiles") - .def("get_power_usage", &hws::rocm_smi_power_samples::get_power_usage, "the current GPU socket power draw in μW") - .def("get_power_total_energy_consumption", &hws::rocm_smi_power_samples::get_power_total_energy_consumption, "the total power consumption since the last driver reload in μJ") + .def("get_power_usage", &hws::rocm_smi_power_samples::get_power_usage, "the current GPU socket power draw in W") + .def("get_power_total_energy_consumption", &hws::rocm_smi_power_samples::get_power_total_energy_consumption, "the total power consumption since the last driver reload in J") .def("get_power_profile", &hws::rocm_smi_power_samples::get_power_profile, "the current active power profile; one of 'available_power_profiles'") .def("__repr__", [](const hws::rocm_smi_power_samples &self) { - return std::format("", self); + return fmt::format("", self); }); // bind the memory samples py::class_(m, "RocmSmiMemorySamples") + .def("has_samples", &hws::rocm_smi_memory_samples::has_samples, "true if any sample is available, false otherwise") .def("get_memory_total", &hws::rocm_smi_memory_samples::get_memory_total, "the total available memory in Byte") .def("get_visible_memory_total", &hws::rocm_smi_memory_samples::get_visible_memory_total, "the total visible available memory in Byte, may be smaller than the total memory") - .def("get_min_num_pcie_lanes", &hws::rocm_smi_memory_samples::get_min_num_pcie_lanes, "the minimum number of used PCIe lanes") - .def("get_max_num_pcie_lanes", &hws::rocm_smi_memory_samples::get_max_num_pcie_lanes, "the maximum number of used PCIe lanes") + .def("get_num_pcie_lanes_min", &hws::rocm_smi_memory_samples::get_num_pcie_lanes_min, "the minimum number of used PCIe lanes") + .def("get_num_pcie_lanes_max", &hws::rocm_smi_memory_samples::get_num_pcie_lanes_max, "the maximum number of used PCIe lanes") + .def("get_pcie_link_transfer_rate_min", &hws::rocm_smi_memory_samples::get_pcie_link_transfer_rate_min, "the minimum PCIe link transfer rate in MT/s") + .def("get_pcie_link_transfer_rate_max", &hws::rocm_smi_memory_samples::get_pcie_link_transfer_rate_max, "the maximum PCIe link transfer rate in MT/s") .def("get_memory_used", &hws::rocm_smi_memory_samples::get_memory_used, "the currently used memory in Byte") - .def("get_pcie_transfer_rate", &hws::rocm_smi_memory_samples::get_pcie_transfer_rate, "the current PCIe transfer rate in T/s") + .def("get_memory_free", &hws::rocm_smi_memory_samples::get_memory_free, "the currently free memory in Byte") .def("get_num_pcie_lanes", &hws::rocm_smi_memory_samples::get_num_pcie_lanes, "the number of currently used PCIe lanes") + .def("get_pcie_link_transfer_rate", &hws::rocm_smi_memory_samples::get_pcie_link_transfer_rate, "the current PCIe transfer rate in T/s") .def("__repr__", [](const hws::rocm_smi_memory_samples &self) { - return std::format("", self); + return fmt::format("", self); }); // bind the temperature samples py::class_(m, "RocmSmiTemperatureSamples") + .def("has_samples", &hws::rocm_smi_temperature_samples::has_samples, "true if any sample is available, false otherwise") .def("get_num_fans", &hws::rocm_smi_temperature_samples::get_num_fans, "the number of fans (if any)") - .def("get_max_fan_speed", &hws::rocm_smi_temperature_samples::get_max_fan_speed, "the maximum fan speed") - .def("get_temperature_edge_min", &hws::rocm_smi_temperature_samples::get_temperature_edge_min, "the minimum temperature on the GPU's edge temperature sensor in m°C") - .def("get_temperature_edge_max", &hws::rocm_smi_temperature_samples::get_temperature_edge_max, "the maximum temperature on the GPU's edge temperature sensor in m°C") - .def("get_temperature_hotspot_min", &hws::rocm_smi_temperature_samples::get_temperature_hotspot_min, "the minimum temperature on the GPU's hotspot temperature sensor in m°C") - .def("get_temperature_hotspot_max", &hws::rocm_smi_temperature_samples::get_temperature_hotspot_max, "the maximum temperature on the GPU's hotspot temperature sensor in m°C") - .def("get_temperature_memory_min", &hws::rocm_smi_temperature_samples::get_temperature_memory_min, "the minimum temperature on the GPU's memory temperature sensor in m°C") - .def("get_temperature_memory_max", &hws::rocm_smi_temperature_samples::get_temperature_memory_max, "the maximum temperature on the GPU's memory temperature sensor in m°C") - .def("get_temperature_hbm_0_min", &hws::rocm_smi_temperature_samples::get_temperature_hbm_0_min, "the minimum temperature on the GPU's HBM0 temperature sensor in m°C") - .def("get_temperature_hbm_0_max", &hws::rocm_smi_temperature_samples::get_temperature_hbm_0_max, "the maximum temperature on the GPU's HBM0 temperature sensor in m°C") - .def("get_temperature_hbm_1_min", &hws::rocm_smi_temperature_samples::get_temperature_hbm_1_min, "the minimum temperature on the GPU's HBM1 temperature sensor in m°C") - .def("get_temperature_hbm_1_max", &hws::rocm_smi_temperature_samples::get_temperature_hbm_1_max, "the maximum temperature on the GPU's HBM1 temperature sensor in m°C") - .def("get_temperature_hbm_2_min", &hws::rocm_smi_temperature_samples::get_temperature_hbm_2_min, "the minimum temperature on the GPU's HBM2 temperature sensor in m°C") - .def("get_temperature_hbm_2_max", &hws::rocm_smi_temperature_samples::get_temperature_hbm_2_max, "the maximum temperature on the GPU's HBM2 temperature sensor in m°C") - .def("get_temperature_hbm_3_min", &hws::rocm_smi_temperature_samples::get_temperature_hbm_3_min, "the minimum temperature on the GPU's HBM3 temperature sensor in m°C") - .def("get_temperature_hbm_3_max", &hws::rocm_smi_temperature_samples::get_temperature_hbm_3_max, "the maximum temperature on the GPU's HBM3 temperature sensor in m°C") - .def("get_fan_speed", &hws::rocm_smi_temperature_samples::get_fan_speed, "the current fan speed in %") - .def("get_temperature_edge", &hws::rocm_smi_temperature_samples::get_temperature_edge, "the current temperature on the GPU's edge temperature sensor in m°C") - .def("get_temperature_hotspot", &hws::rocm_smi_temperature_samples::get_temperature_hotspot, "the current temperature on the GPU's hotspot temperature sensor in m°C") - .def("get_temperature_memory", &hws::rocm_smi_temperature_samples::get_temperature_memory, "the current temperature on the GPU's memory temperature sensor in m°C") - .def("get_temperature_hbm_0", &hws::rocm_smi_temperature_samples::get_temperature_hbm_0, "the current temperature on the GPU's HBM0 temperature sensor in m°C") - .def("get_temperature_hbm_1", &hws::rocm_smi_temperature_samples::get_temperature_hbm_1, "the current temperature on the GPU's HBM1 temperature sensor in m°C") - .def("get_temperature_hbm_2", &hws::rocm_smi_temperature_samples::get_temperature_hbm_2, "the current temperature on the GPU's HBM2 temperature sensor in m°C") - .def("get_temperature_hbm_3", &hws::rocm_smi_temperature_samples::get_temperature_hbm_3, "the current temperature on the GPU's HBM3 temperature sensor in m°C") + .def("get_fan_speed_max", &hws::rocm_smi_temperature_samples::get_fan_speed_max, "the maximum fan speed in RPM") + .def("get_temperature_min", &hws::rocm_smi_temperature_samples::get_temperature_min, "the minimum temperature on the GPU's edge temperature sensor in °C") + .def("get_temperature_max", &hws::rocm_smi_temperature_samples::get_temperature_max, "the maximum temperature on the GPU's edge temperature sensor in °C") + .def("get_memory_temperature_min", &hws::rocm_smi_temperature_samples::get_memory_temperature_min, "the minimum temperature on the GPU's memory temperature sensor in °C") + .def("get_memory_temperature_max", &hws::rocm_smi_temperature_samples::get_memory_temperature_max, "the maximum temperature on the GPU's memory temperature sensor in °C") + .def("get_hotspot_temperature_min", &hws::rocm_smi_temperature_samples::get_hotspot_temperature_min, "the minimum temperature on the GPU's hotspot temperature sensor in °C") + .def("get_hotspot_temperature_max", &hws::rocm_smi_temperature_samples::get_hotspot_temperature_max, "the maximum temperature on the GPU's hotspot temperature sensor in °C") + .def("get_hbm_0_temperature_min", &hws::rocm_smi_temperature_samples::get_hbm_0_temperature_min, "the minimum temperature on the GPU's HBM0 temperature sensor in °C") + .def("get_hbm_0_temperature_max", &hws::rocm_smi_temperature_samples::get_hbm_0_temperature_max, "the maximum temperature on the GPU's HBM0 temperature sensor in °C") + .def("get_hbm_1_temperature_min", &hws::rocm_smi_temperature_samples::get_hbm_1_temperature_min, "the minimum temperature on the GPU's HBM1 temperature sensor in °C") + .def("get_hbm_1_temperature_max", &hws::rocm_smi_temperature_samples::get_hbm_1_temperature_max, "the maximum temperature on the GPU's HBM1 temperature sensor in °C") + .def("get_hbm_2_temperature_min", &hws::rocm_smi_temperature_samples::get_hbm_2_temperature_min, "the minimum temperature on the GPU's HBM2 temperature sensor in °C") + .def("get_hbm_2_temperature_max", &hws::rocm_smi_temperature_samples::get_hbm_2_temperature_max, "the maximum temperature on the GPU's HBM2 temperature sensor in °C") + .def("get_hbm_3_temperature_min", &hws::rocm_smi_temperature_samples::get_hbm_3_temperature_min, "the minimum temperature on the GPU's HBM3 temperature sensor in °C") + .def("get_hbm_3_temperature_max", &hws::rocm_smi_temperature_samples::get_hbm_3_temperature_max, "the maximum temperature on the GPU's HBM3 temperature sensor in °C") + .def("get_fan_speed_percentage", &hws::rocm_smi_temperature_samples::get_fan_speed_percentage, "the current fan speed in %") + .def("get_temperature", &hws::rocm_smi_temperature_samples::get_temperature, "the current temperature on the GPU's edge temperature sensor in °C") + .def("get_hotspot_temperature", &hws::rocm_smi_temperature_samples::get_hotspot_temperature, "the current temperature on the GPU's hotspot temperature sensor in °C") + .def("get_memory_temperature", &hws::rocm_smi_temperature_samples::get_memory_temperature, "the current temperature on the GPU's memory temperature sensor in °C") + .def("get_hbm_0_temperature", &hws::rocm_smi_temperature_samples::get_hbm_0_temperature, "the current temperature on the GPU's HBM0 temperature sensor in °C") + .def("get_hbm_1_temperature", &hws::rocm_smi_temperature_samples::get_hbm_1_temperature, "the current temperature on the GPU's HBM1 temperature sensor in °C") + .def("get_hbm_2_temperature", &hws::rocm_smi_temperature_samples::get_hbm_2_temperature, "the current temperature on the GPU's HBM2 temperature sensor in °C") + .def("get_hbm_3_temperature", &hws::rocm_smi_temperature_samples::get_hbm_3_temperature, "the current temperature on the GPU's HBM3 temperature sensor in °C") .def("__repr__", [](const hws::rocm_smi_temperature_samples &self) { - return std::format("", self); + return fmt::format("", self); }); // bind the GPU AMD hardware sampler class py::class_(m, "GpuAmdHardwareSampler") .def(py::init<>(), "construct a new AMD GPU hardware sampler for the default device with the default sampling interval") + .def(py::init(), "construct a new AMD GPU hardware sampler for the default device with the default sampling interval sampling only the provided sample_category samples") .def(py::init(), "construct a new AMD GPU hardware sampler for the specified device with the default sampling interval") + .def(py::init(), "construct a new AMD GPU hardware sampler for the specified device with the default sampling interval sampling only the provided sample_category samples") .def(py::init(), "construct a new AMD GPU hardware sampler for the default device with the specified sampling interval") + .def(py::init(), "construct a new AMD GPU hardware sampler for the default device with the specified sampling interval sampling only the provided sample_category samples") .def(py::init(), "construct a new AMD GPU hardware sampler for the specified device and sampling interval") + .def(py::init(), "construct a new AMD GPU hardware sampler for the specified device and sampling interval sampling only the provided sample_category samples") .def("general_samples", &hws::gpu_amd_hardware_sampler::general_samples, "get all general samples") .def("clock_samples", &hws::gpu_amd_hardware_sampler::clock_samples, "get all clock related samples") .def("power_samples", &hws::gpu_amd_hardware_sampler::power_samples, "get all power related samples") .def("memory_samples", &hws::gpu_amd_hardware_sampler::memory_samples, "get all memory related samples") .def("temperature_samples", &hws::gpu_amd_hardware_sampler::temperature_samples, "get all temperature related samples") + .def("samples_only_as_yaml_string", &hws::gpu_amd_hardware_sampler::samples_only_as_yaml_string, "return all hardware samples as YAML string") .def("__repr__", [](const hws::gpu_amd_hardware_sampler &self) { - return std::format("", self); + return fmt::format("", self); }); } diff --git a/bindings/gpu_intel_hardware_sampler.cpp b/bindings/gpu_intel_hardware_sampler.cpp index 39b346a..aaae9ed 100644 --- a/bindings/gpu_intel_hardware_sampler.cpp +++ b/bindings/gpu_intel_hardware_sampler.cpp @@ -5,97 +5,123 @@ * See the LICENSE.md file in the project root for full license information. */ -#include "hardware_sampling/gpu_intel/hardware_sampler.hpp" // hws::gpu_intel_hardware_sampler -#include "hardware_sampling/gpu_intel/level_zero_samples.hpp" // hws::{level_zero_general_samples, level_zero_clock_samples, level_zero_power_samples, level_zero_memory_samples, level_zero_temperature_samples} -#include "hardware_sampling/hardware_sampler.hpp" // hws::hardware_sampler +#include "hws/gpu_intel/hardware_sampler.hpp" // hws::gpu_intel_hardware_sampler +#include "hws/gpu_intel/level_zero_samples.hpp" // hws::{level_zero_general_samples, level_zero_clock_samples, level_zero_power_samples, level_zero_memory_samples, level_zero_temperature_samples} +#include "hws/hardware_sampler.hpp" // hws::hardware_sampler +#include "hws/sample_category.hpp" // hws::sample_category +#include "fmt/format.h" // fmt::format #include "pybind11/chrono.h" // automatic bindings for std::chrono::milliseconds #include "pybind11/pybind11.h" // py::module_ #include "pybind11/stl.h" // bind STL types #include // std::chrono::milliseconds #include // std::size_t -#include // std::format namespace py = pybind11; void init_gpu_intel_hardware_sampler(py::module_ &m) { // bind the general samples py::class_(m, "LevelZeroGeneralSamples") + .def("has_samples", &hws::level_zero_general_samples::has_samples, "true if any sample is available, false otherwise") + .def("get_byte_order", &hws::level_zero_general_samples::get_byte_order, "the byte order (e.g., little/big endian)") + .def("get_vendor_id", &hws::level_zero_general_samples::get_vendor_id, "the vendor ID") .def("get_name", &hws::level_zero_general_samples::get_name, "the model name of the device") + .def("get_flags", &hws::level_zero_general_samples::get_flags, "potential GPU flags (e.g. integrated device)") .def("get_standby_mode", &hws::level_zero_general_samples::get_standby_mode, "the enabled standby mode (power saving or never)") .def("get_num_threads_per_eu", &hws::level_zero_general_samples::get_num_threads_per_eu, "the number of threads per EU unit") .def("get_eu_simd_width", &hws::level_zero_general_samples::get_eu_simd_width, "the physical EU unit SIMD width") .def("__repr__", [](const hws::level_zero_general_samples &self) { - return std::format("", self); + return fmt::format("", self); }); // bind the clock samples py::class_(m, "LevelZeroClockSamples") - .def("get_clock_gpu_min", &hws::level_zero_clock_samples::get_clock_gpu_min, "the minimum possible GPU clock frequency in MHz") - .def("get_clock_gpu_max", &hws::level_zero_clock_samples::get_clock_gpu_max, "the maximum possible GPU clock frequency in MHz") - .def("get_available_clocks_gpu", &hws::level_zero_clock_samples::get_available_clocks_gpu, "the available GPU clock frequencies in MHz (slowest to fastest)") - .def("get_clock_mem_min", &hws::level_zero_clock_samples::get_clock_mem_min, "the minimum possible memory clock frequency in MHz") - .def("get_clock_mem_max", &hws::level_zero_clock_samples::get_clock_mem_max, "the maximum possible memory clock frequency in MHz") - .def("get_available_clocks_mem", &hws::level_zero_clock_samples::get_available_clocks_mem, "the available memory clock frequencies in MHz (slowest to fastest)") - .def("get_tdp_frequency_limit_gpu", &hws::level_zero_clock_samples::get_tdp_frequency_limit_gpu, "the current maximum allowed GPU frequency based on the TDP limit in MHz") - .def("get_clock_gpu", &hws::level_zero_clock_samples::get_clock_gpu, "the current GPU frequency in MHz") - .def("get_throttle_reason_gpu", &hws::level_zero_clock_samples::get_throttle_reason_gpu, "the current GPU frequency throttle reason") - .def("get_tdp_frequency_limit_mem", &hws::level_zero_clock_samples::get_tdp_frequency_limit_mem, "the current maximum allowed memory frequency based on the TDP limit in MHz") - .def("get_clock_mem", &hws::level_zero_clock_samples::get_clock_mem, "the current memory frequency in MHz") - .def("get_throttle_reason_mem", &hws::level_zero_clock_samples::get_throttle_reason_mem, "the current memory frequency throttle reason") + .def("has_samples", &hws::level_zero_clock_samples::has_samples, "true if any sample is available, false otherwise") + .def("get_clock_frequency_min", &hws::level_zero_clock_samples::get_clock_frequency_min, "the minimum possible GPU clock frequency in MHz") + .def("get_clock_frequency_max", &hws::level_zero_clock_samples::get_clock_frequency_max, "the maximum possible GPU clock frequency in MHz") + .def("get_memory_clock_frequency_min", &hws::level_zero_clock_samples::get_memory_clock_frequency_min, "the minimum possible memory clock frequency in MHz") + .def("get_memory_clock_frequency_max", &hws::level_zero_clock_samples::get_memory_clock_frequency_max, "the maximum possible memory clock frequency in MHz") + .def("get_available_clock_frequencies", &hws::level_zero_clock_samples::get_available_clock_frequencies, "the available GPU clock frequencies in MHz (slowest to fastest)") + .def("get_available_memory_clock_frequencies", &hws::level_zero_clock_samples::get_available_memory_clock_frequencies, "the available memory clock frequencies in MHz (slowest to fastest)") + .def("get_clock_frequency", &hws::level_zero_clock_samples::get_clock_frequency, "the current GPU frequency in MHz") + .def("get_memory_clock_frequency", &hws::level_zero_clock_samples::get_memory_clock_frequency, "the current memory frequency in MHz") + .def("get_throttle_reason", &hws::level_zero_clock_samples::get_throttle_reason, "the current GPU frequency throttle reason (as bitmask)") + .def("get_throttle_reason_string", &hws::level_zero_clock_samples::get_throttle_reason_string, "the current GPU frequency throttle reason (as string)") + .def("get_memory_throttle_reason", &hws::level_zero_clock_samples::get_memory_throttle_reason, "the current memory frequency throttle reason (as bitmask)") + .def("get_memory_throttle_reason_string", &hws::level_zero_clock_samples::get_memory_throttle_reason_string, "the current memory frequency throttle reason (as string)") + .def("get_frequency_limit_tdp", &hws::level_zero_clock_samples::get_frequency_limit_tdp, "the current maximum allowed GPU frequency based on the TDP limit in MHz") + .def("get_memory_frequency_limit_tdp", &hws::level_zero_clock_samples::get_memory_frequency_limit_tdp, "the current maximum allowed memory frequency based on the TDP limit in MHz") .def("__repr__", [](const hws::level_zero_clock_samples &self) { - return std::format("", self); + return fmt::format("", self); }); // bind the power samples py::class_(m, "LevelZeroPowerSamples") - .def("get_energy_threshold_enabled", &hws::level_zero_power_samples::get_energy_threshold_enabled, "true if the energy threshold is enabled") - .def("get_energy_threshold", &hws::level_zero_power_samples::get_energy_threshold, "the energy threshold in J") - .def("get_power_total_energy_consumption", &hws::level_zero_power_samples::get_power_total_energy_consumption, "the total power consumption since the last driver reload in mJ") + .def("has_samples", &hws::level_zero_power_samples::has_samples, "true if any sample is available, false otherwise") + .def("get_power_enforced_limit", &hws::level_zero_power_samples::get_power_enforced_limit, "the actually enforced power limit (W), may be different from power management limit if external limiters are set") + .def("get_power_measurement_type", &hws::level_zero_power_samples::get_power_measurement_type, "the type of the power readings") + .def("get_power_management_mode", &hws::level_zero_power_samples::get_power_management_mode, "true if power management limits are enabled") + .def("get_power_usage", &hws::level_zero_power_samples::get_power_usage, "the current power draw of the GPU in W (calculated from power_total_energy_consumption)") + .def("get_power_total_energy_consumption", &hws::level_zero_power_samples::get_power_total_energy_consumption, "the total power consumption since the last driver reload in J") .def("__repr__", [](const hws::level_zero_power_samples &self) { - return std::format("", self); + return fmt::format("", self); }); // bind the memory samples py::class_(m, "LevelZeroMemorySamples") + .def("has_samples", &hws::level_zero_memory_samples::has_samples, "true if any sample is available, false otherwise") .def("get_memory_total", &hws::level_zero_memory_samples::get_memory_total, "the total memory size of the different memory modules in Bytes") - .def("get_allocatable_memory_total", &hws::level_zero_memory_samples::get_allocatable_memory_total, "the total allocatable memory size of the different memory modules in Bytes") - .def("get_pcie_link_max_speed", &hws::level_zero_memory_samples::get_pcie_link_max_speed, "the maximum PCIe bandwidth in bytes/sec") - .def("get_pcie_max_width", &hws::level_zero_memory_samples::get_pcie_max_width, "the PCIe lane width") - .def("get_max_pcie_link_generation", &hws::level_zero_memory_samples::get_max_pcie_link_generation, "the PCIe generation") - .def("get_bus_width", &hws::level_zero_memory_samples::get_bus_width, "the bus width of the different memory modules") - .def("get_num_channels", &hws::level_zero_memory_samples::get_num_channels, "the number of memory channels of the different memory modules") - .def("get_location", &hws::level_zero_memory_samples::get_location, "the location of the different memory modules (system or device)") + .def("get_visible_memory_total", &hws::level_zero_memory_samples::get_visible_memory_total, "the total allocatable memory size of the different memory modules in Bytes") + .def("get_memory_location", &hws::level_zero_memory_samples::get_memory_location, "the location of the different memory modules (system or device)") + .def("get_num_pcie_lanes_max", &hws::level_zero_memory_samples::get_num_pcie_lanes_max, "the PCIe lane width") + .def("get_pcie_link_generation_max", &hws::level_zero_memory_samples::get_pcie_link_generation_max, "the PCIe generation") + .def("get_pcie_link_speed_max", &hws::level_zero_memory_samples::get_pcie_link_speed_max, "the maximum PCIe bandwidth in bytes/sec") + .def("get_memory_bus_width", &hws::level_zero_memory_samples::get_memory_bus_width, "the bus width of the different memory modules") + .def("get_memory_num_channels", &hws::level_zero_memory_samples::get_memory_num_channels, "the number of memory channels of the different memory modules") .def("get_memory_free", &hws::level_zero_memory_samples::get_memory_free, "the currently free memory of the different memory modules in Bytes") - .def("get_pcie_link_speed", &hws::level_zero_memory_samples::get_pcie_link_speed, "the current PCIe bandwidth in bytes/sec") - .def("get_pcie_link_width", &hws::level_zero_memory_samples::get_pcie_link_width, "the current PCIe lane width") + .def("get_memory_used", &hws::level_zero_memory_samples::get_memory_used, "the currently used memory of the different memory modules in Bytes") + .def("get_num_pcie_lanes", &hws::level_zero_memory_samples::get_num_pcie_lanes, "the current PCIe lane width") .def("get_pcie_link_generation", &hws::level_zero_memory_samples::get_pcie_link_generation, "the current PCIe generation") + .def("get_pcie_link_speed", &hws::level_zero_memory_samples::get_pcie_link_speed, "the current PCIe bandwidth in bytes/sec") .def("__repr__", [](const hws::level_zero_memory_samples &self) { - return std::format("", self); + return fmt::format("", self); }); // bind the temperature samples py::class_(m, "LevelZeroTemperatureSamples") - .def("get_temperature_max", &hws::level_zero_temperature_samples::get_temperature_max, "the maximum temperature for the sensor in °C") - .def("get_temperature_psu", &hws::level_zero_temperature_samples::get_temperature_psu, "the temperature of the PSU in °C") - .def("get_temperature", &hws::level_zero_temperature_samples::get_temperature, "the current temperature for the sensor in °C") + .def("has_samples", &hws::level_zero_temperature_samples::has_samples, "true if any sample is available, false otherwise") + .def("get_num_fans", &hws::level_zero_temperature_samples::get_num_fans, "the number of fans") + .def("get_fan_speed_max", &hws::level_zero_temperature_samples::get_fan_speed_max, "the maximum fan speed the user can set in RPM") + .def("get_temperature_max", &hws::level_zero_temperature_samples::get_temperature_max, "the maximum GPU temperature in °C") + .def("get_memory_temperature_max", &hws::level_zero_temperature_samples::get_memory_temperature_max, "the maximum memory temperature in °C") + .def("get_global_temperature_max", &hws::level_zero_temperature_samples::get_global_temperature_max, "the maximum global temperature in °C") + .def("get_fan_speed_percentage", &hws::level_zero_temperature_samples::get_fan_speed_percentage, "the current intended fan speed in %") + .def("get_temperature", &hws::level_zero_temperature_samples::get_temperature, "the current GPU temperature in °C") + .def("get_memory_temperature", &hws::level_zero_temperature_samples::get_memory_temperature, "the current memory temperature in °C") + .def("get_global_temperature", &hws::level_zero_temperature_samples::get_global_temperature, "the current global temperature in °C") + .def("get_psu_temperature", &hws::level_zero_temperature_samples::get_psu_temperature, "the current PSU temperature in °C") .def("__repr__", [](const hws::level_zero_temperature_samples &self) { - return std::format("", self); + return fmt::format("", self); }); // bind the GPU Intel hardware sampler class py::class_(m, "GpuIntelHardwareSampler") .def(py::init<>(), "construct a new Intel GPU hardware sampler for the default device with the default sampling interval") + .def(py::init(), "construct a new Intel GPU hardware sampler for the default device with the default sampling interval sampling only the provided sample_category samples") .def(py::init(), "construct a new Intel GPU hardware sampler for the specified device with the default sampling interval") + .def(py::init(), "construct a new Intel GPU hardware sampler for the specified device with the default sampling interval sampling only the provided sample_category samples") .def(py::init(), "construct a new Intel GPU hardware sampler for the default device with the specified sampling interval") + .def(py::init(), "construct a new Intel GPU hardware sampler for the default device with the specified sampling interval sampling only the provided sample_category samples") .def(py::init(), "construct a new Intel GPU hardware sampler for the specified device and sampling interval") + .def(py::init(), "construct a new Intel GPU hardware sampler for the specified device and sampling interval sampling only the provided sample_category samples") .def("general_samples", &hws::gpu_intel_hardware_sampler::general_samples, "get all general samples") .def("clock_samples", &hws::gpu_intel_hardware_sampler::clock_samples, "get all clock related samples") .def("power_samples", &hws::gpu_intel_hardware_sampler::power_samples, "get all power related samples") .def("memory_samples", &hws::gpu_intel_hardware_sampler::memory_samples, "get all memory related samples") .def("temperature_samples", &hws::gpu_intel_hardware_sampler::temperature_samples, "get all temperature related samples") + .def("samples_only_as_yaml_string", &hws::gpu_intel_hardware_sampler::samples_only_as_yaml_string, "return all hardware samples as YAML string") .def("__repr__", [](const hws::gpu_intel_hardware_sampler &self) { - return std::format("", self); + return fmt::format("", self); }); } diff --git a/bindings/gpu_nvidia_hardware_sampler.cpp b/bindings/gpu_nvidia_hardware_sampler.cpp index 21130ae..a32283a 100644 --- a/bindings/gpu_nvidia_hardware_sampler.cpp +++ b/bindings/gpu_nvidia_hardware_sampler.cpp @@ -5,102 +5,122 @@ * See the LICENSE.md file in the project root for full license information. */ -#include "hardware_sampling/gpu_nvidia/hardware_sampler.hpp" // hws::gpu_nvidia_hardware_sampler -#include "hardware_sampling/gpu_nvidia/nvml_samples.hpp" // hws::{nvml_general_samples, nvml_clock_samples, nvml_power_samples, nvml_memory_samples, nvml_temperature_samples} -#include "hardware_sampling/hardware_sampler.hpp" // hws::hardware_sampler +#include "hws/gpu_nvidia/hardware_sampler.hpp" // hws::gpu_nvidia_hardware_sampler +#include "hws/gpu_nvidia/nvml_samples.hpp" // hws::{nvml_general_samples, nvml_clock_samples, nvml_power_samples, nvml_memory_samples, nvml_temperature_samples} +#include "hws/hardware_sampler.hpp" // hws::hardware_sampler +#include "hws/sample_category.hpp" // hws::sample_category +#include "fmt/format.h" // fmt::format #include "pybind11/chrono.h" // automatic bindings for std::chrono::milliseconds #include "pybind11/pybind11.h" // py::module_ #include "pybind11/stl.h" // bind STL types #include // std::chrono::milliseconds #include // std::size_t -#include // std::format namespace py = pybind11; void init_gpu_nvidia_hardware_sampler(py::module_ &m) { // bind the general samples py::class_(m, "NvmlGeneralSamples") + .def("has_samples", &hws::nvml_general_samples::has_samples, "true if any sample is available, false otherwise") + .def("get_architecture", &hws::nvml_general_samples::get_architecture, "the architecture name of the device") + .def("get_byte_order", &hws::nvml_general_samples::get_byte_order, "the byte order (e.g., little/big endian)") + .def("get_num_cores", &hws::nvml_general_samples::get_num_cores, "the number of CUDA cores") + .def("get_vendor_id", &hws::nvml_general_samples::get_vendor_id, "the vendor ID") .def("get_name", &hws::nvml_general_samples::get_name, "the name of the device") .def("get_persistence_mode", &hws::nvml_general_samples::get_persistence_mode, "the persistence mode: if true, the driver is always loaded reducing the latency for the first API call") - .def("get_num_cores", &hws::nvml_general_samples::get_num_cores, "the number of CUDA cores") - .def("get_performance_state", &hws::nvml_general_samples::get_performance_state, "the performance state: 0 - 15 where 0 is the maximum performance and 15 the minimum performance") - .def("get_utilization_gpu", &hws::nvml_general_samples::get_utilization_gpu, "the GPU compute utilization in percent") - .def("get_utilization_mem", &hws::nvml_general_samples::get_utilization_mem, "the GPU memory utilization in percent") + .def("get_compute_utilization", &hws::nvml_general_samples::get_compute_utilization, "the GPU compute utilization in percent") + .def("get_memory_utilization", &hws::nvml_general_samples::get_memory_utilization, "the GPU memory utilization in percent") + .def("get_performance_level", &hws::nvml_general_samples::get_performance_level, "the performance state: 0 - 15 where 0 is the maximum performance and 15 the minimum performance") .def("__repr__", [](const hws::nvml_general_samples &self) { - return std::format("", self); + return fmt::format("", self); }); // bind the clock samples py::class_(m, "NvmlClockSamples") - .def("get_adaptive_clock_status", &hws::nvml_clock_samples::get_adaptive_clock_status, "true if clock boosting is currently enabled") - .def("get_clock_graph_min", &hws::nvml_clock_samples::get_clock_graph_min, "the minimum possible graphics clock frequency in MHz") - .def("get_clock_graph_max", &hws::nvml_clock_samples::get_clock_graph_max, "the maximum possible graphics clock frequency in MHz") - .def("get_clock_sm_max", &hws::nvml_clock_samples::get_clock_sm_max, "the maximum possible SM clock frequency in MHz") - .def("get_clock_mem_min", &hws::nvml_clock_samples::get_clock_mem_min, "the minimum possible memory clock frequency in MHz") - .def("get_clock_mem_max", &hws::nvml_clock_samples::get_clock_mem_max, "the maximum possible memory clock frequency in MHz") - .def("get_clock_graph", &hws::nvml_clock_samples::get_clock_graph, "the current graphics clock frequency in MHz") - .def("get_clock_sm", &hws::nvml_clock_samples::get_clock_sm, "the current SM clock frequency in Mhz") - .def("get_clock_mem", &hws::nvml_clock_samples::get_clock_mem, "the current memory clock frequency in MHz") - .def("get_clock_throttle_reason", &hws::nvml_clock_samples::get_clock_throttle_reason, "the reason the GPU clock throttled (bitmask)") - .def("get_auto_boosted_clocks", &hws::nvml_clock_samples::get_auto_boosted_clocks, "true if the clocks are currently auto boosted") + .def("has_samples", &hws::nvml_clock_samples::has_samples, "true if any sample is available, false otherwise") + .def("get_auto_boosted_clock_enabled", &hws::nvml_clock_samples::get_auto_boosted_clock_enabled, "true if clock boosting is currently enabled") + .def("get_clock_frequency_min", &hws::nvml_clock_samples::get_clock_frequency_min, "the minimum possible graphics clock frequency in MHz") + .def("get_clock_frequency_max", &hws::nvml_clock_samples::get_clock_frequency_max, "the maximum possible graphics clock frequency in MHz") + .def("get_memory_clock_frequency_min", &hws::nvml_clock_samples::get_memory_clock_frequency_min, "the minimum possible memory clock frequency in MHz") + .def("get_memory_clock_frequency_max", &hws::nvml_clock_samples::get_memory_clock_frequency_max, "the maximum possible memory clock frequency in MHz") + .def("get_sm_clock_frequency_max", &hws::nvml_clock_samples::get_sm_clock_frequency_max, "the maximum possible SM clock frequency in MHz") + .def("get_clock_frequency", &hws::nvml_clock_samples::get_clock_frequency, "the current graphics clock frequency in MHz") + .def("get_available_clock_frequencies", &hws::nvml_clock_samples::get_available_clock_frequencies, "the available clock frequencies in MHz, based on a memory clock frequency (slowest to fastest)") + .def("get_available_memory_clock_frequencies", &hws::nvml_clock_samples::get_available_memory_clock_frequencies, "the available memory clock frequencies in MHz (slowest to fastest)") + .def("get_memory_clock_frequency", &hws::nvml_clock_samples::get_memory_clock_frequency, "the current memory clock frequency in MHz") + .def("get_sm_clock_frequency", &hws::nvml_clock_samples::get_sm_clock_frequency, "the current SM clock frequency in Mhz") + .def("get_throttle_reason", &hws::nvml_clock_samples::get_throttle_reason, "the reason the GPU clock throttled (as bitmask)") + .def("get_throttle_reason_string", &hws::nvml_clock_samples::get_throttle_reason_string, "the reason the GPU clock throttled (as string)") + .def("get_auto_boosted_clock", &hws::nvml_clock_samples::get_auto_boosted_clock, "true if the clocks are currently auto boosted") .def("__repr__", [](const hws::nvml_clock_samples &self) { - return std::format("", self); + return fmt::format("", self); }); // bind the power samples py::class_(m, "NvmlPowerSamples") - .def("get_power_management_mode", &hws::nvml_power_samples::get_power_management_mode, "true if power management algorithms are supported and active") + .def("has_samples", &hws::nvml_power_samples::has_samples, "true if any sample is available, false otherwise") .def("get_power_management_limit", &hws::nvml_power_samples::get_power_management_limit, "if the GPU draws more power (mW) than the power management limit, the GPU may throttle") .def("get_power_enforced_limit", &hws::nvml_power_samples::get_power_enforced_limit, "the actually enforced power limit, may be different from power management limit if external limiters are set") - .def("get_power_state", &hws::nvml_power_samples::get_power_state, "the current GPU power state: 0 - 15 where 0 is the maximum power and 15 the minimum power") + .def("get_power_measurement_type", &hws::nvml_power_samples::get_power_measurement_type, "the type of the power readings: either current power draw or average power draw") + .def("get_power_management_mode", &hws::nvml_power_samples::get_power_management_mode, "true if power management algorithms are supported and active") + .def("get_available_power_profiles", &hws::nvml_power_samples::get_available_power_profiles, "a list of the available power profiles") .def("get_power_usage", &hws::nvml_power_samples::get_power_usage, "the current power draw of the GPU and its related circuity (e.g., memory) in mW") .def("get_power_total_energy_consumption", &hws::nvml_power_samples::get_power_total_energy_consumption, "the total power consumption since the last driver reload in mJ") + .def("get_power_profile", &hws::nvml_power_samples::get_power_profile, "the current GPU power state: 0 - 15 where 0 is the maximum power and 15 the minimum power") .def("__repr__", [](const hws::nvml_power_samples &self) { - return std::format("", self); + return fmt::format("", self); }); // bind the memory samples py::class_(m, "NvmlMemorySamples") + .def("has_samples", &hws::nvml_memory_samples::has_samples, "true if any sample is available, false otherwise") .def("get_memory_total", &hws::nvml_memory_samples::get_memory_total, "the total available memory in Byte") - .def("get_pcie_link_max_speed", &hws::nvml_memory_samples::get_pcie_link_max_speed, "the maximum PCIe link speed in MBPS") + .def("get_num_pcie_lanes_max", &hws::nvml_memory_samples::get_num_pcie_lanes_max, "the maximum number of PCIe lanes") + .def("get_pcie_link_generation_max", &hws::nvml_memory_samples::get_pcie_link_generation_max, "the current PCIe link generation (e.g., PCIe 4.0, PCIe 5.0, etc)") + .def("get_pcie_link_speed_max", &hws::nvml_memory_samples::get_pcie_link_speed_max, "the maximum PCIe link speed in MBPS") .def("get_memory_bus_width", &hws::nvml_memory_samples::get_memory_bus_width, "the memory bus with in Bit") - .def("get_max_pcie_link_generation", &hws::nvml_memory_samples::get_max_pcie_link_generation, "the current PCIe link generation (e.g., PCIe 4.0, PCIe 5.0, etc)") - .def("get_memory_free", &hws::nvml_memory_samples::get_memory_free, "the currently free memory in Byte") .def("get_memory_used", &hws::nvml_memory_samples::get_memory_used, "the currently used memory in Byte") - .def("get_pcie_link_speed", &hws::nvml_memory_samples::get_pcie_link_speed, "the current PCIe link speed in MBPS") - .def("get_pcie_link_width", &hws::nvml_memory_samples::get_pcie_link_width, "the current PCIe link width (e.g., x16, x8, x4, etc)") + .def("get_memory_free", &hws::nvml_memory_samples::get_memory_free, "the currently free memory in Byte") + .def("get_num_pcie_lanes", &hws::nvml_memory_samples::get_num_pcie_lanes, "the current PCIe link width (e.g., x16, x8, x4, etc)") .def("get_pcie_link_generation", &hws::nvml_memory_samples::get_pcie_link_generation, "the current PCIe link generation (may change during runtime to save energy)") + .def("get_pcie_link_speed", &hws::nvml_memory_samples::get_pcie_link_speed, "the current PCIe link speed in MBPS") .def("__repr__", [](const hws::nvml_memory_samples &self) { - return std::format("", self); + return fmt::format("", self); }); // bind the temperature samples py::class_(m, "NvmlTemperatureSamples") + .def("has_samples", &hws::nvml_temperature_samples::has_samples, "true if any sample is available, false otherwise") .def("get_num_fans", &hws::nvml_temperature_samples::get_num_fans, "the number of fans (if any)") - .def("get_min_fan_speed", &hws::nvml_temperature_samples::get_min_fan_speed, "the minimum fan speed the user can set in %") - .def("get_max_fan_speed", &hws::nvml_temperature_samples::get_max_fan_speed, "the maximum fan speed the user can set in %") - .def("get_temperature_threshold_gpu_max", &hws::nvml_temperature_samples::get_temperature_threshold_gpu_max, "the maximum graphics temperature threshold in °C") - .def("get_temperature_threshold_mem_max", &hws::nvml_temperature_samples::get_temperature_threshold_mem_max, "the maximum memory temperature threshold in °C") - .def("get_fan_speed", &hws::nvml_temperature_samples::get_fan_speed, "the current intended fan speed in %") - .def("get_temperature_gpu", &hws::nvml_temperature_samples::get_temperature_gpu, "the current GPU temperature in °C") + .def("get_fan_speed_min", &hws::nvml_temperature_samples::get_fan_speed_min, "the minimum fan speed the user can set in %") + .def("get_fan_speed_max", &hws::nvml_temperature_samples::get_fan_speed_max, "the maximum fan speed the user can set in %") + .def("get_temperature_max", &hws::nvml_temperature_samples::get_temperature_max, "the maximum graphics temperature threshold in °C") + .def("get_memory_temperature_max", &hws::nvml_temperature_samples::get_memory_temperature_max, "the maximum memory temperature threshold in °C") + .def("get_fan_speed_percentage", &hws::nvml_temperature_samples::get_fan_speed_percentage, "the current intended fan speed in %") + .def("get_temperature", &hws::nvml_temperature_samples::get_temperature, "the current GPU temperature in °C") .def("__repr__", [](const hws::nvml_temperature_samples &self) { - return std::format("", self); + return fmt::format("", self); }); // bind the GPU NVIDIA hardware sampler class py::class_(m, "GpuNvidiaHardwareSampler") .def(py::init<>(), "construct a new NVIDIA GPU hardware sampler for the default device with the default sampling interval") + .def(py::init(), "construct a new NVIDIA GPU hardware sampler for the default device with the default sampling interval sampling only the provided sample_category samples") .def(py::init(), "construct a new NVIDIA GPU hardware sampler for the specified device with the default sampling interval") + .def(py::init(), "construct a new NVIDIA GPU hardware sampler for the specified device with the default sampling interval sampling only the provided sample_category samples") .def(py::init(), "construct a new NVIDIA GPU hardware sampler for the default device with the specified sampling interval") + .def(py::init(), "construct a new NVIDIA GPU hardware sampler for the default device with the specified sampling interval sampling only the provided sample_category samples") .def(py::init(), "construct a new NVIDIA GPU hardware sampler for the specified device and sampling interval") + .def(py::init(), "construct a new NVIDIA GPU hardware sampler for the specified device and sampling interval sampling only the provided sample_category samples") .def("general_samples", &hws::gpu_nvidia_hardware_sampler::general_samples, "get all general samples") .def("clock_samples", &hws::gpu_nvidia_hardware_sampler::clock_samples, "get all clock related samples") .def("power_samples", &hws::gpu_nvidia_hardware_sampler::power_samples, "get all power related samples") .def("memory_samples", &hws::gpu_nvidia_hardware_sampler::memory_samples, "get all memory related samples") .def("temperature_samples", &hws::gpu_nvidia_hardware_sampler::temperature_samples, "get all temperature related samples") + .def("samples_only_as_yaml_string", &hws::gpu_nvidia_hardware_sampler::samples_only_as_yaml_string, "return all hardware samples as YAML string") .def("__repr__", [](const hws::gpu_nvidia_hardware_sampler &self) { - return std::format("", self); + return fmt::format("", self); }); } diff --git a/bindings/hardware_sampler.cpp b/bindings/hardware_sampler.cpp index 5d45f74..5a12141 100644 --- a/bindings/hardware_sampler.cpp +++ b/bindings/hardware_sampler.cpp @@ -5,28 +5,31 @@ * See the LICENSE.md file in the project root for full license information. */ -#include "hardware_sampling/hardware_sampler.hpp" // hws::hardware_sampler +#include "hws/hardware_sampler.hpp" // hws::hardware_sampler -#include "hardware_sampling/event.hpp" // hws::event +#include "hws/event.hpp" // hws::event +#include "hws/utility.hpp" // hws::detail::durations_from_reference_time #if defined(HWS_FOR_CPUS_ENABLED) - #include "hardware_sampling/cpu/hardware_sampler.hpp" // hws::cpu_hardware_sampler + #include "hws/cpu/hardware_sampler.hpp" // hws::cpu_hardware_sampler +#endif +#if defined(HWS_FOR_NVIDIA_GPUS_ENABLED) + #include "hws/gpu_nvidia/hardware_sampler.hpp" // hws::gpu_nvidia_hardware_sampler #endif #if defined(HWS_FOR_AMD_GPUS_ENABLED) - #include "hardware_sampling/gpu_amd/hardware_sampler.hpp" // hws::gpu_amd_hardware_sampler + #include "hws/gpu_amd/hardware_sampler.hpp" // hws::gpu_amd_hardware_sampler #endif #if defined(HWS_FOR_INTEL_GPUS_ENABLED) - #include "hardware_sampling/gpu_intel/hardware_sampler.hpp" // hws::gpu_intel_hardware_sampler -#endif -#if defined(HWS_FOR_NVIDIA_GPUS_ENABLED) - #include "hardware_sampling/gpu_nvidia/hardware_sampler.hpp" // hws::gpu_nvidia_hardware_sampler + #include "hws/gpu_intel/hardware_sampler.hpp" // hws::gpu_intel_hardware_sampler #endif +#include "fmt/format.h" // fmt::format #include "pybind11/chrono.h" // bind std::chrono types #include "pybind11/pybind11.h" // py::module_, py::class_ #include "pybind11/stl.h" // bind STL types -#include // std::format +#include "relative_event.hpp" // hws::detail::relative_event +#include // std::string namespace py = pybind11; @@ -47,31 +50,40 @@ void init_hardware_sampler(py::module_ &m) { .def("add_event", py::overload_cast(&hws::hardware_sampler::add_event), "add a new event using a name, the current time is used as time point") .def("num_events", &hws::hardware_sampler::num_events, "get the number of events") .def("get_events", &hws::hardware_sampler::get_events, "get all events") + .def("get_relative_events", [](const hws::hardware_sampler &self) { + std::vector relative_events{}; + for (const hws::event &e : self.get_events()) { + relative_events.emplace_back(hws::detail::duration_from_reference_time(e.time_point, self.get_event(0).time_point), e.name); + } + return relative_events; }, "get all relative events") .def("get_event", &hws::hardware_sampler::get_event, "get a specific event") + .def("get_relative_event", [](const hws::hardware_sampler &self, const std::size_t idx) { return hws::detail::relative_event{ hws::detail::duration_from_reference_time(self.get_event(idx).time_point, self.get_event(0).time_point), self.get_event(idx).name }; }, "get a specific relative event") .def("time_points", &hws::hardware_sampler::sampling_time_points, "get the time points of the respective hardware samples") + .def("relative_time_points", [](const hws::hardware_sampler &self) { return hws::detail::durations_from_reference_time(self.sampling_time_points(), self.get_event(0).time_point); }, "get the relative durations of the respective hardware samples in seconds (as \"normal\" number)") .def("sampling_interval", &hws::hardware_sampler::sampling_interval, "get the sampling interval of this hardware sampler (in ms)") - .def("dump_yaml", py::overload_cast(&hws::hardware_sampler::dump_yaml), "dump all hardware samples to the given YAML file") + .def("dump_yaml", py::overload_cast(&hws::hardware_sampler::dump_yaml, py::const_), "dump all hardware samples to the given YAML file") + .def("as_yaml_string", &hws::hardware_sampler::as_yaml_string, "return all hardware samples including additional information like events as YAML string") + .def("samples_only_as_yaml_string", &hws::hardware_sampler::samples_only_as_yaml_string, "return all hardware samples as YAML string") .def("__repr__", [](const hws::hardware_sampler &self) { #if defined(HWS_FOR_CPUS_ENABLED) if (dynamic_cast(&self)) { - return std::format("", dynamic_cast(self)); + return fmt::format("", dynamic_cast(self)); } #endif #if defined(HWS_FOR_NVIDIA_GPUS_ENABLED) if (dynamic_cast(&self)) { - return std::format("", dynamic_cast(self)); + return fmt::format("", dynamic_cast(self)); } #endif #if defined(HWS_FOR_AMD_GPUS_ENABLED) if (dynamic_cast(&self)) { - return std::format("", dynamic_cast(self)); + return fmt::format("", dynamic_cast(self)); } #endif #if defined(HWS_FOR_INTEL_GPUS_ENABLED) if (dynamic_cast(&self)) { - return std::format("", dynamic_cast(self)); + return fmt::format("", dynamic_cast(self)); } #endif - return std::string{ "unknown" }; - }); + return std::string{ "unknown" }; }); } diff --git a/bindings/main.cpp b/bindings/main.cpp index 5a4c01c..f3dca3f 100644 --- a/bindings/main.cpp +++ b/bindings/main.cpp @@ -7,46 +7,57 @@ #include "pybind11/pybind11.h" // PYBIND11_MODULE, py::module_ +#include // std::string_view + +#define HWS_IS_DEFINED_HELPER(x) #x +#define HWS_IS_DEFINED(x) (std::string_view{ #x } != std::string_view{ HWS_IS_DEFINED_HELPER(x) }) + namespace py = pybind11; // forward declare binding functions void init_event(py::module_ &); +void init_sample_category(py::module_ &); +void init_relative_event(py::module_ &); void init_hardware_sampler(py::module_ &); +void init_system_hardware_sampler(py::module_ &); void init_cpu_hardware_sampler(py::module_ &); void init_gpu_nvidia_hardware_sampler(py::module_ &); void init_gpu_amd_hardware_sampler(py::module_ &); void init_gpu_intel_hardware_sampler(py::module_ &); +void init_version(py::module_ &); PYBIND11_MODULE(HardwareSampling, m) { m.doc() = "Hardware Sampling for CPUs and GPUs"; init_event(m); + init_sample_category(m); + init_relative_event(m); init_hardware_sampler(m); + init_system_hardware_sampler(m); + // CPU sampling #if defined(HWS_FOR_CPUS_ENABLED) init_cpu_hardware_sampler(m); - m.def("has_cpu_hardware_sampler", []{return true;} ); -#else - m.def("has_cpu_hardware_sampler", []{return false;} ); #endif + m.def("has_cpu_hardware_sampler", []() { return HWS_IS_DEFINED(HWS_FOR_CPUS_ENABLED); }); + + // NVIDIA GPU sampling #if defined(HWS_FOR_NVIDIA_GPUS_ENABLED) init_gpu_nvidia_hardware_sampler(m); - m.def("has_gpu_nvidia_hardware_sampler", []{return true;} ); -#else - m.def("has_gpu_nvidia_hardware_sampler", []{return false;} ); #endif + m.def("has_gpu_nvidia_hardware_sampler", []() { return HWS_IS_DEFINED(HWS_FOR_NVIDIA_GPUS_ENABLED); }); + + // AMD GPU sampling #if defined(HWS_FOR_AMD_GPUS_ENABLED) init_gpu_amd_hardware_sampler(m); - m.def("has_gpu_amd_hardware_sampler", []{return true;} ); -#else - m.def("has_gpu_amd_hardware_sampler", []{return false;} ); #endif + m.def("has_gpu_amd_hardware_sampler", []() { return HWS_IS_DEFINED(HWS_FOR_AMD_GPUS_ENABLED); }); + // Intel GPU sampling #if defined(HWS_FOR_INTEL_GPUS_ENABLED) init_gpu_intel_hardware_sampler(m); - m.def("has_gpu_intel_hardware_sampler", []{return true;} ); -#else - m.def("has_gpu_intel_hardware_sampler", []{return false;} ); #endif + m.def("has_gpu_intel_hardware_sampler", []() { return HWS_IS_DEFINED(HWS_FOR_INTEL_GPUS_ENABLED); }); + init_version(m); } diff --git a/bindings/relative_event.cpp b/bindings/relative_event.cpp new file mode 100644 index 0000000..c0cb611 --- /dev/null +++ b/bindings/relative_event.cpp @@ -0,0 +1,26 @@ +/** + * @author Marcel Breyer + * @copyright 2024-today All Rights Reserved + * @license This file is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + */ + +#include "relative_event.hpp" // hws::detail::relative_event + +#include "fmt/format.h" // fmt::format +#include "pybind11/chrono.h" // bind std::chrono types +#include "pybind11/pybind11.h" // py::module_ +#include "pybind11/stl.h" // bind STL types + +namespace py = pybind11; + +void init_relative_event(py::module_ &m) { + // a special python only struct encapsulating a relative event, i.e., an event where its "relative_time_point" member is the time passed since the first event + py::class_(m, "RelativeEvent") + .def(py::init(), "construct a new event using a time point and a name") + .def_readonly("relative_time_point", &hws::detail::relative_event::relative_time_point, "read the relative time point associated to this event") + .def_readonly("name", &hws::detail::relative_event::name, "read the name associated to this event") + .def("__repr__", [](const hws::detail::relative_event &self) { + return fmt::format("", self.relative_time_point, self.name); + }); +} diff --git a/bindings/relative_event.hpp b/bindings/relative_event.hpp new file mode 100644 index 0000000..fcdd02e --- /dev/null +++ b/bindings/relative_event.hpp @@ -0,0 +1,40 @@ +/** + * @file + * @author Marcel Breyer + * @copyright 2024-today All Rights Reserved + * @license This file is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + * + * @brief Defines a struct encapsulating a single event with a relative time point. + */ + +#ifndef HWS_BINDINGS_RELATIVE_EVENT_HPP_ +#define HWS_BINDINGS_RELATIVE_EVENT_HPP_ + +#include // std::string +#include // std::move + +namespace hws::detail { + +/** + * @brief A struct encapsulating a single event with a relative time point. + */ +struct relative_event { + /** + * @brief Construct a new event given a time point and name. + * @param[in] time_point_p the time when the event occurred relative to the first event + * @param[in] name_p the name of the event + */ + relative_event(const double relative_time_point_p, std::string name_p) : + relative_time_point{ relative_time_point_p }, + name{ std::move(name_p) } { } + + /// The relative time point this event occurred at. + double relative_time_point; + /// The name of this event. + std::string name; +}; + +} // namespace hws::detail + +#endif // HWS_BINDINGS_RELATIVE_EVENT_HPP_ diff --git a/bindings/sample_category.cpp b/bindings/sample_category.cpp new file mode 100644 index 0000000..455914c --- /dev/null +++ b/bindings/sample_category.cpp @@ -0,0 +1,30 @@ +/** + * @author Marcel Breyer + * @copyright 2024-today All Rights Reserved + * @license This file is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + */ + +#include "hws/sample_category.hpp" // hws::sample_category + +#include "pybind11/operators.h" // operator overloading +#include "pybind11/pybind11.h" // py::module_, py::overload_cast + +namespace py = pybind11; + +void init_sample_category(py::module_ &m) { + // sample_category enum and bitwise operations on the sample_category enum + py::enum_(m, "SampleCategory") + .value("GENERAL", hws::sample_category::general, "General hardware samples like architecture, names, or utilization.") + .value("CLOCK", hws::sample_category::clock, "Clock-related hardware samples like minimum, maximum, and current frequencies or throttle reasons.") + .value("POWER", hws::sample_category::power, "Power-related hardware samples like current power draw or total energy consumption.") + .value("MEMORY", hws::sample_category::memory, "Memory-related hardware samples like memory usage or PCIe information.") + .value("TEMPERATURE", hws::sample_category::temperature, "Temperature-related hardware samples like maximum and current temperatures.") + .value("GFX", hws::sample_category::gfx, "Gfx-related (iGPU) hardware samples. Only used in the cpu_hardware_sampler.") + .value("IDLE_STATE", hws::sample_category::idle_state, "Idle-state-related hardware samples. Only used in the cpu_hardware_sampler.") + .value("ALL", hws::sample_category::all, "Shortcut to enable all available hardware samples (default).") + .def("__invert__", py::overload_cast(&hws::operator~)) + .def("__and__", py::overload_cast(&hws::operator&)) + .def("__or__", py::overload_cast(&hws::operator|)) + .def("__xor__", py::overload_cast(&hws::operator^)); +} diff --git a/bindings/system_hardware_sampler.cpp b/bindings/system_hardware_sampler.cpp new file mode 100644 index 0000000..d9af622 --- /dev/null +++ b/bindings/system_hardware_sampler.cpp @@ -0,0 +1,71 @@ +/** + * @author Marcel Breyer + * @copyright 2024-today All Rights Reserved + * @license This file is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + */ + +#include "hws/system_hardware_sampler.hpp" // hws::system_hardware_sampler + +#include "hws/event.hpp" // hws::event +#include "hws/sample_category.hpp" // hws::sample_category +#include "hws/utility.hpp" // hws::detail::durations_from_reference_time + +#include "fmt/format.h" // fmt::format +#include "pybind11/chrono.h" // bind std::chrono types +#include "pybind11/pybind11.h" // py::module_, py::class_ +#include "pybind11/stl.h" // bind STL types + +#include "relative_event.hpp" // hws::detail::relative_event +#include // std::string + +namespace py = pybind11; + +void init_system_hardware_sampler(py::module_ &m) { + // bind the pure virtual hardware sampler base class + py::class_(m, "SystemHardwareSampler") + .def(py::init<>(), "construct a new system hardware sampler with the default sampling interval") + .def(py::init(), "construct a new system hardware sampler with the default sampling interval sampling only the provided sample_category samples") + .def(py::init(), "construct a new system hardware sampler for with the specified sampling interval") + .def(py::init(), "construct a new system hardware sampler for with the specified sampling interval sampling only the provided sample_category samples") + .def("start", &hws::system_hardware_sampler::start_sampling, "start hardware sampling for all available hardware samplers") + .def("stop", &hws::system_hardware_sampler::stop_sampling, "stop hardware sampling for all available hardware samplers") + .def("pause", &hws::system_hardware_sampler::pause_sampling, "pause hardware sampling for all available hardware samplers") + .def("resume", &hws::system_hardware_sampler::resume_sampling, "resume hardware sampling for all available hardware samplers") + .def("has_started", &hws::system_hardware_sampler::has_sampling_started, "check whether hardware sampling has already been started for all hardware samplers") + .def("is_sampling", &hws::system_hardware_sampler::is_sampling, "check whether the hardware sampling is currently active for all hardware samplers") + .def("has_stopped", &hws::system_hardware_sampler::has_sampling_stopped, "check whether hardware sampling has already been stopped for all hardware samplers") + .def("add_event", py::overload_cast(&hws::system_hardware_sampler::add_event), "add a new event to all hardware samplers") + .def("add_event", py::overload_cast(&hws::system_hardware_sampler::add_event), "add a new event using a time point and a name to all hardware samplers") + .def("add_event", py::overload_cast(&hws::system_hardware_sampler::add_event), "add a new event using a name, the current time is used as time point to all hardware samplers") + .def("num_events", &hws::system_hardware_sampler::num_events, "get the number of events separately for each hardware sampler") + .def("get_events", &hws::system_hardware_sampler::get_events, "get all events separately for each hardware sampler") + .def("get_relative_events", [](const hws::system_hardware_sampler &self) { + std::vector> relative_events{}; + for (const std::vector &events : self.get_events()) { + relative_events.emplace_back(); + for (const hws::event &e : events) { + relative_events.back().emplace_back(hws::detail::duration_from_reference_time(e.time_point, events[0].time_point), e.name); + } + } + return relative_events; }, "get all relative events separately for each hardware sampler") + .def("time_points", &hws::system_hardware_sampler::sampling_time_points, "get the time points of the respective hardware samples separately for each hardware sampler") + .def("relative_time_points", [](const hws::system_hardware_sampler &self) { + std::vector> relative_time_points{}; + for (std::size_t s = 0; s < self.num_samplers(); ++s) { + relative_time_points.emplace_back(hws::detail::durations_from_reference_time(self.sampling_time_points()[s], self.get_events()[s][0].time_point)); + } + return relative_time_points; }, "get the relative durations of the respective hardware samples in seconds (as \"normal\" number)") + .def("sampling_interval", &hws::system_hardware_sampler::sampling_interval, "get the sampling interval separately for each hardware sampler (in ms)") + .def("num_samplers", &hws::system_hardware_sampler::num_samplers, "get the number of hardware samplers available for the whole system") + .def("samplers", [](hws::system_hardware_sampler &self) { + std::vector out{}; + for (auto &ptr : self.samplers()) { + out.push_back(ptr.get()); + } + return out; }, "get the hardware samplers available for the whole system") + .def("sampler", [](hws::system_hardware_sampler &self, const std::size_t idx) { return self.sampler(idx).get(); }, "get the i-th hardware sampler available for the whole system") + .def("dump_yaml", py::overload_cast(&hws::system_hardware_sampler::dump_yaml, py::const_), "dump all hardware samples for all hardware samplers to the given YAML file") + .def("as_yaml_string", &hws::system_hardware_sampler::as_yaml_string, "return all hardware samples for all hardware samplers as YAML string") + .def("__repr__", [](const hws::system_hardware_sampler &self) { return fmt::format("", self.num_samplers()); }); +} diff --git a/bindings/version.cpp b/bindings/version.cpp new file mode 100644 index 0000000..e5481d1 --- /dev/null +++ b/bindings/version.cpp @@ -0,0 +1,26 @@ +/** +* @author Marcel Breyer +* @copyright 2024-today All Rights Reserved +* @license This file is released under the MIT license. +* See the LICENSE.md file in the project root for full license information. +*/ + +#include "hws/version.hpp" // hws::version + +#include "pybind11/pybind11.h" // py::module_ + +namespace py = pybind11; + +// dummy class +class version { }; + +void init_version(py::module_ &m) { + // bind global version information + // complexity necessary to enforce read-only + py::class_(m, "version") + .def_property_readonly_static("name", [](const py::object & /* self */) { return hws::version::name; }, "the name of the hws library") + .def_property_readonly_static("version", [](const py::object & /* self */) { return hws::version::version; }, "the used version of the hws library") + .def_property_readonly_static("major", [](const py::object & /* self */) { return hws::version::major; }, "the used major version of the hws library") + .def_property_readonly_static("minor", [](const py::object & /* self */) { return hws::version::minor; }, "the used minor version of the hws library") + .def_property_readonly_static("patch", [](const py::object & /* self */) { return hws::version::patch; }, "the used patch version of the hws library"); +} diff --git a/cmake/hardware_samplingConfig.cmake.in b/cmake/hwsConfig.cmake.in similarity index 55% rename from cmake/hardware_samplingConfig.cmake.in rename to cmake/hwsConfig.cmake.in index 56ba42a..852e638 100644 --- a/cmake/hardware_samplingConfig.cmake.in +++ b/cmake/hwsConfig.cmake.in @@ -8,6 +8,12 @@ include(CMakeFindDependencyMacro) +# always try finding {fmt} +# -> CMAKE_PREFIX_PATH necessary if build via FetchContent +# -> doesn't hurt to be set everytime +list(APPEND CMAKE_PREFIX_PATH "${CMAKE_CURRENT_LIST_DIR}/../../../lib/cmake/fmt") +find_dependency(fmt REQUIRED) + # sanity checks -include("${CMAKE_CURRENT_LIST_DIR}/hardware_samplingTargets.cmake") -check_required_components("hardware_sampling") \ No newline at end of file +include("${CMAKE_CURRENT_LIST_DIR}/hwsTargets.cmake") +check_required_components("hws") \ No newline at end of file diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt new file mode 100644 index 0000000..1623953 --- /dev/null +++ b/docs/CMakeLists.txt @@ -0,0 +1,56 @@ +## Authors: Marcel Breyer +## Copyright (C): 2024-today All Rights Reserved +## License: This file is released under the MIT license. +## See the LICENSE.md file in the project root for full license information. +######################################################################################################################## + +######################################################################################################################## +## setup documentation generation with doxygen ## +######################################################################################################################## +## use installed doxygen +find_package(Doxygen REQUIRED OPTIONAL_COMPONENTS dot) + +## configure doxygen +set(DOXYGEN_OUTPUT_DIRECTORY "${PROJECT_SOURCE_DIR}/docs") +set(DOXYGEN_USE_MDFILE_AS_MAINPAGE "${PROJECT_SOURCE_DIR}/README.md") +set(DOXYGEN_FILE_PATTERNS "*.hpp;") +set(DOXYGEN_STRIP_FROM_PATH "${PROJECT_SOURCE_DIR}") +set(DOXYGEN_ABBREVIATE_BRIEF "") +set(DOXYGEN_QUIET "YES") +set(DOXYGEN_HTML_TIMESTAMP "YES") +set(DOXYGEN_NUM_PROC_THREADS 0) +set(DOXYGEN_WARN_NO_PARAMDOC "YES") +set(DOXYGEN_SORT_MEMBER_DOCS "NO") +set(DOXYGEN_INLINE_INHERITED_MEMB "YES") +set(DOXYGEN_USE_MATHJAX "YES") +set(DOXYGEN_EXCLUDE_SYMBOLS "*_HPP_") + +set(DOXYGEN_DOT_IMAGE_FORMAT "svg") +set(DOXYGEN_INTERACTIVE_SVG "YES") +set(DOXYGEN_INCLUDE_GRAPH "NO") +set(DOXYGEN_EXTRACT_PRIVATE "YES") + +## enable processing of specific attributes and macros +set(DOXYGEN_ENABLE_PREPROCESSING "YES") +set(DOXYGEN_MACRO_EXPANSION "YES") +set(DOXYGEN_EXPAND_ONLY_PREDEF "YES") +set(DOXYGEN_EXPAND_AS_DEFINED "YES") + +set(DOXYGEN_VERBATIM_VARS DOXYGEN_ALIASES) +set(DOXYGEN_ALIASES + [[license="\par License^^\parblock^^" ]] +) + +## add doxygen as target +doxygen_add_docs( + doc + "${PROJECT_SOURCE_DIR}/include;${PROJECT_SOURCE_DIR}/README.md;" + WORKING_DIRECTORY "${PROJECT_SOURCE_DIR}" + COMMENT "Generating API documentation with Doxygen." +) + +## install targets for the documentation +include(GNUInstallDirs) +install(DIRECTORY "${PROJECT_SOURCE_DIR}/docs/html" + DESTINATION "${CMAKE_INSTALL_DOCDIR}" +) diff --git a/examples/cpp/CMakeLists.txt b/examples/cpp/CMakeLists.txt index 6086f5b..1ffbc0b 100644 --- a/examples/cpp/CMakeLists.txt +++ b/examples/cpp/CMakeLists.txt @@ -8,9 +8,9 @@ cmake_minimum_required(VERSION 3.22) project(LibraryUsageExample LANGUAGES CXX) -find_package(hardware_sampling REQUIRED) +find_package(hws REQUIRED) add_executable(prog main.cpp) -target_compile_features(prog PUBLIC cxx_std_20) -target_link_libraries(prog PUBLIC hws::hardware_sampling) \ No newline at end of file +target_compile_features(prog PUBLIC cxx_std_17) +target_link_libraries(prog PUBLIC hws::hws) \ No newline at end of file diff --git a/examples/cpp/main.cpp b/examples/cpp/main.cpp index 166a967..63e4160 100644 --- a/examples/cpp/main.cpp +++ b/examples/cpp/main.cpp @@ -5,14 +5,14 @@ * See the LICENSE.md file in the project root for full license information. */ -#include "hardware_sampling/core.hpp" +#include "hws/core.hpp" #include // std::size_t #include // std::iota #include // std::vector int main() { - hws::cpu_hardware_sampler sampler{}; + hws::system_hardware_sampler sampler{}; // could also be, e.g., // hws::gpu_nvidia_hardware_sampler sampler{}; sampler.start_sampling(); diff --git a/examples/python/main.py b/examples/python/main.py index 7f384ca..da0809f 100644 --- a/examples/python/main.py +++ b/examples/python/main.py @@ -8,12 +8,12 @@ # See the LICENSE.md file in the project root for full license information. # ######################################################################################################################## -import HardwareSampling +import HardwareSampling as hws import numpy as np -sampler = HardwareSampling.CpuHardwareSampler() +sampler = hws.SystemHardwareSampler() # could also be, e.g., -# sampler = HardwareSampling.GpuNvidiaHardwareSampler() +# sampler = hws.GpuNvidiaHardwareSampler() sampler.start() sampler.add_event("init") diff --git a/include/hardware_sampling/core.hpp b/include/hardware_sampling/core.hpp deleted file mode 100644 index 3c986a5..0000000 --- a/include/hardware_sampling/core.hpp +++ /dev/null @@ -1,38 +0,0 @@ -/** - * @file - * @author Marcel Breyer - * @copyright 2024-today All Rights Reserved - * @license This file is released under the MIT license. - * See the LICENSE.md file in the project root for full license information. - * - * @brief Core header containing all other necessary other headers. - */ - -#ifndef HARDWARE_SAMPLING_CORE_HPP_ -#define HARDWARE_SAMPLING_CORE_HPP_ -#pragma once - -#include "hardware_sampling/event.hpp" -#include "hardware_sampling/hardware_sampler.hpp" - -#if defined(HWS_FOR_CPUS_ENABLED) - #include "hardware_sampling/cpu/cpu_samples.hpp" - #include "hardware_sampling/cpu/hardware_sampler.hpp" -#endif - -#if defined(HWS_FOR_NVIDIA_GPUS_ENABLED) - #include "hardware_sampling/gpu_nvidia//nvml_samples.hpp" - #include "hardware_sampling/gpu_nvidia/hardware_sampler.hpp" -#endif - -#if defined(HWS_FOR_AMD_GPUS_ENABLED) - #include "hardware_sampling/gpu_amd/hardware_sampler.hpp" - #include "hardware_sampling/gpu_amd/rocm_smi_samples.hpp" -#endif - -#if defined(HWS_FOR_INTEL_GPUS_ENABLED) - #include "hardware_sampling/gpu_intel/hardware_sampler.hpp" - #include "hardware_sampling/gpu_intel/level_zero_samples.hpp" -#endif - -#endif // HARDWARE_SAMPLING_CORE_HPP_ diff --git a/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp b/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp deleted file mode 100644 index 8f5d120..0000000 --- a/include/hardware_sampling/gpu_amd/rocm_smi_samples.hpp +++ /dev/null @@ -1,251 +0,0 @@ -/** - * @file - * @author Marcel Breyer - * @copyright 2024-today All Rights Reserved - * @license This file is released under the MIT license. - * See the LICENSE.md file in the project root for full license information. - * - * @brief Defines the samples used with ROCm SMI. - */ - -#ifndef HARDWARE_SAMPLING_GPU_AMD_ROCM_SMI_SAMPLES_HPP_ -#define HARDWARE_SAMPLING_GPU_AMD_ROCM_SMI_SAMPLES_HPP_ -#pragma once - -#include "hardware_sampling/utility.hpp" // HWS_SAMPLE_STRUCT_FIXED_MEMBER, HWS_SAMPLE_STRUCT_SAMPLING_MEMBER, hws::detail::ostream_formatter - -#include // std::uint64_t, std::int64_t, std::uint32_t -#include // std::formatter -#include // std::ostream forward declaration -#include // std::optional -#include // std::string -#include // std::vector - -namespace hws { - -//*************************************************************************************************************************************// -// general samples // -//*************************************************************************************************************************************// - -/** - * @brief Wrapper class for all general ROCm SMI hardware samples. - */ -class rocm_smi_general_samples { - // befriend hardware sampler class - friend class gpu_amd_hardware_sampler; - - public: - /** - * @brief Assemble the YAML string containing all available general hardware samples. - * @details Hardware samples that are not supported by the current device are omitted in the YAML output. - * @return the YAML string (`[[nodiscard]]`) - */ - [[nodiscard]] std::string generate_yaml_string() const; - - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, name) // the name of the device - - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(int, performance_level) // the performance level: one of rsmi_dev_perf_level_t - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint32_t, utilization_gpu) // the GPU compute utilization in percent - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint32_t, utilization_mem) // the GPU memory utilization in percent -}; - -/** - * @brief Output the general @p samples to the given output-stream @p out. - * @details In contrast to `rocm_smi_general_samples::generate_yaml_string()`, outputs **all** general hardware samples, even if not supported by the current device (default initialized value). - * @param[in,out] out the output-stream to write the general hardware samples to - * @param[in] samples the ROCm SMI general samples - * @return the output-stream - */ -std::ostream &operator<<(std::ostream &out, const rocm_smi_general_samples &samples); - -//*************************************************************************************************************************************// -// clock samples // -//*************************************************************************************************************************************// - -/** - * @brief Wrapper class for all clock related ROCm SMI hardware samples. - */ -class rocm_smi_clock_samples { - // befriend hardware sampler class - friend class gpu_amd_hardware_sampler; - - public: - /** - * @brief Assemble the YAML string containing all available general hardware samples. - * @details Hardware samples that are not supported by the current device are omitted in the YAML output. - * @return the YAML string (`[[nodiscard]]`) - */ - [[nodiscard]] std::string generate_yaml_string() const; - - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, clock_system_min) // the minimum possible system clock frequency in Hz - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, clock_system_max) // the maximum possible system clock frequency in Hz - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, clock_socket_min) // the minimum possible socket clock frequency in Hz - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, clock_socket_max) // the maximum possible socket clock frequency in Hz - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, clock_memory_min) // the minimum possible memory clock frequency in Hz - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, clock_memory_max) // the maximum possible memory clock frequency in Hz - - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint64_t, clock_system) // the current system clock frequency in Hz - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint64_t, clock_socket) // the current socket clock frequency in Hz - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint64_t, clock_memory) // the current memory clock frequency in Hz - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint32_t, overdrive_level) // the GPU overdrive percentage - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint32_t, memory_overdrive_level) // the GPU memory overdrive percentage -}; - -/** - * @brief Output the clock related @p samples to the given output-stream @p out. - * @details In contrast to `rocm_smi_clock_samples::generate_yaml_string()`, outputs **all** clock related hardware samples, even if not supported by the current device (default initialized value). - * @param[in,out] out the output-stream to write the clock related hardware samples to - * @param[in] samples the ROCm SMI clock related samples - * @return the output-stream - */ -std::ostream &operator<<(std::ostream &out, const rocm_smi_clock_samples &samples); - -//*************************************************************************************************************************************// -// power samples // -//*************************************************************************************************************************************// - -/** - * @brief Wrapper class for all power related ROCm SMI hardware samples. - */ -class rocm_smi_power_samples { - // befriend hardware sampler class - friend class gpu_amd_hardware_sampler; - - public: - /** - * @brief Assemble the YAML string containing all available general hardware samples. - * @details Hardware samples that are not supported by the current device are omitted in the YAML output. - * @return the YAML string (`[[nodiscard]]`) - */ - [[nodiscard]] std::string generate_yaml_string() const; - - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, power_default_cap) // the default power cap, may be different from power cap - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, power_cap) // if the GPU draws more power (μW) than the power cap, the GPU may throttle - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, power_type) // the type of the power management: either current power draw or average power draw - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector, available_power_profiles) // a list of the available power profiles - - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint64_t, power_usage) // the current GPU socket power draw in μW - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint64_t, power_total_energy_consumption) // the total power consumption since the last driver reload in μJ - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::string, power_profile) // the current active power profile; one of 'available_power_profiles' -}; - -/** - * @brief Output the power related @p samples to the given output-stream @p out. - * @details In contrast to `rocm_smi_power_samples::generate_yaml_string()`, outputs **all** power related hardware samples, even if not supported by the current device (default initialized value). - * @param[in,out] out the output-stream to write the power related hardware samples to - * @param[in] samples the ROCm SMI power related samples - * @return the output-stream - */ -std::ostream &operator<<(std::ostream &out, const rocm_smi_power_samples &samples); - -//*************************************************************************************************************************************// -// memory samples // -//*************************************************************************************************************************************// - -/** - * @brief Wrapper class for all memory related ROCm SMI hardware samples. - */ -class rocm_smi_memory_samples { - // befriend hardware sampler class - friend class gpu_amd_hardware_sampler; - - public: - /** - * @brief Assemble the YAML string containing all available general hardware samples. - * @details Hardware samples that are not supported by the current device are omitted in the YAML output. - * @return the YAML string (`[[nodiscard]]`) - */ - [[nodiscard]] std::string generate_yaml_string() const; - - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, memory_total) // the total available memory in Byte - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, visible_memory_total) // the total visible available memory in Byte, may be smaller than the total memory - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint32_t, min_num_pcie_lanes) // the minimum number of used PCIe lanes - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint32_t, max_num_pcie_lanes) // the maximum number of used PCIe lanes - - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint64_t, memory_used) // the currently used memory in Byte - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint64_t, pcie_transfer_rate) // the current PCIe transfer rate in T/s - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint32_t, num_pcie_lanes) // the number of currently used PCIe lanes -}; - -/** - * @brief Output the memory related @p samples to the given output-stream @p out. - * @details In contrast to `rocm_smi_memory_samples::generate_yaml_string()`, outputs **all** memory related hardware samples, even if not supported by the current device (default initialized value). - * @param[in,out] out the output-stream to write the memory related hardware samples to - * @param[in] samples the ROCm SMI memory related samples - * @return the output-stream - */ -std::ostream &operator<<(std::ostream &out, const rocm_smi_memory_samples &samples); - -//*************************************************************************************************************************************// -// temperature samples // -//*************************************************************************************************************************************// - -/** - * @brief Wrapper class for all temperature related ROCm SMI hardware samples. - */ -class rocm_smi_temperature_samples { - // befriend hardware sampler class - friend class gpu_amd_hardware_sampler; - - public: - /** - * @brief Assemble the YAML string containing all available general hardware samples. - * @details Hardware samples that are not supported by the current device are omitted in the YAML output. - * @return the YAML string (`[[nodiscard]]`) - */ - [[nodiscard]] std::string generate_yaml_string() const; - - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint32_t, num_fans) // the number of fans (if any) - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, max_fan_speed) // the maximum fan speed - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, temperature_edge_min) // the minimum temperature on the GPU's edge temperature sensor in m°C - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, temperature_edge_max) // the maximum temperature on the GPU's edge temperature sensor in m°C - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, temperature_hotspot_min) // the minimum temperature on the GPU's hotspot temperature sensor in m°C - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, temperature_hotspot_max) // the maximum temperature on the GPU's hotspot temperature sensor in m°C - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, temperature_memory_min) // the minimum temperature on the GPU's memory temperature sensor in m°C - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, temperature_memory_max) // the maximum temperature on the GPU's memory temperature sensor in m°C - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, temperature_hbm_0_min) // the minimum temperature on the GPU's HBM0 temperature sensor in m°C - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, temperature_hbm_0_max) // the maximum temperature on the GPU's HBM0 temperature sensor in m°C - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, temperature_hbm_1_min) // the minimum temperature on the GPU's HBM1 temperature sensor in m°C - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, temperature_hbm_1_max) // the maximum temperature on the GPU's HBM1 temperature sensor in m°C - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, temperature_hbm_2_min) // the minimum temperature on the GPU's HBM2 temperature sensor in m°C - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, temperature_hbm_2_max) // the maximum temperature on the GPU's HBM2 temperature sensor in m°C - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, temperature_hbm_3_min) // the minimum temperature on the GPU's HBM3 temperature sensor in m°C - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, temperature_hbm_3_max) // the maximum temperature on the GPU's HBM3 temperature sensor in m°C - - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int64_t, fan_speed) // the current fan speed in % - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int64_t, temperature_edge) // the current temperature on the GPU's edge temperature sensor in m°C - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int64_t, temperature_hotspot) // the current temperature on the GPU's hotspot temperature sensor in m°C - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int64_t, temperature_memory) // the current temperature on the GPU's memory temperature sensor in m°C - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int64_t, temperature_hbm_0) // the current temperature on the GPU's HBM0 temperature sensor in m°C - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int64_t, temperature_hbm_1) // the current temperature on the GPU's HBM1 temperature sensor in m°C - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int64_t, temperature_hbm_2) // the current temperature on the GPU's HBM2 temperature sensor in m°C - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int64_t, temperature_hbm_3) // the current temperature on the GPU's HBM3 temperature sensor in m°C -}; - -/** - * @brief Output the temperature related @p samples to the given output-stream @p out. - * @details In contrast to `rocm_smi_temperature_samples::generate_yaml_string()`, outputs **all** temperature related hardware samples, even if not supported by the current device (default initialized value). - * @param[in,out] out the output-stream to write the temperature related hardware samples to - * @param[in] samples the ROCm SMI temperature related samples - * @return the output-stream - */ -std::ostream &operator<<(std::ostream &out, const rocm_smi_temperature_samples &samples); - -} // namespace hws - -template <> -struct std::formatter : hws::detail::ostream_formatter { }; - -template <> -struct std::formatter : hws::detail::ostream_formatter { }; - -template <> -struct std::formatter : hws::detail::ostream_formatter { }; - -template <> -struct std::formatter : hws::detail::ostream_formatter { }; - -template <> -struct std::formatter : hws::detail::ostream_formatter { }; - -#endif // HARDWARE_SAMPLING_GPU_AMD_ROCM_SMI_SAMPLES_HPP_ diff --git a/include/hardware_sampling/gpu_nvidia/utility.hpp b/include/hardware_sampling/gpu_nvidia/utility.hpp deleted file mode 100644 index f4f8577..0000000 --- a/include/hardware_sampling/gpu_nvidia/utility.hpp +++ /dev/null @@ -1,41 +0,0 @@ -/** - * @file - * @author Marcel Breyer - * @copyright 2024-today All Rights Reserved - * @license This file is released under the MIT license. - * See the LICENSE.md file in the project root for full license information. - * - * @brief Implements utility functionality for the NVIDIA GPU sampler. - */ - -#ifndef HARDWARE_SAMPLING_GPU_NVIDIA_UTILITY_HPP_ -#define HARDWARE_SAMPLING_GPU_NVIDIA_UTILITY_HPP_ -#pragma once - -#include "nvml.h" // NVML runtime functions - -#include // std::format -#include // std::runtime_error - -namespace hws::detail { - -/** - * @def HWS_NVML_ERROR_CHECK - * @brief Defines the `HWS_NVML_ERROR_CHECK` macro if `HWS_ERROR_CHECKS_ENABLED` is defined, does nothing otherwise. - * @details Throws an exception if an NVML call returns with an error. Additionally outputs a more concrete error string. - */ -#if defined(HWS_ERROR_CHECKS_ENABLED) - #define HWS_NVML_ERROR_CHECK(nvml_func) \ - { \ - const nvmlReturn_t errc = nvml_func; \ - if (errc != NVML_SUCCESS) { \ - throw std::runtime_error{ std::format("Error in NVML function call \"{}\": {} ({})", #nvml_func, nvmlErrorString(errc), static_cast(errc)) }; \ - } \ - } -#else - #define HWS_NVML_ERROR_CHECK(nvml_func) nvml_func; -#endif - -} // namespace hws::detail - -#endif // HARDWARE_SAMPLING_GPU_NVIDIA_UTILITY_HPP_ diff --git a/include/hws/core.hpp b/include/hws/core.hpp new file mode 100644 index 0000000..8c7a474 --- /dev/null +++ b/include/hws/core.hpp @@ -0,0 +1,41 @@ +/** + * @file + * @author Marcel Breyer + * @copyright 2024-today All Rights Reserved + * @license This file is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + * + * @brief Core header containing all other necessary other headers. + */ + +#ifndef HWS_CORE_HPP_ +#define HWS_CORE_HPP_ +#pragma once + +#include "hws/event.hpp" +#include "hws/hardware_sampler.hpp" +#include "hws/sample_category.hpp" +#include "hws/system_hardware_sampler.hpp" +#include "hws/version.hpp" + +#if defined(HWS_FOR_CPUS_ENABLED) + #include "hws/cpu/cpu_samples.hpp" + #include "hws/cpu/hardware_sampler.hpp" +#endif + +#if defined(HWS_FOR_NVIDIA_GPUS_ENABLED) + #include "hws/gpu_nvidia//nvml_samples.hpp" + #include "hws/gpu_nvidia/hardware_sampler.hpp" +#endif + +#if defined(HWS_FOR_AMD_GPUS_ENABLED) + #include "hws/gpu_amd/hardware_sampler.hpp" + #include "hws/gpu_amd/rocm_smi_samples.hpp" +#endif + +#if defined(HWS_FOR_INTEL_GPUS_ENABLED) + #include "hws/gpu_intel/hardware_sampler.hpp" + #include "hws/gpu_intel/level_zero_samples.hpp" +#endif + +#endif // HWS_CORE_HPP_ diff --git a/include/hardware_sampling/cpu/cpu_samples.hpp b/include/hws/cpu/cpu_samples.hpp similarity index 74% rename from include/hardware_sampling/cpu/cpu_samples.hpp rename to include/hws/cpu/cpu_samples.hpp index da08f84..bcea2d4 100644 --- a/include/hardware_sampling/cpu/cpu_samples.hpp +++ b/include/hws/cpu/cpu_samples.hpp @@ -8,13 +8,14 @@ * @brief Defines the samples used with turbostat, lscpu, and free. */ -#ifndef HARDWARE_SAMPLING_CPU_CPU_SAMPLES_HPP_ -#define HARDWARE_SAMPLING_CPU_CPU_SAMPLES_HPP_ +#ifndef HWS_CPU_CPU_SAMPLES_HPP_ +#define HWS_CPU_CPU_SAMPLES_HPP_ #pragma once -#include "hardware_sampling/utility.hpp" // HWS_SAMPLE_STRUCT_FIXED_MEMBER, HWS_SAMPLE_STRUCT_SAMPLING_MEMBER, hws::detail::ostream_formatter +#include "hws/utility.hpp" // HWS_SAMPLE_STRUCT_FIXED_MEMBER, HWS_SAMPLE_STRUCT_SAMPLING_MEMBER + +#include "fmt/ostream.h" // fmt::formatter, fmt::ostream_formatter -#include // std::formatter #include // std::ostream forward declaration #include // std::optional #include // std::string @@ -35,30 +36,37 @@ class cpu_general_samples { friend class cpu_hardware_sampler; public: + /** + * @brief Checks whether any general hardware sample is present. + * @return `true` if any general hardware sample is, otherwise `false`. + */ + [[nodiscard]] bool has_samples() const; /** * @brief Assemble the YAML string containing all available general hardware samples. * @details Hardware samples that are not supported by the current device are omitted in the YAML output. + * Returns an empty string if `has_samples()` returns `false`. * @return the YAML string (`[[nodiscard]]`) */ [[nodiscard]] std::string generate_yaml_string() const; HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, architecture) // the CPU architecture (e.g., x86_64) HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, byte_order) // the byte order (e.g., little/big endian) + HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, num_cores) // the total number of cores of the CPU(s) HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, num_threads) // the number of threads of the CPU(s) including potential hyper-threads HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, threads_per_core) // the number of hyper-threads per core HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, cores_per_socket) // the number of physical cores per socket HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, num_sockets) // the number of sockets HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, numa_nodes) // the number of NUMA nodes - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, vendor_id) // the vendor ID (e.g. GenuineIntel) + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, vendor_id) // the vendor ID (e.g., GenuineIntel) HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, name) // the name of the CPU HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector, flags) // potential CPU flags (e.g., sse4_1, avx, avx, etc) - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, busy_percent) // the percent the CPU was busy doing work - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, ipc) // the instructions-per-cycle count - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, irq) // the number of interrupts - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, smi) // the number of system management interrupts - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, poll) // the number of times the CPU was in the polling state - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, poll_percent) // the percent of the CPU was in the polling state + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, compute_utilization) // the percent the CPU was busy doing work + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, ipc) // the instructions-per-cycle count + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, irq) // the number of interrupts + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, smi) // the number of system management interrupts + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, poll) // the number of times the CPU was in the polling state + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, poll_percent) // the percent of the CPU was in the polling state }; /** @@ -82,20 +90,26 @@ class cpu_clock_samples { friend class cpu_hardware_sampler; public: + /** + * @brief Checks whether any clock related hardware sample is present. + * @return `true` if any clock related hardware sample is, otherwise `false`. + */ + [[nodiscard]] bool has_samples() const; /** * @brief Assemble the YAML string containing all available general hardware samples. * @details Hardware samples that are not supported by the current device are omitted in the YAML output. + * Returns an empty string if `has_samples()` returns `false`. * @return the YAML string (`[[nodiscard]]`) */ [[nodiscard]] std::string generate_yaml_string() const; - HWS_SAMPLE_STRUCT_FIXED_MEMBER(bool, frequency_boost) // true if frequency boosting is enabled - HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, min_frequency) // the minimum possible CPU frequency in MHz - HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, max_frequency) // the maximum possible CPU frequency in MHz + HWS_SAMPLE_STRUCT_FIXED_MEMBER(bool, auto_boosted_clock_enabled) // true if frequency boosting is enabled + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, clock_frequency_min) // the minimum possible CPU frequency in MHz + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, clock_frequency_max) // the maximum possible CPU frequency in MHz - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, average_frequency) // the average CPU frequency in MHz including idle cores - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, average_non_idle_frequency) // the average CPU frequency in MHz excluding idle cores - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, time_stamp_counter) // the time stamp counter + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, clock_frequency) // the average CPU frequency in MHz including idle cores + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, average_non_idle_clock_frequency) // the average CPU frequency in MHz excluding idle cores + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, time_stamp_counter) // the time stamp counter }; /** @@ -119,18 +133,27 @@ class cpu_power_samples { friend class cpu_hardware_sampler; public: + /** + * @brief Checks whether any power related hardware sample is present. + * @return `true` if any power related hardware sample is, otherwise `false`. + */ + [[nodiscard]] bool has_samples() const; /** * @brief Assemble the YAML string containing all available general hardware samples. * @details Hardware samples that are not supported by the current device are omitted in the YAML output. + * Returns an empty string if `has_samples()` returns `false`. * @return the YAML string (`[[nodiscard]]`) */ [[nodiscard]] std::string generate_yaml_string() const; - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, package_watt) // the currently consumed power of the package of the CPU in W - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, core_watt) // the currently consumed power of the core part of the CPU in W - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, ram_watt) // the currently consumed power of the RAM part of the CPU in W - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, package_rapl_throttle_percent) // the percent of time the package throttled due to RAPL limiters - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, dram_rapl_throttle_percent) // the percent of time the DRAM throttled due to RAPL limiters + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, power_measurement_type) // the type of the power readings: always "instant/current" + + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, power_usage) // the currently consumed power of the package of the CPU in W + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, power_total_energy_consumption) // the total power consumption in J + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, core_watt) // the currently consumed power of the core part of the CPU in W + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, ram_watt) // the currently consumed power of the RAM part of the CPU in W + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, package_rapl_throttle_percent) // the percent of time the package throttled due to RAPL limiters + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, dram_rapl_throttle_percent) // the percent of time the DRAM throttled due to RAPL limiters }; /** @@ -154,17 +177,23 @@ class cpu_memory_samples { friend class cpu_hardware_sampler; public: + /** + * @brief Checks whether any memory related hardware sample is present. + * @return `true` if any memory related hardware sample is, otherwise `false`. + */ + [[nodiscard]] bool has_samples() const; /** * @brief Assemble the YAML string containing all available general hardware samples. * @details Hardware samples that are not supported by the current device are omitted in the YAML output. + * Returns an empty string if `has_samples()` returns `false`. * @return the YAML string (`[[nodiscard]]`) */ [[nodiscard]] std::string generate_yaml_string() const; - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, l1d_cache) // the size of the L1 data cache - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, l1i_cache) // the size of the L1 instruction cache - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, l2_cache) // the size of the L2 cache - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, l3_cache) // the size of the L2 cache + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, cache_size_L1d) // the size of the L1 data cache + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, cache_size_L1i) // the size of the L1 instruction cache + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, cache_size_L2) // the size of the L2 cache + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, cache_size_L3) // the size of the L2 cache HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned long long, memory_total) // the total available memory in Byte HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned long long, swap_memory_total) // the total available swap memory in Byte @@ -195,16 +224,22 @@ class cpu_temperature_samples { friend class cpu_hardware_sampler; public: + /** + * @brief Checks whether any temperature related hardware sample is present. + * @return `true` if any temperature related hardware sample is, otherwise `false`. + */ + [[nodiscard]] bool has_samples() const; /** * @brief Assemble the YAML string containing all available general hardware samples. * @details Hardware samples that are not supported by the current device are omitted in the YAML output. + * Returns an empty string if `has_samples()` returns `false`. * @return the YAML string (`[[nodiscard]]`) */ [[nodiscard]] std::string generate_yaml_string() const; + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, temperature) // the current temperature of the whole package in °C HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, core_temperature) // the current temperature of the core part of the CPU in °C HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, core_throttle_percent) // the percent of time the CPU has throttled - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, package_temperature) // the current temperature of the whole package in °C }; /** @@ -228,9 +263,15 @@ class cpu_gfx_samples { friend class cpu_hardware_sampler; public: + /** + * @brief Checks whether any gfx related hardware sample is present. + * @return `true` if any gfx related hardware sample is, otherwise `false`. + */ + [[nodiscard]] bool has_samples() const; /** * @brief Assemble the YAML string containing all available general hardware samples. * @details Hardware samples that are not supported by the current device are omitted in the YAML output. + * Returns an empty string if `has_samples()` returns `false`. * @return the YAML string (`[[nodiscard]]`) */ [[nodiscard]] std::string generate_yaml_string() const; @@ -266,9 +307,15 @@ class cpu_idle_states_samples { using map_type = std::unordered_map>; public: + /** + * @brief Checks whether any idle state related hardware sample is present. + * @return `true` if any idle state related hardware sample is, otherwise `false`. + */ + [[nodiscard]] bool has_samples() const; /** * @brief Assemble the YAML string containing all available general hardware samples. * @details Hardware samples that are not supported by the current device are omitted in the YAML output. + * Returns an empty string if `has_samples()` returns `false`. * @return the YAML string (`[[nodiscard]]`) */ [[nodiscard]] std::string generate_yaml_string() const; @@ -292,25 +339,29 @@ std::ostream &operator<<(std::ostream &out, const cpu_idle_states_samples &sampl } // namespace hws +/// @cond Doxygen_suppress + template <> -struct std::formatter : hws::detail::ostream_formatter { }; +struct fmt::formatter : fmt::ostream_formatter { }; template <> -struct std::formatter : hws::detail::ostream_formatter { }; +struct fmt::formatter : fmt::ostream_formatter { }; template <> -struct std::formatter : hws::detail::ostream_formatter { }; +struct fmt::formatter : fmt::ostream_formatter { }; template <> -struct std::formatter : hws::detail::ostream_formatter { }; +struct fmt::formatter : fmt::ostream_formatter { }; template <> -struct std::formatter : hws::detail::ostream_formatter { }; +struct fmt::formatter : fmt::ostream_formatter { }; template <> -struct std::formatter : hws::detail::ostream_formatter { }; +struct fmt::formatter : fmt::ostream_formatter { }; template <> -struct std::formatter : hws::detail::ostream_formatter { }; +struct fmt::formatter : fmt::ostream_formatter { }; + +/// @endcond -#endif // HARDWARE_SAMPLING_CPU_CPU_SAMPLES_HPP_ +#endif // HWS_CPU_CPU_SAMPLES_HPP_ diff --git a/include/hardware_sampling/cpu/hardware_sampler.hpp b/include/hws/cpu/hardware_sampler.hpp similarity index 81% rename from include/hardware_sampling/cpu/hardware_sampler.hpp rename to include/hws/cpu/hardware_sampler.hpp index b86771e..d1b4102 100644 --- a/include/hardware_sampling/cpu/hardware_sampler.hpp +++ b/include/hws/cpu/hardware_sampler.hpp @@ -8,16 +8,17 @@ * @brief Defines a hardware sampler for CPUs using the turbostat, lscpu, and free utilities (requires root). */ -#ifndef HARDWARE_SAMPLING_CPU_HARDWARE_SAMPLER_HPP_ -#define HARDWARE_SAMPLING_CPU_HARDWARE_SAMPLER_HPP_ +#ifndef HWS_CPU_HARDWARE_SAMPLER_HPP_ +#define HWS_CPU_HARDWARE_SAMPLER_HPP_ #pragma once -#include "hardware_sampling/cpu/cpu_samples.hpp" // hws::{cpu_general_samples, clock_samples, power_samples, memory_samples, temperature_samples, gfx_samples, idle_state_samples} -#include "hardware_sampling/hardware_sampler.hpp" // hws::hardware_sampler -#include "hardware_sampling/utility.hpp" // hws::detail::ostream_formatter +#include "hws/cpu/cpu_samples.hpp" // hws::{cpu_general_samples, clock_samples, power_samples, memory_samples, temperature_samples, gfx_samples, idle_state_samples} +#include "hws/hardware_sampler.hpp" // hws::hardware_sampler +#include "hws/sample_category.hpp" // hws::sample_category + +#include "fmt/ostream.h" // fmt::formatter, fmt::ostream_formatter #include // std::chrono::milliseconds, std::chrono_literals namespace -#include // std::formatter #include // std::ostream forward declaration namespace hws { @@ -32,13 +33,15 @@ class cpu_hardware_sampler : public hardware_sampler { public: /** * @brief Construct a new CPU hardware sampler with the default sampling interval. + * @param[in] category the sample categories that are enabled for hardware sampling (default: all) */ - cpu_hardware_sampler(); + explicit cpu_hardware_sampler(sample_category category = sample_category::all); /** * @brief Construct a new CPU hardware sampler with the @p sampling_interval. * @param[in] sampling_interval the used sampling interval + * @param[in] category the sample categories that are enabled for hardware sampling (default: all) */ - explicit cpu_hardware_sampler(std::chrono::milliseconds sampling_interval); + explicit cpu_hardware_sampler(std::chrono::milliseconds sampling_interval, sample_category category = sample_category::all); /** * @brief Delete the copy-constructor (already implicitly deleted due to the base class's std::atomic member). @@ -104,21 +107,21 @@ class cpu_hardware_sampler : public hardware_sampler { */ [[nodiscard]] const cpu_idle_states_samples &idle_state_samples() const noexcept { return idle_state_samples_; } - private: /** - * @copydoc hws::hardware_sampler::sampling_loop + * @copydoc hws::hardware_sampler::device_identification */ - void sampling_loop() final; + [[nodiscard]] std::string device_identification() const final; /** - * @copydoc hws::hardware_sampler::device_identification + * @copydoc hws::hardware_sampler::samples_only_as_yaml_string() const */ - std::string device_identification() const final; + [[nodiscard]] std::string samples_only_as_yaml_string() const final; + private: /** - * @copydoc hws::hardware_sampler::generate_yaml_string + * @copydoc hws::hardware_sampler::sampling_loop */ - std::string generate_yaml_string() const final; + void sampling_loop() final; /// The general CPU samples. cpu_general_samples general_samples_{}; @@ -147,7 +150,11 @@ std::ostream &operator<<(std::ostream &out, const cpu_hardware_sampler &sampler) } // namespace hws +/// @cond Doxygen_suppress + template <> -struct std::formatter : hws::detail::ostream_formatter { }; +struct fmt::formatter : fmt::ostream_formatter { }; + +/// @endcond -#endif // HARDWARE_SAMPLING_CPU_HARDWARE_SAMPLER_HPP_ +#endif // HWS_CPU_HARDWARE_SAMPLER_HPP_ diff --git a/include/hardware_sampling/cpu/utility.hpp b/include/hws/cpu/utility.hpp similarity index 87% rename from include/hardware_sampling/cpu/utility.hpp rename to include/hws/cpu/utility.hpp index d203e0b..9efd008 100644 --- a/include/hardware_sampling/cpu/utility.hpp +++ b/include/hws/cpu/utility.hpp @@ -8,11 +8,12 @@ * @brief Implements utility functionality for the CPU sampler. */ -#ifndef HARDWARE_SAMPLING_CPU_UTILITY_HPP_ -#define HARDWARE_SAMPLING_CPU_UTILITY_HPP_ +#ifndef HWS_CPU_UTILITY_HPP_ +#define HWS_CPU_UTILITY_HPP_ #pragma once -#include // std::format +#include "fmt/format.h" // fmt::format + #include // std::runtime_error #include // std::string #include // std::string_view @@ -29,7 +30,7 @@ namespace hws::detail { { \ const int errc = subprocess_func; \ if (errc != 0) { \ - throw std::runtime_error{ std::format("Error calling subprocess function \"{}\"", #subprocess_func) }; \ + throw std::runtime_error{ fmt::format("Error calling subprocess function \"{}\"", #subprocess_func) }; \ } \ } #else @@ -43,6 +44,6 @@ namespace hws::detail { */ [[nodiscard]] std::string run_subprocess(std::string_view cmd_line); -} // namespace hws +} // namespace hws::detail -#endif // HARDWARE_SAMPLING_CPU_UTILITY_HPP_ +#endif // HWS_CPU_UTILITY_HPP_ diff --git a/include/hardware_sampling/event.hpp b/include/hws/event.hpp similarity index 51% rename from include/hardware_sampling/event.hpp rename to include/hws/event.hpp index 4375813..7252a75 100644 --- a/include/hardware_sampling/event.hpp +++ b/include/hws/event.hpp @@ -8,16 +8,16 @@ * @brief Defines an event type. */ -#ifndef HARDWARE_SAMPLING_EVENT_HPP_ -#define HARDWARE_SAMPLING_EVENT_HPP_ +#ifndef HWS_EVENT_HPP_ +#define HWS_EVENT_HPP_ #pragma once -#include "hardware_sampling/utility.hpp" // hws::detail::ostream_formatter +#include "fmt/ostream.h" // fmt::formatter, fmt::ostream_formatter -#include // std::chrono::steady_clock::time_point -#include // std::formatter -#include // std::ostream forward declaration -#include // std::string +#include // std::chrono::steady_clock::time_point +#include // std::ostream forward declaration +#include // std::string +#include // std::move namespace hws { @@ -25,9 +25,17 @@ namespace hws { * @brief A struct encapsulating a single event. */ struct event { + /** + * @brief Construct a new event given a time point and name. + * @param[in] time_point_p the time when the event occurred + * @param[in] name_p the name of the event + */ + event(const std::chrono::steady_clock::time_point time_point_p, std::string name_p) : + time_point{ time_point_p }, + name{ std::move(name_p) } { } + /// The time point this event occurred at. std::chrono::steady_clock::time_point time_point; - /// The name of this event. std::string name; }; @@ -42,7 +50,11 @@ std::ostream &operator<<(std::ostream &out, const event &e); } // namespace hws +/// @cond Doxygen_suppress + template <> -struct std::formatter : hws::detail::ostream_formatter { }; +struct fmt::formatter : fmt::ostream_formatter { }; + +/// @endcond -#endif // HARDWARE_SAMPLING_EVENT_HPP_ +#endif // HWS_EVENT_HPP_ diff --git a/include/hardware_sampling/gpu_amd/hardware_sampler.hpp b/include/hws/gpu_amd/hardware_sampler.hpp similarity index 78% rename from include/hardware_sampling/gpu_amd/hardware_sampler.hpp rename to include/hws/gpu_amd/hardware_sampler.hpp index 55ab3a9..668cc9a 100644 --- a/include/hardware_sampling/gpu_amd/hardware_sampler.hpp +++ b/include/hws/gpu_amd/hardware_sampler.hpp @@ -8,19 +8,20 @@ * @brief Defines a hardware sampler for AMD GPUs using AMD's ROCm SMI library. */ -#ifndef HARDWARE_SAMPLING_GPU_AMD_HARDWARE_SAMPLER_HPP_ -#define HARDWARE_SAMPLING_GPU_AMD_HARDWARE_SAMPLER_HPP_ +#ifndef HWS_GPU_AMD_HARDWARE_SAMPLER_HPP_ +#define HWS_GPU_AMD_HARDWARE_SAMPLER_HPP_ #pragma once -#include "hardware_sampling/gpu_amd/rocm_smi_samples.hpp" // hws::{rocm_smi_general_samples, rocm_smi_clock_samples, rocm_smi_power_samples, rocm_smi_memory_samples, rocm_smi_temperature_samples} -#include "hardware_sampling/hardware_sampler.hpp" // hws::hardware_sampler -#include "hardware_sampling/utility.hpp" // hws::detail::ostream_formatter +#include "hws/gpu_amd/rocm_smi_samples.hpp" // hws::{rocm_smi_general_samples, rocm_smi_clock_samples, rocm_smi_power_samples, rocm_smi_memory_samples, rocm_smi_temperature_samples} +#include "hws/hardware_sampler.hpp" // hws::hardware_sampler +#include "hws/sample_category.hpp" // hws::sample_category + +#include "fmt/ostream.h" // fmt::formatter, fmt::ostream_formatter #include // std::atomic #include // std::chrono::milliseconds, std::chrono_literals namespace #include // std::size_t #include // std::uint32_t -#include // std::formatter #include // std::ostream forward declaration namespace hws { @@ -36,27 +37,31 @@ class gpu_amd_hardware_sampler : public hardware_sampler { /** * @brief Construct a new AMD GPU hardware sampler for the default device with the default sampling interval. * @details If this is the first AMD GPU sampler, initializes the ROCm SMI environment. + * @param[in] category the sample categories that are enabled for hardware sampling (default: all) */ - gpu_amd_hardware_sampler(); + explicit gpu_amd_hardware_sampler(sample_category category = sample_category::all); /** * @brief Construct a new AMD GPU hardware sampler for device @p device_id with the default sampling interval. * @details If this is the first AMD GPU sampler, initializes the ROCm SMI environment. * @param[in] device_id the ID of the device to sample + * @param[in] category the sample categories that are enabled for hardware sampling (default: all) */ - explicit gpu_amd_hardware_sampler(std::size_t device_id); + explicit gpu_amd_hardware_sampler(std::size_t device_id, sample_category category = sample_category::all); /** * @brief Construct a new AMD GPU hardware sampler for the default device with the @p sampling_interval. * @details If this is the first AMD GPU sampler, initializes the ROCm SMI environment. * @param[in] sampling_interval the used sampling interval + * @param[in] category the sample categories that are enabled for hardware sampling (default: all) */ - explicit gpu_amd_hardware_sampler(std::chrono::milliseconds sampling_interval); + explicit gpu_amd_hardware_sampler(std::chrono::milliseconds sampling_interval, sample_category category = sample_category::all); /** * @brief Construct a new AMD GPU hardware sampler for device @p device_id with the @p sampling_interval. * @details If this is the first AMD GPU sampler, initializes the ROCm SMI environment. * @param[in] device_id the ID of the device to sample * @param[in] sampling_interval the used sampling interval + * @param[in] category the sample categories that are enabled for hardware sampling (default: all) */ - gpu_amd_hardware_sampler(std::size_t device_id, std::chrono::milliseconds sampling_interval); + gpu_amd_hardware_sampler(std::size_t device_id, std::chrono::milliseconds sampling_interval, sample_category category = sample_category::all); /** * @brief Delete the copy-constructor (already implicitly deleted due to the base class's std::atomic member). @@ -111,21 +116,21 @@ class gpu_amd_hardware_sampler : public hardware_sampler { */ [[nodiscard]] const rocm_smi_temperature_samples &temperature_samples() const noexcept { return temperature_samples_; } - private: /** - * @copydoc hws::hardware_sampler::sampling_loop + * @copydoc hws::hardware_sampler::device_identification */ - void sampling_loop() final; + [[nodiscard]] std::string device_identification() const final; /** - * @copydoc hws::hardware_sampler::device_identification + * @copydoc hws::hardware_sampler::samples_only_as_yaml_string() const */ - std::string device_identification() const final; + [[nodiscard]] std::string samples_only_as_yaml_string() const final; + private: /** - * @copydoc hws::hardware_sampler::generate_yaml_string + * @copydoc hws::hardware_sampler::sampling_loop */ - std::string generate_yaml_string() const final; + void sampling_loop() final; /// The ID of the device to sample. std::uint32_t device_id_{}; @@ -158,7 +163,11 @@ std::ostream &operator<<(std::ostream &out, const gpu_amd_hardware_sampler &samp } // namespace hws +/// @cond Doxygen_suppress + template <> -struct std::formatter : hws::detail::ostream_formatter { }; +struct fmt::formatter : fmt::ostream_formatter { }; + +/// @endcond -#endif // HARDWARE_SAMPLING_GPU_AMD_HARDWARE_SAMPLER_HPP_ +#endif // HWS_GPU_AMD_HARDWARE_SAMPLER_HPP_ diff --git a/include/hws/gpu_amd/rocm_smi_samples.hpp b/include/hws/gpu_amd/rocm_smi_samples.hpp new file mode 100644 index 0000000..8ace761 --- /dev/null +++ b/include/hws/gpu_amd/rocm_smi_samples.hpp @@ -0,0 +1,294 @@ +/** + * @file + * @author Marcel Breyer + * @copyright 2024-today All Rights Reserved + * @license This file is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + * + * @brief Defines the samples used with ROCm SMI. + */ + +#ifndef HWS_GPU_AMD_ROCM_SMI_SAMPLES_HPP_ +#define HWS_GPU_AMD_ROCM_SMI_SAMPLES_HPP_ +#pragma once + +#include "hws/utility.hpp" // HWS_SAMPLE_STRUCT_FIXED_MEMBER, HWS_SAMPLE_STRUCT_SAMPLING_MEMBER + +#include "fmt/ostream.h" // fmt::formatter, fmt::ostream_formatter + +#include // std::uint64_t, std::int64_t, std::uint32_t +#include // std::ostream forward declaration +#include // std::optional +#include // std::string +#include // std::vector + +namespace hws { + +//*************************************************************************************************************************************// +// general samples // +//*************************************************************************************************************************************// + +/** + * @brief Wrapper class for all general ROCm SMI hardware samples. + */ +class rocm_smi_general_samples { + // befriend hardware sampler class + friend class gpu_amd_hardware_sampler; + + public: + /** + * @brief Checks whether any general hardware sample is present. + * @return `true` if any general hardware sample is, otherwise `false`. + */ + [[nodiscard]] bool has_samples() const; + /** + * @brief Assemble the YAML string containing all available general hardware samples. + * @details Hardware samples that are not supported by the current device are omitted in the YAML output. + * Returns an empty string if `has_samples()` returns `false`. + * @return the YAML string (`[[nodiscard]]`) + */ + [[nodiscard]] std::string generate_yaml_string() const; + + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, architecture) // the architecture name of the device + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, byte_order) // the byte order (e.g., little/big endian) + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, vendor_id) // the vendor ID + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, name) // the name of the device + + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint32_t, compute_utilization) // the GPU compute utilization in percent + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint32_t, memory_utilization) // the GPU memory utilization in percent + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::string, performance_level) // the performance level: one of rsmi_dev_perf_level_t +}; + +/** + * @brief Output the general @p samples to the given output-stream @p out. + * @details In contrast to `rocm_smi_general_samples::generate_yaml_string()`, outputs **all** general hardware samples, even if not supported by the current device (default initialized value). + * @param[in,out] out the output-stream to write the general hardware samples to + * @param[in] samples the ROCm SMI general samples + * @return the output-stream + */ +std::ostream &operator<<(std::ostream &out, const rocm_smi_general_samples &samples); + +//*************************************************************************************************************************************// +// clock samples // +//*************************************************************************************************************************************// + +/** + * @brief Wrapper class for all clock related ROCm SMI hardware samples. + */ +class rocm_smi_clock_samples { + // befriend hardware sampler class + friend class gpu_amd_hardware_sampler; + + public: + /** + * @brief Checks whether any clock related hardware sample is present. + * @return `true` if any clock related hardware sample is, otherwise `false`. + */ + [[nodiscard]] bool has_samples() const; + /** + * @brief Assemble the YAML string containing all available general hardware samples. + * @details Hardware samples that are not supported by the current device are omitted in the YAML output. + * Returns an empty string if `has_samples()` returns `false`. + * @return the YAML string (`[[nodiscard]]`) + */ + [[nodiscard]] std::string generate_yaml_string() const; + + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, clock_frequency_min) // the minimum possible system clock frequency in MHz + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, clock_frequency_max) // the maximum possible system clock frequency in MHz + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, memory_clock_frequency_min) // the minimum possible memory clock frequency in MHz + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, memory_clock_frequency_max) // the maximum possible memory clock frequency in MHz + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, socket_clock_frequency_min) // the minimum possible socket clock frequency in MHz + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, socket_clock_frequency_max) // the maximum possible socket clock frequency in MHz + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector, available_clock_frequencies) // the available clock frequencies in MHz (slowest to fastest) + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector, available_memory_clock_frequencies) // the available memory clock frequencies in MHz (slowest to fastest) + + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, clock_frequency) // the current system clock frequency in MHz + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, memory_clock_frequency) // the current memory clock frequency in MHz + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, socket_clock_frequency) // the current socket clock frequency in MHz + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint32_t, overdrive_level) // the GPU overdrive percentage + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint32_t, memory_overdrive_level) // the GPU memory overdrive percentage +}; + +/** + * @brief Output the clock related @p samples to the given output-stream @p out. + * @details In contrast to `rocm_smi_clock_samples::generate_yaml_string()`, outputs **all** clock related hardware samples, even if not supported by the current device (default initialized value). + * @param[in,out] out the output-stream to write the clock related hardware samples to + * @param[in] samples the ROCm SMI clock related samples + * @return the output-stream + */ +std::ostream &operator<<(std::ostream &out, const rocm_smi_clock_samples &samples); + +//*************************************************************************************************************************************// +// power samples // +//*************************************************************************************************************************************// + +/** + * @brief Wrapper class for all power related ROCm SMI hardware samples. + */ +class rocm_smi_power_samples { + // befriend hardware sampler class + friend class gpu_amd_hardware_sampler; + + public: + /** + * @brief Checks whether any power related hardware sample is present. + * @return `true` if any power related hardware sample is, otherwise `false`. + */ + [[nodiscard]] bool has_samples() const; + /** + * @brief Assemble the YAML string containing all available general hardware samples. + * @details Hardware samples that are not supported by the current device are omitted in the YAML output. + * Returns an empty string if `has_samples()` returns `false`. + * @return the YAML string (`[[nodiscard]]`) + */ + [[nodiscard]] std::string generate_yaml_string() const; + + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, power_management_limit) // the default power cap (W), may be different from power cap + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, power_enforced_limit) // if the GPU draws more power (W) than the power cap, the GPU may throttle + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, power_measurement_type) // the type of the power readings: either current power draw or average power draw + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector, available_power_profiles) // a list of the available power profiles + + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, power_usage) // the current GPU socket power draw in W + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, power_total_energy_consumption) // the total power consumption since the last driver reload in J + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::string, power_profile) // the current active power profile; one of 'available_power_profiles' +}; + +/** + * @brief Output the power related @p samples to the given output-stream @p out. + * @details In contrast to `rocm_smi_power_samples::generate_yaml_string()`, outputs **all** power related hardware samples, even if not supported by the current device (default initialized value). + * @param[in,out] out the output-stream to write the power related hardware samples to + * @param[in] samples the ROCm SMI power related samples + * @return the output-stream + */ +std::ostream &operator<<(std::ostream &out, const rocm_smi_power_samples &samples); + +//*************************************************************************************************************************************// +// memory samples // +//*************************************************************************************************************************************// + +/** + * @brief Wrapper class for all memory related ROCm SMI hardware samples. + */ +class rocm_smi_memory_samples { + // befriend hardware sampler class + friend class gpu_amd_hardware_sampler; + + public: + /** + * @brief Checks whether any memory related hardware sample is present. + * @return `true` if any memory related hardware sample is, otherwise `false`. + */ + [[nodiscard]] bool has_samples() const; + /** + * @brief Assemble the YAML string containing all available general hardware samples. + * @details Hardware samples that are not supported by the current device are omitted in the YAML output. + * Returns an empty string if `has_samples()` returns `false`. + * @return the YAML string (`[[nodiscard]]`) + */ + [[nodiscard]] std::string generate_yaml_string() const; + + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, memory_total) // the total available memory in Byte + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, visible_memory_total) // the total visible available memory in Byte, may be smaller than the total memory + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint32_t, num_pcie_lanes_min) // the minimum number of used PCIe lanes + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint32_t, num_pcie_lanes_max) // the maximum number of used PCIe lanes + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, pcie_link_transfer_rate_min) // the minimum PCIe link transfer rate in MT/s + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, pcie_link_transfer_rate_max) // the maximum PCIe link transfer rate in MT/s + + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint64_t, memory_used) // the currently used memory in Byte + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint64_t, memory_free) // the currently free memory in Byte + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint32_t, num_pcie_lanes) // the number of currently used PCIe lanes + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint64_t, pcie_link_transfer_rate) // the current PCIe transfer rate in MT/s +}; + +/** + * @brief Output the memory related @p samples to the given output-stream @p out. + * @details In contrast to `rocm_smi_memory_samples::generate_yaml_string()`, outputs **all** memory related hardware samples, even if not supported by the current device (default initialized value). + * @param[in,out] out the output-stream to write the memory related hardware samples to + * @param[in] samples the ROCm SMI memory related samples + * @return the output-stream + */ +std::ostream &operator<<(std::ostream &out, const rocm_smi_memory_samples &samples); + +//*************************************************************************************************************************************// +// temperature samples // +//*************************************************************************************************************************************// + +/** + * @brief Wrapper class for all temperature related ROCm SMI hardware samples. + */ +class rocm_smi_temperature_samples { + // befriend hardware sampler class + friend class gpu_amd_hardware_sampler; + + public: + /** + * @brief Checks whether any temperature related hardware sample is present. + * @return `true` if any temperature related hardware sample is, otherwise `false`. + */ + [[nodiscard]] bool has_samples() const; + /** + * @brief Assemble the YAML string containing all available general hardware samples. + * @details Hardware samples that are not supported by the current device are omitted in the YAML output. + * Returns an empty string if `has_samples()` returns `false`. + * @return the YAML string (`[[nodiscard]]`) + */ + [[nodiscard]] std::string generate_yaml_string() const; + + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint32_t, num_fans) // the number of fans (if any) + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint64_t, fan_speed_max) // the maximum fan speed in RPM + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, temperature_min) // the minimum temperature on the GPU's edge temperature sensor in °C + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, temperature_max) // the maximum temperature on the GPU's edge temperature sensor in °C + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, memory_temperature_min) // the minimum temperature on the GPU's memory temperature sensor in °C + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, memory_temperature_max) // the maximum temperature on the GPU's memory temperature sensor in °C + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, hotspot_temperature_min) // the minimum temperature on the GPU's hotspot temperature sensor in °C + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, hotspot_temperature_max) // the maximum temperature on the GPU's hotspot temperature sensor in °C + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, hbm_0_temperature_min) // the minimum temperature on the GPU's HBM0 temperature sensor in °C + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, hbm_0_temperature_max) // the maximum temperature on the GPU's HBM0 temperature sensor in °C + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, hbm_1_temperature_min) // the minimum temperature on the GPU's HBM1 temperature sensor in °C + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, hbm_1_temperature_max) // the maximum temperature on the GPU's HBM1 temperature sensor in °C + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, hbm_2_temperature_min) // the minimum temperature on the GPU's HBM2 temperature sensor in °C + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, hbm_2_temperature_max) // the maximum temperature on the GPU's HBM2 temperature sensor in °C + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, hbm_3_temperature_min) // the minimum temperature on the GPU's HBM3 temperature sensor in °C + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, hbm_3_temperature_max) // the maximum temperature on the GPU's HBM3 temperature sensor in °C + + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, fan_speed_percentage) // the current fan speed in % + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, temperature) // the current temperature on the GPU's edge temperature sensor in °C + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, hotspot_temperature) // the current temperature on the GPU's hotspot temperature sensor in °C + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, memory_temperature) // the current temperature on the GPU's memory temperature sensor in °C + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, hbm_0_temperature) // the current temperature on the GPU's HBM0 temperature sensor in °C + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, hbm_1_temperature) // the current temperature on the GPU's HBM1 temperature sensor in °C + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, hbm_2_temperature) // the current temperature on the GPU's HBM2 temperature sensor in °C + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, hbm_3_temperature) // the current temperature on the GPU's HBM3 temperature sensor in °C +}; + +/** + * @brief Output the temperature related @p samples to the given output-stream @p out. + * @details In contrast to `rocm_smi_temperature_samples::generate_yaml_string()`, outputs **all** temperature related hardware samples, even if not supported by the current device (default initialized value). + * @param[in,out] out the output-stream to write the temperature related hardware samples to + * @param[in] samples the ROCm SMI temperature related samples + * @return the output-stream + */ +std::ostream &operator<<(std::ostream &out, const rocm_smi_temperature_samples &samples); + +} // namespace hws + +/// @cond Doxygen_suppress + +template <> +struct fmt::formatter : fmt::ostream_formatter { }; + +template <> +struct fmt::formatter : fmt::ostream_formatter { }; + +template <> +struct fmt::formatter : fmt::ostream_formatter { }; + +template <> +struct fmt::formatter : fmt::ostream_formatter { }; + +template <> +struct fmt::formatter : fmt::ostream_formatter { }; + +/// @endcond + +#endif // HWS_GPU_AMD_ROCM_SMI_SAMPLES_HPP_ diff --git a/include/hardware_sampling/gpu_amd/utility.hpp b/include/hws/gpu_amd/utility.hpp similarity index 56% rename from include/hardware_sampling/gpu_amd/utility.hpp rename to include/hws/gpu_amd/utility.hpp index 5d039c7..4889976 100644 --- a/include/hardware_sampling/gpu_amd/utility.hpp +++ b/include/hws/gpu_amd/utility.hpp @@ -8,22 +8,28 @@ * @brief Implements utility functionality for the AMD GPU sampler. */ -#ifndef HARDWARE_SAMPLING_GPU_AMD_UTILITY_HPP_ -#define HARDWARE_SAMPLING_GPU_AMD_UTILITY_HPP_ +#ifndef HWS_GPU_AMD_UTILITY_HPP_ +#define HWS_GPU_AMD_UTILITY_HPP_ #pragma once +#include "fmt/format.h" // fmt::format #include "rocm_smi/rocm_smi.h" // ROCm SMI runtime functions -#include // std::format #include // std::runtime_error +#include // std::string -namespace hws { +namespace hws::detail { /** * @def HWS_ROCM_SMI_ERROR_CHECK * @brief Defines the `HWS_ROCM_SMI_ERROR_CHECK` macro if `HWS_ERROR_CHECKS_ENABLED` is defined, does nothing otherwise. * @details Throws an exception if a ROCm SMI call returns with an error. Additionally outputs a more concrete error string if possible. */ +/** + * @def HWS_HIP_ERROR_CHECK + * @brief Defines the `HWS_HIP_ERROR_CHECK` macro if `HWS_ERROR_CHECKS_ENABLED` is defined, does nothing otherwise. + * @details Throws an exception if a HIP call returns with an error. Additionally outputs a more concrete error string. + */ #if defined(HWS_ERROR_CHECKS_ENABLED) #define HWS_ROCM_SMI_ERROR_CHECK(rocm_smi_func) \ { \ @@ -32,16 +38,36 @@ namespace hws { const char *error_string; \ const rsmi_status_t ret = rsmi_status_string(errc, &error_string); \ if (ret == RSMI_STATUS_SUCCESS) { \ - throw std::runtime_error{ std::format("Error in ROCm SMI function call \"{}\": {}", #rocm_smi_func, error_string) }; \ + throw std::runtime_error{ fmt::format("Error in ROCm SMI function call \"{}\": {}", #rocm_smi_func, error_string) }; \ } else { \ - throw std::runtime_error{ std::format("Error in ROCm SMI function call \"{}\": {}", #rocm_smi_func, static_cast(errc)) }; \ + throw std::runtime_error{ fmt::format("Error in ROCm SMI function call \"{}\": {}", #rocm_smi_func, static_cast(errc)) }; \ } \ } \ } + + #define HWS_HIP_ERROR_CHECK(hip_func) \ + { \ + const hiperror_t errc = hip_func; \ + if (errc != hipSuccess) { \ + throw std::runtime_error{ fmt::format("Error in HIP function call \"{}\": {}", #hip_func, hipGetErrorString(errc)) }; \ + } \ + } + #else #define HWS_ROCM_SMI_ERROR_CHECK(rocm_smi_func) rocm_smi_func; + #define HWS_HIP_ERROR_CHECK(hip_func) \ + { \ + [[maybe_unused]] hipError_t errc = hip_func; \ + } #endif -} // namespace hws +/** + * @brief Convert the performance level value (`rsmi_dev_perf_level_t`) to a string. + * @param[in] perf_level the bitmask to convert to a string + * @return all event throttle reasons (`[[nodiscard]]`) + */ +[[nodiscard]] std::string performance_level_to_string(rsmi_dev_perf_level_t perf_level); + +} // namespace hws::detail -#endif // HARDWARE_SAMPLING_GPU_AMD_UTILITY_HPP_ +#endif // HWS_GPU_AMD_UTILITY_HPP_ diff --git a/include/hardware_sampling/gpu_intel/hardware_sampler.hpp b/include/hws/gpu_intel/hardware_sampler.hpp similarity index 76% rename from include/hardware_sampling/gpu_intel/hardware_sampler.hpp rename to include/hws/gpu_intel/hardware_sampler.hpp index 442be04..db068fe 100644 --- a/include/hardware_sampling/gpu_intel/hardware_sampler.hpp +++ b/include/hws/gpu_intel/hardware_sampler.hpp @@ -8,18 +8,21 @@ * @brief Defines a hardware sampler for Intel GPUs using Intel's Level Zero. */ -#ifndef HARDWARE_SAMPLING_GPU_INTEL_HARDWARE_SAMPLER_HPP_ -#define HARDWARE_SAMPLING_GPU_INTEL_HARDWARE_SAMPLER_HPP_ +#ifndef HWS_GPU_INTEL_HARDWARE_SAMPLER_HPP_ +#define HWS_GPU_INTEL_HARDWARE_SAMPLER_HPP_ #pragma once -#include "hardware_sampling/gpu_intel/level_zero_device_handle.hpp" // hws::detail::level_zero_device_handle -#include "hardware_sampling/gpu_intel/level_zero_samples.hpp" // hws::{level_zero_general_samples, level_zero_clock_samples, level_zero_power_samples, level_zero_memory_samples, level_zero_temperature_samples} -#include "hardware_sampling/hardware_sampler.hpp" // hws::hardware_sampler +#include "hws/gpu_intel/level_zero_device_handle.hpp" // hws::detail::level_zero_device_handle +#include "hws/gpu_intel/level_zero_samples.hpp" // hws::{level_zero_general_samples, level_zero_clock_samples, level_zero_power_samples, level_zero_memory_samples, level_zero_temperature_samples} +#include "hws/hardware_sampler.hpp" // hws::hardware_sampler +#include "hws/sample_category.hpp" // hws::sample_category + +#include "fmt/format.h" // fmt::formatter, fmt::ostream_formatter #include // std::atomic -#include // std::chrono::{steady_clock, milliseconds}, std::chrono_literals namespace +#include // std::chrono::milliseconds, std::chrono_literals namespace #include // std::size_t -#include // std::formatter +#include // std::ostream forward declaration #include // std::string namespace hws { @@ -35,27 +38,31 @@ class gpu_intel_hardware_sampler : public hardware_sampler { /** * @brief Construct a new Intel GPU hardware sampler for the default device with the default sampling interval. * @details If this is the first Intel GPU sampler, initializes the Level Zero environment. + * @param[in] category the sample categories that are enabled for hardware sampling (default: all) */ - gpu_intel_hardware_sampler(); + explicit gpu_intel_hardware_sampler(sample_category category = sample_category::all); /** * @brief Construct a new Intel GPU hardware sampler for device @p device_id with the default sampling interval. * @details If this is the first Intel GPU sampler, initializes the Level Zero environment. * @param[in] device_id the ID of the device to sample + * @param[in] category the sample categories that are enabled for hardware sampling (default: all) */ - explicit gpu_intel_hardware_sampler(std::size_t device_id); + explicit gpu_intel_hardware_sampler(std::size_t device_id, sample_category category = sample_category::all); /** * @brief Construct a new Intel GPU hardware sampler for the default device with the @p sampling_interval. * @details If this is the first Intel GPU sampler, initializes the Level Zero environment. * @param[in] sampling_interval the used sampling interval + * @param[in] category the sample categories that are enabled for hardware sampling (default: all) */ - explicit gpu_intel_hardware_sampler(std::chrono::milliseconds sampling_interval); + explicit gpu_intel_hardware_sampler(std::chrono::milliseconds sampling_interval, sample_category category = sample_category::all); /** * @brief Construct a new Intel GPU hardware sampler for device @p device_id with the @p sampling_interval. * @details If this is the first Intel GPU sampler, initializes the Level Zero environment. * @param[in] device_id the ID of the device to sample * @param[in] sampling_interval the used sampling interval + * @param[in] category the sample categories that are enabled for hardware sampling (default: all) */ - gpu_intel_hardware_sampler(std::size_t device_id, std::chrono::milliseconds sampling_interval); + gpu_intel_hardware_sampler(std::size_t device_id, std::chrono::milliseconds sampling_interval, sample_category category = sample_category::all); /** * @brief Delete the copy-constructor (already implicitly deleted due to the base class's std::atomic member). @@ -109,21 +116,21 @@ class gpu_intel_hardware_sampler : public hardware_sampler { */ [[nodiscard]] const level_zero_temperature_samples &temperature_samples() const noexcept { return temperature_samples_; } - private: /** - * @copydoc hws::hardware_sampler::sampling_loop + * @copydoc hws::hardware_sampler::device_identification */ - void sampling_loop() final; + std::string device_identification() const final; /** - * @copydoc hws::hardware_sampler::device_identification + * @copydoc hws::hardware_sampler::samples_only_as_yaml_string() const */ - std::string device_identification() const final; + [[nodiscard]] std::string samples_only_as_yaml_string() const final; + private: /** - * @copydoc hws::hardware_sampler::generate_yaml_string + * @copydoc hws::hardware_sampler::sampling_loop */ - std::string generate_yaml_string() const final; + void sampling_loop() final; /// The device handle for the device to sample. detail::level_zero_device_handle device_; @@ -156,7 +163,11 @@ std::ostream &operator<<(std::ostream &out, const gpu_intel_hardware_sampler &sa } // namespace hws +/// @cond Doxygen_suppress + template <> -struct std::formatter : hws::detail::ostream_formatter { }; +struct fmt::formatter : fmt::ostream_formatter { }; + +/// @endcond -#endif // HARDWARE_SAMPLING_GPU_INTEL_HARDWARE_SAMPLER_HPP_ +#endif // HWS_GPU_INTEL_HARDWARE_SAMPLER_HPP_ diff --git a/include/hardware_sampling/gpu_intel/level_zero_device_handle.hpp b/include/hws/gpu_intel/level_zero_device_handle.hpp similarity index 85% rename from include/hardware_sampling/gpu_intel/level_zero_device_handle.hpp rename to include/hws/gpu_intel/level_zero_device_handle.hpp index f84d8a5..c05f630 100644 --- a/include/hardware_sampling/gpu_intel/level_zero_device_handle.hpp +++ b/include/hws/gpu_intel/level_zero_device_handle.hpp @@ -8,8 +8,8 @@ * @brief Defines a pImpl class for a Level Zero device handle. */ -#ifndef HARDWARE_SAMPLING_GPU_INTEL_LEVEL_ZERO_DEVICE_HANDLE_HPP_ -#define HARDWARE_SAMPLING_GPU_INTEL_LEVEL_ZERO_DEVICE_HANDLE_HPP_ +#ifndef HWS_GPU_INTEL_LEVEL_ZERO_DEVICE_HANDLE_HPP_ +#define HWS_GPU_INTEL_LEVEL_ZERO_DEVICE_HANDLE_HPP_ #pragma once #include // std::size_t @@ -40,7 +40,7 @@ class level_zero_device_handle { /** * @brief Get the level_zero_device_handle implementation used to access the actual ze_driver_handle_t and ze_device_handle_t. - * @throws hardware_sampling_exception if `*this` has been default constructed + * @throws std::runtime_error if `*this` has been default constructed * @return the device handle (`[[nodiscard]]`) */ [[nodiscard]] level_zero_device_handle_impl &get_impl() { @@ -52,7 +52,7 @@ class level_zero_device_handle { /** * @brief Get the level_zero_device_handle implementation used to access the actual ze_driver_handle_t and ze_device_handle_t. - * @throws hardware_sampling_exception if `*this` has been default constructed + * @throws std::runtime_error if `*this` has been default constructed * @return the device handle (`[[nodiscard]]`) */ [[nodiscard]] const level_zero_device_handle_impl &get_impl() const { @@ -69,4 +69,4 @@ class level_zero_device_handle { } // namespace hws::detail -#endif // HARDWARE_SAMPLING_GPU_INTEL_LEVEL_ZERO_DEVICE_HANDLE_HPP_ +#endif // HWS_GPU_INTEL_LEVEL_ZERO_DEVICE_HANDLE_HPP_ diff --git a/include/hardware_sampling/gpu_intel/level_zero_device_handle_impl.hpp b/include/hws/gpu_intel/level_zero_device_handle_impl.hpp similarity index 78% rename from include/hardware_sampling/gpu_intel/level_zero_device_handle_impl.hpp rename to include/hws/gpu_intel/level_zero_device_handle_impl.hpp index 640cdcc..1c3b269 100644 --- a/include/hardware_sampling/gpu_intel/level_zero_device_handle_impl.hpp +++ b/include/hws/gpu_intel/level_zero_device_handle_impl.hpp @@ -8,18 +8,18 @@ * @brief Implements a pImpl class for a Level Zero device handle. */ -#ifndef HARDWARE_SAMPLING_GPU_INTEL_LEVEL_ZERO_DEVICE_HANDLE_IMPL_HPP_ -#define HARDWARE_SAMPLING_GPU_INTEL_LEVEL_ZERO_DEVICE_HANDLE_IMPL_HPP_ +#ifndef HWS_GPU_INTEL_LEVEL_ZERO_DEVICE_HANDLE_IMPL_HPP_ +#define HWS_GPU_INTEL_LEVEL_ZERO_DEVICE_HANDLE_IMPL_HPP_ #pragma once -#include "hardware_sampling/gpu_intel/level_zero_device_handle.hpp" // hws::detail::level_zero_device_handle -#include "hardware_sampling/gpu_intel/utility.hpp" // HWS_LEVEL_ZERO_ERROR_CHECK +#include "hws/gpu_intel/level_zero_device_handle.hpp" // hws::detail::level_zero_device_handle +#include "hws/gpu_intel/utility.hpp" // HWS_LEVEL_ZERO_ERROR_CHECK +#include "fmt/format.h" // fmt::format #include "level_zero/ze_api.h" // Level Zero runtime functions #include // std::size_t #include // std::uint32_t -#include // std::format #include // std::make_shared #include // std::runtime_error #include // std::vector @@ -38,28 +38,28 @@ struct level_zero_device_handle::level_zero_device_handle_impl { explicit level_zero_device_handle_impl(const std::size_t device_id) { // discover the number of drivers std::uint32_t driver_count{ 0 }; - HWS_LEVEL_ZERO_ERROR_CHECK(zeDriverGet(&driver_count, nullptr)); + HWS_LEVEL_ZERO_ERROR_CHECK(zeDriverGet(&driver_count, nullptr)) // check if only the single GPU driver has been found if (driver_count > 1) { - throw std::runtime_error{ std::format("Found too many GPU drivers ({})!", driver_count) }; + throw std::runtime_error{ fmt::format("Found too many GPU drivers ({})!", driver_count) }; } // get the GPU driver - HWS_LEVEL_ZERO_ERROR_CHECK(zeDriverGet(&driver_count, &driver)); + HWS_LEVEL_ZERO_ERROR_CHECK(zeDriverGet(&driver_count, &driver)) // get all GPUs for the current driver std::uint32_t device_count{ 0 }; - HWS_LEVEL_ZERO_ERROR_CHECK(zeDeviceGet(driver, &device_count, nullptr)); + HWS_LEVEL_ZERO_ERROR_CHECK(zeDeviceGet(driver, &device_count, nullptr)) // check if enough GPUs have been found if (driver_count <= device_id) { - throw std::runtime_error{ std::format("Found only {} GPUs, but GPU with the ID was requested!", device_count, device_id) }; + throw std::runtime_error{ fmt::format("Found only {} GPUs, but GPU with the ID was requested!", device_count, device_id) }; } // get the GPUs std::vector all_devices(device_count); - HWS_LEVEL_ZERO_ERROR_CHECK(zeDeviceGet(driver, &device_count, all_devices.data())); + HWS_LEVEL_ZERO_ERROR_CHECK(zeDeviceGet(driver, &device_count, all_devices.data())) // save the requested device device = all_devices[device_id]; @@ -76,4 +76,4 @@ inline level_zero_device_handle::level_zero_device_handle(const std::size_t devi } // namespace hws::detail -#endif // HARDWARE_SAMPLING_GPU_INTEL_LEVEL_ZERO_DEVICE_HANDLE_IMPL_HPP_ +#endif // HWS_GPU_INTEL_LEVEL_ZERO_DEVICE_HANDLE_IMPL_HPP_ diff --git a/include/hardware_sampling/gpu_intel/level_zero_samples.hpp b/include/hws/gpu_intel/level_zero_samples.hpp similarity index 55% rename from include/hardware_sampling/gpu_intel/level_zero_samples.hpp rename to include/hws/gpu_intel/level_zero_samples.hpp index 7d0f713..dec6ec5 100644 --- a/include/hardware_sampling/gpu_intel/level_zero_samples.hpp +++ b/include/hws/gpu_intel/level_zero_samples.hpp @@ -8,14 +8,15 @@ * @brief Defines the samples used with Level Zero. */ -#ifndef HARDWARE_SAMPLING_GPU_INTEL_LEVEL_ZERO_SAMPLES_HPP_ -#define HARDWARE_SAMPLING_GPU_INTEL_LEVEL_ZERO_SAMPLES_HPP_ +#ifndef HWS_GPU_INTEL_LEVEL_ZERO_SAMPLES_HPP_ +#define HWS_GPU_INTEL_LEVEL_ZERO_SAMPLES_HPP_ #pragma once -#include "hardware_sampling/utility.hpp" // HWS_SAMPLE_STRUCT_FIXED_MEMBER, HWS_SAMPLE_STRUCT_SAMPLING_MEMBER, hws::detail::ostream_formatter +#include "hws/utility.hpp" // HWS_SAMPLE_STRUCT_FIXED_MEMBER, HWS_SAMPLE_STRUCT_SAMPLING_MEMBER -#include // std::uint64_t, std::int32_t -#include // std::format +#include "fmt/ostream.h" // fmt::formatter, fmt::ostream_formatter + +#include // std::uint64_t, std::int64_t, std::int32_t #include // std::ostream forward declaration #include // std::optional #include // std::string @@ -36,14 +37,23 @@ class level_zero_general_samples { friend class gpu_intel_hardware_sampler; public: + /** + * @brief Checks whether any general hardware sample is present. + * @return `true` if any general hardware sample is, otherwise `false`. + */ + [[nodiscard]] bool has_samples() const; /** * @brief Assemble the YAML string containing all available general hardware samples. * @details Hardware samples that are not supported by the current device are omitted in the YAML output. + * Returns an empty string if `has_samples()` returns `false`. * @return the YAML string (`[[nodiscard]]`) */ [[nodiscard]] std::string generate_yaml_string() const; + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, byte_order) // the byte order (e.g., little/big endian) + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, vendor_id) // the vendor ID HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, name) // the model name of the device + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector, flags) // potential GPU flags (e.g. integrated device) HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, standby_mode) // the enabled standby mode (power saving or never) HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint32_t, num_threads_per_eu) // the number of threads per EU unit HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint32_t, eu_simd_width) // the physical EU unit SIMD width @@ -70,26 +80,34 @@ class level_zero_clock_samples { friend class gpu_intel_hardware_sampler; public: + /** + * @brief Checks whether any clock related hardware sample is present. + * @return `true` if any clock related hardware sample is, otherwise `false`. + */ + [[nodiscard]] bool has_samples() const; /** * @brief Assemble the YAML string containing all available general hardware samples. * @details Hardware samples that are not supported by the current device are omitted in the YAML output. + * Returns an empty string if `has_samples()` returns `false`. * @return the YAML string (`[[nodiscard]]`) */ [[nodiscard]] std::string generate_yaml_string() const; - HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, clock_gpu_min) // the minimum possible GPU clock frequency in MHz - HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, clock_gpu_max) // the maximum possible GPU clock frequency in MHz - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector, available_clocks_gpu) // the available GPU clock frequencies in MHz (slowest to fastest) - HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, clock_mem_min) // the minimum possible memory clock frequency in MHz - HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, clock_mem_max) // the maximum possible memory clock frequency in MHz - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector, available_clocks_mem) // the available memory clock frequencies in MHz (slowest to fastest) - - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, tdp_frequency_limit_gpu) // the current maximum allowed GPU frequency based on the TDP limit in MHz - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, clock_gpu) // the current GPU frequency in MHz - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(int, throttle_reason_gpu) // the current GPU frequency throttle reason - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, tdp_frequency_limit_mem) // the current maximum allowed memory frequency based on the TDP limit in MHz - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, clock_mem) // the current memory frequency in MHz - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(int, throttle_reason_mem) // the current memory frequency throttle reason + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, clock_frequency_min) // the minimum possible GPU clock frequency in MHz + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, clock_frequency_max) // the maximum possible GPU clock frequency in MHz + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, memory_clock_frequency_min) // the minimum possible memory clock frequency in MHz + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, memory_clock_frequency_max) // the maximum possible memory clock frequency in MHz + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector, available_clock_frequencies) // the available GPU clock frequencies in MHz (slowest to fastest) + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector, available_memory_clock_frequencies) // the available memory clock frequencies in MHz (slowest to fastest) + + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, clock_frequency) // the current GPU frequency in MHz + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, memory_clock_frequency) // the current memory frequency in MHz + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int64_t, throttle_reason) // the current GPU frequency throttle reason as bitmask + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::string, throttle_reason_string) // the current GPU frequency throttle reason as string + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int64_t, memory_throttle_reason) // the current memory frequency throttle reason as bitmask + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::string, memory_throttle_reason_string) // the current memory frequency throttle reason as string + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, frequency_limit_tdp) // the current maximum allowed GPU frequency based on the TDP limit in MHz + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, memory_frequency_limit_tdp) // the current maximum allowed memory frequency based on the TDP limit in MHz }; /** @@ -113,17 +131,25 @@ class level_zero_power_samples { friend class gpu_intel_hardware_sampler; public: + /** + * @brief Checks whether any power related hardware sample is present. + * @return `true` if any power related hardware sample is, otherwise `false`. + */ + [[nodiscard]] bool has_samples() const; /** * @brief Assemble the YAML string containing all available general hardware samples. * @details Hardware samples that are not supported by the current device are omitted in the YAML output. + * Returns an empty string if `has_samples()` returns `false`. * @return the YAML string (`[[nodiscard]]`) */ [[nodiscard]] std::string generate_yaml_string() const; - HWS_SAMPLE_STRUCT_FIXED_MEMBER(bool, energy_threshold_enabled) // true if the energy threshold is enabled - HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, energy_threshold) // the energy threshold in J + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, power_enforced_limit) // the actually enforced power limit (W), may be different from power management limit if external limiters are set + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, power_measurement_type) // the type of the power readings + HWS_SAMPLE_STRUCT_FIXED_MEMBER(bool, power_management_mode) // true if power management limits are enabled - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::uint64_t, power_total_energy_consumption) // the total power consumption since the last driver reload in mJ + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, power_usage) // the current power draw of the GPU in W (calculated from power_total_energy_consumption) + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, power_total_energy_consumption) // the total power consumption since the last driver reload in J }; /** @@ -154,26 +180,33 @@ class level_zero_memory_samples { using map_type = std::unordered_map; public: + /** + * @brief Checks whether any memory related hardware sample is present. + * @return `true` if any memory related hardware sample is, otherwise `false`. + */ + [[nodiscard]] bool has_samples() const; /** * @brief Assemble the YAML string containing all available general hardware samples. * @details Hardware samples that are not supported by the current device are omitted in the YAML output. + * Returns an empty string if `has_samples()` returns `false`. * @return the YAML string (`[[nodiscard]]`) */ [[nodiscard]] std::string generate_yaml_string() const; - HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type, memory_total) // the total memory size of the different memory modules in Bytes - HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type, allocatable_memory_total) // the total allocatable memory size of the different memory modules in Bytes - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, pcie_link_max_speed) // the maximum PCIe bandwidth in bytes/sec - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int32_t, pcie_max_width) // the PCIe lane width - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int32_t, max_pcie_link_generation) // the PCIe generation - HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type, bus_width) // the bus width of the different memory modules - HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type, num_channels) // the number of memory channels of the different memory modules - HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type, location) // the location of the different memory modules (system or device) + HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type, memory_total) // the total memory size of the different memory modules in Bytes + HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type, visible_memory_total) // the total allocatable memory size of the different memory modules in Bytes + HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type, memory_location) // the location of the different memory modules (system or device) + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int32_t, num_pcie_lanes_max) // the maximum PCIe lane width + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int32_t, pcie_link_generation_max) // the maximum PCIe generation + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int64_t, pcie_link_speed_max) // the maximum PCIe bandwidth in MBPS + HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type, memory_bus_width) // the bus width of the different memory modules + HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type, memory_num_channels) // the number of memory channels of the different memory modules HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type>, memory_free) // the currently free memory of the different memory modules in Bytes - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int64_t, pcie_link_speed) // the current PCIe bandwidth in bytes/sec - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int32_t, pcie_link_width) // the current PCIe lane width + HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type>, memory_used) // the currently used memory of the different memory modules in Bytes + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int32_t, num_pcie_lanes) // the current PCIe lane width HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int32_t, pcie_link_generation) // the current PCIe generation + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int64_t, pcie_link_speed) // the current PCIe bandwidth in bytes/sec }; /** @@ -196,25 +229,31 @@ class level_zero_temperature_samples { // befriend hardware sampler class friend class gpu_intel_hardware_sampler; + public: /** - * @brief The map type used if the number of potential Level Zero domains is unknown at compile time. - * @tparam T the mapped type + * @brief Checks whether any temperature related hardware sample is present. + * @return `true` if any temperature related hardware sample is, otherwise `false`. */ - template - using map_type = std::unordered_map; - - public: + [[nodiscard]] bool has_samples() const; /** * @brief Assemble the YAML string containing all available general hardware samples. * @details Hardware samples that are not supported by the current device are omitted in the YAML output. + * Returns an empty string if `has_samples()` returns `false`. * @return the YAML string (`[[nodiscard]]`) */ [[nodiscard]] std::string generate_yaml_string() const; - HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type, temperature_max) // the maximum temperature for the sensor in °C - - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::int32_t, temperature_psu) // the temperature of the PSU in °C - HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type>, temperature) // the current temperature for the sensor in °C + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::uint32_t, num_fans) // the number of fans + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::int32_t, fan_speed_max) // the maximum fan speed the user can set in RPM + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, temperature_max) // the maximum GPU temperature in °C + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, memory_temperature_max) // the maximum memory temperature in °C + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, global_temperature_max) // the maximum global temperature in °C + + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, fan_speed_percentage) // the current intended fan speed in % + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, temperature) // the temperature of the GPU in °C + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, memory_temperature) // the temperature of the memory in °C + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, global_temperature) // the global temperature in °C + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, psu_temperature) // the temperature of the PSU in °C }; /** @@ -228,19 +267,23 @@ std::ostream &operator<<(std::ostream &out, const level_zero_temperature_samples } // namespace hws +/// @cond Doxygen_suppress + template <> -struct std::formatter : hws::detail::ostream_formatter { }; +struct fmt::formatter : fmt::ostream_formatter { }; template <> -struct std::formatter : hws::detail::ostream_formatter { }; +struct fmt::formatter : fmt::ostream_formatter { }; template <> -struct std::formatter : hws::detail::ostream_formatter { }; +struct fmt::formatter : fmt::ostream_formatter { }; template <> -struct std::formatter : hws::detail::ostream_formatter { }; +struct fmt::formatter : fmt::ostream_formatter { }; template <> -struct std::formatter : hws::detail::ostream_formatter { }; +struct fmt::formatter : fmt::ostream_formatter { }; + +/// @endcond -#endif // HARDWARE_SAMPLING_GPU_INTEL_LEVEL_ZERO_SAMPLES_HPP_ +#endif // HWS_GPU_INTEL_LEVEL_ZERO_SAMPLES_HPP_ diff --git a/include/hardware_sampling/gpu_intel/utility.hpp b/include/hws/gpu_intel/utility.hpp similarity index 67% rename from include/hardware_sampling/gpu_intel/utility.hpp rename to include/hws/gpu_intel/utility.hpp index 810901f..76e15a1 100644 --- a/include/hardware_sampling/gpu_intel/utility.hpp +++ b/include/hws/gpu_intel/utility.hpp @@ -8,17 +8,18 @@ * @brief Implements utility functionality for the Intel GPU sampler. */ -#ifndef HARDWARE_SAMPLING_GPU_INTEL_UTILITY_HPP_ -#define HARDWARE_SAMPLING_GPU_INTEL_UTILITY_HPP_ +#ifndef HWS_GPU_INTEL_UTILITY_HPP_ +#define HWS_GPU_INTEL_UTILITY_HPP_ #pragma once +#include "fmt/format.h" // fmt::format #include "level_zero/ze_api.h" // Level Zero runtime functions #include "level_zero/zes_api.h" // Level Zero runtime functions -#include // std::format #include // std::runtime_error #include // std::string #include // std::string_view +#include // std::vector namespace hws::detail { @@ -35,17 +36,31 @@ namespace hws::detail { * @details Throws an exception if a Level Zero call returns with an error. Additionally outputs a more concrete custom error string. */ #if defined(HWS_ERROR_CHECKS_ENABLED) - #define HWS_LEVEL_ZERO_ERROR_CHECK(level_zero_func) \ - { \ - const ze_result_t errc = level_zero_func; \ - if (errc != ZE_RESULT_SUCCESS) { \ - throw std::runtime_error{ std::format("Error in Level Zero function call \"{}\": {}", #level_zero_func, to_result_string(errc)) }; \ - } \ + #define HWS_LEVEL_ZERO_ERROR_CHECK(level_zero_func) \ + { \ + const ze_result_t errc = level_zero_func; \ + if (errc != ZE_RESULT_SUCCESS) { \ + throw std::runtime_error{ fmt::format("Error in Level Zero function call \"{}\": {}", #level_zero_func, ::hws::detail::to_result_string(errc)) }; \ + } \ } #else #define HWS_LEVEL_ZERO_ERROR_CHECK(level_zero_func) level_zero_func; #endif +/** + * @brief Convert the @p flags to a vector of strings. + * @param[in] flags the flags to convert to strings + * @return a vector containing all flags as strings (`[[nodiscard]]`) + */ +[[nodiscard]] std::vector property_flags_to_vector(ze_device_property_flags_t flags); + +/** + * @brief Convert the throttle reason bitmask to a string representation. If the provided bitmask represents multiple reasons, they are split using "|". + * @param[in] reasons the bitmask to convert to a string + * @return all throttle reasons (`[[nodiscard]]`) + */ +[[nodiscard]] std::string throttle_reason_to_string(zes_freq_throttle_reason_flags_t reasons); + /** * @brief Convert a Level Zero memory type to a string representation. * @param[in] mem_type the Level Zero memory type @@ -60,13 +75,6 @@ namespace hws::detail { */ [[nodiscard]] std::string memory_location_to_name(zes_mem_loc_t mem_loc); -/** - * @brief Convert a Level Zero temperature sensor type to a string representation. - * @param[in] sensor_type the Level Zero temperature sensor type - * @return the string representation (`[[nodiscard]]`) - */ -[[nodiscard]] std::string temperature_sensor_type_to_name(zes_temp_sensors_t sensor_type); - } // namespace hws::detail -#endif // HARDWARE_SAMPLING_GPU_INTEL_UTILITY_HPP_ +#endif // HWS_GPU_INTEL_UTILITY_HPP_ diff --git a/include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp b/include/hws/gpu_nvidia/hardware_sampler.hpp similarity index 78% rename from include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp rename to include/hws/gpu_nvidia/hardware_sampler.hpp index de22f3f..59a5e31 100644 --- a/include/hardware_sampling/gpu_nvidia/hardware_sampler.hpp +++ b/include/hws/gpu_nvidia/hardware_sampler.hpp @@ -8,19 +8,20 @@ * @brief Defines a hardware sampler for NVIDIA GPUs using NVIDIA's Management Library (NVML). */ -#ifndef HARDWARE_SAMPLING_GPU_NVIDIA_HARDWARE_SAMPLER_HPP_ -#define HARDWARE_SAMPLING_GPU_NVIDIA_HARDWARE_SAMPLER_HPP_ +#ifndef HWS_GPU_NVIDIA_HARDWARE_SAMPLER_HPP_ +#define HWS_GPU_NVIDIA_HARDWARE_SAMPLER_HPP_ #pragma once -#include "hardware_sampling/gpu_nvidia/nvml_device_handle.hpp" // hws::nvml_device_handle -#include "hardware_sampling/gpu_nvidia/nvml_samples.hpp" // hws::{nvml_general_samples, nvml_clock_samples, nvml_power_samples, nvml_memory_samples, nvml_temperature_samples} -#include "hardware_sampling/hardware_sampler.hpp" // hws::hardware_sampler -#include "hardware_sampling/utility.hpp" // hws::detail::ostream_formatter +#include "hws/gpu_nvidia/nvml_device_handle.hpp" // hws::nvml_device_handle +#include "hws/gpu_nvidia/nvml_samples.hpp" // hws::{nvml_general_samples, nvml_clock_samples, nvml_power_samples, nvml_memory_samples, nvml_temperature_samples} +#include "hws/hardware_sampler.hpp" // hws::hardware_sampler +#include "hws/sample_category.hpp" // hws::sample_category + +#include "fmt/format.h" // fmt::formatter, fmt::ostream_formatter #include // std::atomic #include // std::chrono::milliseconds, std::chrono_literals namespace #include // std::size_t -#include // std::formatter #include // std::ostream forward declaration #include // std::string @@ -37,27 +38,31 @@ class gpu_nvidia_hardware_sampler : public hardware_sampler { /** * @brief Construct a new NVIDIA GPU hardware sampler for the default device with the default sampling interval. * @details If this is the first NVIDIA GPU sampler, initializes the NVML environment. + * @param[in] category the sample categories that are enabled for hardware sampling (default: all) */ - gpu_nvidia_hardware_sampler(); + explicit gpu_nvidia_hardware_sampler(sample_category category = sample_category::all); /** * @brief Construct a new NVIDIA GPU hardware sampler for device @p device_id with the default sampling interval. * @details If this is the first NVIDIA GPU sampler, initializes the NVML environment. * @param[in] device_id the ID of the device to sample + * @param[in] category the sample categories that are enabled for hardware sampling (default: all) */ - explicit gpu_nvidia_hardware_sampler(std::size_t device_id); + explicit gpu_nvidia_hardware_sampler(std::size_t device_id, sample_category category = sample_category::all); /** * @brief Construct a new NVIDIA GPU hardware sampler for the default device with the @p sampling_interval. * @details If this is the first NVIDIA GPU sampler, initializes the NVML environment. * @param[in] sampling_interval the used sampling interval + * @param[in] category the sample categories that are enabled for hardware sampling (default: all) */ - explicit gpu_nvidia_hardware_sampler(std::chrono::milliseconds sampling_interval); + explicit gpu_nvidia_hardware_sampler(std::chrono::milliseconds sampling_interval, sample_category category = sample_category::all); /** * @brief Construct a new NVIDIA GPU hardware sampler for device @p device_id with the @p sampling_interval. * @details If this is the first NVIDIA GPU sampler, initializes the NVML environment. * @param[in] device_id the ID of the device to sample * @param[in] sampling_interval the used sampling interval + * @param[in] category the sample categories that are enabled for hardware sampling (default: all) */ - gpu_nvidia_hardware_sampler(std::size_t device_id, std::chrono::milliseconds sampling_interval); + gpu_nvidia_hardware_sampler(std::size_t device_id, std::chrono::milliseconds sampling_interval, sample_category category = sample_category::all); /** * @brief Delete the copy-constructor (already implicitly deleted due to the base class's std::atomic member). @@ -112,21 +117,21 @@ class gpu_nvidia_hardware_sampler : public hardware_sampler { */ [[nodiscard]] const nvml_temperature_samples &temperature_samples() const noexcept { return temperature_samples_; } - private: /** - * @copydoc hws::hardware_sampler::sampling_loop + * @copydoc hws::hardware_sampler::device_identification */ - void sampling_loop() final; + [[nodiscard]] std::string device_identification() const final; /** - * @copydoc hws::hardware_sampler::device_identification + * @copydoc hws::hardware_sampler::samples_only_as_yaml_string() const */ - std::string device_identification() const final; + [[nodiscard]] std::string samples_only_as_yaml_string() const final; + private: /** - * @copydoc hws::hardware_sampler::generate_yaml_string + * @copydoc hws::hardware_sampler::sampling_loop */ - std::string generate_yaml_string() const final; + void sampling_loop() final; /// The device handle for the device to sample. detail::nvml_device_handle device_{}; @@ -159,7 +164,11 @@ std::ostream &operator<<(std::ostream &out, const gpu_nvidia_hardware_sampler &s } // namespace hws +/// @cond Doxygen_suppress + template <> -struct std::formatter : hws::detail::ostream_formatter { }; +struct fmt::formatter : fmt::ostream_formatter { }; + +/// @endcond -#endif // HARDWARE_SAMPLING_GPU_NVIDIA_HARDWARE_SAMPLER_HPP_ +#endif // HWS_GPU_NVIDIA_HARDWARE_SAMPLER_HPP_ diff --git a/include/hardware_sampling/gpu_nvidia/nvml_device_handle.hpp b/include/hws/gpu_nvidia/nvml_device_handle.hpp similarity index 85% rename from include/hardware_sampling/gpu_nvidia/nvml_device_handle.hpp rename to include/hws/gpu_nvidia/nvml_device_handle.hpp index f52fb84..eb3da33 100644 --- a/include/hardware_sampling/gpu_nvidia/nvml_device_handle.hpp +++ b/include/hws/gpu_nvidia/nvml_device_handle.hpp @@ -8,8 +8,8 @@ * @brief Defines a pImpl class for an NVML device handle. */ -#ifndef HARDWARE_SAMPLING_GPU_NVIDIA_NVML_DEVICE_HANDLE_HPP_ -#define HARDWARE_SAMPLING_GPU_NVIDIA_NVML_DEVICE_HANDLE_HPP_ +#ifndef HWS_GPU_NVIDIA_NVML_DEVICE_HANDLE_HPP_ +#define HWS_GPU_NVIDIA_NVML_DEVICE_HANDLE_HPP_ #pragma once #include // std::size_t @@ -40,7 +40,7 @@ class nvml_device_handle { /** * @brief Get the nvml_device_handle implementation used to access the actual nvmlDevice_t. - * @throws hardware_sampling_exception if `*this` has been default constructed + * @throws std::runtime_error if `*this` has been default constructed * @return the device handle (`[[nodiscard]]`) */ [[nodiscard]] nvml_device_handle_impl &get_impl() { @@ -52,7 +52,7 @@ class nvml_device_handle { /** * @brief Get the nvml_device_handle implementation used to access the actual nvmlDevice_t. - * @throws hardware_sampling_exception if `*this` has been default constructed + * @throws std::runtime_error if `*this` has been default constructed * @return the device handle (`[[nodiscard]]`) */ [[nodiscard]] const nvml_device_handle_impl &get_impl() const { @@ -69,4 +69,4 @@ class nvml_device_handle { } // namespace hws::detail -#endif // HARDWARE_SAMPLING_GPU_NVIDIA_NVML_DEVICE_HANDLE_HPP_ +#endif // HWS_GPU_NVIDIA_NVML_DEVICE_HANDLE_HPP_ diff --git a/include/hardware_sampling/gpu_nvidia/nvml_device_handle_impl.hpp b/include/hws/gpu_nvidia/nvml_device_handle_impl.hpp similarity index 72% rename from include/hardware_sampling/gpu_nvidia/nvml_device_handle_impl.hpp rename to include/hws/gpu_nvidia/nvml_device_handle_impl.hpp index 9247f29..7656599 100644 --- a/include/hardware_sampling/gpu_nvidia/nvml_device_handle_impl.hpp +++ b/include/hws/gpu_nvidia/nvml_device_handle_impl.hpp @@ -8,12 +8,12 @@ * @brief Implements a pImpl class for an NVML device handle. */ -#ifndef HARDWARE_SAMPLING_GPU_NVIDIA_NVML_DEVICE_HANDLE_IMPL_HPP_ -#define HARDWARE_SAMPLING_GPU_NVIDIA_NVML_DEVICE_HANDLE_IMPL_HPP_ +#ifndef HWS_GPU_NVIDIA_NVML_DEVICE_HANDLE_IMPL_HPP_ +#define HWS_GPU_NVIDIA_NVML_DEVICE_HANDLE_IMPL_HPP_ #pragma once -#include "hardware_sampling/gpu_nvidia/nvml_device_handle.hpp" // hws::detail::nvml_device_handle -#include "hardware_sampling/gpu_nvidia/utility.hpp" // HWS_NVML_ERROR_CHECK +#include "hws/gpu_nvidia/nvml_device_handle.hpp" // hws::detail::nvml_device_handle +#include "hws/gpu_nvidia/utility.hpp" // HWS_NVML_ERROR_CHECK #include "nvml.h" // nvmlDevice_t @@ -32,7 +32,7 @@ struct nvml_device_handle::nvml_device_handle_impl { * @param[in] device_id the device to get the handle for */ explicit nvml_device_handle_impl(const std::size_t device_id) { - HWS_NVML_ERROR_CHECK(nvmlDeviceGetHandleByIndex(static_cast(device_id), &device)); + HWS_NVML_ERROR_CHECK(nvmlDeviceGetHandleByIndex(static_cast(device_id), &device)) } /// The wrapped NVML device handle. @@ -44,4 +44,4 @@ inline nvml_device_handle::nvml_device_handle(const std::size_t device_id) : } // namespace hws::detail -#endif // HARDWARE_SAMPLING_GPU_NVIDIA_NVML_DEVICE_HANDLE_IMPL_HPP_ +#endif // HWS_GPU_NVIDIA_NVML_DEVICE_HANDLE_IMPL_HPP_ diff --git a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp b/include/hws/gpu_nvidia/nvml_samples.hpp similarity index 53% rename from include/hardware_sampling/gpu_nvidia/nvml_samples.hpp rename to include/hws/gpu_nvidia/nvml_samples.hpp index 29b1d5a..0ddd6ae 100644 --- a/include/hardware_sampling/gpu_nvidia/nvml_samples.hpp +++ b/include/hws/gpu_nvidia/nvml_samples.hpp @@ -8,14 +8,16 @@ * @brief Defines the samples used with NVML. */ -#ifndef HARDWARE_SAMPLING_GPU_NVIDIA_NVML_SAMPLES_HPP_ -#define HARDWARE_SAMPLING_GPU_NVIDIA_NVML_SAMPLES_HPP_ +#ifndef HWS_GPU_NVIDIA_NVML_SAMPLES_HPP_ +#define HWS_GPU_NVIDIA_NVML_SAMPLES_HPP_ #pragma once -#include "hardware_sampling/utility.hpp" // HWS_SAMPLE_STRUCT_FIXED_MEMBER, HWS_SAMPLE_STRUCT_SAMPLING_MEMBER, hws::detail::ostream_formatter +#include "hws/utility.hpp" // HWS_SAMPLE_STRUCT_FIXED_MEMBER, HWS_SAMPLE_STRUCT_SAMPLING_MEMBER + +#include "fmt/ostream.h" // fmt::formatter, fmt::ostream_formatter -#include // std::formatter #include // std::ostream forward declaration +#include // std::map #include // std::optional #include // std::string #include // std::vector @@ -34,20 +36,29 @@ class nvml_general_samples { friend class gpu_nvidia_hardware_sampler; public: + /** + * @brief Checks whether any general hardware sample is present. + * @return `true` if any general hardware sample is, otherwise `false`. + */ + [[nodiscard]] bool has_samples() const; /** * @brief Assemble the YAML string containing all available general hardware samples. * @details Hardware samples that are not supported by the current device are omitted in the YAML output. + * Returns an empty string if `has_samples()` returns `false`. * @return the YAML string (`[[nodiscard]]`) */ [[nodiscard]] std::string generate_yaml_string() const; - HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, name) // the name of the device - HWS_SAMPLE_STRUCT_FIXED_MEMBER(bool, persistence_mode) // the persistence mode: if true, the driver is always loaded reducing the latency for the first API call - HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, num_cores) // the number of CUDA cores + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, architecture) // the architecture name of the device + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, byte_order) // the byte order (e.g., little/big endian) + HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, num_cores) // the number of CUDA cores + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, vendor_id) // the vendor ID + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, name) // the name of the device + HWS_SAMPLE_STRUCT_FIXED_MEMBER(bool, persistence_mode) // the persistence mode: if true, the driver is always loaded reducing the latency for the first API call - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(int, performance_state) // the performance state: 0 - 15 where 0 is the maximum performance and 15 the minimum performance - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, utilization_gpu) // the GPU compute utilization in percent - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, utilization_mem) // the GPU memory utilization in percent + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, compute_utilization) // the GPU compute utilization in percent + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, memory_utilization) // the GPU memory utilization in percent + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(int, performance_level) // the performance state: 0 - 15 where 0 is the maximum performance and 15 the minimum performance }; /** @@ -70,26 +81,38 @@ class nvml_clock_samples { // befriend hardware sampler class friend class gpu_nvidia_hardware_sampler; + /// The map type used to map the available clock frequencies to a specific memory frequency. + using map_type = std::map>; + public: + /** + * @brief Checks whether any clock related hardware sample is present. + * @return `true` if any clock related hardware sample is, otherwise `false`. + */ + [[nodiscard]] bool has_samples() const; /** * @brief Assemble the YAML string containing all available general hardware samples. * @details Hardware samples that are not supported by the current device are omitted in the YAML output. + * Returns an empty string if `has_samples()` returns `false`. * @return the YAML string (`[[nodiscard]]`) */ [[nodiscard]] std::string generate_yaml_string() const; - HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, adaptive_clock_status) // true if clock boosting is currently enabled - HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, clock_graph_min) // the minimum possible graphics clock frequency in MHz - HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, clock_graph_max) // the maximum possible graphics clock frequency in MHz - HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, clock_sm_max) // the maximum possible SM clock frequency in MHz - HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, clock_mem_min) // the minimum possible memory clock frequency in MHz - HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, clock_mem_max) // the maximum possible memory clock frequency in MHz - - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, clock_graph) // the current graphics clock frequency in MHz - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, clock_sm) // the current SM clock frequency in Mhz - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, clock_mem) // the current memory clock frequency in MHz - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned long long, clock_throttle_reason) // the reason the GPU clock throttled (bitmask) - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(bool, auto_boosted_clocks) // true if the clocks are currently auto boosted + HWS_SAMPLE_STRUCT_FIXED_MEMBER(bool, auto_boosted_clock_enabled) // true if clock boosting is currently enabled + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, clock_frequency_min) // the minimum possible graphics clock frequency in MHz + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, clock_frequency_max) // the maximum possible graphics clock frequency in MHz + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, memory_clock_frequency_min) // the minimum possible memory clock frequency in MHz + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, memory_clock_frequency_max) // the maximum possible memory clock frequency in MHz + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, sm_clock_frequency_max) // the maximum possible SM clock frequency in MHz + HWS_SAMPLE_STRUCT_FIXED_MEMBER(map_type, available_clock_frequencies) // the available clock frequencies in MHz, based on a memory clock frequency (slowest to fastest) + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector, available_memory_clock_frequencies) // the available memory clock frequencies in MHz (slowest to fastest) + + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, clock_frequency) // the current graphics clock frequency in MHz + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, memory_clock_frequency) // the current memory clock frequency in MHz + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, sm_clock_frequency) // the current SM clock frequency in Mhz + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned long long, throttle_reason) // the reason the GPU clock throttled (as bitmask) + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(std::string, throttle_reason_string) // the reason the GPU clock throttled (as string) + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(bool, auto_boosted_clock) // true if the clocks are currently auto boosted }; /** @@ -113,20 +136,28 @@ class nvml_power_samples { friend class gpu_nvidia_hardware_sampler; public: + /** + * @brief Checks whether any power related hardware sample is present. + * @return `true` if any power related hardware sample is, otherwise `false`. + */ + [[nodiscard]] bool has_samples() const; /** * @brief Assemble the YAML string containing all available general hardware samples. * @details Hardware samples that are not supported by the current device are omitted in the YAML output. + * Returns an empty string if `has_samples()` returns `false`. * @return the YAML string (`[[nodiscard]]`) */ [[nodiscard]] std::string generate_yaml_string() const; - HWS_SAMPLE_STRUCT_FIXED_MEMBER(bool, power_management_mode) // true if power management algorithms are supported and active - HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, power_management_limit) // if the GPU draws more power (mW) than the power management limit, the GPU may throttle - HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, power_enforced_limit) // the actually enforced power limit, may be different from power management limit if external limiters are set + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, power_management_limit) // if the GPU draws more power (W) than the power management limit, the GPU may throttle + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, power_enforced_limit) // the actually enforced power limit (W), may be different from power management limit if external limiters are set + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::string, power_measurement_type) // the type of the power readings: either current power draw or average power draw + HWS_SAMPLE_STRUCT_FIXED_MEMBER(bool, power_management_mode) // true if power management algorithms are supported and active + HWS_SAMPLE_STRUCT_FIXED_MEMBER(std::vector, available_power_profiles) // a list of the available power profiles - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(int, power_state) // the current GPU power state: 0 - 15 where 0 is the maximum power and 15 the minimum power - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, power_usage) // the current power draw of the GPU and its related circuity (e.g., memory) in mW - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned long long, power_total_energy_consumption) // the total power consumption since the last driver reload in mJ + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, power_usage) // the current power draw of the GPU and its related circuity (e.g., memory) in W + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, power_total_energy_consumption) // the total power consumption since the last driver reload in J + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(int, power_profile) // the current GPU power state: 0 - 15 where 0 is the maximum power and 15 the minimum power; 32 indicates unknown }; /** @@ -150,23 +181,30 @@ class nvml_memory_samples { friend class gpu_nvidia_hardware_sampler; public: + /** + * @brief Checks whether any memory related hardware sample is present. + * @return `true` if any memory related hardware sample is, otherwise `false`. + */ + [[nodiscard]] bool has_samples() const; /** * @brief Assemble the YAML string containing all available general hardware samples. * @details Hardware samples that are not supported by the current device are omitted in the YAML output. + * Returns an empty string if `has_samples()` returns `false`. * @return the YAML string (`[[nodiscard]]`) */ [[nodiscard]] std::string generate_yaml_string() const; HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned long, memory_total) // the total available memory in Byte - HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, pcie_link_max_speed) // the maximum PCIe link speed in MBPS + HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, num_pcie_lanes_max) // the maximum number of PCIe lanes + HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, pcie_link_generation_max) // the maximum PCIe link generation (e.g., PCIe 4.0, PCIe 5.0, etc.) + HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, pcie_link_speed_max) // the maximum PCIe link speed in MBPS HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, memory_bus_width) // the memory bus with in Bit - HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, max_pcie_link_generation) // the current PCIe link generation (e.g., PCIe 4.0, PCIe 5.0, etc) - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned long long, memory_free) // the currently free memory in Byte HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned long long, memory_used) // the currently used memory in Byte - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, pcie_link_speed) // the current PCIe link speed in MBPS - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, pcie_link_width) // the current PCIe link width (e.g., x16, x8, x4, etc) + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned long long, memory_free) // the currently free memory in Byte + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, num_pcie_lanes) // the current PCIe link width (e.g., x16, x8, x4, etc) HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, pcie_link_generation) // the current PCIe link generation (may change during runtime to save energy) + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, pcie_link_speed) // the current PCIe link speed in MBPS }; /** @@ -190,6 +228,11 @@ class nvml_temperature_samples { friend class gpu_nvidia_hardware_sampler; public: + /** + * @brief Checks whether any temperature related hardware sample is present. + * @return `true` if any temperature related hardware sample is, otherwise `false`. + */ + [[nodiscard]] bool has_samples() const; /** * @brief Assemble the YAML string containing all available general hardware samples. * @details Hardware samples that are not supported by the current device are omitted in the YAML output. @@ -197,14 +240,14 @@ class nvml_temperature_samples { */ [[nodiscard]] std::string generate_yaml_string() const; - HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, num_fans) // the number of fans (if any) - HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, min_fan_speed) // the minimum fan speed the user can set in % - HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, max_fan_speed) // the maximum fan speed the user can set in % - HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, temperature_threshold_gpu_max) // the maximum graphics temperature threshold in °C - HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, temperature_threshold_mem_max) // the maximum memory temperature threshold in °C + HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, num_fans) // the number of fans (if any) + HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, fan_speed_min) // the minimum fan speed the user can set in % + HWS_SAMPLE_STRUCT_FIXED_MEMBER(unsigned int, fan_speed_max) // the maximum fan speed the user can set in % + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, temperature_max) // the maximum graphics temperature threshold in °C + HWS_SAMPLE_STRUCT_FIXED_MEMBER(double, memory_temperature_max) // the maximum memory temperature threshold in °C - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, fan_speed) // the current intended fan speed in % - HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(unsigned int, temperature_gpu) // the current GPU temperature in °C + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, fan_speed_percentage) // the current intended fan speed in % + HWS_SAMPLE_STRUCT_SAMPLING_MEMBER(double, temperature) // the current GPU temperature in °C }; /** @@ -218,19 +261,23 @@ std::ostream &operator<<(std::ostream &out, const nvml_temperature_samples &samp } // namespace hws +/// @cond Doxygen_suppress + template <> -struct std::formatter : hws::detail::ostream_formatter { }; +struct fmt::formatter : fmt::ostream_formatter { }; template <> -struct std::formatter : hws::detail::ostream_formatter { }; +struct fmt::formatter : fmt::ostream_formatter { }; template <> -struct std::formatter : hws::detail::ostream_formatter { }; +struct fmt::formatter : fmt::ostream_formatter { }; template <> -struct std::formatter : hws::detail::ostream_formatter { }; +struct fmt::formatter : fmt::ostream_formatter { }; template <> -struct std::formatter : hws::detail::ostream_formatter { }; +struct fmt::formatter : fmt::ostream_formatter { }; + +/// @endcond -#endif // HARDWARE_SAMPLING_GPU_NVIDIA_NVML_SAMPLES_HPP_ +#endif // HWS_GPU_NVIDIA_NVML_SAMPLES_HPP_ diff --git a/include/hws/gpu_nvidia/utility.hpp b/include/hws/gpu_nvidia/utility.hpp new file mode 100644 index 0000000..c405386 --- /dev/null +++ b/include/hws/gpu_nvidia/utility.hpp @@ -0,0 +1,64 @@ +/** + * @file + * @author Marcel Breyer + * @copyright 2024-today All Rights Reserved + * @license This file is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + * + * @brief Implements utility functionality for the NVIDIA GPU sampler. + */ + +#ifndef HWS_GPU_NVIDIA_UTILITY_HPP_ +#define HWS_GPU_NVIDIA_UTILITY_HPP_ +#pragma once + +#include "cuda_runtime_api.h" // CUDA runtime functions +#include "fmt/format.h" // fmt::format +#include "nvml.h" // NVML runtime functions + +#include // std::runtime_error +#include // std::string + +namespace hws::detail { + +/** + * @def HWS_NVML_ERROR_CHECK + * @brief Defines the `HWS_NVML_ERROR_CHECK` macro if `HWS_ERROR_CHECKS_ENABLED` is defined, does nothing otherwise. + * @details Throws an exception if an NVML call returns with an error. Additionally outputs a more concrete error string. + */ +/** + * @def HWS_CUDA_ERROR_CHECK + * @brief Defines the `HWS_CUDA_ERROR_CHECK` macro if `HWS_ERROR_CHECKS_ENABLED` is defined, does nothing otherwise. + * @details Throws an exception if a CUDA call returns with an error. Additionally outputs a more concrete error string. + */ +#if defined(HWS_ERROR_CHECKS_ENABLED) + #define HWS_NVML_ERROR_CHECK(nvml_func) \ + { \ + const nvmlReturn_t errc = nvml_func; \ + if (errc != NVML_SUCCESS) { \ + throw std::runtime_error{ fmt::format("Error in NVML function call \"{}\": {} ({})", #nvml_func, nvmlErrorString(errc), static_cast(errc)) }; \ + } \ + } + + #define HWS_CUDA_ERROR_CHECK(cuda_func) \ + { \ + const cudaError_t errc = cuda_func; \ + if (errc != cudaSuccess) { \ + throw std::runtime_error{ fmt::format("Error in CUDA function call \"{}\": {} ({})", #cuda_func, cudaGetErrorName(errc), cudaGetErrorString(errc)) }; \ + } \ + } +#else + #define HWS_NVML_ERROR_CHECK(nvml_func) nvml_func; + #define HWS_CUDA_ERROR_CHECK(cuda_func) cuda_func; +#endif + +/** + * @brief Convert the clock throttle reason event bitmask to a string representation. If the provided bitmask represents multiple reasons, they are split using "|". + * @param[in] clocks_event_reasons the bitmask to convert to a string + * @return all event throttle reasons (`[[nodiscard]]`) + */ +[[nodiscard]] std::string throttle_event_reason_to_string(unsigned long long clocks_event_reasons); + +} // namespace hws::detail + +#endif // HWS_GPU_NVIDIA_UTILITY_HPP_ diff --git a/include/hardware_sampling/hardware_sampler.hpp b/include/hws/hardware_sampler.hpp similarity index 76% rename from include/hardware_sampling/hardware_sampler.hpp rename to include/hws/hardware_sampler.hpp index ce7c6fb..326eb7e 100644 --- a/include/hardware_sampling/hardware_sampler.hpp +++ b/include/hws/hardware_sampler.hpp @@ -8,11 +8,12 @@ * @brief Defines the base class for all hardware samplers. */ -#ifndef HARDWARE_SAMPLING_HARDWARE_SAMPLER_HPP_ -#define HARDWARE_SAMPLING_HARDWARE_SAMPLER_HPP_ +#ifndef HWS_HARDWARE_SAMPLER_HPP_ +#define HWS_HARDWARE_SAMPLER_HPP_ #pragma once -#include "hardware_sampling/event.hpp" // hws::event +#include "hws/event.hpp" // hws::event +#include "hws/sample_category.hpp" // hws::sample_category #include // std::atomic #include // std::chrono::{system_clock::time_point, steady_clock::time_point, milliseconds} @@ -32,8 +33,10 @@ class hardware_sampler { /** * @brief Construct a new hardware sampler with the provided @p sampling_interval. * @param[in] sampling_interval the used sampling interval + * @param[in] category the sample categories that are enabled for hardware sampling + * @throws std::invalid_argument if the @p sampling_interval is zero */ - explicit hardware_sampler(std::chrono::milliseconds sampling_interval); + hardware_sampler(std::chrono::milliseconds sampling_interval, sample_category category); /** * @brief Delete the copy-constructor (already implicitly deleted due to the std::atomic member). @@ -86,7 +89,7 @@ class hardware_sampler { */ [[nodiscard]] bool has_sampling_started() const noexcept; /** - * @brief Check whether this hardware sampler has currently sampling. + * @brief Check whether this hardware sampler is currently sampling. * @return `true` if the hardware sampler is currently sampling, `false` otherwise (`[[nodiscard]]`) */ [[nodiscard]] bool is_sampling() const noexcept; @@ -120,15 +123,16 @@ class hardware_sampler { [[nodiscard]] std::size_t num_events() const noexcept { return events_.size(); } /** - * @brief Return the number of recorded events. - * @return the number of events (`[[nodiscard]]`) + * @brief Return a vector of all recorded events. + * @return the events (`[[nodiscard]]`) */ [[nodiscard]] const std::vector &get_events() const noexcept { return events_; } /** - * @brief Return the number of recorded events. + * @brief Return the event at index @p idx. + * @param[in] idx the event to return * @throws std::out_of_range the the @p idx is out of bounce - * @return the number of events (`[[nodiscard]]`) + * @return the event at index @p idx (`[[nodiscard]]`) */ [[nodiscard]] event get_event(std::size_t idx) const; @@ -148,21 +152,15 @@ class hardware_sampler { * @brief Dump the hardware samples to the YAML file with @p filename. * @param[in] filename the YAML file to append the hardware samples to */ - void dump_yaml(const char *filename); + void dump_yaml(const char *filename) const; /** - * @copydoc hws::hardware_sampler::dump_yaml(const char *) + * @copydoc hws::hardware_sampler::dump_yaml(const char *) const */ - void dump_yaml(const std::string &filename); + void dump_yaml(const std::string &filename) const; /** - * @copydoc hws::hardware_sampler::dump_yaml(const char *) + * @copydoc hws::hardware_sampler::dump_yaml(const char *) const */ - void dump_yaml(const std::filesystem::path &filename); - - protected: - /** - * @brief Getter the hardware samples. Called in another std::thread. - */ - virtual void sampling_loop() = 0; + void dump_yaml(const std::filesystem::path &filename) const; /** * @brief Return the unique device identification. Can be used as unique key in the YAML string. @@ -171,18 +169,35 @@ class hardware_sampler { [[nodiscard]] virtual std::string device_identification() const = 0; /** - * @brief Assemble the YAML string containing all hardware samples. - * @param[in] start_time_point the reference time point the hardware samples occurred relative to + * @brief Return the hardware samples as well as events and time points as YAML string. + * @return the YAML content as string (`[[nodiscard]]`) + */ + [[nodiscard]] std::string as_yaml_string() const; + /** + * @brief Return only the hardware samples as YAML string. * @throws std::runtime_error if sampling is still running - * @return the YAML string (`[[nodiscard]]`) + * @return the YAML content as string (`[[nodiscard]]`) */ - [[nodiscard]] virtual std::string generate_yaml_string() const = 0; + [[nodiscard]] virtual std::string samples_only_as_yaml_string() const = 0; + + protected: + /** + * @brief Getter the hardware samples. Called in another std::thread. + */ + virtual void sampling_loop() = 0; /** * @brief Add a new time point to this hardware sampler. Called during the sampling loop. * @param time_point the new time point to add */ - void add_time_point(const std::chrono::steady_clock::time_point time_point) { time_points_.push_back(time_point); } + void add_time_point(std::chrono::steady_clock::time_point time_point); + + /** + * @brief Check whether the @p category is currently enabled for hardware sampling or not. + * @param[in] category the sample_category to check + * @return Returns `true` if @p category is enabled for sampling, otherwise `false` (`[[nodiscard]]`) + */ + [[nodiscard]] bool sample_category_enabled(sample_category category) const noexcept; private: /// A boolean flag indicating whether the sampling has already started. @@ -206,8 +221,11 @@ class hardware_sampler { /// The sampling interval of this hardware sampler. const std::chrono::milliseconds sampling_interval_{}; + + /// The bitmask of sample categories to use. + const sample_category sample_category_{}; }; } // namespace hws -#endif // HARDWARE_SAMPLING_HARDWARE_SAMPLER_HPP_ +#endif // HWS_HARDWARE_SAMPLER_HPP_ diff --git a/include/hws/sample_category.hpp b/include/hws/sample_category.hpp new file mode 100644 index 0000000..0ec500b --- /dev/null +++ b/include/hws/sample_category.hpp @@ -0,0 +1,117 @@ +/** + * @file + * @author Marcel Breyer + * @copyright 2024-today All Rights Reserved + * @license This file is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + * + * @brief Defines an enum class with all sample categories to be able to only selectively enable some samples. + */ + +#ifndef HWS_SAMPLE_CATEGORY_HPP_ +#define HWS_SAMPLE_CATEGORY_HPP_ +#pragma once + +namespace hws { + +/** + * @brief Enum class as bitfield containing the possible sample categories. + * @details The sample_category "gfx" and "idle_state" are only used in the cpu_hardware_sampler. + * Additionally, the "all" sample_category is available to easily enable all hardware samples (default). + */ +enum class sample_category : int { + // clang-format off + /// General hardware samples like architecture, names, or utilization. + general = 0b00000001, + /// Clock-related hardware samples like minimum, maximum, and current frequencies or throttle reasons. + clock = 0b00000010, + /// Power-related hardware samples like current power draw or total energy consumption. + power = 0b00000100, + /// Memory-related hardware samples like memory usage or PCIe information. + memory = 0b00001000, + /// Temperature-related hardware samples like maximum and current temperatures. + temperature = 0b00010000, + /// Gfx-related (iGPU) hardware samples. Only used in the cpu_hardware_sampler. + gfx = 0b00100000, + /// Idle-state-related hardware samples. Only used in the cpu_hardware_sampler. + idle_state = 0b01000000, + /// Shortcut to enable all available hardware samples (default). + all = 0b01111111 + // clang-format on +}; + +/** + * @brief Compute the bitwise not of @p sc. + * @param[in] sc the sample_category to apply the bitwise not to + * @return the bitwise not result (`[[nodiscard]]`) + */ +[[nodiscard]] constexpr sample_category operator~(const sample_category sc) noexcept { + return static_cast(~static_cast(sc)); +} + +/** + * @brief Compute the bitwise and between @p lhs and @p rhs and return a new sample_category. + * @param[in] lhs the first sample_category + * @param[in] rhs the second sample_category + * @return the bitwise and result (`[[nodiscard]]`) + */ +[[nodiscard]] constexpr sample_category operator&(const sample_category lhs, const sample_category rhs) noexcept { + return static_cast(static_cast(lhs) & static_cast(rhs)); +} + +/** + * @brief Compute the bitwise or between @p lhs and @p rhs and return a new sample_category. + * @param[in] lhs the first sample_category + * @param[in] rhs the second sample_category + * @return the bitwise or result (`[[nodiscard]]`) + */ +[[nodiscard]] constexpr sample_category operator|(const sample_category lhs, const sample_category rhs) noexcept { + return static_cast(static_cast(lhs) | static_cast(rhs)); +} + +/** + * @brief Compute the bitwise xor between @p lhs and @p rhs and return a new sample_category. + * @param[in] lhs the first sample_category + * @param[in] rhs the second sample_category + * @return the bitwise xor result (`[[nodiscard]]`) + */ +[[nodiscard]] constexpr sample_category operator^(const sample_category lhs, const sample_category rhs) noexcept { + return static_cast(static_cast(lhs) ^ static_cast(rhs)); +} + +/** + * @brief Compute the bitwise compound and between @p lhs and @p rhs and return the result in @p lhs. + * @param[in,out] lhs the first sample_category + * @param[in] rhs the second sample_category + * @return a reference to @p lhs containing the bitwise and result + */ +constexpr sample_category &operator&=(sample_category &lhs, const sample_category rhs) noexcept { + lhs = lhs & rhs; + return lhs; +} + +/** + * @brief Compute the bitwise compound or between @p lhs and @p rhs and return the result in @p lhs. + * @param[in,out] lhs the first sample_category + * @param[in] rhs the second sample_category + * @return a reference to @p lhs containing the bitwise or result + */ +constexpr sample_category &operator|=(sample_category &lhs, const sample_category rhs) noexcept { + lhs = lhs | rhs; + return lhs; +} + +/** + * @brief Compute the bitwise compound xor between @p lhs and @p rhs and return the result in @p lhs. + * @param[in,out] lhs the first sample_category + * @param[in] rhs the second sample_category + * @return a reference to @p lhs containing the bitwise xor result + */ +constexpr sample_category &operator^=(sample_category &lhs, const sample_category rhs) noexcept { + lhs = lhs ^ rhs; + return lhs; +} + +} // namespace hws + +#endif // HWS_SAMPLE_CATEGORY_HPP_ diff --git a/include/hws/system_hardware_sampler.hpp b/include/hws/system_hardware_sampler.hpp new file mode 100644 index 0000000..42924ac --- /dev/null +++ b/include/hws/system_hardware_sampler.hpp @@ -0,0 +1,197 @@ +/** + * @file + * @author Marcel Breyer + * @copyright 2024-today All Rights Reserved + * @license This file is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + * + * @brief Defines a hardware sampler for the whole system, i.e., automatically creates CPU and GPU hardware samples if the respective sampler and hardware are available. + */ + +#ifndef HWS_SYSTEM_HARDWARE_SAMPLER_HPP_ +#define HWS_SYSTEM_HARDWARE_SAMPLER_HPP_ + +#include "hws/event.hpp" // hws::event +#include "hws/hardware_sampler.hpp" // hws::hardware_sampler +#include "hws/sample_category.hpp" // hws::sample_category + +#include // std::chrono::{milliseconds, steady_clock::time_point} +#include // std::size_t +#include // std::filesystem::path +#include // std::unique_ptr +#include // std::string +#include // std::vector + +namespace hws { + +/** + * @brief A hardware sampler for the whole system. + * @details Enables hardware samplers for which hardware is available and the CMake configuration found the respective dependencies. + */ +class system_hardware_sampler { + public: + /** + * @brief Construct hardware samplers with the default sampling interval. + * @param[in] category the sample categories that are enabled for hardware sampling (default: all) + */ + explicit system_hardware_sampler(sample_category category = sample_category::all); + /** + * @brief Construct hardware samplers with the provided @p sampling_interval. + * @param[in] sampling_interval the used sampling interval + * @param[in] category the sample categories that are enabled for hardware sampling (default: all) + */ + explicit system_hardware_sampler(std::chrono::milliseconds sampling_interval, sample_category category = sample_category::all); + + /** + * @brief Delete the copy-constructor. + */ + system_hardware_sampler(const system_hardware_sampler &) = delete; + /** + * @brief Delete the move-constructor. + */ + system_hardware_sampler(system_hardware_sampler &&) noexcept = delete; + /** + * @brief Delete the copy-assignment operator. + */ + system_hardware_sampler &operator=(const system_hardware_sampler &) = delete; + /** + * @brief Delete the move-assignment operator. + */ + system_hardware_sampler &operator=(system_hardware_sampler &&) noexcept = delete; + + /** + * @brief Explicitly use the default destructor. + */ + ~system_hardware_sampler() = default; + + /** + * @brief Start hardware sampling for all wrapped hardware samplers. + */ + void start_sampling(); + /** + * @brief Stop hardware sampling for all wrapped hardware samplers. + */ + void stop_sampling(); + /** + * @brief Pause hardware sampling for all wrapped hardware samplers. + */ + void pause_sampling(); + /** + * @brief Resume hardware sampling for all wrapped hardware samplers. + */ + void resume_sampling(); + + /** + * @brief Check whether the hardware samplers have already started sampling. + * @return `true` if **all** hardware samplers have already started sampling, `false` otherwise (`[[nodiscard]]`) + */ + [[nodiscard]] bool has_sampling_started() const noexcept; + /** + * @brief Check whether the hardware samplers are currently sampling. + * @return `true` if **all** hardware samplers are currently sampling, `false` otherwise (`[[nodiscard]]`) + */ + [[nodiscard]] bool is_sampling() const noexcept; + /** + * @brief Check whether the hardware samplers have already stopped sampling. + * @return `true` if **all** hardware samplers have already stopped sampling, `false` otherwise (`[[nodiscard]]`) + */ + [[nodiscard]] bool has_sampling_stopped() const noexcept; + + /** + * @brief Add a new event to all hardware samplers. + * @param e the event + */ + void add_event(event e); + /** + * @brief Add a new event to all hardware samplers. + * @param[in] time_point the time point when the event occurred + * @param[in] name the name of the event + */ + void add_event(decltype(event::time_point) time_point, decltype(event::name) name); + /** + * @brief Add a new event to all hardware samplers. The time_point will be the current time. + * @param[in] name the name of the event + */ + void add_event(decltype(event::name) name); + + /** + * @brief Return the number of recorded events separately for each hardware sampler. + * @return the number of events per hardware sampler (`[[nodiscard]]`) + */ + [[nodiscard]] std::vector num_events() const; + /** + * @brief Return the number of recorded events separately for each hardware sampler. + * @return the events per hardware sampler (`[[nodiscard]]`) + */ + [[nodiscard]] std::vector> get_events() const; + /** + * @brief Return the time points the samples separately for each hardware sampler. + * @return the time points per hardware sampler (`[[nodiscard]]`) + */ + [[nodiscard]] std::vector> sampling_time_points() const; + /** + * @brief Return the sampling interval separately for each hardware sampler. + * @return the samping interval in milliseconds per hardware sampler (`[[nodiscard]]`) + */ + [[nodiscard]] std::vector sampling_interval() const; + + /** + * @brief The number of hardware samplers available for the whole system. + * @return the number of hardware samplers (`[[nodiscard]]`) + */ + [[nodiscard]] std::size_t num_samplers() const noexcept; + /** + * @brief The hardware samplers available for the whole system. + * @return all available hardware samplers (`[[nodiscard]]`) + */ + [[nodiscard]] const std::vector> &samplers() const noexcept; + /** + * @copydoc hws::system_hardware_sampler::samplers() const + */ + [[nodiscard]] std::vector> &samplers() noexcept; + /** + * @brief Return the hardware sampler at index @p idx. + * @param[in] idx the index of the hardware sampler + * @throws std::out_of_range if @p idx is out-of-range + * @return the hardware sampler at index @p idx (`[[nodiscard]]`) + */ + [[nodiscard]] const std::unique_ptr &sampler(std::size_t idx) const; + /** + * @copydoc hws::system_hardware_sampler::sampler(std::size_t idx) const + */ + [[nodiscard]] std::unique_ptr &sampler(std::size_t idx); + + /** + * @brief Dump the hardware samples of all hardware samplers to the YAML file with @p filename. + * @param[in] filename the YAML file to append the hardware samples to + */ + void dump_yaml(const char *filename) const; + /** + * @copydoc hws::system_hardware_sampler::dump_yaml(const char *) const + */ + void dump_yaml(const std::string &filename) const; + /** + * @copydoc hws::system_hardware_sampler::dump_yaml(const char *) const + */ + void dump_yaml(const std::filesystem::path &filename) const; + + /** + * @brief Return the hardware samples as YAML string. + * @return the YAML content as string (`[[nodiscard]]`) + */ + [[nodiscard]] std::string as_yaml_string() const; + /** + * @brief Return only the hardware samples as YAML string. + * @throws std::runtime_error if sampling is still running + * @return the YAML content as string (`[[nodiscard]]`) + */ + [[nodiscard]] std::string samples_only_as_yaml_string() const; + + private: + /// The different hardware sampler for the current system. + std::vector> samplers_; +}; + +} // namespace hws + +#endif // HWS_SYSTEM_HARDWARE_SAMPLER_HPP_ diff --git a/include/hardware_sampling/utility.hpp b/include/hws/utility.hpp similarity index 56% rename from include/hardware_sampling/utility.hpp rename to include/hws/utility.hpp index e66d6c8..db37390 100644 --- a/include/hardware_sampling/utility.hpp +++ b/include/hws/utility.hpp @@ -8,22 +8,23 @@ * @brief Utility functions for the hardware sampling. */ -#ifndef HARDWARE_SAMPLING_UTILITY_HPP_ -#define HARDWARE_SAMPLING_UTILITY_HPP_ +#ifndef HWS_UTILITY_HPP_ +#define HWS_UTILITY_HPP_ #pragma once +#include "fmt/format.h" // fmt::format +#include "fmt/ranges.h" // fmt::join + #include // std::from_chars -#include // std::chrono::{milliseconds, duration_cast} +#include // std::chrono::duration +#include // std::trunc #include // std::size_t -#include // std::format, std::formatter, std::basic_format_context, std::format_to -#include // std::back_inserter, std::next, std::prev #include // std::optional -#include // std::basic_stringstream #include // std::runtime_error #include // std::string, std::stof, std::stod, std::stold -#include // std::string_view, std::basic_string_view +#include // std::string_view #include // std::errc -#include // std::is_same_v, std::remove_cvref_t +#include // std::is_same_v, std::is_floating_point_v, std::remove_cv_t, std::remove_reference_t, std::true_type, std::false_type #include // std::vector namespace hws::detail { @@ -53,55 +54,48 @@ namespace hws::detail { private: \ std::optional> sample_name##_{}; +/*****************************************************************************************************/ +/** type_traits **/ +/*****************************************************************************************************/ + /** - * @brief Convert all time points to their duration passed since the @p reference time point. - * @tparam Duration the duration type to return - * @tparam TimePoint the type if the time points - * @param[in] time_points the time points - * @param[in] reference the reference time point - * @return the duration passed since the @p reference time point (`[[nodiscard]]`) + * @brief Remove the topmost cv-qualifiers from type @p T. */ -template -[[nodiscard]] inline std::vector durations_from_reference_time(const std::vector &time_points, const TimePoint &reference) { - std::vector durations(time_points.size()); +template +using remove_cvref_t = std::remove_cv_t>; - for (std::size_t i = 0; i < durations.size(); ++i) { - durations[i] = std::chrono::duration_cast(time_points[i] - reference); - } +/** + * @brief The case if the type @p T isn't a std::vector. + * @tparam T the type to check + */ +template +struct is_vector : std::false_type { }; - return durations; -} +/** + * @brief The case if the type @p T is a std::vector. + * @tparam T the type to check + */ +template +struct is_vector> : std::true_type { }; /** - * @brief Convert all time points to their duration since the epoch start. - * @tparam TimePoint the type of the time points - * @param[in] time_points the time points - * @return the duration passed since the respective @p TimePoint epoch start (`[[nodiscard]]`) + * @brief Evaluates to `true` if @p T is a std::vector, otherwise `false`. + * @tparam T the type to check */ -template -[[nodiscard]] inline std::vector time_points_to_epoch(const std::vector &time_points) { - std::vector times(time_points.size()); +template +constexpr bool is_vector_v = is_vector::value; - for (std::size_t i = 0; i < times.size(); ++i) { - times[i] = time_points[i].time_since_epoch(); - } - return times; -} +/*****************************************************************************************************/ +/** string manipulation **/ +/*****************************************************************************************************/ /** - * @brief Return the value encapsulated by the std::optional @p opt if it contains a value, otherwise a default constructed @p T is returned. - * @tparam T the type of the value stored in the std::optional - * @param[in] opt the std::optional to check - * @return the value of the std::optional or a default constructed @p T (`[[nodiscard]]`) + * @brief Checks whether the string @p sv starts with the substring @p start + * @param[in] sv the full string + * @param[in] start the substring + * @return `true` if @p sv starts with @p start, otherwise `false` */ -template -[[nodiscard]] inline T value_or_default(const std::optional &opt) { - if (opt.has_value()) { - return opt.value(); - } else { - return T{}; - } -} +[[nodiscard]] bool starts_with(std::string_view sv, std::string_view start) noexcept; /** * @brief Trim the @p str, i.e., remove all leading and trailing whitespace characters. @@ -117,6 +111,14 @@ template */ [[nodiscard]] std::string to_lower_case(std::string_view str); +/** + * @brief Split the @p str at the delimiters @p delim. + * @param[in] str the string to split + * @param[in] delim the used delimiter + * @return a vector containing all split tokens (`[[nodiscard]]`) + */ +[[nodiscard]] std::vector split(std::string_view str, char delim = ' '); + /** * @brief Convert the @p str to a value of type @p T. * @tparam T the type to convert the string to @@ -125,10 +127,10 @@ template */ template [[nodiscard]] inline T convert_to(const std::string_view str) { - if constexpr (std::is_same_v, std::string>) { + if constexpr (std::is_same_v, std::string>) { // convert string_view to string return std::string{ trim(str) }; - } else if constexpr (std::is_same_v, bool>) { + } else if constexpr (std::is_same_v, bool>) { const std::string lower_case_str = to_lower_case(trim(str)); // the string true if (lower_case_str == "true") { @@ -140,17 +142,17 @@ template } // convert a number to its "long long" value and convert it to a bool: 0 -> false, otherwise true return static_cast(convert_to(str)); - } else if constexpr (std::is_same_v, char>) { + } else if constexpr (std::is_same_v, char>) { const std::string_view trimmed = trim(str); // since we expect a character, after trimming the string must only contain exactly one character if (trimmed.size() != 1) { - throw std::runtime_error{ std::format("Can't convert '{}' to a value of type char!", str) }; + throw std::runtime_error{ fmt::format("Can't convert '{}' to a value of type char!", str) }; } return trimmed.front(); - } else if constexpr (std::is_floating_point_v>) { - if constexpr (std::is_same_v, float>) { + } else if constexpr (std::is_floating_point_v>) { + if constexpr (std::is_same_v, float>) { return std::stof(std::string{ str }); - } else if constexpr (std::is_same_v, double>) { + } else if constexpr (std::is_same_v, double>) { return std::stod(std::string{ str }); } else { return std::stold(std::string{ str }); @@ -163,7 +165,7 @@ template T val; auto res = std::from_chars(trimmed_str.data(), trimmed_str.data() + trimmed_str.size(), val); if (res.ec != std::errc{}) { - throw std::runtime_error{ std::format("Can't convert '{}' to a value of type T!", str) }; + throw std::runtime_error{ fmt::format("Can't convert '{}' to a value of type T!", str) }; } return val; } @@ -196,53 +198,113 @@ template } /** - * @brief Split the @p str at the delimiters @p delim. - * @param[in] str the string to split - * @param[in] delim the used delimiter - * @return a vector containing all split tokens (`[[nodiscard]]`) + * @brief Convert all entries in the map to a single dict-like string. + * @details The resulting string is of form "{KEY, VALUE}" or "{KEY, [VALUES]}". + * @tparam MapType the type of the map + * @param[in] map the map to convert to a string + * @return the result string (`[[nodiscard]]`( */ -[[nodiscard]] std::vector split(std::string_view str, char delim = ' '); +template +[[nodiscard]] inline std::string map_entry_to_string(const std::optional &map) { + if (map.has_value()) { + std::vector entries{}; + for (const auto &[key, value] : map.value()) { + if constexpr (is_vector_v>) { + entries.push_back(fmt::format("{{{}, [{}]}}", key, fmt::join(value, ", "))); + } else { + entries.push_back(fmt::format("{{{}, {}}}", key, value)); + } + } + return fmt::format("{}", fmt::join(entries, ", ")); + } + return ""; +} /** - * @brief A std::formatter child class allowing to format custom types using an `operator<<` overload. - * @tparam CharT the character type + * @brief Quote all @p values and return a vector of strings. + * @details Example: calling this function with `{ 1, 2, 3, 4 }` would return a vector of strings containing `{ "1", "2", "3", "4" }`. + * @tparam T the type of the values to quote + * @param[in] values the values to quote + * @return the quoted values (`[[nodiscard]]`) */ -template -struct basic_ostream_formatter : std::formatter, CharT> { - template - OutputIt format(const T &value, std::basic_format_context &ctx) const { - std::basic_stringstream ss; - ss << value; - return std::formatter, CharT>::format(ss.view(), ctx); +template +[[nodiscard]] inline std::vector quote(const std::vector &values) { + std::vector quoted{}; + quoted.reserve(values.size()); + + // quote all values + for (const T &val : values) { + quoted.push_back(fmt::format("\"{}\"", val)); } -}; -/// Type alias for a basic_ostream_formatter using a normal char. -using ostream_formatter = basic_ostream_formatter; + return quoted; +} + +/*****************************************************************************************************/ +/** other free functions **/ +/*****************************************************************************************************/ /** - * @brief Join all values in @p c to a single string using @p delim as delimiter. - * @tparam Container the type of the container - * @param[in] c the container for what the values should be joined - * @param[in] delim the delimiter used in joining the values - * @return the joined string (`[[nodiscard]]`) + * @brief Convert the time point to its duration in seconds (using double) truncated to three decimal places passed since the @p reference time point. + * @tparam TimePoint the type if the time point + * @param[in] time_point the time point + * @param[in] reference the reference time point + * @return the duration passed in seconds since the @p reference time point (`[[nodiscard]]`) */ -template -[[nodiscard]] inline std::string join(const Container &c, const std::string_view delim) { - if (c.empty()) { - return ""; - } else if (c.size() == 1) { - return std::format("{}", *c.cbegin()); +template +[[nodiscard]] inline double duration_from_reference_time(const TimePoint &time_point, const TimePoint &reference) { + return std::trunc(std::chrono::duration(time_point - reference).count() * 1000.0) / 1000.0; +} + +/** + * @brief Convert all time points to their duration in seconds (using double) truncated to three decimal places passed since the @p reference time point. + * @tparam TimePoint the type if the time points + * @param[in] time_points the time points + * @param[in] reference the reference time point + * @return the duration passed in seconds since the @p reference time point (`[[nodiscard]]`) + */ +template +[[nodiscard]] inline std::vector durations_from_reference_time(const std::vector &time_points, const TimePoint &reference) { + std::vector durations(time_points.size()); + + for (std::size_t i = 0; i < durations.size(); ++i) { + durations[i] = duration_from_reference_time(time_points[i], reference); + } + + return durations; +} + +/** + * @brief Convert all time points to their duration since the epoch start. + * @tparam TimePoint the type of the time points + * @param[in] time_points the time points + * @return the duration passed since the respective @p TimePoint epoch start (`[[nodiscard]]`) + */ +template +[[nodiscard]] inline std::vector time_points_to_epoch(const std::vector &time_points) { + std::vector times(time_points.size()); + + for (std::size_t i = 0; i < times.size(); ++i) { + times[i] = time_points[i].time_since_epoch(); + } + return times; +} + +/** + * @brief Return the value encapsulated by the std::optional @p opt if it contains a value, otherwise a default constructed @p T is returned. + * @tparam T the type of the value stored in the std::optional + * @param[in] opt the std::optional to check + * @return the value of the std::optional or a default constructed @p T (`[[nodiscard]]`) + */ +template +[[nodiscard]] inline T value_or_default(const std::optional &opt) { + if (opt.has_value()) { + return opt.value(); } else { - std::string out{}; - for (auto it = c.cbegin(); it != std::prev(c.cend()); it = std::next(it)) { - std::format_to(std::back_inserter(out), "{}{}", *it, delim); - } - std::format_to(std::back_inserter(out), "{}", *std::prev(c.end())); - return out; + return T{}; } } } // namespace hws::detail -#endif // HARDWARE_SAMPLING_UTILITY_HPP_ +#endif // HWS_UTILITY_HPP_ diff --git a/include/hws/version.hpp.in b/include/hws/version.hpp.in new file mode 100644 index 0000000..225072f --- /dev/null +++ b/include/hws/version.hpp.in @@ -0,0 +1,51 @@ +/** + * @file + * @author Marcel Breyer + * @copyright 2024-today All Rights Reserved + * @license This file is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + * + * @brief Version information for the hardware sampling. + */ + +#ifndef HWS_VERSION_HPP_ +#define HWS_VERSION_HPP_ +#pragma once + +#include // std::string_view + +namespace hws::version { + +/** + * @brief The name of the library. + * @details The value gets automatically set during the [`CMake`](https://cmake.org/) configuration step. + */ +constexpr std::string_view name = "@PROJECT_NAME@"; + +/** + * @brief The current version of the library in the form: "major.minor.patch". + * @details The value gets automatically set during the [`CMake`](https://cmake.org/) configuration step. + */ +constexpr std::string_view version = "@PROJECT_VERSION@"; + +/** + * @brief The current major version of the library. + * @details The value gets automatically set during the [`CMake`](https://cmake.org/) configuration step. + */ +constexpr int major = @PROJECT_VERSION_MAJOR@; + +/** + * @brief The current minor version of the library. + * @details The value gets automatically set during the [`CMake`](https://cmake.org/) configuration step. + */ +constexpr int minor = @PROJECT_VERSION_MINOR@; + +/** + * @brief The current patch version of the library. + * @details The value gets automatically set during the [`CMake`](https://cmake.org/) configuration step. + */ +constexpr int patch = @PROJECT_VERSION_PATCH@; + +} // namespace hws::version + +#endif // HWS_VERSION_HPP_ diff --git a/src/hardware_sampling/cpu/hardware_sampler.cpp b/src/hardware_sampling/cpu/hardware_sampler.cpp deleted file mode 100644 index 1eb5a14..0000000 --- a/src/hardware_sampling/cpu/hardware_sampler.cpp +++ /dev/null @@ -1,458 +0,0 @@ -/** - * @author Marcel Breyer - * @copyright 2024-today All Rights Reserved - * @license This file is released under the MIT license. - * See the LICENSE.md file in the project root for full license information. - */ - -#include "hardware_sampling/cpu/hardware_sampler.hpp" - -#include "hardware_sampling/cpu/cpu_samples.hpp" // hws::{cpu_general_samples, clock_samples, power_samples, memory_samples, temperature_samples, gfx_samples, idle_state_samples} -#include "hardware_sampling/cpu/utility.hpp" // HWS_SUBPROCESS_ERROR_CHECK, hws::detail::run_subprocess -#include "hardware_sampling/hardware_sampler.hpp" // hws::tracking::hardware_sampler -#include "hardware_sampling/utility.hpp" // hws::detail::{split, split_as, trim, convert_to, ostream_formatter, join} - -#include // assert -#include // std::chrono::{steady_clock, milliseconds} -#include // std::size_t -#include // std::exception, std::terminate -#include // std::format -#include // std::ios_base -#include // std::cerr, std::endl -#include // std::make_optional -#include // std::ostream -#include // std::regex, std::regex::extended, std::regex_match, std::regex_replace -#include // std::runtime_error -#include // std::string -#include // std::string_view -#include // std::this_thread -#include // std::unordered_map -#include // std::vector - -namespace hws { - -cpu_hardware_sampler::cpu_hardware_sampler() : - cpu_hardware_sampler{ HWS_SAMPLING_INTERVAL } { } - -cpu_hardware_sampler::cpu_hardware_sampler(const std::chrono::milliseconds sampling_interval) : - hardware_sampler{ sampling_interval } { } - -cpu_hardware_sampler::~cpu_hardware_sampler() { - try { - // if this hardware sampler is still sampling, stop it - if (this->has_sampling_started() && !this->has_sampling_stopped()) { - this->stop_sampling(); - } - } catch (const std::exception &e) { - std::cerr << e.what() << std::endl; - std::terminate(); - } -} - -void cpu_hardware_sampler::sampling_loop() { - // - // add samples where we only have to retrieve the value once - // - - this->add_time_point(std::chrono::steady_clock::now()); - -#if defined(HWS_VIA_LSCPU_ENABLED) - { - const std::string lscpu_output = detail::run_subprocess("lscpu"); - const std::vector lscpu_lines = detail::split(detail::trim(lscpu_output), '\n'); - - for (std::string_view line : lscpu_lines) { - line = detail::trim(line); - // extract the value - std::string_view value{ line }; - value.remove_prefix(value.find_first_of(":") + 1); - value = detail::trim(value); - - // check the lines if the start with an entry that we want to sample - if (line.starts_with("Architecture")) { - general_samples_.architecture_ = detail::convert_to(value); - } else if (line.starts_with("Byte Order")) { - general_samples_.byte_order_ = detail::convert_to(value); - } else if (line.starts_with("CPU(s)")) { - general_samples_.num_threads_ = detail::convert_to(value); - } else if (line.starts_with("Thread(s) per core")) { - general_samples_.threads_per_core_ = detail::convert_to(value); - } else if (line.starts_with("Core(s) per socket")) { - general_samples_.cores_per_socket_ = detail::convert_to(value); - } else if (line.starts_with("Socket(s)")) { - general_samples_.num_sockets_ = detail::convert_to(value); - } else if (line.starts_with("NUMA node(s)")) { - general_samples_.numa_nodes_ = detail::convert_to(value); - } else if (line.starts_with("Vendor ID")) { - general_samples_.vendor_id_ = detail::convert_to(value); - } else if (line.starts_with("Model name")) { - general_samples_.name_ = detail::convert_to(value); - } else if (line.starts_with("Flags")) { - general_samples_.flags_ = detail::split_as(value, ' '); - } else if (line.starts_with("Frequency boost")) { - clock_samples_.frequency_boost_ = value == "enabled"; - } else if (line.starts_with("CPU max MHz")) { - clock_samples_.max_frequency_ = detail::convert_to(value); - } else if (line.starts_with("CPU min MHz")) { - clock_samples_.min_frequency_ = detail::convert_to(value); - } else if (line.starts_with("L1d cache")) { - memory_samples_.l1d_cache_ = detail::convert_to(value); - } else if (line.starts_with("L1i cache")) { - memory_samples_.l1i_cache_ = detail::convert_to(value); - } else if (line.starts_with("L2 cache")) { - memory_samples_.l2_cache_ = detail::convert_to(value); - } else if (line.starts_with("L3 cache")) { - memory_samples_.l3_cache_ = detail::convert_to(value); - } - } - } -#endif - -#if defined(HWS_VIA_FREE_ENABLED) - const std::regex whitespace_replace_reg{ "[ ]+", std::regex::extended }; - { - std::string free_output = detail::run_subprocess("free -b"); - free_output = std::regex_replace(free_output, whitespace_replace_reg, " "); - const std::vector free_lines = detail::split(detail::trim(free_output), '\n'); - assert((free_lines.size() >= 3) && "Must read more than three lines, but fewer were read!"); - - // read memory information - const std::vector memory_data = detail::split(free_lines[1], ' '); - memory_samples_.memory_total_ = detail::convert_to(memory_data[1]); - memory_samples_.memory_used_ = decltype(memory_samples_.memory_used_)::value_type{ detail::convert_to(memory_data[2]) }; - memory_samples_.memory_free_ = decltype(memory_samples_.memory_free_)::value_type{ detail::convert_to(memory_data[3]) }; - - // read swap information - const std::vector swap_data = detail::split(free_lines[2], ' '); - memory_samples_.swap_memory_total_ = detail::convert_to(swap_data[1]); - memory_samples_.swap_memory_used_ = decltype(memory_samples_.swap_memory_used_)::value_type{ detail::convert_to(swap_data[2]) }; - memory_samples_.swap_memory_free_ = decltype(memory_samples_.swap_memory_free_)::value_type{ detail::convert_to(swap_data[3]) }; - } -#endif - -#if defined(HWS_VIA_TURBOSTAT_ENABLED) - - // -n, --num_iterations number of the measurement iterations - // -i, --interval sampling interval in seconds (decimal number) - // -S, --Summary limits output to 1-line per interval - // -q, --quiet skip decoding system configuration header - - // get header information - #if defined(HWS_VIA_TURBOSTAT_ROOT) - // run with sudo - const std::string_view turbostat_command_line = "sudo turbostat -n 1 -i 0.001 -S -q"; - #else - // run without sudo - const std::string_view turbostat_command_line = "turbostat -n 1 -i 0.001 -S -q"; - #endif - - { - // run turbostat - const std::string turbostat_output = detail::run_subprocess(turbostat_command_line); - - // retrieve the turbostat data - const std::vector data = detail::split(detail::trim(turbostat_output), '\n'); - assert((data.size() >= 2) && "Must read at least two lines!"); - const std::vector header = detail::split(data[0], '\t'); - const std::vector values = detail::split(data[1], '\t'); - - for (std::size_t i = 0; i < header.size(); ++i) { - if (header[i] == "Avg_MHz") { - using vector_type = decltype(clock_samples_.average_frequency_)::value_type; - clock_samples_.average_frequency_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "Busy%") { - using vector_type = decltype(general_samples_.busy_percent_)::value_type; - general_samples_.busy_percent_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "Bzy_MHz") { - using vector_type = decltype(clock_samples_.average_non_idle_frequency_)::value_type; - clock_samples_.average_non_idle_frequency_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "TSC_MHz") { - using vector_type = decltype(clock_samples_.time_stamp_counter_)::value_type; - clock_samples_.time_stamp_counter_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "IPC") { - using vector_type = decltype(general_samples_.ipc_)::value_type; - general_samples_.ipc_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "IRQ") { - using vector_type = decltype(general_samples_.irq_)::value_type; - general_samples_.irq_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "SMI") { - using vector_type = decltype(general_samples_.smi_)::value_type; - general_samples_.smi_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "POLL") { - using vector_type = decltype(general_samples_.poll_)::value_type; - general_samples_.poll_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "POLL%") { - using vector_type = decltype(general_samples_.poll_percent_)::value_type; - general_samples_.poll_percent_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "CoreTmp") { - using vector_type = decltype(temperature_samples_.core_temperature_)::value_type; - temperature_samples_.core_temperature_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "CoreThr") { - using vector_type = decltype(temperature_samples_.core_throttle_percent_)::value_type; - temperature_samples_.core_throttle_percent_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "PkgTmp") { - using vector_type = decltype(temperature_samples_.package_temperature_)::value_type; - temperature_samples_.package_temperature_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "GFX%rc6") { - using vector_type = decltype(gfx_samples_.gfx_render_state_percent_)::value_type; - gfx_samples_.gfx_render_state_percent_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "GFXMHz") { - using vector_type = decltype(gfx_samples_.gfx_frequency_)::value_type; - gfx_samples_.gfx_frequency_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "GFXAMHz") { - using vector_type = decltype(gfx_samples_.average_gfx_frequency_)::value_type; - gfx_samples_.average_gfx_frequency_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "Totl%C0") { - using vector_type = decltype(idle_state_samples_.all_cpus_state_c0_percent_)::value_type; - idle_state_samples_.all_cpus_state_c0_percent_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "Any%C0") { - using vector_type = decltype(idle_state_samples_.any_cpu_state_c0_percent_)::value_type; - idle_state_samples_.any_cpu_state_c0_percent_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "GFX%C0") { - using vector_type = decltype(gfx_samples_.gfx_state_c0_percent_)::value_type; - gfx_samples_.gfx_state_c0_percent_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "CPUGFX%") { - using vector_type = decltype(gfx_samples_.cpu_works_for_gpu_percent_)::value_type; - gfx_samples_.cpu_works_for_gpu_percent_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "CPU%LPI") { - using vector_type = decltype(idle_state_samples_.low_power_idle_state_percent_)::value_type; - idle_state_samples_.low_power_idle_state_percent_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "SYS%LPI") { - using vector_type = decltype(idle_state_samples_.system_low_power_idle_state_percent_)::value_type; - idle_state_samples_.system_low_power_idle_state_percent_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "Pkg%LPI") { - using vector_type = decltype(idle_state_samples_.package_low_power_idle_state_percent_)::value_type; - idle_state_samples_.package_low_power_idle_state_percent_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "PkgWatt") { - using vector_type = decltype(power_samples_.package_watt_)::value_type; - power_samples_.package_watt_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "CorWatt") { - using vector_type = decltype(power_samples_.core_watt_)::value_type; - power_samples_.core_watt_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "GFXWatt") { - using vector_type = decltype(gfx_samples_.gfx_watt_)::value_type; - gfx_samples_.gfx_watt_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "RAMWatt") { - using vector_type = decltype(power_samples_.ram_watt_)::value_type; - power_samples_.ram_watt_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "PKG_%") { - using vector_type = decltype(power_samples_.package_rapl_throttle_percent_)::value_type; - power_samples_.package_rapl_throttle_percent_ = vector_type{ detail::convert_to(values[i]) }; - } else if (header[i] == "RAM_%") { - using vector_type = decltype(power_samples_.dram_rapl_throttle_percent_)::value_type; - power_samples_.dram_rapl_throttle_percent_ = vector_type{ detail::convert_to(values[i]) }; - } else { - // test against regex - const std::string header_str{ header[i] }; - const std::regex reg{ std::string{ "CPU%[0-9a-zA-Z]+|Pkg%[0-9a-zA-Z]+|Pk%[0-9a-zA-Z]+|C[0-9a-zA-Z]+%|C[0-9a-zA-Z]+" }, std::regex::extended }; - if (std::regex_match(header_str, reg)) { - // first time this branch is reached -> create optional value - if (!idle_state_samples_.idle_states_.has_value()) { - idle_state_samples_.idle_states_ = std::make_optional(); - } - - using vector_type = cpu_idle_states_samples::map_type::mapped_type; - idle_state_samples_.idle_states_.value()[header_str] = vector_type{ detail::convert_to(values[i]) }; - } - } - } - } -#endif - - // - // loop until stop_sampling() is called - // - - while (!this->has_sampling_stopped()) { - // only sample values if the sampler currently isn't paused - if (this->is_sampling()) { - // add current time point - this->add_time_point(std::chrono::steady_clock::now()); - -#if defined(HWS_VIA_FREE_ENABLED) - { - // run free - std::string free_output = detail::run_subprocess("free -b"); - free_output = std::regex_replace(free_output, whitespace_replace_reg, " "); - const std::vector free_lines = detail::split(detail::trim(free_output), '\n'); - assert((free_lines.size() >= 3) && "Must read more than three lines, but fewer were read!"); - - // read memory information - const std::vector memory_data = detail::split(free_lines[1], ' '); - memory_samples_.memory_used_->push_back(detail::convert_to(memory_data[2])); - memory_samples_.memory_free_->push_back(detail::convert_to(memory_data[3])); - - // read swap information - const std::vector swap_data = detail::split(free_lines[2], ' '); - memory_samples_.swap_memory_used_->push_back(detail::convert_to(swap_data[2])); - memory_samples_.swap_memory_free_->push_back(detail::convert_to(swap_data[3])); - } -#endif - -#if defined(HWS_VIA_TURBOSTAT_ENABLED) - { - // run turbostat - const std::string turbostat_output = detail::run_subprocess(turbostat_command_line); - - // retrieve the turbostat data - const std::vector data = detail::split(detail::trim(turbostat_output), '\n'); - assert((data.size() >= 2) && "Must read at least two lines!"); - const std::vector header = detail::split(data[0], '\t'); - const std::vector values = detail::split(data[1], '\t'); - - // add values to the respective sample entries - for (std::size_t i = 0; i < header.size(); ++i) { - if (header[i] == "Avg_MHz") { - using vector_type = decltype(clock_samples_.average_frequency_)::value_type; - clock_samples_.average_frequency_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "Busy%") { - using vector_type = decltype(general_samples_.busy_percent_)::value_type; - general_samples_.busy_percent_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "Bzy_MHz") { - using vector_type = decltype(clock_samples_.average_non_idle_frequency_)::value_type; - clock_samples_.average_non_idle_frequency_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "TSC_MHz") { - using vector_type = decltype(clock_samples_.time_stamp_counter_)::value_type; - clock_samples_.time_stamp_counter_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "IPC") { - using vector_type = decltype(general_samples_.ipc_)::value_type; - general_samples_.ipc_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "IRQ") { - using vector_type = decltype(general_samples_.irq_)::value_type; - general_samples_.irq_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "SMI") { - using vector_type = decltype(general_samples_.smi_)::value_type; - general_samples_.smi_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "POLL") { - using vector_type = decltype(general_samples_.poll_)::value_type; - general_samples_.poll_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "POLL%") { - using vector_type = decltype(general_samples_.poll_percent_)::value_type; - general_samples_.poll_percent_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "CoreTmp") { - using vector_type = decltype(temperature_samples_.core_temperature_)::value_type; - temperature_samples_.core_temperature_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "CoreThr") { - using vector_type = decltype(temperature_samples_.core_throttle_percent_)::value_type; - temperature_samples_.core_throttle_percent_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "PkgTmp") { - using vector_type = decltype(temperature_samples_.package_temperature_)::value_type; - temperature_samples_.package_temperature_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "GFX%rc6") { - using vector_type = decltype(gfx_samples_.gfx_render_state_percent_)::value_type; - gfx_samples_.gfx_render_state_percent_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "GFXMHz") { - using vector_type = decltype(gfx_samples_.gfx_frequency_)::value_type; - gfx_samples_.gfx_frequency_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "GFXAMHz") { - using vector_type = decltype(gfx_samples_.average_gfx_frequency_)::value_type; - gfx_samples_.average_gfx_frequency_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "Totl%C0") { - using vector_type = decltype(idle_state_samples_.all_cpus_state_c0_percent_)::value_type; - idle_state_samples_.all_cpus_state_c0_percent_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "Any%C0") { - using vector_type = decltype(idle_state_samples_.any_cpu_state_c0_percent_)::value_type; - idle_state_samples_.any_cpu_state_c0_percent_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "GFX%C0") { - using vector_type = decltype(gfx_samples_.gfx_state_c0_percent_)::value_type; - gfx_samples_.gfx_state_c0_percent_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "CPUGFX%") { - using vector_type = decltype(gfx_samples_.cpu_works_for_gpu_percent_)::value_type; - gfx_samples_.cpu_works_for_gpu_percent_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "CPU%LPI") { - using vector_type = decltype(idle_state_samples_.low_power_idle_state_percent_)::value_type; - idle_state_samples_.low_power_idle_state_percent_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "SYS%LPI") { - using vector_type = decltype(idle_state_samples_.system_low_power_idle_state_percent_)::value_type; - idle_state_samples_.system_low_power_idle_state_percent_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "Pkg%LPI") { - using vector_type = decltype(idle_state_samples_.package_low_power_idle_state_percent_)::value_type; - idle_state_samples_.package_low_power_idle_state_percent_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "PkgWatt") { - using vector_type = decltype(power_samples_.package_watt_)::value_type; - power_samples_.package_watt_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "CorWatt") { - using vector_type = decltype(power_samples_.core_watt_)::value_type; - power_samples_.core_watt_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "GFXWatt") { - using vector_type = decltype(gfx_samples_.gfx_watt_)::value_type; - gfx_samples_.gfx_watt_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "RAMWatt") { - using vector_type = decltype(power_samples_.ram_watt_)::value_type; - power_samples_.ram_watt_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "PKG_%") { - using vector_type = decltype(power_samples_.package_rapl_throttle_percent_)::value_type; - power_samples_.package_rapl_throttle_percent_->push_back(detail::convert_to(values[i])); - } else if (header[i] == "RAM_%") { - using vector_type = decltype(power_samples_.dram_rapl_throttle_percent_)::value_type; - power_samples_.dram_rapl_throttle_percent_->push_back(detail::convert_to(values[i])); - } else { - const std::string header_str{ header[i] }; - if (idle_state_samples_.idle_states_.value().contains(header_str)) { - using vector_type = cpu_idle_states_samples::map_type::mapped_type; - idle_state_samples_.idle_states_.value()[header_str].push_back(detail::convert_to(values[i])); - } - } - } - } -#endif - } - - // wait for the sampling interval to pass to retrieve the next sample - std::this_thread::sleep_for(this->sampling_interval()); - } -} - -std::string cpu_hardware_sampler::device_identification() const { - return "cpu_device"; -} - -std::string cpu_hardware_sampler::generate_yaml_string() const { - // check whether it's safe to generate the YAML entry - if (this->is_sampling()) { - throw std::runtime_error{ "Can't create the final YAML entry if the hardware sampler is still running!" }; - } - - return std::format("{}\n" - "{}\n" - "{}\n" - "{}\n" - "{}\n" - "{}\n" - "{}", - general_samples_.generate_yaml_string(), - clock_samples_.generate_yaml_string(), - power_samples_.generate_yaml_string(), - memory_samples_.generate_yaml_string(), - temperature_samples_.generate_yaml_string(), - gfx_samples_.generate_yaml_string(), - idle_state_samples_.generate_yaml_string()); -} - -std::ostream &operator<<(std::ostream &out, const cpu_hardware_sampler &sampler) { - if (sampler.is_sampling()) { - out.setstate(std::ios_base::failbit); - return out; - } else { - return out << std::format("sampling interval: {}\n" - "time points: [{}]\n\n" - "general samples:\n{}\n\n" - "clock samples:\n{}\n\n" - "power samples:\n{}\n\n" - "memory samples:\n{}\n\n" - "temperature samples:\n{}\n\n" - "gfx samples:\n{}\n\n" - "idle state samples:\n{}", - sampler.sampling_interval(), - detail::join(detail::time_points_to_epoch(sampler.sampling_time_points()), ", "), - sampler.general_samples(), - sampler.clock_samples(), - sampler.power_samples(), - sampler.memory_samples(), - sampler.temperature_samples(), - sampler.gfx_samples(), - sampler.idle_state_samples()); - } -} - -} // namespace hws diff --git a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp b/src/hardware_sampling/gpu_amd/hardware_sampler.cpp deleted file mode 100644 index 3f926af..0000000 --- a/src/hardware_sampling/gpu_amd/hardware_sampler.cpp +++ /dev/null @@ -1,671 +0,0 @@ -/** - * @author Marcel Breyer - * @copyright 2024-today All Rights Reserved - * @license This file is released under the MIT license. - * See the LICENSE.md file in the project root for full license information. - */ - -#include "hardware_sampling/gpu_amd/hardware_sampler.hpp" - -#include "hardware_sampling/gpu_amd/rocm_smi_samples.hpp" // hws::{rocm_smi_general_samples, rocm_smi_clock_samples, rocm_smi_power_samples, rocm_smi_memory_samples, rocm_smi_temperature_samples} -#include "hardware_sampling/gpu_amd/utility.hpp" // HWS_ROCM_SMI_ERROR_CHECK -#include "hardware_sampling/hardware_sampler.hpp" // hws::hardware_sampler -#include "hardware_sampling/utility.hpp" // hws::detail::{time_points_to_epoch, join} - -#include "rocm_smi/rocm_smi.h" // ROCm SMI runtime functions - -#include // std::chrono::{steady_clock, duration_cast, milliseconds} -#include // std::size_t -#include // std::uint32_t, std::uint64_t -#include // std::exception, std::terminate -#include // std::format -#include // std::ios_base -#include // std::cerr, std::endl -#include // std::optional -#include // std::ostream -#include // std::runtime_error -#include // std::string -#include // std::this_thread -#include // std::move -#include // std::vector - -namespace hws { - -gpu_amd_hardware_sampler::gpu_amd_hardware_sampler() : - gpu_amd_hardware_sampler{ 0, HWS_SAMPLING_INTERVAL } { } - -gpu_amd_hardware_sampler::gpu_amd_hardware_sampler(const std::size_t device_id) : - gpu_amd_hardware_sampler{ device_id, HWS_SAMPLING_INTERVAL } { } - -gpu_amd_hardware_sampler::gpu_amd_hardware_sampler(const std::chrono::milliseconds sampling_interval) : - gpu_amd_hardware_sampler{ 0, sampling_interval } { } - -gpu_amd_hardware_sampler::gpu_amd_hardware_sampler(const std::size_t device_id, const std::chrono::milliseconds sampling_interval) : - hardware_sampler{ sampling_interval }, - device_id_{ static_cast(device_id) } { - // make sure that rsmi_init is only called once for all instances - if (instances_++ == 0) { - HWS_ROCM_SMI_ERROR_CHECK(rsmi_init(std::uint64_t{ 0 })); - // notify that initialization has been finished - init_finished_ = true; - } else { - // wait until init has been finished! - while (!init_finished_) { } - } -} - -gpu_amd_hardware_sampler::~gpu_amd_hardware_sampler() { - try { - // if this hardware sampler is still sampling, stop it - if (this->has_sampling_started() && !this->has_sampling_stopped()) { - this->stop_sampling(); - } - - // the last instance must shut down the ROCm SMI runtime - // make sure that rsmi_shut_down is only called once - if (--instances_ == 0) { - HWS_ROCM_SMI_ERROR_CHECK(rsmi_shut_down()); - // reset init_finished flag - init_finished_ = false; - } - } catch (const std::exception &e) { - std::cerr << e.what() << std::endl; - std::terminate(); - } -} - -void gpu_amd_hardware_sampler::sampling_loop() { - // - // add samples where we only have to retrieve the value once - // - - this->add_time_point(std::chrono::steady_clock::now()); - - // retrieve initial general information - { - // fixed information -> only retrieved once - std::string name(static_cast(1024), '\0'); - if (rsmi_dev_name_get(device_id_, name.data(), name.size()) == RSMI_STATUS_SUCCESS) { - general_samples_.name_ = name.substr(0, name.find_first_of('\0')); - } - - // queried samples -> retrieved every iteration if available - rsmi_dev_perf_level_t pstate{}; - if (rsmi_dev_perf_level_get(device_id_, &pstate) == RSMI_STATUS_SUCCESS) { - general_samples_.performance_level_ = decltype(general_samples_.performance_level_)::value_type{ static_cast(pstate) }; - } - - decltype(general_samples_.utilization_gpu_)::value_type::value_type utilization_gpu{}; - if (rsmi_dev_busy_percent_get(device_id_, &utilization_gpu) == RSMI_STATUS_SUCCESS) { - general_samples_.utilization_gpu_ = decltype(general_samples_.utilization_gpu_)::value_type{ utilization_gpu }; - } - - decltype(general_samples_.utilization_mem_)::value_type::value_type utilization_mem{}; - if (rsmi_dev_memory_busy_percent_get(device_id_, &utilization_mem) == RSMI_STATUS_SUCCESS) { - general_samples_.utilization_mem_ = decltype(general_samples_.utilization_mem_)::value_type{ utilization_mem }; - } - } - - // retrieve initial clock related information - { - rsmi_frequencies_t frequency_info{}; - if (rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_SYS, &frequency_info) == RSMI_STATUS_SUCCESS) { - clock_samples_.clock_system_min_ = frequency_info.frequency[0]; - clock_samples_.clock_system_max_ = frequency_info.frequency[frequency_info.num_supported - 1]; - // queried samples -> retrieved every iteration if available - clock_samples_.clock_system_ = decltype(clock_samples_.clock_system_)::value_type{}; - if (frequency_info.current < RSMI_MAX_NUM_FREQUENCIES) { - clock_samples_.clock_system_->push_back(frequency_info.frequency[frequency_info.current]); - } else { - clock_samples_.clock_system_->push_back(0); - } - } - - if (rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_SOC, &frequency_info) == RSMI_STATUS_SUCCESS) { - clock_samples_.clock_socket_min_ = frequency_info.frequency[0]; - clock_samples_.clock_socket_max_ = frequency_info.frequency[frequency_info.num_supported - 1]; - // queried samples -> retrieved every iteration if available - clock_samples_.clock_socket_ = decltype(clock_samples_.clock_socket_)::value_type{}; - if (frequency_info.current < RSMI_MAX_NUM_FREQUENCIES) { - clock_samples_.clock_socket_->push_back(frequency_info.frequency[frequency_info.current]); - } else { - clock_samples_.clock_socket_->push_back(0); - } - } - - if (rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_MEM, &frequency_info) == RSMI_STATUS_SUCCESS) { - clock_samples_.clock_memory_min_ = frequency_info.frequency[0]; - clock_samples_.clock_memory_max_ = frequency_info.frequency[frequency_info.num_supported - 1]; - // queried samples -> retrieved every iteration if available - clock_samples_.clock_memory_ = decltype(clock_samples_.clock_memory_)::value_type{}; - if (frequency_info.current < RSMI_MAX_NUM_FREQUENCIES) { - clock_samples_.clock_memory_->push_back(frequency_info.frequency[frequency_info.current]); - } else { - clock_samples_.clock_memory_->push_back(0); - } - } - - // queried samples -> retrieved every iteration if available - decltype(clock_samples_.overdrive_level_)::value_type::value_type overdrive_level{}; - if (rsmi_dev_overdrive_level_get(device_id_, &overdrive_level) == RSMI_STATUS_SUCCESS) { - clock_samples_.overdrive_level_ = decltype(clock_samples_.overdrive_level_)::value_type{ overdrive_level }; - } - - decltype(clock_samples_.memory_overdrive_level_)::value_type::value_type memory_overdrive_level{}; - if (rsmi_dev_mem_overdrive_level_get(device_id_, &memory_overdrive_level) == RSMI_STATUS_SUCCESS) { - clock_samples_.memory_overdrive_level_ = decltype(clock_samples_.memory_overdrive_level_)::value_type{ memory_overdrive_level }; - } - } - - // retrieve initial power related information - { - decltype(power_samples_.power_default_cap_)::value_type power_default_cap{}; - if (rsmi_dev_power_cap_default_get(device_id_, &power_default_cap) == RSMI_STATUS_SUCCESS) { - power_samples_.power_default_cap_ = power_default_cap; - } - - decltype(power_samples_.power_cap_)::value_type power_cap{}; - if (rsmi_dev_power_cap_get(device_id_, std::uint32_t{ 0 }, &power_cap) == RSMI_STATUS_SUCCESS) { - power_samples_.power_cap_ = power_cap; - } - - { - decltype(power_samples_.power_usage_)::value_type::value_type power_usage{}; - RSMI_POWER_TYPE power_type{}; - if (rsmi_dev_power_get(device_id_, &power_usage, &power_type) == RSMI_STATUS_SUCCESS) { - switch (power_type) { - case RSMI_POWER_TYPE::RSMI_AVERAGE_POWER: - power_samples_.power_type_ = "average"; - break; - case RSMI_POWER_TYPE::RSMI_CURRENT_POWER: - power_samples_.power_type_ = "current/instant"; - break; - case RSMI_POWER_TYPE::RSMI_INVALID_POWER: - power_samples_.power_type_ = "invalid/undetected"; - break; - } - // queried samples -> retrieved every iteration if available - power_samples_.power_usage_ = decltype(power_samples_.power_usage_)::value_type{ power_usage }; - } - } - - rsmi_power_profile_status_t power_profile{}; - if (rsmi_dev_power_profile_presets_get(device_id_, std::uint32_t{ 0 }, &power_profile) == RSMI_STATUS_SUCCESS) { - decltype(power_samples_.available_power_profiles_)::value_type available_power_profiles{}; - // go through all possible power profiles - if ((power_profile.available_profiles & RSMI_PWR_PROF_PRST_CUSTOM_MASK) != std::uint64_t{ 0 }) { - available_power_profiles.emplace_back("CUSTOM"); - } - if ((power_profile.available_profiles & RSMI_PWR_PROF_PRST_VIDEO_MASK) != std::uint64_t{ 0 }) { - available_power_profiles.emplace_back("VIDEO"); - } - if ((power_profile.available_profiles & RSMI_PWR_PROF_PRST_POWER_SAVING_MASK) != std::uint64_t{ 0 }) { - available_power_profiles.emplace_back("POWER_SAVING"); - } - if ((power_profile.available_profiles & RSMI_PWR_PROF_PRST_COMPUTE_MASK) != std::uint64_t{ 0 }) { - available_power_profiles.emplace_back("COMPUTE"); - } - if ((power_profile.available_profiles & RSMI_PWR_PROF_PRST_VR_MASK) != std::uint64_t{ 0 }) { - available_power_profiles.emplace_back("VR"); - } - if ((power_profile.available_profiles & RSMI_PWR_PROF_PRST_3D_FULL_SCR_MASK) != std::uint64_t{ 0 }) { - available_power_profiles.emplace_back("3D_FULL_SCREEN"); - } - if ((power_profile.available_profiles & RSMI_PWR_PROF_PRST_BOOTUP_DEFAULT) != std::uint64_t{ 0 }) { - available_power_profiles.emplace_back("BOOTUP_DEFAULT"); - } - power_samples_.available_power_profiles_ = std::move(available_power_profiles); - - // queried samples -> retrieved every iteration if available - switch (power_profile.current) { - case RSMI_PWR_PROF_PRST_CUSTOM_MASK: - power_samples_.power_profile_ = decltype(power_samples_.power_profile_)::value_type{ "CUSTOM" }; - break; - case RSMI_PWR_PROF_PRST_VIDEO_MASK: - power_samples_.power_profile_ = decltype(power_samples_.power_profile_)::value_type{ "VIDEO" }; - break; - case RSMI_PWR_PROF_PRST_POWER_SAVING_MASK: - power_samples_.power_profile_ = decltype(power_samples_.power_profile_)::value_type{ "POWER_SAVING" }; - break; - case RSMI_PWR_PROF_PRST_COMPUTE_MASK: - power_samples_.power_profile_ = decltype(power_samples_.power_profile_)::value_type{ "COMPUTE" }; - break; - case RSMI_PWR_PROF_PRST_VR_MASK: - power_samples_.power_profile_ = decltype(power_samples_.power_profile_)::value_type{ "VR" }; - break; - case RSMI_PWR_PROF_PRST_3D_FULL_SCR_MASK: - power_samples_.power_profile_ = decltype(power_samples_.power_profile_)::value_type{ "3D_FULL_SCREEN" }; - break; - case RSMI_PWR_PROF_PRST_BOOTUP_DEFAULT: - power_samples_.power_profile_ = decltype(power_samples_.power_profile_)::value_type{ "BOOTUP_DEFAULT" }; - break; - case RSMI_PWR_PROF_PRST_INVALID: - power_samples_.power_profile_ = decltype(power_samples_.power_profile_)::value_type{ "INVALID" }; - break; - } - } - - // queried samples -> retrieved every iteration if available - [[maybe_unused]] std::uint64_t timestamp{}; - float resolution{}; - decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type power_total_energy_consumption{}; - if (rsmi_dev_energy_count_get(device_id_, &power_total_energy_consumption, &resolution, ×tamp) == RSMI_STATUS_SUCCESS) { // TODO: returns the same value for all invocations - const double scaled_value = static_cast(power_total_energy_consumption) * static_cast(resolution); - power_samples_.power_total_energy_consumption_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ static_cast(scaled_value) }; - } - } - - // retrieve initial memory related information - { - decltype(memory_samples_.memory_total_)::value_type memory_total{}; - if (rsmi_dev_memory_total_get(device_id_, RSMI_MEM_TYPE_VRAM, &memory_total) == RSMI_STATUS_SUCCESS) { - memory_samples_.memory_total_ = memory_total; - } - - decltype(memory_samples_.visible_memory_total_)::value_type visible_memory_total{}; - if (rsmi_dev_memory_total_get(device_id_, RSMI_MEM_TYPE_VIS_VRAM, &visible_memory_total) == RSMI_STATUS_SUCCESS) { - memory_samples_.visible_memory_total_ = visible_memory_total; - } - - rsmi_pcie_bandwidth_t bandwidth_info{}; - if (rsmi_dev_pci_bandwidth_get(device_id_, &bandwidth_info) == RSMI_STATUS_SUCCESS) { - memory_samples_.min_num_pcie_lanes_ = bandwidth_info.lanes[0]; - memory_samples_.max_num_pcie_lanes_ = bandwidth_info.lanes[bandwidth_info.transfer_rate.num_supported - 1]; - // queried samples -> retrieved every iteration if available - memory_samples_.pcie_transfer_rate_ = decltype(memory_samples_.pcie_transfer_rate_)::value_type{}; - memory_samples_.num_pcie_lanes_ = decltype(memory_samples_.num_pcie_lanes_)::value_type{}; - if (bandwidth_info.transfer_rate.current < RSMI_MAX_NUM_FREQUENCIES) { - memory_samples_.pcie_transfer_rate_->push_back(bandwidth_info.transfer_rate.frequency[bandwidth_info.transfer_rate.current]); - memory_samples_.num_pcie_lanes_->push_back(bandwidth_info.lanes[bandwidth_info.transfer_rate.current]); - } else { - // the current index is (somehow) wrong - memory_samples_.pcie_transfer_rate_->push_back(0); - memory_samples_.num_pcie_lanes_->push_back(0); - } - } - - // queried samples -> retrieved every iteration if available - decltype(memory_samples_.memory_used_)::value_type::value_type memory_used{}; - if (rsmi_dev_memory_usage_get(device_id_, RSMI_MEM_TYPE_VRAM, &memory_used) == RSMI_STATUS_SUCCESS) { - memory_samples_.memory_used_ = decltype(memory_samples_.memory_used_)::value_type{ memory_used }; - } - } - - // retrieve fixed temperature related information - { - std::uint32_t fan_id{ 0 }; - decltype(temperature_samples_.fan_speed_)::value_type::value_type fan_speed{}; - while (rsmi_dev_fan_speed_get(device_id_, fan_id, &fan_speed) == RSMI_STATUS_SUCCESS) { - if (fan_id == 0) { - // queried samples -> retrieved every iteration if available - temperature_samples_.fan_speed_ = decltype(temperature_samples_.fan_speed_)::value_type{ fan_speed }; - } - ++fan_id; - } - temperature_samples_.num_fans_ = fan_id; - - decltype(temperature_samples_.max_fan_speed_)::value_type max_fan_speed{}; - if (rsmi_dev_fan_speed_max_get(device_id_, std::uint32_t{ 0 }, &max_fan_speed) == RSMI_STATUS_SUCCESS) { - temperature_samples_.max_fan_speed_ = max_fan_speed; - } - - decltype(temperature_samples_.temperature_edge_min_)::value_type temperature_edge_min{}; - if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_EDGE, RSMI_TEMP_MIN, &temperature_edge_min) == RSMI_STATUS_SUCCESS) { - temperature_samples_.temperature_edge_min_ = temperature_edge_min; - } - - decltype(temperature_samples_.temperature_edge_max_)::value_type temperature_edge_max{}; - if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_EDGE, RSMI_TEMP_MAX, &temperature_edge_max) == RSMI_STATUS_SUCCESS) { - temperature_samples_.temperature_edge_max_ = temperature_edge_min; - } - - decltype(temperature_samples_.temperature_hotspot_min_)::value_type temperature_hotspot_min{}; - if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_JUNCTION, RSMI_TEMP_MIN, &temperature_hotspot_min) == RSMI_STATUS_SUCCESS) { - temperature_samples_.temperature_hotspot_min_ = temperature_hotspot_min; - } - - decltype(temperature_samples_.temperature_hotspot_max_)::value_type temperature_hotspot_max{}; - if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_JUNCTION, RSMI_TEMP_MAX, &temperature_hotspot_max) == RSMI_STATUS_SUCCESS) { - temperature_samples_.temperature_hotspot_max_ = temperature_hotspot_max; - } - - decltype(temperature_samples_.temperature_memory_min_)::value_type temperature_memory_min{}; - if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_MEMORY, RSMI_TEMP_MIN, &temperature_memory_min) == RSMI_STATUS_SUCCESS) { - temperature_samples_.temperature_memory_min_ = temperature_memory_min; - } - - decltype(temperature_samples_.temperature_memory_max_)::value_type temperature_memory_max{}; - if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_MEMORY, RSMI_TEMP_MAX, &temperature_memory_max) == RSMI_STATUS_SUCCESS) { - temperature_samples_.temperature_memory_max_ = temperature_memory_max; - } - - decltype(temperature_samples_.temperature_hbm_0_min_)::value_type temperature_hbm_0_min{}; - if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_0, RSMI_TEMP_MIN, &temperature_hbm_0_min) == RSMI_STATUS_SUCCESS) { - temperature_samples_.temperature_hbm_0_min_ = temperature_hbm_0_min; - } - - decltype(temperature_samples_.temperature_hbm_0_max_)::value_type temperature_hbm_0_max{}; - if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_0, RSMI_TEMP_MAX, &temperature_hbm_0_max) == RSMI_STATUS_SUCCESS) { - temperature_samples_.temperature_hbm_0_max_ = temperature_hbm_0_max; - } - - decltype(temperature_samples_.temperature_hbm_1_min_)::value_type temperature_hbm_1_min{}; - if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_1, RSMI_TEMP_MIN, &temperature_hbm_1_min) == RSMI_STATUS_SUCCESS) { - temperature_samples_.temperature_hbm_1_min_ = temperature_hbm_1_min; - } - - decltype(temperature_samples_.temperature_hbm_1_max_)::value_type temperature_hbm_1_max{}; - if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_1, RSMI_TEMP_MAX, &temperature_hbm_1_max) == RSMI_STATUS_SUCCESS) { - temperature_samples_.temperature_hbm_1_max_ = temperature_hbm_1_max; - } - - decltype(temperature_samples_.temperature_hbm_2_min_)::value_type temperature_hbm_2_min{}; - if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_2, RSMI_TEMP_MIN, &temperature_hbm_2_min) == RSMI_STATUS_SUCCESS) { - temperature_samples_.temperature_hbm_2_min_ = temperature_hbm_2_min; - } - - decltype(temperature_samples_.temperature_hbm_2_max_)::value_type temperature_hbm_2_max{}; - if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_2, RSMI_TEMP_MAX, &temperature_hbm_2_max) == RSMI_STATUS_SUCCESS) { - temperature_samples_.temperature_hbm_2_max_ = temperature_hbm_2_max; - } - - decltype(temperature_samples_.temperature_hbm_3_min_)::value_type temperature_hbm_3_min{}; - if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_3, RSMI_TEMP_MIN, &temperature_hbm_3_min) == RSMI_STATUS_SUCCESS) { - temperature_samples_.temperature_hbm_3_min_ = temperature_hbm_3_min; - } - - decltype(temperature_samples_.temperature_hbm_3_max_)::value_type temperature_hbm_3_max{}; - if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_3, RSMI_TEMP_MAX, &temperature_hbm_3_max) == RSMI_STATUS_SUCCESS) { - temperature_samples_.temperature_hbm_3_max_ = temperature_hbm_3_max; - } - - // queried samples -> retrieved every iteration if available - decltype(temperature_samples_.temperature_edge_)::value_type::value_type temperature_edge{}; - if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_EDGE, RSMI_TEMP_CURRENT, &temperature_edge) == RSMI_STATUS_SUCCESS) { - temperature_samples_.temperature_edge_ = decltype(temperature_samples_.temperature_edge_)::value_type{ temperature_edge }; - } - - decltype(temperature_samples_.temperature_hotspot_)::value_type::value_type temperature_hotspot{}; - if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_JUNCTION, RSMI_TEMP_CURRENT, &temperature_hotspot) == RSMI_STATUS_SUCCESS) { - temperature_samples_.temperature_hotspot_ = decltype(temperature_samples_.temperature_hotspot_)::value_type{ temperature_hotspot }; - } - - decltype(temperature_samples_.temperature_memory_)::value_type::value_type temperature_memory{}; - if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_MEMORY, RSMI_TEMP_CURRENT, &temperature_memory) == RSMI_STATUS_SUCCESS) { - temperature_samples_.temperature_memory_ = decltype(temperature_samples_.temperature_memory_)::value_type{ temperature_memory }; - } - - decltype(temperature_samples_.temperature_hbm_0_)::value_type::value_type temperature_hbm_0{}; - if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_0, RSMI_TEMP_CURRENT, &temperature_hbm_0) == RSMI_STATUS_SUCCESS) { - temperature_samples_.temperature_hbm_0_ = decltype(temperature_samples_.temperature_hbm_0_)::value_type{ temperature_hbm_0 }; - } - - decltype(temperature_samples_.temperature_hbm_1_)::value_type::value_type temperature_hbm_1{}; - if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_1, RSMI_TEMP_CURRENT, &temperature_hbm_1) == RSMI_STATUS_SUCCESS) { - temperature_samples_.temperature_hbm_1_ = decltype(temperature_samples_.temperature_hbm_1_)::value_type{ temperature_hbm_1 }; - } - - decltype(temperature_samples_.temperature_hbm_2_)::value_type::value_type temperature_hbm_2{}; - if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_2, RSMI_TEMP_CURRENT, &temperature_hbm_2) == RSMI_STATUS_SUCCESS) { - temperature_samples_.temperature_hbm_2_ = decltype(temperature_samples_.temperature_hbm_2_)::value_type{ temperature_hbm_2 }; - } - - decltype(temperature_samples_.temperature_hbm_3_)::value_type::value_type temperature_hbm_3{}; - if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_3, RSMI_TEMP_CURRENT, &temperature_hbm_3) == RSMI_STATUS_SUCCESS) { - temperature_samples_.temperature_hbm_3_ = decltype(temperature_samples_.temperature_hbm_3_)::value_type{ temperature_hbm_3 }; - } - } - - // - // loop until stop_sampling() is called - // - - while (!this->has_sampling_stopped()) { - // only sample values if the sampler currently isn't paused - if (this->is_sampling()) { - // add current time point - this->add_time_point(std::chrono::steady_clock::now()); - - // retrieve general samples - { - if (general_samples_.performance_level_.has_value()) { - rsmi_dev_perf_level_t pstate{}; - HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_perf_level_get(device_id_, &pstate)); - general_samples_.performance_level_->push_back(static_cast(pstate)); - } - - if (general_samples_.utilization_gpu_.has_value()) { - decltype(general_samples_.utilization_gpu_)::value_type::value_type value{}; - HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_busy_percent_get(device_id_, &value)); - general_samples_.utilization_gpu_->push_back(value); - } - - if (general_samples_.utilization_mem_.has_value()) { - decltype(general_samples_.utilization_mem_)::value_type::value_type value{}; - HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_memory_busy_percent_get(device_id_, &value)); - general_samples_.utilization_mem_->push_back(value); - } - } - - // retrieve clock related samples - { - if (clock_samples_.clock_system_.has_value()) { - rsmi_frequencies_t frequency_info{}; - HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_SYS, &frequency_info)); - if (frequency_info.current < RSMI_MAX_NUM_FREQUENCIES) { - clock_samples_.clock_system_->push_back(frequency_info.frequency[frequency_info.current]); - } else { - // the current index is (somehow) wrong - clock_samples_.clock_system_->push_back(0); - } - } - - if (clock_samples_.clock_socket_.has_value()) { - rsmi_frequencies_t frequency_info{}; - HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_SOC, &frequency_info)); - if (frequency_info.current < RSMI_MAX_NUM_FREQUENCIES) { - clock_samples_.clock_socket_->push_back(frequency_info.frequency[frequency_info.current]); - } else { - // the current index is (somehow) wrong - clock_samples_.clock_socket_->push_back(0); - } - } - - if (clock_samples_.clock_memory_.has_value()) { - rsmi_frequencies_t frequency_info{}; - HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_MEM, &frequency_info)); - if (frequency_info.current < RSMI_MAX_NUM_FREQUENCIES) { - clock_samples_.clock_memory_->push_back(frequency_info.frequency[frequency_info.current]); - } else { - // the current index is (somehow) wrong - clock_samples_.clock_memory_->push_back(0); - } - } - - if (clock_samples_.overdrive_level_.has_value()) { - decltype(clock_samples_.overdrive_level_)::value_type::value_type value{}; - HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_overdrive_level_get(device_id_, &value)); - clock_samples_.overdrive_level_->push_back(value); - } - - if (clock_samples_.memory_overdrive_level_.has_value()) { - decltype(clock_samples_.memory_overdrive_level_)::value_type::value_type value{}; - HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_mem_overdrive_level_get(device_id_, &value)); - clock_samples_.memory_overdrive_level_->push_back(value); - } - } - - // retrieve power related samples - { - if (power_samples_.power_usage_.has_value()) { - [[maybe_unused]] RSMI_POWER_TYPE power_type{}; - decltype(power_samples_.power_usage_)::value_type::value_type value{}; - HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_power_get(device_id_, &value, &power_type)); - power_samples_.power_usage_->push_back(value); - } - - if (power_samples_.power_total_energy_consumption_.has_value()) { - [[maybe_unused]] std::uint64_t timestamp{}; - float resolution{}; - decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type value{}; - HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_energy_count_get(device_id_, &value, &resolution, ×tamp)); // TODO: returns the same value for all invocations - const double scaled_value = static_cast(value) * static_cast(resolution); - power_samples_.power_total_energy_consumption_->push_back(static_cast(scaled_value)); - } - - if (power_samples_.power_profile_.has_value()) { - rsmi_power_profile_status_t power_profile{}; - HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_power_profile_presets_get(device_id_, std::uint32_t{ 0 }, &power_profile)); - switch (power_profile.current) { - case RSMI_PWR_PROF_PRST_CUSTOM_MASK: - power_samples_.power_profile_->emplace_back("CUSTOM"); - break; - case RSMI_PWR_PROF_PRST_VIDEO_MASK: - power_samples_.power_profile_->emplace_back("VIDEO"); - break; - case RSMI_PWR_PROF_PRST_POWER_SAVING_MASK: - power_samples_.power_profile_->emplace_back("POWER_SAVING"); - break; - case RSMI_PWR_PROF_PRST_COMPUTE_MASK: - power_samples_.power_profile_->emplace_back("COMPUTE"); - break; - case RSMI_PWR_PROF_PRST_VR_MASK: - power_samples_.power_profile_->emplace_back("VR"); - break; - case RSMI_PWR_PROF_PRST_3D_FULL_SCR_MASK: - power_samples_.power_profile_->emplace_back("3D_FULL_SCREEN"); - break; - case RSMI_PWR_PROF_PRST_BOOTUP_DEFAULT: - power_samples_.power_profile_->emplace_back("BOOTUP_DEFAULT"); - break; - case RSMI_PWR_PROF_PRST_INVALID: - power_samples_.power_profile_->emplace_back("INVALID"); - break; - } - } - } - - // retrieve memory related samples - { - if (memory_samples_.memory_used_.has_value()) { - decltype(memory_samples_.memory_used_)::value_type::value_type value{}; - HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_memory_usage_get(device_id_, RSMI_MEM_TYPE_VRAM, &value)); - memory_samples_.memory_used_->push_back(value); - } - - if (memory_samples_.pcie_transfer_rate_.has_value() && memory_samples_.num_pcie_lanes_.has_value()) { - rsmi_pcie_bandwidth_t bandwidth_info{}; - HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_pci_bandwidth_get(device_id_, &bandwidth_info)); - if (bandwidth_info.transfer_rate.current < RSMI_MAX_NUM_FREQUENCIES) { - memory_samples_.pcie_transfer_rate_->push_back(bandwidth_info.transfer_rate.frequency[bandwidth_info.transfer_rate.current]); - memory_samples_.num_pcie_lanes_->push_back(bandwidth_info.lanes[bandwidth_info.transfer_rate.current]); - } else { - // the current index is (somehow) wrong - memory_samples_.pcie_transfer_rate_->push_back(0); - memory_samples_.num_pcie_lanes_->push_back(0); - } - } - } - - // retrieve temperature related samples - { - if (temperature_samples_.fan_speed_.has_value()) { - decltype(temperature_samples_.fan_speed_)::value_type::value_type value{}; - HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_fan_speed_get(device_id_, std::uint32_t{ 0 }, &value)); - temperature_samples_.fan_speed_->push_back(value); - } - - if (temperature_samples_.temperature_edge_.has_value()) { - decltype(temperature_samples_.temperature_edge_)::value_type::value_type value{}; - HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_EDGE, RSMI_TEMP_CURRENT, &value)); - temperature_samples_.temperature_edge_->push_back(value); - } - - if (temperature_samples_.temperature_hotspot_.has_value()) { - decltype(temperature_samples_.temperature_hotspot_)::value_type::value_type value{}; - HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_JUNCTION, RSMI_TEMP_CURRENT, &value)); - temperature_samples_.temperature_hotspot_->push_back(value); - } - - if (temperature_samples_.temperature_memory_.has_value()) { - decltype(temperature_samples_.temperature_memory_)::value_type::value_type value{}; - HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_MEMORY, RSMI_TEMP_CURRENT, &value)); - temperature_samples_.temperature_memory_->push_back(value); - } - - if (temperature_samples_.temperature_hbm_0_.has_value()) { - decltype(temperature_samples_.temperature_hbm_0_)::value_type::value_type value{}; - HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_0, RSMI_TEMP_CURRENT, &value)); - temperature_samples_.temperature_hbm_0_->push_back(value); - } - - if (temperature_samples_.temperature_hbm_1_.has_value()) { - decltype(temperature_samples_.temperature_hbm_1_)::value_type::value_type value{}; - HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_1, RSMI_TEMP_CURRENT, &value)); - temperature_samples_.temperature_hbm_1_->push_back(value); - } - - if (temperature_samples_.temperature_hbm_2_.has_value()) { - decltype(temperature_samples_.temperature_hbm_2_)::value_type::value_type value{}; - HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_2, RSMI_TEMP_CURRENT, &value)); - temperature_samples_.temperature_hbm_2_->push_back(value); - } - - if (temperature_samples_.temperature_hbm_3_.has_value()) { - decltype(temperature_samples_.temperature_hbm_3_)::value_type::value_type value{}; - HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_3, RSMI_TEMP_CURRENT, &value)); - temperature_samples_.temperature_hbm_3_->push_back(value); - } - } - } - - // wait for the sampling interval to pass to retrieve the next sample - std::this_thread::sleep_for(this->sampling_interval()); - } -} - -std::string gpu_amd_hardware_sampler::device_identification() const { - return std::format("gpu_amd_device_{}", device_id_); -} - -std::string gpu_amd_hardware_sampler::generate_yaml_string() const { - // check whether it's safe to generate the YAML entry - if (this->is_sampling()) { - throw std::runtime_error{ "Can't create the final YAML entry if the hardware sampler is still running!" }; - } - - return std::format("{}\n" - "{}\n" - "{}\n" - "{}\n" - "{}", - general_samples_.generate_yaml_string(), - clock_samples_.generate_yaml_string(), - power_samples_.generate_yaml_string(), - memory_samples_.generate_yaml_string(), - temperature_samples_.generate_yaml_string()); -} - -std::ostream &operator<<(std::ostream &out, const gpu_amd_hardware_sampler &sampler) { - if (sampler.is_sampling()) { - out.setstate(std::ios_base::failbit); - return out; - } else { - return out << std::format("sampling interval: {}\n" - "time points: [{}]\n\n" - "general samples:\n{}\n\n" - "clock samples:\n{}\n\n" - "power samples:\n{}\n\n" - "memory samples:\n{}\n\n" - "temperature samples:\n{}", - sampler.sampling_interval(), - detail::join(detail::time_points_to_epoch(sampler.sampling_time_points()), ", "), - sampler.general_samples(), - sampler.clock_samples(), - sampler.power_samples(), - sampler.memory_samples(), - sampler.temperature_samples()); - } -} - -} // namespace hws diff --git a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp b/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp deleted file mode 100644 index 52a1ae8..0000000 --- a/src/hardware_sampling/gpu_amd/rocm_smi_samples.cpp +++ /dev/null @@ -1,609 +0,0 @@ -/** - * @author Marcel Breyer - * @copyright 2024-today All Rights Reserved - * @license This file is released under the MIT license. - * See the LICENSE.md file in the project root for full license information. - */ - -#include "hardware_sampling/gpu_amd/rocm_smi_samples.hpp" - -#include "hardware_sampling/utility.hpp" // hws::detail::{value_or_default, join} - -#include "rocm_smi/rocm_smi.h" // RSMI_MAX_FAN_SPEED - -#include // std::format -#include // std::ostream -#include // std::string - -namespace hws { - -//*************************************************************************************************************************************// -// general samples // -//*************************************************************************************************************************************// - -std::string rocm_smi_general_samples::generate_yaml_string() const { - std::string str{ "general:\n" }; - - // device name - if (this->name_.has_value()) { - str += std::format(" name:\n" - " unit: \"string\"\n" - " values: \"{}\"\n", - this->name_.value()); - } - - // performance state - if (this->performance_level_.has_value()) { - str += std::format(" performance_state:\n" - " unit: \"int - see rsmi_dev_perf_level_t\"\n" - " values: [{}]\n", - detail::join(this->performance_level_.value(), ", ")); - } - // device compute utilization - if (this->utilization_gpu_.has_value()) { - str += std::format(" utilization_gpu:\n" - " unit: \"percentage\"\n" - " values: [{}]\n", - detail::join(this->utilization_gpu_.value(), ", ")); - } - // device memory utilization - if (this->utilization_mem_.has_value()) { - str += std::format(" utilization_mem:\n" - " unit: \"percentage\"\n" - " values: [{}]\n", - detail::join(this->utilization_mem_.value(), ", ")); - } - - // remove last newline - str.pop_back(); - - return str; -} - -std::ostream &operator<<(std::ostream &out, const rocm_smi_general_samples &samples) { - return out << std::format("name [string]: {}\n" - "performance_level [int]: [{}]\n" - "utilization_gpu [%]: [{}]\n" - "utilization_mem [%]: [{}]", - detail::value_or_default(samples.get_name()), - detail::join(detail::value_or_default(samples.get_performance_level()), ", "), - detail::join(detail::value_or_default(samples.get_utilization_gpu()), ", "), - detail::join(detail::value_or_default(samples.get_utilization_mem()), ", ")); -} - -//*************************************************************************************************************************************// -// clock samples // -//*************************************************************************************************************************************// - -std::string rocm_smi_clock_samples::generate_yaml_string() const { - std::string str{ "clock:\n" }; - - // socket clock min frequencies - if (this->clock_socket_min_.has_value()) { - str += std::format(" clock_socket_min:\n" - " unit: \"Hz\"\n" - " values: {}\n", - this->clock_socket_min_.value()); - } - // socket clock max frequencies - if (this->clock_socket_max_.has_value()) { - str += std::format(" clock_socket_max:\n" - " unit: \"Hz\"\n" - " values: {}\n", - this->clock_socket_max_.value()); - } - - // memory clock min frequencies - if (this->clock_memory_min_.has_value()) { - str += std::format(" clock_memory_min:\n" - " unit: \"Hz\"\n" - " values: {}\n", - this->clock_memory_min_.value()); - } - // memory clock max frequencies - if (this->clock_memory_max_.has_value()) { - str += std::format(" clock_memory_max:\n" - " unit: \"Hz\"\n" - " values: {}\n", - this->clock_memory_max_.value()); - } - - // system clock min frequencies - if (this->clock_system_min_.has_value()) { - str += std::format(" clock_gpu_min:\n" - " unit: \"Hz\"\n" - " values: {}\n", - this->clock_system_min_.value()); - } - // system clock max frequencies - if (this->clock_system_max_.has_value()) { - str += std::format(" clock_gpu_max:\n" - " unit: \"Hz\"\n" - " values: {}\n", - this->clock_system_max_.value()); - } - - // socket clock frequency - if (this->clock_socket_.has_value()) { - str += std::format(" clock_socket:\n" - " unit: \"Hz\"\n" - " values: [{}]\n", - detail::join(this->clock_socket_.value(), ", ")); - } - // memory clock frequency - if (this->clock_memory_.has_value()) { - str += std::format(" clock_memory:\n" - " unit: \"Hz\"\n" - " values: [{}]\n", - detail::join(this->clock_memory_.value(), ", ")); - } - // system clock frequency - if (this->clock_system_.has_value()) { - str += std::format(" clock_gpu:\n" - " unit: \"Hz\"\n" - " values: [{}]\n", - detail::join(this->clock_system_.value(), ", ")); - } - // overdrive level - if (this->overdrive_level_.has_value()) { - str += std::format(" overdrive_level:\n" - " unit: \"percentage\"\n" - " values: [{}]\n", - detail::join(this->overdrive_level_.value(), ", ")); - } - // memory overdrive level - if (this->memory_overdrive_level_.has_value()) { - str += std::format(" memory_overdrive_level:\n" - " unit: \"percentage\"\n" - " values: [{}]\n", - detail::join(this->memory_overdrive_level_.value(), ", ")); - } - - // remove last newline - str.pop_back(); - - return str; -} - -std::ostream &operator<<(std::ostream &out, const rocm_smi_clock_samples &samples) { - return out << std::format("clock_system_min [Hz]: {}\n" - "clock_system_max [Hz]: {}\n" - "clock_socket_min [Hz]: {}\n" - "clock_socket_max [Hz]: {}\n" - "clock_memory_min [Hz]: {}\n" - "clock_memory_max [Hz]: {}\n" - "clock_system [Hz]: [{}]\n" - "clock_socket [Hz]: [{}]\n" - "clock_memory [Hz]: [{}]\n" - "overdrive_level [%]: [{}]\n" - "memory_overdrive_level [%]: [{}]", - detail::value_or_default(samples.get_clock_system_min()), - detail::value_or_default(samples.get_clock_system_max()), - detail::value_or_default(samples.get_clock_socket_min()), - detail::value_or_default(samples.get_clock_socket_max()), - detail::value_or_default(samples.get_clock_memory_min()), - detail::value_or_default(samples.get_clock_memory_max()), - detail::join(detail::value_or_default(samples.get_clock_system()), ", "), - detail::join(detail::value_or_default(samples.get_clock_socket()), ", "), - detail::join(detail::value_or_default(samples.get_clock_memory()), ", "), - detail::join(detail::value_or_default(samples.get_overdrive_level()), ", "), - detail::join(detail::value_or_default(samples.get_memory_overdrive_level()), ", ")); -} - -//*************************************************************************************************************************************// -// power samples // -//*************************************************************************************************************************************// - -std::string rocm_smi_power_samples::generate_yaml_string() const { - std::string str{ "power:\n" }; - - // default power cap - if (this->power_default_cap_.has_value()) { - str += std::format(" power_management_limit:\n" - " unit: \"muW\"\n" - " values: {}\n", - this->power_default_cap_.value()); - } - // power cap - if (this->power_cap_.has_value()) { - str += std::format(" power_enforced_limit:\n" - " unit: \"muW\"\n" - " values: {}\n", - this->power_cap_.value()); - } - // power measurement type - if (this->power_type_.has_value()) { - str += std::format(" power_measurement_type:\n" - " unit: \"string\"\n" - " values: {}\n", - this->power_type_.value()); - } - // available power levels - if (this->available_power_profiles_.has_value()) { - str += std::format(" available_power_profiles:\n" - " unit: \"string\"\n" - " values: [{}]\n", - detail::join(this->available_power_profiles_.value(), ", ")); - } - - // current power usage - if (this->power_usage_.has_value()) { - str += std::format(" power_usage:\n" - " unit: \"muW\"\n" - " values: [{}]\n", - detail::join(this->power_usage_.value(), ", ")); - } - // total energy consumed - if (this->power_total_energy_consumption_.has_value()) { - decltype(rocm_smi_power_samples::power_total_energy_consumption_)::value_type consumed_energy(this->power_total_energy_consumption_->size()); - for (std::size_t i = 0; i < consumed_energy.size(); ++i) { - consumed_energy[i] = this->power_total_energy_consumption_.value()[i] - this->power_total_energy_consumption_->front(); - } - str += std::format(" power_total_energy_consumed:\n" - " unit: \"muJ\"\n" - " values: [{}]\n", - detail::join(consumed_energy, ", ")); - } - // current power level - if (this->power_profile_.has_value()) { - str += std::format(" power_profile:\n" - " unit: \"string\"\n" - " values: [{}]\n", - detail::join(this->power_profile_.value(), ", ")); - } - - // remove last newline - str.pop_back(); - - return str; -} - -std::ostream &operator<<(std::ostream &out, const rocm_smi_power_samples &samples) { - return out << std::format("power_default_cap [muW]: {}\n" - "power_cap [muW]: {}\n" - "power_type [string]: {}\n" - "available_power_profiles [string]: [{}]\n" - "power_usage [muW]: [{}]\n" - "power_total_energy_consumption [muJ]: [{}]\n" - "power_profile [string]: [{}]", - detail::value_or_default(samples.get_power_default_cap()), - detail::value_or_default(samples.get_power_cap()), - detail::value_or_default(samples.get_power_type()), - detail::join(detail::value_or_default(samples.get_available_power_profiles()), ", "), - detail::join(detail::value_or_default(samples.get_power_usage()), ", "), - detail::join(detail::value_or_default(samples.get_power_total_energy_consumption()), ", "), - detail::join(detail::value_or_default(samples.get_power_profile()), ", ")); -} - -//*************************************************************************************************************************************// -// memory samples // -//*************************************************************************************************************************************// - -std::string rocm_smi_memory_samples::generate_yaml_string() const { - std::string str{ "memory:\n" }; - - // total memory - if (this->memory_total_.has_value()) { - str += std::format(" memory_total:\n" - " unit: \"B\"\n" - " values: {}\n", - this->memory_total_.value()); - } - // total visible memory - if (this->visible_memory_total_.has_value()) { - str += std::format(" visible_memory_total:\n" - " unit: \"B\"\n" - " values: {}\n", - this->visible_memory_total_.value()); - } - // min number of PCIe lanes - if (this->min_num_pcie_lanes_.has_value()) { - str += std::format(" min_num_pcie_lanes:\n" - " unit: \"int\"\n" - " values: {}\n", - this->min_num_pcie_lanes_.value()); - } - // max number of PCIe lanes - if (this->max_num_pcie_lanes_.has_value()) { - str += std::format(" max_num_pcie_lanes:\n" - " unit: \"int\"\n" - " values: {}\n", - this->max_num_pcie_lanes_.value()); - } - - // used memory - if (this->memory_used_.has_value()) { - str += std::format(" memory_used:\n" - " unit: \"B\"\n" - " values: [{}]\n", - detail::join(this->memory_used_.value(), ", ")); - } - // free memory - if (this->memory_used_.has_value() && this->memory_total_.has_value()) { - decltype(rocm_smi_memory_samples::memory_used_)::value_type memory_free(this->memory_used_->size(), this->memory_total_.value()); - for (std::size_t i = 0; i < memory_free.size(); ++i) { - memory_free[i] -= this->memory_used_.value()[i]; - } - str += std::format(" memory_free:\n" - " unit: \"B\"\n" - " values: [{}]\n", - detail::join(memory_free, ", ")); - } - - // PCIe bandwidth - if (this->pcie_transfer_rate_.has_value()) { - str += std::format(" pcie_bandwidth:\n" - " unit: \"T/s\"\n" - " values: [{}]\n", - detail::join(this->pcie_transfer_rate_.value(), ", ")); - } - // number of PCIe lanes - if (this->num_pcie_lanes_.has_value()) { - str += std::format(" pcie_num_lanes:\n" - " unit: \"int\"\n" - " values: [{}]\n", - detail::join(this->num_pcie_lanes_.value(), ", ")); - } - - // remove last newline - str.pop_back(); - - return str; -} - -std::ostream &operator<<(std::ostream &out, const rocm_smi_memory_samples &samples) { - return out << std::format("memory_total [B]: {}\n" - "visible_memory_total [B]: {}\n" - "min_num_pcie_lanes [int]: {}\n" - "max_num_pcie_lanes [int]: {}\n" - "memory_used [B]: [{}]\n" - "pcie_transfer_rate [T/s]: [{}]\n" - "num_pcie_lanes [int]: [{}]", - detail::value_or_default(samples.get_memory_total()), - detail::value_or_default(samples.get_visible_memory_total()), - detail::value_or_default(samples.get_min_num_pcie_lanes()), - detail::value_or_default(samples.get_max_num_pcie_lanes()), - detail::join(detail::value_or_default(samples.get_memory_used()), ", "), - detail::join(detail::value_or_default(samples.get_pcie_transfer_rate()), ", "), - detail::join(detail::value_or_default(samples.get_num_pcie_lanes()), ", ")); -} - -//*************************************************************************************************************************************// -// temperature samples // -//*************************************************************************************************************************************// - -std::string rocm_smi_temperature_samples::generate_yaml_string() const { - std::string str{ "temperature:\n" }; - - // number of fans (emulated) - if (this->num_fans_.has_value()) { - str += std::format(" num_fans:\n" - " unit: \"int\"\n" - " values: {}\n", - this->num_fans_.value()); - } - // maximum fan speed - if (this->max_fan_speed_.has_value()) { - str += std::format(" max_fan_speed:\n" - " unit: \"int\"\n" - " values: {}\n", - this->max_fan_speed_.value()); - } - // minimum GPU edge temperature - if (this->temperature_edge_min_.has_value()) { - str += std::format(" temperature_gpu_min:\n" - " unit: \"m°C\"\n" - " values: {}\n", - this->temperature_edge_min_.value()); - } - // maximum GPU edge temperature - if (this->temperature_edge_max_.has_value()) { - str += std::format(" temperature_gpu_max:\n" - " unit: \"m°C\"\n" - " values: {}\n", - this->temperature_edge_max_.value()); - } - // minimum GPU hotspot temperature - if (this->temperature_hotspot_min_.has_value()) { - str += std::format(" temperature_hotspot_min:\n" - " unit: \"m°C\"\n" - " values: {}\n", - this->temperature_hotspot_min_.value()); - } - // maximum GPU hotspot temperature - if (this->temperature_hotspot_max_.has_value()) { - str += std::format(" temperature_hotspot_max:\n" - " unit: \"m°C\"\n" - " values: {}\n", - this->temperature_hotspot_max_.value()); - } - // minimum GPU memory temperature - if (this->temperature_memory_min_.has_value()) { - str += std::format(" temperature_memory_min:\n" - " unit: \"m°C\"\n" - " values: {}\n", - this->temperature_memory_min_.value()); - } - // maximum GPU memory temperature - if (this->temperature_memory_max_.has_value()) { - str += std::format(" temperature_memory_max:\n" - " unit: \"m°C\"\n" - " values: {}\n", - this->temperature_memory_max_.value()); - } - // minimum GPU HBM 0 temperature - if (this->temperature_hbm_0_min_.has_value()) { - str += std::format(" temperature_hbm_0_min:\n" - " unit: \"m°C\"\n" - " values: {}\n", - this->temperature_hbm_0_min_.value()); - } - // maximum GPU HBM 0 temperature - if (this->temperature_hbm_0_max_.has_value()) { - str += std::format(" temperature_hbm_0_max:\n" - " unit: \"m°C\"\n" - " values: {}\n", - this->temperature_hbm_0_max_.value()); - } - // minimum GPU HBM 1 temperature - if (this->temperature_hbm_1_min_.has_value()) { - str += std::format(" temperature_hbm_1_min:\n" - " unit: \"m°C\"\n" - " values: {}\n", - this->temperature_hbm_1_min_.value()); - } - // maximum GPU HBM 1 temperature - if (this->temperature_hbm_1_max_.has_value()) { - str += std::format(" temperature_hbm_1_max:\n" - " unit: \"m°C\"\n" - " values: {}\n", - this->temperature_hbm_1_max_.value()); - } - // minimum GPU HBM 2 temperature - if (this->temperature_hbm_2_min_.has_value()) { - str += std::format(" temperature_hbm_2_min:\n" - " unit: \"m°C\"\n" - " values: {}\n", - this->temperature_hbm_2_min_.value()); - } - // maximum GPU HBM 2 temperature - if (this->temperature_hbm_2_max_.has_value()) { - str += std::format(" temperature_hbm_2_max:\n" - " unit: \"m°C\"\n" - " values: {}\n", - this->temperature_hbm_2_max_.value()); - } - // minimum GPU HBM 3 temperature - if (this->temperature_hbm_3_min_.has_value()) { - str += std::format(" temperature_hbm_3_min:\n" - " unit: \"m°C\"\n" - " values: {}\n", - this->temperature_hbm_3_min_.value()); - } - // maximum GPU HBM 3 temperature - if (this->temperature_hbm_3_max_.has_value()) { - str += std::format(" temperature_hbm_3_max:\n" - " unit: \"m°C\"\n" - " values: {}\n", - this->temperature_hbm_3_max_.value()); - } - - // fan speed - if (this->fan_speed_.has_value()) { - std::vector fan_speed_percent(this->fan_speed_->size()); - for (std::size_t i = 0; i < fan_speed_percent.size(); ++i) { - fan_speed_percent[i] = static_cast(this->fan_speed_.value()[i]) / static_cast(RSMI_MAX_FAN_SPEED); - } - str += std::format(" fan_speed:\n" - " unit: \"percentage\"\n" - " values: [{}]\n", - detail::join(fan_speed_percent, ", ")); - } - // GPU edge temperature - if (this->temperature_edge_.has_value()) { - str += std::format(" temperature_gpu:\n" - " unit: \"m°C\"\n" - " values: [{}]\n", - detail::join(this->temperature_edge_.value(), ", ")); - } - // GPU hotspot temperature - if (this->temperature_hotspot_.has_value()) { - str += std::format(" temperature_hotspot:\n" - " unit: \"m°C\"\n" - " values: [{}]\n", - detail::join(this->temperature_hotspot_.value(), ", ")); - } - // GPU memory temperature - if (this->temperature_memory_.has_value()) { - str += std::format(" temperature_memory:\n" - " unit: \"m°C\"\n" - " values: [{}]\n", - detail::join(this->temperature_memory_.value(), ", ")); - } - // GPU HBM 0 temperature - if (this->temperature_hbm_0_.has_value()) { - str += std::format(" temperature_hbm_0:\n" - " unit: \"m°C\"\n" - " values: [{}]\n", - detail::join(this->temperature_hbm_0_.value(), ", ")); - } - // GPU HBM 1 temperature - if (this->temperature_hbm_1_.has_value()) { - str += std::format(" temperature_hbm_1:\n" - " unit: \"m°C\"\n" - " values: [{}]\n", - detail::join(this->temperature_hbm_1_.value(), ", ")); - } - // GPU HBM 2 temperature - if (this->temperature_hbm_2_.has_value()) { - str += std::format(" temperature_hbm_2:\n" - " unit: \"m°C\"\n" - " values: [{}]\n", - detail::join(this->temperature_hbm_2_.value(), ", ")); - } - // GPU HBM 3 temperature - if (this->temperature_hbm_3_.has_value()) { - str += std::format(" temperature_hbm_3:\n" - " unit: \"m°C\"\n" - " values: [{}]\n", - detail::join(this->temperature_hbm_3_.value(), ", ")); - } - - // remove last newline - str.pop_back(); - - return str; -} - -std::ostream &operator<<(std::ostream &out, const rocm_smi_temperature_samples &samples) { - return out << std::format("num_fans [int]: {}\n" - "max_fan_speed [int]: {}\n" - "temperature_edge_min [m°C]: {}\n" - "temperature_edge_max [m°C]: {}\n" - "temperature_hotspot_min [m°C]: {}\n" - "temperature_hotspot_max [m°C]: {}\n" - "temperature_memory_min [m°C]: {}\n" - "temperature_memory_max [m°C]: {}\n" - "temperature_hbm_0_min [m°C]: {}\n" - "temperature_hbm_0_max [m°C]: {}\n" - "temperature_hbm_1_min [m°C]: {}\n" - "temperature_hbm_1_max [m°C]: {}\n" - "temperature_hbm_2_min [m°C]: {}\n" - "temperature_hbm_2_max [m°C]: {}\n" - "temperature_hbm_3_min [m°C]: {}\n" - "temperature_hbm_3_max [m°C]: {}\n" - "fan_speed [%]: [{}]\n" - "temperature_edge [m°C]: [{}]\n" - "temperature_hotspot [m°C]: [{}]\n" - "temperature_memory [m°C]: [{}]\n" - "temperature_hbm_0 [m°C]: [{}]\n" - "temperature_hbm_1 [m°C]: [{}]\n" - "temperature_hbm_2 [m°C]: [{}]\n" - "temperature_hbm_3 [m°C]: [{}]", - detail::value_or_default(samples.get_num_fans()), - detail::value_or_default(samples.get_max_fan_speed()), - detail::value_or_default(samples.get_temperature_edge_min()), - detail::value_or_default(samples.get_temperature_edge_max()), - detail::value_or_default(samples.get_temperature_hotspot_min()), - detail::value_or_default(samples.get_temperature_hotspot_max()), - detail::value_or_default(samples.get_temperature_memory_min()), - detail::value_or_default(samples.get_temperature_memory_max()), - detail::value_or_default(samples.get_temperature_hbm_0_min()), - detail::value_or_default(samples.get_temperature_hbm_0_max()), - detail::value_or_default(samples.get_temperature_hbm_1_min()), - detail::value_or_default(samples.get_temperature_hbm_1_max()), - detail::value_or_default(samples.get_temperature_hbm_2_min()), - detail::value_or_default(samples.get_temperature_hbm_2_max()), - detail::value_or_default(samples.get_temperature_hbm_3_min()), - detail::value_or_default(samples.get_temperature_hbm_3_max()), - detail::join(detail::value_or_default(samples.get_fan_speed()), ", "), - detail::join(detail::value_or_default(samples.get_temperature_edge()), ", "), - detail::join(detail::value_or_default(samples.get_temperature_hotspot()), ", "), - detail::join(detail::value_or_default(samples.get_temperature_memory()), ", "), - detail::join(detail::value_or_default(samples.get_temperature_hbm_0()), ", "), - detail::join(detail::value_or_default(samples.get_temperature_hbm_1()), ", "), - detail::join(detail::value_or_default(samples.get_temperature_hbm_2()), ", "), - detail::join(detail::value_or_default(samples.get_temperature_hbm_3()), ", ")); -} - -} // namespace hws diff --git a/src/hardware_sampling/gpu_intel/level_zero_samples.cpp b/src/hardware_sampling/gpu_intel/level_zero_samples.cpp deleted file mode 100644 index 1c362cf..0000000 --- a/src/hardware_sampling/gpu_intel/level_zero_samples.cpp +++ /dev/null @@ -1,491 +0,0 @@ -/** - * @author Marcel Breyer - * @copyright 2024-today All Rights Reserved - * @license This file is released under the MIT license. - * See the LICENSE.md file in the project root for full license information. - */ - -#include "hardware_sampling/gpu_intel/level_zero_samples.hpp" - -#include "hardware_sampling/utility.hpp" // hws::detail::{value_or_default, join} - -#include // std::format -#include // std::ostream -#include // std::string -#include // std::string_view -#include // std::remove_cvref_t, std::false_type, std::true_type -#include // std::vector - -namespace hws { - -namespace detail { - -template -struct is_vector : std::false_type { }; - -template -struct is_vector> : std::true_type { }; - -template -constexpr bool is_vector_v = is_vector::value; - -template -void append_map_values(std::string &str, const std::string_view entry_name, const MapType &map) { - if (map.has_value()) { - for (const auto &[key, value] : map.value()) { - if constexpr (is_vector_v>) { - str += std::format("{}_{}: [{}]\n", entry_name, key, detail::join(value, ", ")); - } else { - str += std::format("{}_{}: {}\n", entry_name, key, value); - } - } - } -} - -} // namespace detail - -//*************************************************************************************************************************************// -// general samples // -//*************************************************************************************************************************************// - -std::string level_zero_general_samples::generate_yaml_string() const { - std::string str{ "general:\n" }; - - // the model name - if (this->name_.has_value()) { - str += std::format(" name:\n" - " unit: \"string\"\n" - " values: \"{}\"\n", - this->name_.value()); - } - // the standby mode - if (this->standby_mode_.has_value()) { - str += std::format(" standby_mode:\n" - " unit: \"string\"\n" - " values: \"{}\"\n", - this->standby_mode_.value()); - } - // the number of threads per EU unit - if (this->num_threads_per_eu_.has_value()) { - str += std::format(" num_threads_per_eu:\n" - " unit: \"int\"\n" - " values: {}\n", - this->num_threads_per_eu_.value()); - } - // the EU SIMD width - if (this->eu_simd_width_.has_value()) { - str += std::format(" physical_eu_simd_width:\n" - " unit: \"int\"\n" - " values: {}\n", - this->eu_simd_width_.value()); - } - - // remove last newline - str.pop_back(); - - return str; -} - -std::ostream &operator<<(std::ostream &out, const level_zero_general_samples &samples) { - return out << std::format("name [string]: {}\n" - "standby_mode [string]: {}\n" - "num_threads_per_eu [int]: {}\n" - "eu_simd_width [int]: {}", - detail::value_or_default(samples.get_name()), - detail::value_or_default(samples.get_standby_mode()), - detail::value_or_default(samples.get_num_threads_per_eu()), - detail::value_or_default(samples.get_eu_simd_width())); -} - -//*************************************************************************************************************************************// -// clock samples // -//*************************************************************************************************************************************// - -std::string level_zero_clock_samples::generate_yaml_string() const { - std::string str{ "clock:\n" }; - - // minimum GPU core clock - if (this->clock_gpu_min_.has_value()) { - str += std::format(" clock_gpu_min:\n" - " unit: \"MHz\"\n" - " values: {}\n", - this->clock_gpu_min_.value()); - } - // maximum GPU core clock - if (this->clock_gpu_max_.has_value()) { - str += std::format(" clock_gpu_max:\n" - " unit: \"MHz\"\n" - " values: {}\n", - this->clock_gpu_max_.value()); - } - // all possible GPU core clock frequencies - if (this->available_clocks_gpu_.has_value()) { - str += std::format(" available_clocks_gpu:\n" - " unit: \"MHz\"\n" - " values: [{}]\n", - detail::join(this->available_clocks_gpu_.value(), ", ")); - } - // minimum memory clock - if (this->clock_mem_min_.has_value()) { - str += std::format(" clock_mem_min:\n" - " unit: \"MHz\"\n" - " values: {}\n", - this->clock_mem_min_.value()); - } - // maximum memory clock - if (this->clock_mem_max_.has_value()) { - str += std::format(" clock_mem_max:\n" - " unit: \"MHz\"\n" - " values: {}\n", - this->clock_mem_max_.value()); - } - // all possible memory clock frequencies - if (this->available_clocks_mem_.has_value()) { - str += std::format(" available_clocks_mem:\n" - " unit: \"MHz\"\n" - " values: [{}]\n", - detail::join(this->available_clocks_mem_.value(), ", ")); - } - - // the maximum GPU core frequency based on the current TDP limit - if (this->tdp_frequency_limit_gpu_.has_value()) { - str += std::format(" tdp_frequency_limit_gpu:\n" - " unit: \"MHz\"\n" - " values: [{}]\n", - detail::join(this->tdp_frequency_limit_gpu_.value(), ", ")); - } - // the current GPU core clock frequency - if (this->clock_gpu_.has_value()) { - str += std::format(" clock_gpu:\n" - " unit: \"MHz\"\n" - " values: [{}]\n", - detail::join(this->clock_gpu_.value(), ", ")); - } - // the current GPU core throttle reason - if (this->throttle_reason_gpu_.has_value()) { - str += std::format(" throttle_reason_gpu:\n" - " unit: \"bitmask\"\n" - " values: [{}]\n", - detail::join(this->throttle_reason_gpu_.value(), ", ")); - } - // the maximum memory frequency based on the current TDP limit - if (this->tdp_frequency_limit_mem_.has_value()) { - str += std::format(" tdp_frequency_limit_mem:\n" - " unit: \"MHz\"\n" - " values: [{}]\n", - detail::join(this->tdp_frequency_limit_mem_.value(), ", ")); - } - // the current memory clock frequency - if (this->clock_mem_.has_value()) { - str += std::format(" clock_mem:\n" - " unit: \"MHz\"\n" - " values: [{}]\n", - detail::join(this->clock_mem_.value(), ", ")); - } - // the current memory throttle reason - if (this->throttle_reason_mem_.has_value()) { - str += std::format(" throttle_reason_mem:\n" - " unit: \"bitmask\"\n" - " values: [{}]\n", - detail::join(this->throttle_reason_mem_.value(), ", ")); - } - - // remove last newline - str.pop_back(); - - return str; -} - -std::ostream &operator<<(std::ostream &out, const level_zero_clock_samples &samples) { - return out << std::format("clock_gpu_min [MHz]: {}\n" - "clock_gpu_max [MHz]: {}\n" - "available_clocks_gpu [MHz]: [{}]\n" - "clock_mem_min [MHz]: {}\n" - "clock_mem_max [MHz]: {}\n" - "available_clocks_mem [MHz]: [{}]\n" - "tdp_frequency_limit_gpu [MHz]: [{}]\n" - "clock_gpu [MHz]: [{}]\n" - "throttle_reason_gpu [bitmask]: [{}]\n" - "tdp_frequency_limit_mem [MHz]: [{}]\n" - "clock_mem [MHz]: [{}]\n" - "throttle_reason_mem [bitmask]: [{}]", - detail::value_or_default(samples.get_clock_gpu_min()), - detail::value_or_default(samples.get_clock_gpu_max()), - detail::join(detail::value_or_default(samples.get_available_clocks_gpu()), ", "), - detail::value_or_default(samples.get_clock_mem_min()), - detail::value_or_default(samples.get_clock_mem_max()), - detail::join(detail::value_or_default(samples.get_available_clocks_mem()), ", "), - detail::join(detail::value_or_default(samples.get_tdp_frequency_limit_gpu()), ", "), - detail::join(detail::value_or_default(samples.get_clock_gpu()), ", "), - detail::join(detail::value_or_default(samples.get_throttle_reason_gpu()), ", "), - detail::join(detail::value_or_default(samples.get_tdp_frequency_limit_mem()), ", "), - detail::join(detail::value_or_default(samples.get_clock_mem()), ", "), - detail::join(detail::value_or_default(samples.get_throttle_reason_mem()), ", ")); -} - -//*************************************************************************************************************************************// -// power samples // -//*************************************************************************************************************************************// - -std::string level_zero_power_samples::generate_yaml_string() const { - std::string str{ "power:\n" }; - - // flag whether the energy threshold is enabled - if (this->energy_threshold_enabled_.has_value()) { - str += std::format(" energy_threshold_enabled:\n" - " unit: \"bool\"\n" - " values: {}\n", - this->energy_threshold_enabled_.value()); - } - // the energy threshold - if (this->energy_threshold_.has_value()) { - str += std::format(" energy_threshold:\n" - " unit: \"J\"\n" - " values: {}\n", - this->energy_threshold_.value()); - } - - // the total consumed energy - if (this->power_total_energy_consumption_.has_value()) { - decltype(level_zero_power_samples::power_total_energy_consumption_)::value_type consumed_energy(this->power_total_energy_consumption_->size()); - for (std::size_t i = 0; i < consumed_energy.size(); ++i) { - consumed_energy[i] = this->power_total_energy_consumption_.value()[i] - this->power_total_energy_consumption_->front(); - } - str += std::format(" power_total_energy_consumed:\n" - " unit: \"J\"\n" - " values: [{}]\n", - detail::join(consumed_energy, ", ")); - } - - // remove last newline - str.pop_back(); - - return str; -} - -std::ostream &operator<<(std::ostream &out, const level_zero_power_samples &samples) { - return out << std::format("energy_threshold_enabled [bool]: {}\n" - "energy_threshold [J]: {}\n" - "power_total_energy_consumption [J]: [{}]", - detail::value_or_default(samples.get_energy_threshold_enabled()), - detail::value_or_default(samples.get_energy_threshold()), - detail::join(detail::value_or_default(samples.get_power_total_energy_consumption()), ", ")); -} - -//*************************************************************************************************************************************// -// memory samples // -//*************************************************************************************************************************************// - -std::string level_zero_memory_samples::generate_yaml_string() const { - std::string str{ "memory:\n" }; - - // the total memory - if (this->memory_total_.has_value()) { - for (const auto &[key, value] : this->memory_total_.value()) { - str += std::format(" memory_total_{}:\n" - " unit: \"B\"\n" - " values: {}\n", - key, - value); - } - } - // the total allocatable memory - if (this->allocatable_memory_total_.has_value()) { - for (const auto &[key, value] : this->allocatable_memory_total_.value()) { - str += std::format(" allocatable_memory_total_{}:\n" - " unit: \"B\"\n" - " values: {}\n", - key, - value); - } - } - // the pcie max bandwidth - if (this->pcie_link_max_speed_.has_value()) { - str += std::format(" pcie_max_bandwidth:\n" - " unit: \"BPS\"\n" - " values: {}\n", - this->pcie_link_max_speed_.value()); - } - // the pcie link width - if (this->pcie_max_width_.has_value()) { - str += std::format(" max_pcie_link_width:\n" - " unit: \"int\"\n" - " values: {}\n", - this->pcie_max_width_.value()); - } - // the pcie generation - if (this->max_pcie_link_generation_.has_value()) { - str += std::format(" max_pcie_link_generation:\n" - " unit: \"int\"\n" - " values: {}\n", - this->max_pcie_link_generation_.value()); - } - // the memory bus width - if (this->bus_width_.has_value()) { - for (const auto &[key, value] : this->bus_width_.value()) { - str += std::format(" memory_bus_width_{}:\n" - " unit: \"Bit\"\n" - " values: {}\n", - key, - value); - } - } - // the number of memory channels - if (this->num_channels_.has_value()) { - for (const auto &[key, value] : this->num_channels_.value()) { - str += std::format(" memory_num_channels_{}:\n" - " unit: \"int\"\n" - " values: {}\n", - key, - value); - } - } - // the memory location (system or device) - if (this->location_.has_value()) { - for (const auto &[key, value] : this->location_.value()) { - str += std::format(" memory_location_{}:\n" - " unit: \"string\"\n" - " values: \"{}\"\n", - key, - value); - } - } - - // the currently free and used memory - if (this->memory_free_.has_value()) { - for (const auto &[key, value] : this->memory_free_.value()) { - str += std::format(" memory_free_{}:\n" - " unit: \"string\"\n" - " values: [{}]\n", - key, - detail::join(value, ", ")); - - // calculate the used memory - if (this->allocatable_memory_total_.has_value()) { - decltype(level_zero_memory_samples::memory_free_)::value_type::mapped_type memory_used(value.size(), this->allocatable_memory_total_->at(key)); - for (std::size_t i = 0; i < memory_used.size(); ++i) { - memory_used[i] -= value[i]; - } - str += std::format(" memory_used_{}:\n" - " unit: \"string\"\n" - " values: [{}]\n", - key, - detail::join(memory_used, ", ")); - } - } - } - // PCIe link speed - if (this->pcie_link_speed_.has_value()) { - str += std::format(" pcie_bandwidth:\n" - " unit: \"MBPS\"\n" - " values: [{}]\n", - detail::join(this->pcie_link_speed_.value(), ", ")); - } - // PCIe link width - if (this->pcie_link_width_.has_value()) { - str += std::format(" pcie_link_width:\n" - " unit: \"int\"\n" - " values: [{}]\n", - detail::join(this->pcie_link_width_.value(), ", ")); - } - // PCIe link generation - if (this->pcie_link_generation_.has_value()) { - str += std::format(" pcie_link_generation:\n" - " unit: \"int\"\n" - " values: [{}]\n", - detail::join(this->pcie_link_generation_.value(), ", ")); - } - - // remove last newline - str.pop_back(); - - return str; -} - -std::ostream &operator<<(std::ostream &out, const level_zero_memory_samples &samples) { - std::string str{}; - - detail::append_map_values(str, "memory_total [B]", samples.get_memory_total()); - detail::append_map_values(str, "allocatable_memory_total [B]", samples.get_allocatable_memory_total()); - - str += std::format("pcie_link_max_speed [BPS]: {}\n" - "pcie_max_width [int]: {}\n" - "max_pcie_link_generation [int]: {}\n", - detail::value_or_default(samples.get_pcie_link_max_speed()), - detail::value_or_default(samples.get_pcie_max_width()), - detail::value_or_default(samples.get_max_pcie_link_generation())); - - detail::append_map_values(str, "bus_width [Bit]", samples.get_bus_width()); - detail::append_map_values(str, "num_channels [int]", samples.get_num_channels()); - detail::append_map_values(str, "location [string]", samples.get_location()); - detail::append_map_values(str, "memory_free [string]", samples.get_memory_free()); - - str += std::format("pcie_link_speed [MBPS]: [{}]\n" - "pcie_link_width [int]: [{}]\n" - "pcie_link_generation [int]: [{}]", - detail::join(detail::value_or_default(samples.get_pcie_link_speed()), ", "), - detail::join(detail::value_or_default(samples.get_pcie_link_width()), ", "), - detail::join(detail::value_or_default(samples.get_pcie_link_generation()), ", ")); - - return out << str; -} - -//*************************************************************************************************************************************// -// temperature samples // -//*************************************************************************************************************************************// - -std::string level_zero_temperature_samples::generate_yaml_string() const { - std::string str{ "temperature:\n" }; - - // the maximum sensor temperature - if (this->temperature_max_.has_value()) { - for (const auto &[key, value] : this->temperature_max_.value()) { - str += std::format(" temperature_{}_max:\n" - " unit: \"°C\"\n" - " values: {}\n", - key, - value); - } - } - - // the current PSU temperatures - if (this->temperature_psu_.has_value()) { - str += std::format(" temperature_psu:\n" - " unit: \"°C\"\n" - " values: [{}]\n", - detail::join(this->temperature_psu_.value(), ", ")); - } - // the current sensor temperatures - if (this->temperature_.has_value()) { - for (const auto &[key, value] : this->temperature_.value()) { - str += std::format(" temperature_{}:\n" - " unit: \"°C\"\n" - " values: [{}]\n", - key, - detail::join(value, ", ")); - } - } - - // remove last newline - str.pop_back(); - - return str; -} - -std::ostream &operator<<(std::ostream &out, const level_zero_temperature_samples &samples) { - std::string str{}; - - detail::append_map_values(str, "temperature_max [°C]", samples.get_temperature_max()); - - str += std::format("temperature_psu [°C]: [{}]\n", - detail::join(detail::value_or_default(samples.get_temperature_psu()), ", ")); - - detail::append_map_values(str, "temperature [°C]", samples.get_temperature()); - - // remove last newline - str.pop_back(); - - return out << str; -} - -} // namespace hws diff --git a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp b/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp deleted file mode 100644 index 4d9fbdc..0000000 --- a/src/hardware_sampling/gpu_nvidia/hardware_sampler.cpp +++ /dev/null @@ -1,471 +0,0 @@ -/** - * @author Marcel Breyer - * @copyright 2024-today All Rights Reserved - * @license This file is released under the MIT license. - * See the LICENSE.md file in the project root for full license information. - */ - -#include "hardware_sampling/gpu_nvidia/hardware_sampler.hpp" - -#include "hardware_sampling/gpu_nvidia/nvml_device_handle_impl.hpp" // hws::detail::nvml_device_handle implementation -#include "hardware_sampling/gpu_nvidia/nvml_samples.hpp" // hws::{nvml_general_samples, nvml_clock_samples, nvml_power_samples, nvml_memory_samples, nvml_temperature_samples} -#include "hardware_sampling/gpu_nvidia/utility.hpp" // HWS_NVML_ERROR_CHECK -#include "hardware_sampling/hardware_sampler.hpp" // hws::hardware_sampler -#include "hardware_sampling/utility.hpp" // hws::detail::{time_points_to_epoch, join} - -#include "nvml.h" // NVML runtime functions - -#include // std::min_element -#include // std::chrono::{steady_clock, duration_cast, milliseconds} -#include // std::size_t -#include // std::exception, std::terminate -#include // std::format -#include // std::ios_base -#include // std::cerr, std::endl -#include // std::optional -#include // std::ostream -#include // std::runtime_error -#include // std::string -#include // std::this_thread -#include // std::vector - -namespace hws { - -gpu_nvidia_hardware_sampler::gpu_nvidia_hardware_sampler() : - gpu_nvidia_hardware_sampler{ 0, HWS_SAMPLING_INTERVAL } { } - -gpu_nvidia_hardware_sampler::gpu_nvidia_hardware_sampler(const std::size_t device_id) : - gpu_nvidia_hardware_sampler{ device_id, HWS_SAMPLING_INTERVAL } { } - -gpu_nvidia_hardware_sampler::gpu_nvidia_hardware_sampler(const std::chrono::milliseconds sampling_interval) : - gpu_nvidia_hardware_sampler{ 0, sampling_interval } { } - -gpu_nvidia_hardware_sampler::gpu_nvidia_hardware_sampler(const std::size_t device_id, const std::chrono::milliseconds sampling_interval) : - hardware_sampler{ sampling_interval } { - // make sure that nvmlInit is only called once for all instances - if (instances_++ == 0) { - HWS_NVML_ERROR_CHECK(nvmlInit()); - // notify that initialization has been finished - init_finished_ = true; - } else { - // wait until init has been finished! - while (!init_finished_) { } - } - - // initialize samples -> can't be done beforehand since the device handle can only be initialized after a call to nvmlInit - device_ = detail::nvml_device_handle{ device_id }; -} - -gpu_nvidia_hardware_sampler::~gpu_nvidia_hardware_sampler() { - try { - // if this hardware sampler is still sampling, stop it - if (this->has_sampling_started() && !this->has_sampling_stopped()) { - this->stop_sampling(); - } - - // the last instance must shut down the NVML runtime - // make sure that nvmlShutdown is only called once - if (--instances_ == 0) { - HWS_NVML_ERROR_CHECK(nvmlShutdown()); - // reset init_finished flag - init_finished_ = false; - } - } catch (const std::exception &e) { - std::cerr << e.what() << std::endl; - std::terminate(); - } -} - -void gpu_nvidia_hardware_sampler::sampling_loop() { - // get the nvml handle from the device - nvmlDevice_t device = device_.get_impl().device; - - // - // add samples where we only have to retrieve the value once - // - - this->add_time_point(std::chrono::steady_clock::now()); - - // retrieve initial general information - { - // fixed information -> only retrieved once - std::string name(NVML_DEVICE_NAME_V2_BUFFER_SIZE, '\0'); - if (nvmlDeviceGetName(device, name.data(), name.size()) == NVML_SUCCESS) { - general_samples_.name_ = name.substr(0, name.find_first_of('\0')); - } - - nvmlEnableState_t mode{}; - if (nvmlDeviceGetPersistenceMode(device, &mode) == NVML_SUCCESS) { - general_samples_.persistence_mode_ = mode == NVML_FEATURE_ENABLED; - } - - decltype(general_samples_.num_cores_)::value_type num_cores{}; - if (nvmlDeviceGetNumGpuCores(device, &num_cores) == NVML_SUCCESS) { - general_samples_.num_cores_ = num_cores; - } - - // queried samples -> retrieved every iteration if available - nvmlPstates_t pstate{}; - if (nvmlDeviceGetPerformanceState(device, &pstate) == NVML_SUCCESS) { - general_samples_.performance_state_ = decltype(general_samples_.performance_state_)::value_type{ static_cast(pstate) }; - } - - nvmlUtilization_t util{}; - if (nvmlDeviceGetUtilizationRates(device, &util) == NVML_SUCCESS) { - general_samples_.utilization_gpu_ = decltype(general_samples_.utilization_gpu_)::value_type{ util.gpu }; - general_samples_.utilization_mem_ = decltype(general_samples_.utilization_gpu_)::value_type{ util.memory }; - } - } - - // retrieve initial clock related information - { - // fixed information -> only retrieved once - decltype(clock_samples_.adaptive_clock_status_)::value_type adaptive_clock_status{}; - if (nvmlDeviceGetAdaptiveClockInfoStatus(device, &adaptive_clock_status) == NVML_SUCCESS) { - clock_samples_.adaptive_clock_status_ = adaptive_clock_status; - } - - decltype(clock_samples_.clock_graph_max_)::value_type clock_graph_max{}; - if (nvmlDeviceGetMaxClockInfo(device, NVML_CLOCK_GRAPHICS, &clock_graph_max) == NVML_SUCCESS) { - clock_samples_.clock_graph_max_ = clock_graph_max; - } - - decltype(clock_samples_.clock_sm_max_)::value_type clock_sm_max{}; - if (nvmlDeviceGetMaxClockInfo(device, NVML_CLOCK_SM, &clock_sm_max) == NVML_SUCCESS) { - clock_samples_.clock_sm_max_ = clock_sm_max; - } - - decltype(clock_samples_.clock_mem_max_)::value_type clock_mem_max{}; - if (nvmlDeviceGetMaxClockInfo(device, NVML_CLOCK_MEM, &clock_mem_max) == NVML_SUCCESS) { - clock_samples_.clock_mem_max_ = clock_mem_max; - } - - { - unsigned int clock_count{ 128 }; - std::vector supported_clocks(clock_count); - if (nvmlDeviceGetSupportedMemoryClocks(device, &clock_count, supported_clocks.data()) == NVML_SUCCESS) { - supported_clocks.resize(clock_count); - clock_samples_.clock_mem_min_ = *std::min_element(supported_clocks.cbegin(), supported_clocks.cend()); - } - } - - { - unsigned int clock_count{ 128 }; - std::vector supported_clocks(clock_count); - if (clock_samples_.clock_mem_min_.has_value() && nvmlDeviceGetSupportedGraphicsClocks(device, clock_samples_.clock_mem_min_.value(), &clock_count, supported_clocks.data()) == NVML_SUCCESS) { - supported_clocks.resize(clock_count); - clock_samples_.clock_graph_min_ = *std::min_element(supported_clocks.cbegin(), supported_clocks.cend()); - } - } - - // queried samples -> retrieved every iteration if available - decltype(clock_samples_.clock_graph_)::value_type::value_type clock_graph{}; - if (nvmlDeviceGetClockInfo(device, NVML_CLOCK_GRAPHICS, &clock_graph) == NVML_SUCCESS) { - clock_samples_.clock_graph_ = decltype(clock_samples_.clock_graph_)::value_type{ clock_graph }; - } - - decltype(clock_samples_.clock_sm_)::value_type::value_type clock_sm{}; - if (nvmlDeviceGetClockInfo(device, NVML_CLOCK_SM, &clock_sm) == NVML_SUCCESS) { - clock_samples_.clock_sm_ = decltype(clock_samples_.clock_sm_)::value_type{ clock_sm }; - } - - decltype(clock_samples_.clock_mem_)::value_type::value_type clock_mem{}; - if (nvmlDeviceGetClockInfo(device, NVML_CLOCK_MEM, &clock_mem) == NVML_SUCCESS) { - clock_samples_.clock_mem_ = decltype(clock_samples_.clock_mem_)::value_type{ clock_mem }; - } - - decltype(clock_samples_.clock_throttle_reason_)::value_type::value_type clock_throttle_reason{}; - if (nvmlDeviceGetCurrentClocksThrottleReasons(device, &clock_throttle_reason) == NVML_SUCCESS) { - clock_samples_.clock_throttle_reason_ = decltype(clock_samples_.clock_throttle_reason_)::value_type{ clock_throttle_reason }; - } - - nvmlEnableState_t mode{}; - nvmlEnableState_t default_mode{}; - if (nvmlDeviceGetAutoBoostedClocksEnabled(device, &mode, &default_mode) == NVML_SUCCESS) { - clock_samples_.auto_boosted_clocks_ = decltype(clock_samples_.auto_boosted_clocks_)::value_type{ mode == NVML_FEATURE_ENABLED }; - } - } - - // retrieve initial power related information - { - // fixed information -> only retrieved once - nvmlEnableState_t mode{}; - if (nvmlDeviceGetPowerManagementMode(device, &mode) == NVML_SUCCESS) { - power_samples_.power_management_mode_ = mode == NVML_FEATURE_ENABLED; - } - - decltype(power_samples_.power_management_limit_)::value_type power_management_limit{}; - if (nvmlDeviceGetPowerManagementLimit(device, &power_management_limit) == NVML_SUCCESS) { - power_samples_.power_management_limit_ = power_management_limit; - } - - decltype(power_samples_.power_enforced_limit_)::value_type power_enforced_limit{}; - if (nvmlDeviceGetEnforcedPowerLimit(device, &power_enforced_limit) == NVML_SUCCESS) { - power_samples_.power_enforced_limit_ = power_enforced_limit; - } - - // queried samples -> retrieved every iteration if available - nvmlPstates_t pstate{}; - if (nvmlDeviceGetPowerState(device, &pstate) == NVML_SUCCESS) { - power_samples_.power_state_ = decltype(power_samples_.power_state_)::value_type{ static_cast(pstate) }; - } - - decltype(power_samples_.power_usage_)::value_type::value_type power_usage{}; - if (nvmlDeviceGetPowerUsage(device, &power_usage) == NVML_SUCCESS) { - power_samples_.power_usage_ = decltype(power_samples_.power_usage_)::value_type{ power_usage }; - } - - decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type power_total_energy_consumption{}; - if (nvmlDeviceGetTotalEnergyConsumption(device, &power_total_energy_consumption) == NVML_SUCCESS) { - power_samples_.power_total_energy_consumption_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ power_total_energy_consumption }; - } - } - - // retrieve initial memory related information - { - // fixed information -> only retrieved once - nvmlMemory_t memory_info{}; - if (nvmlDeviceGetMemoryInfo(device, &memory_info) == NVML_SUCCESS) { - memory_samples_.memory_total_ = memory_info.total; - // queried samples -> retrieved every iteration if available - memory_samples_.memory_free_ = decltype(memory_samples_.memory_free_)::value_type{ memory_info.free }; - memory_samples_.memory_used_ = decltype(memory_samples_.memory_used_)::value_type{ memory_info.used }; - } - - decltype(memory_samples_.memory_bus_width_)::value_type memory_bus_width{}; - if (nvmlDeviceGetMemoryBusWidth(device, &memory_bus_width) == NVML_SUCCESS) { - memory_samples_.memory_bus_width_ = memory_bus_width; - } - - decltype(memory_samples_.max_pcie_link_generation_)::value_type max_pcie_link_generation{}; - if (nvmlDeviceGetMaxPcieLinkGeneration(device, &max_pcie_link_generation) == NVML_SUCCESS) { - memory_samples_.max_pcie_link_generation_ = max_pcie_link_generation; - } - - decltype(memory_samples_.pcie_link_max_speed_)::value_type pcie_link_max_speed{}; - if (nvmlDeviceGetPcieLinkMaxSpeed(device, &pcie_link_max_speed) == NVML_SUCCESS) { - memory_samples_.pcie_link_max_speed_ = pcie_link_max_speed; - } - - // queried samples -> retrieved every iteration if available - decltype(memory_samples_.pcie_link_width_)::value_type::value_type pcie_link_width{}; - if (nvmlDeviceGetCurrPcieLinkWidth(device, &pcie_link_width) == NVML_SUCCESS) { - memory_samples_.pcie_link_width_ = decltype(memory_samples_.pcie_link_width_)::value_type{ pcie_link_width }; - } - - decltype(memory_samples_.pcie_link_generation_)::value_type::value_type pcie_link_generation{}; - if (nvmlDeviceGetCurrPcieLinkGeneration(device, &pcie_link_generation) == NVML_SUCCESS) { - memory_samples_.pcie_link_generation_ = decltype(memory_samples_.pcie_link_generation_)::value_type{ pcie_link_generation }; - } - } - - // retrieve initial temperature related information - { - // fixed information -> only retrieved once - decltype(temperature_samples_.num_fans_)::value_type num_fans{}; - if (nvmlDeviceGetNumFans(device, &num_fans) == NVML_SUCCESS) { - temperature_samples_.num_fans_ = num_fans; - } - - if (temperature_samples_.num_fans_.has_value() && temperature_samples_.num_fans_.value() > 0) { - decltype(temperature_samples_.min_fan_speed_)::value_type min_fan_speed{}; - decltype(temperature_samples_.max_fan_speed_)::value_type max_fan_speed{}; - if (nvmlDeviceGetMinMaxFanSpeed(device, &min_fan_speed, &max_fan_speed) == NVML_SUCCESS) { - temperature_samples_.min_fan_speed_ = min_fan_speed; - temperature_samples_.max_fan_speed_ = max_fan_speed; - } - } - - decltype(temperature_samples_.temperature_threshold_gpu_max_)::value_type temperature_threshold_gpu_max{}; - if (nvmlDeviceGetTemperatureThreshold(device, NVML_TEMPERATURE_THRESHOLD_GPU_MAX, &temperature_threshold_gpu_max) == NVML_SUCCESS) { - temperature_samples_.temperature_threshold_gpu_max_ = temperature_threshold_gpu_max; - } - - decltype(temperature_samples_.temperature_threshold_mem_max_)::value_type temperature_threshold_mem_max{}; - if (nvmlDeviceGetTemperatureThreshold(device, NVML_TEMPERATURE_THRESHOLD_MEM_MAX, &temperature_threshold_mem_max) == NVML_SUCCESS) { - temperature_samples_.temperature_threshold_mem_max_ = temperature_threshold_mem_max; - } - - // queried samples -> retrieved every iteration if available - decltype(temperature_samples_.fan_speed_)::value_type::value_type fan_speed{}; - if (nvmlDeviceGetFanSpeed(device, &fan_speed) == NVML_SUCCESS) { - temperature_samples_.fan_speed_ = decltype(temperature_samples_.fan_speed_)::value_type{ fan_speed }; - } - - decltype(temperature_samples_.temperature_gpu_)::value_type::value_type temperature_gpu{}; - if (nvmlDeviceGetTemperature(device, NVML_TEMPERATURE_GPU, &temperature_gpu) == NVML_SUCCESS) { - temperature_samples_.temperature_gpu_ = decltype(temperature_samples_.temperature_gpu_)::value_type{ temperature_gpu }; - } - } - - // - // loop until stop_sampling() is called - // - - while (!this->has_sampling_stopped()) { - // only sample values if the sampler currently isn't paused - if (this->is_sampling()) { - // add current time point - this->add_time_point(std::chrono::steady_clock::now()); - - // retrieve general samples - { - if (general_samples_.performance_state_.has_value()) { - nvmlPstates_t pstate{}; - HWS_NVML_ERROR_CHECK(nvmlDeviceGetPerformanceState(device, &pstate)); - general_samples_.performance_state_->push_back(static_cast(pstate)); - } - - if (general_samples_.utilization_gpu_.has_value() && general_samples_.utilization_mem_.has_value()) { - nvmlUtilization_t util{}; - HWS_NVML_ERROR_CHECK(nvmlDeviceGetUtilizationRates(device, &util)); - general_samples_.utilization_gpu_->push_back(util.gpu); - general_samples_.utilization_mem_->push_back(util.memory); - } - } - - // retrieve clock related samples - { - if (clock_samples_.clock_graph_.has_value()) { - decltype(clock_samples_.clock_graph_)::value_type::value_type value{}; - HWS_NVML_ERROR_CHECK(nvmlDeviceGetClockInfo(device, NVML_CLOCK_GRAPHICS, &value)); - clock_samples_.clock_graph_->push_back(value); - } - - if (clock_samples_.clock_sm_.has_value()) { - decltype(clock_samples_.clock_sm_)::value_type::value_type value{}; - HWS_NVML_ERROR_CHECK(nvmlDeviceGetClockInfo(device, NVML_CLOCK_SM, &value)); - clock_samples_.clock_sm_->push_back(value); - } - - if (clock_samples_.clock_mem_.has_value()) { - decltype(clock_samples_.clock_mem_)::value_type::value_type value{}; - HWS_NVML_ERROR_CHECK(nvmlDeviceGetClockInfo(device, NVML_CLOCK_MEM, &value)); - clock_samples_.clock_mem_->push_back(value); - } - - if (clock_samples_.clock_throttle_reason_.has_value()) { - decltype(clock_samples_.clock_throttle_reason_)::value_type::value_type value{}; - HWS_NVML_ERROR_CHECK(nvmlDeviceGetCurrentClocksThrottleReasons(device, &value)); - clock_samples_.clock_throttle_reason_->push_back(value); - } - - if (clock_samples_.auto_boosted_clocks_.has_value()) { - nvmlEnableState_t mode{}; - nvmlEnableState_t default_mode{}; - HWS_NVML_ERROR_CHECK(nvmlDeviceGetAutoBoostedClocksEnabled(device, &mode, &default_mode)); - clock_samples_.auto_boosted_clocks_->push_back(mode == NVML_FEATURE_ENABLED); - } - } - - // retrieve power related information - { - if (power_samples_.power_state_.has_value()) { - nvmlPstates_t pstate{}; - HWS_NVML_ERROR_CHECK(nvmlDeviceGetPowerState(device, &pstate)); - power_samples_.power_state_->push_back(static_cast(pstate)); - } - - if (power_samples_.power_usage_.has_value()) { - decltype(power_samples_.power_usage_)::value_type::value_type value{}; - HWS_NVML_ERROR_CHECK(nvmlDeviceGetPowerUsage(device, &value)); - power_samples_.power_usage_->push_back(value); - } - - if (power_samples_.power_total_energy_consumption_.has_value()) { - decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type value{}; - HWS_NVML_ERROR_CHECK(nvmlDeviceGetTotalEnergyConsumption(device, &value)); - power_samples_.power_total_energy_consumption_->push_back(value); - } - } - - // retrieve memory related information - { - if (memory_samples_.memory_free_.has_value() && memory_samples_.memory_used_.has_value()) { - nvmlMemory_t memory_info{}; - HWS_NVML_ERROR_CHECK(nvmlDeviceGetMemoryInfo(device, &memory_info)); - memory_samples_.memory_free_->push_back(memory_info.free); - memory_samples_.memory_used_->push_back(memory_info.used); - } - - if (memory_samples_.pcie_link_width_.has_value()) { - decltype(memory_samples_.pcie_link_width_)::value_type::value_type value{}; - HWS_NVML_ERROR_CHECK(nvmlDeviceGetCurrPcieLinkWidth(device, &value)); - memory_samples_.pcie_link_width_->push_back(value); - } - - if (memory_samples_.pcie_link_generation_.has_value()) { - decltype(memory_samples_.pcie_link_generation_)::value_type::value_type value{}; - HWS_NVML_ERROR_CHECK(nvmlDeviceGetCurrPcieLinkGeneration(device, &value)); - memory_samples_.pcie_link_generation_->push_back(value); - } - } - - // retrieve temperature related information - { - if (temperature_samples_.fan_speed_.has_value()) { - decltype(temperature_samples_.fan_speed_)::value_type::value_type value{}; - HWS_NVML_ERROR_CHECK(nvmlDeviceGetFanSpeed(device, &value)); - temperature_samples_.fan_speed_->push_back(value); - } - - if (temperature_samples_.temperature_gpu_.has_value()) { - decltype(temperature_samples_.temperature_gpu_)::value_type::value_type value{}; - HWS_NVML_ERROR_CHECK(nvmlDeviceGetTemperature(device, NVML_TEMPERATURE_GPU, &value)); - temperature_samples_.temperature_gpu_->push_back(value); - } - } - } - - // wait for the sampling interval to pass to retrieve the next sample - std::this_thread::sleep_for(this->sampling_interval()); - } -} - -std::string gpu_nvidia_hardware_sampler::device_identification() const { - nvmlPciInfo_st pcie_info{}; - HWS_NVML_ERROR_CHECK(nvmlDeviceGetPciInfo_v3(device_.get_impl().device, &pcie_info)); - return std::format("gpu_nvidia_device_{}_{}", pcie_info.bus, pcie_info.device); -} - -std::string gpu_nvidia_hardware_sampler::generate_yaml_string() const { - // check whether it's safe to generate the YAML entry - if (this->is_sampling()) { - throw std::runtime_error{ "Can't create the final YAML entry if the hardware sampler is still running!" }; - } - - return std::format("{}\n" - "{}\n" - "{}\n" - "{}\n" - "{}", - general_samples_.generate_yaml_string(), - clock_samples_.generate_yaml_string(), - power_samples_.generate_yaml_string(), - memory_samples_.generate_yaml_string(), - temperature_samples_.generate_yaml_string()); -} - -std::ostream &operator<<(std::ostream &out, const gpu_nvidia_hardware_sampler &sampler) { - if (sampler.is_sampling()) { - out.setstate(std::ios_base::failbit); - return out; - } else { - return out << std::format("sampling interval: {}\n" - "time points: [{}]\n\n" - "general samples:\n{}\n\n" - "clock samples:\n{}\n\n" - "power samples:\n{}\n\n" - "memory samples:\n{}\n\n" - "temperature samples:\n{}", - sampler.sampling_interval(), - detail::join(detail::time_points_to_epoch(sampler.sampling_time_points()), ", "), - sampler.general_samples(), - sampler.clock_samples(), - sampler.power_samples(), - sampler.memory_samples(), - sampler.temperature_samples()); - } -} - -} // namespace hws diff --git a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp b/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp deleted file mode 100644 index 76ffe47..0000000 --- a/src/hardware_sampling/gpu_nvidia/nvml_samples.cpp +++ /dev/null @@ -1,466 +0,0 @@ -/** - * @author Marcel Breyer - * @copyright 2024-today All Rights Reserved - * @license This file is released under the MIT license. - * See the LICENSE.md file in the project root for full license information. - */ - -#include "hardware_sampling/gpu_nvidia/nvml_samples.hpp" - -#include "hardware_sampling/utility.hpp" // hws::detail::{value_or_default, join} - -#include "nvml.h" // NVML_ADAPTIVE_CLOCKING_INFO_STATUS_ENABLED - -#include // std::format -#include // std::ostream -#include // std::string - -namespace hws { - -//*************************************************************************************************************************************// -// general samples // -//*************************************************************************************************************************************// - -std::string nvml_general_samples::generate_yaml_string() const { - std::string str{ "general:\n" }; - - // device name - if (this->name_.has_value()) { - str += std::format(" name:\n" - " unit: \"string\"\n" - " values: \"{}\"\n", - this->name_.value()); - } - // persistence mode enabled - if (this->persistence_mode_.has_value()) { - str += std::format(" persistence_mode:\n" - " unit: \"bool\"\n" - " values: {}\n", - this->persistence_mode_.value()); - } - // number of cores - if (this->num_cores_.has_value()) { - str += std::format(" num_cores:\n" - " unit: \"int\"\n" - " values: {}\n", - this->num_cores_.value()); - } - - // performance state - if (this->performance_state_.has_value()) { - str += std::format(" performance_state:\n" - " unit: \"0 - maximum performance; 15 - minimum performance; 32 - unknown\"\n" - " values: [{}]\n", - detail::join(this->performance_state_.value(), ", ")); - } - // device compute utilization - if (this->utilization_gpu_.has_value()) { - str += std::format(" utilization_gpu:\n" - " unit: \"percentage\"\n" - " values: [{}]\n", - detail::join(this->utilization_gpu_.value(), ", ")); - } - - // device compute utilization - if (this->utilization_mem_.has_value()) { - str += std::format(" utilization_mem:\n" - " unit: \"percentage\"\n" - " values: [{}]\n", - detail::join(this->utilization_mem_.value(), ", ")); - } - - // remove last newline - str.pop_back(); - - return str; -} - -std::ostream &operator<<(std::ostream &out, const nvml_general_samples &samples) { - return out << std::format("name [string]: {}\n" - "persistence_mode [bool]: {}\n" - "num_cores [int]: {}\n" - "performance_state [int]: [{}]\n" - "utilization_gpu [%]: [{}]\n" - "utilization_mem [%]: [{}]", - detail::value_or_default(samples.get_name()), - detail::value_or_default(samples.get_persistence_mode()), - detail::value_or_default(samples.get_num_cores()), - detail::join(detail::value_or_default(samples.get_performance_state()), ", "), - detail::join(detail::value_or_default(samples.get_utilization_gpu()), ", "), - detail::join(detail::value_or_default(samples.get_utilization_mem()), ", ")); -} - -//*************************************************************************************************************************************// -// clock samples // -//*************************************************************************************************************************************// - -std::string nvml_clock_samples::generate_yaml_string() const { - std::string str{ "clock:\n" }; - - // adaptive clock status - if (this->adaptive_clock_status_.has_value()) { - str += std::format(" adaptive_clock_status:\n" - " unit: \"bool\"\n" - " values: {}\n", - this->adaptive_clock_status_.value() == NVML_ADAPTIVE_CLOCKING_INFO_STATUS_ENABLED); - } - // maximum SM clock - if (this->clock_sm_max_.has_value()) { - str += std::format(" clock_sm_max:\n" - " unit: \"MHz\"\n" - " values: {}\n", - this->clock_sm_max_.value()); - } - // minimum memory clock - if (this->clock_mem_min_.has_value()) { - str += std::format(" clock_mem_min:\n" - " unit: \"MHz\"\n" - " values: {}\n", - this->clock_mem_min_.value()); - } - // maximum memory clock - if (this->clock_mem_max_.has_value()) { - str += std::format(" clock_mem_max:\n" - " unit: \"MHz\"\n" - " values: {}\n", - this->clock_mem_max_.value()); - } - // minimum graph clock - if (this->clock_graph_min_.has_value()) { - str += std::format(" clock_gpu_min:\n" - " unit: \"MHz\"\n" - " values: {}\n", - this->clock_graph_min_.value()); - } - // maximum graph clock - if (this->clock_graph_max_.has_value()) { - str += std::format(" clock_gpu_max:\n" - " unit: \"MHz\"\n" - " values: {}\n", - this->clock_graph_max_.value()); - } - - // SM clock - if (this->clock_sm_.has_value()) { - str += std::format(" clock_sm:\n" - " unit: \"MHz\"\n" - " values: [{}]\n", - detail::join(this->clock_sm_.value(), ", ")); - } - // memory clock - if (this->clock_mem_.has_value()) { - str += std::format(" clock_mem:\n" - " unit: \"MHz\"\n" - " values: [{}]\n", - detail::join(this->clock_mem_.value(), ", ")); - } - // graph clock - if (this->clock_graph_.has_value()) { - str += std::format(" clock_gpu:\n" - " unit: \"MHz\"\n" - " values: [{}]\n", - detail::join(this->clock_graph_.value(), ", ")); - } - // clock throttle reason - if (this->clock_throttle_reason_.has_value()) { - str += std::format(" clock_throttle_reason:\n" - " unit: \"bitmask\"\n" - " values: [{}]\n", - detail::join(this->clock_throttle_reason_.value(), ", ")); - } - // clock is auto-boosted - if (this->auto_boosted_clocks_.has_value()) { - str += std::format(" auto_boosted_clocks:\n" - " unit: \"bool\"\n" - " values: [{}]\n", - detail::join(this->auto_boosted_clocks_.value(), ", ")); - } - - // remove last newline - str.pop_back(); - - return str; -} - -std::ostream &operator<<(std::ostream &out, const nvml_clock_samples &samples) { - return out << std::format("adaptive_clock_status [int]: {}\n" - "clock_graph_min [MHz]: {}\n" - "clock_graph_max [MHz]: {}\n" - "clock_sm_max [MHz]: {}\n" - "clock_mem_min [MHz]: {}\n" - "clock_mem_max [MHz]: {}\n" - "clock_graph [MHz]: [{}]\n" - "clock_sm [MHz]: [{}]\n" - "clock_mem [MHz]: [{}]\n" - "clock_throttle_reason [bitmask]: [{}]\n" - "auto_boosted_clocks [bool]: [{}]", - detail::value_or_default(samples.get_adaptive_clock_status()), - detail::value_or_default(samples.get_clock_graph_min()), - detail::value_or_default(samples.get_clock_graph_max()), - detail::value_or_default(samples.get_clock_sm_max()), - detail::value_or_default(samples.get_clock_mem_min()), - detail::value_or_default(samples.get_clock_mem_max()), - detail::join(detail::value_or_default(samples.get_clock_graph()), ", "), - detail::join(detail::value_or_default(samples.get_clock_sm()), ", "), - detail::join(detail::value_or_default(samples.get_clock_mem()), ", "), - detail::join(detail::value_or_default(samples.get_clock_throttle_reason()), ", "), - detail::join(detail::value_or_default(samples.get_auto_boosted_clocks()), ", ")); -} - -//*************************************************************************************************************************************// -// power samples // -//*************************************************************************************************************************************// - -std::string nvml_power_samples::generate_yaml_string() const { - std::string str{ "power:\n" }; - - // the power management mode - if (this->power_management_mode_.has_value()) { - str += std::format(" power_management_mode:\n" - " unit: \"bool\"\n" - " values: {}\n", - this->power_management_mode_.value()); - } - // power management limit - if (this->power_management_limit_.has_value()) { - str += std::format(" power_management_limit:\n" - " unit: \"mW\"\n" - " values: {}\n", - this->power_management_limit_.value()); - } - // power enforced limit - if (this->power_enforced_limit_.has_value()) { - str += std::format(" power_enforced_limit:\n" - " unit: \"mW\"\n" - " values: {}\n", - this->power_enforced_limit_.value()); - } - - // power state - if (this->power_state_.has_value()) { - str += std::format(" power_state:\n" - " unit: \"0 - maximum performance; 15 - minimum performance; 32 - unknown\"\n" - " values: [{}]\n", - detail::join(this->power_state_.value(), ", ")); - } - // current power usage - if (this->power_usage_.has_value()) { - str += std::format(" power_usage:\n" - " unit: \"mW\"\n" - " values: [{}]\n", - detail::join(this->power_usage_.value(), ", ")); - } - // total energy consumed - if (this->power_total_energy_consumption_.has_value()) { - decltype(nvml_power_samples::power_total_energy_consumption_)::value_type consumed_energy(this->power_total_energy_consumption_->size()); - for (std::size_t i = 0; i < consumed_energy.size(); ++i) { - consumed_energy[i] = this->power_total_energy_consumption_.value()[i] - this->power_total_energy_consumption_->front(); - } - str += std::format(" power_total_energy_consumed:\n" - " unit: \"J\"\n" - " values: [{}]\n", - detail::join(consumed_energy, ", ")); - } - - // remove last newline - str.pop_back(); - - return str; -} - -std::ostream &operator<<(std::ostream &out, const nvml_power_samples &samples) { - return out << std::format("power_management_mode [bool]: {}\n" - "power_management_limit [mW]: {}\n" - "power_enforced_limit [mW]: {}\n" - "power_state [int]: [{}]\n" - "power_usage [mW]: [{}]\n" - "power_total_energy_consumption [J]: [{}]", - detail::value_or_default(samples.get_power_management_mode()), - detail::value_or_default(samples.get_power_management_limit()), - detail::value_or_default(samples.get_power_enforced_limit()), - detail::join(detail::value_or_default(samples.get_power_state()), ", "), - detail::join(detail::value_or_default(samples.get_power_usage()), ", "), - detail::join(detail::value_or_default(samples.get_power_total_energy_consumption()), ", ")); -} - -//*************************************************************************************************************************************// -// memory samples // -//*************************************************************************************************************************************// - -std::string nvml_memory_samples::generate_yaml_string() const { - std::string str{ "memory:\n" }; - - // total memory size - if (this->memory_total_.has_value()) { - str += std::format(" memory_total:\n" - " unit: \"B\"\n" - " values: {}\n", - this->memory_total_.value()); - } - // maximum PCIe link speed - if (this->pcie_link_max_speed_.has_value()) { - str += std::format(" pcie_max_bandwidth:\n" - " unit: \"MBPS\"\n" - " values: {}\n", - this->pcie_link_max_speed_.value()); - } - // memory bus width - if (this->memory_bus_width_.has_value()) { - str += std::format(" memory_bus_width:\n" - " unit: \"Bit\"\n" - " values: {}\n", - this->memory_bus_width_.value()); - } - // maximum PCIe link generation - if (this->max_pcie_link_generation_.has_value()) { - str += std::format(" max_pcie_link_generation:\n" - " unit: \"int\"\n" - " values: {}\n", - this->max_pcie_link_generation_.value()); - } - - // free memory size - if (this->memory_free_.has_value()) { - str += std::format(" memory_free:\n" - " unit: \"B\"\n" - " values: [{}]\n", - detail::join(this->memory_free_.value(), ", ")); - } - // used memory size - if (this->memory_used_.has_value()) { - str += std::format(" memory_used:\n" - " unit: \"B\"\n" - " values: [{}]\n", - detail::join(this->memory_used_.value(), ", ")); - } - // PCIe link speed - if (this->pcie_link_speed_.has_value()) { - str += std::format(" pcie_bandwidth:\n" - " unit: \"MBPS\"\n" - " values: [{}]\n", - detail::join(this->pcie_link_speed_.value(), ", ")); - } - // PCIe link width - if (this->pcie_link_width_.has_value()) { - str += std::format(" pcie_link_width:\n" - " unit: \"int\"\n" - " values: [{}]\n", - detail::join(this->pcie_link_width_.value(), ", ")); - } - // PCIe link generation - if (this->pcie_link_generation_.has_value()) { - str += std::format(" pcie_link_generation:\n" - " unit: \"int\"\n" - " values: [{}]\n", - detail::join(this->pcie_link_generation_.value(), ", ")); - } - - // remove last newline - str.pop_back(); - - return str; -} - -std::ostream &operator<<(std::ostream &out, const nvml_memory_samples &samples) { - return out << std::format("memory_total [B]: {}\n" - "pcie_link_max_speed [MBPS]: {}\n" - "memory_bus_width [Bit]: {}\n" - "max_pcie_link_generation [int]: {}\n" - "memory_free [B]: [{}]\n" - "memory_used [B]: [{}]\n" - "pcie_link_speed [MBPS]: [{}]\n" - "pcie_link_width [int]: [{}]\n" - "pcie_link_generation [int]: [{}]", - detail::value_or_default(samples.get_memory_total()), - detail::value_or_default(samples.get_pcie_link_max_speed()), - detail::value_or_default(samples.get_memory_bus_width()), - detail::value_or_default(samples.get_max_pcie_link_generation()), - detail::join(detail::value_or_default(samples.get_memory_free()), ", "), - detail::join(detail::value_or_default(samples.get_memory_used()), ", "), - detail::join(detail::value_or_default(samples.get_pcie_link_speed()), ", "), - detail::join(detail::value_or_default(samples.get_pcie_link_width()), ", "), - detail::join(detail::value_or_default(samples.get_pcie_link_generation()), ", ")); -} - -//*************************************************************************************************************************************// -// temperature samples // -//*************************************************************************************************************************************// - -std::string nvml_temperature_samples::generate_yaml_string() const { - std::string str{ "temperature:\n" }; - - // number of fans - if (this->num_fans_.has_value()) { - str += std::format(" num_fans:\n" - " unit: \"int\"\n" - " values: {}\n", - this->num_fans_.value()); - } - // min fan speed - if (this->min_fan_speed_.has_value()) { - str += std::format(" min_fan_speed:\n" - " unit: \"percentage\"\n" - " values: {}\n", - this->min_fan_speed_.value()); - } - // max fan speed - if (this->max_fan_speed_.has_value()) { - str += std::format(" max_fan_speed:\n" - " unit: \"percentage\"\n" - " values: {}\n", - this->max_fan_speed_.value()); - } - // temperature threshold GPU max - if (this->temperature_threshold_gpu_max_.has_value()) { - str += std::format(" temperature_gpu_max:\n" - " unit: \"°C\"\n" - " values: {}\n", - this->temperature_threshold_gpu_max_.value()); - } - // temperature threshold memory max - if (this->temperature_threshold_mem_max_.has_value()) { - str += std::format(" temperature_mem_max:\n" - " unit: \"°C\"\n" - " values: {}\n", - this->temperature_threshold_mem_max_.value()); - } - - // fan speed - if (this->fan_speed_.has_value()) { - str += std::format(" fan_speed:\n" - " unit: \"percentage\"\n" - " values: [{}]\n", - detail::join(this->fan_speed_.value(), ", ")); - } - // temperature GPU - if (this->temperature_gpu_.has_value()) { - str += std::format(" temperature_gpu:\n" - " unit: \"°C\"\n" - " values: [{}]\n", - detail::join(this->temperature_gpu_.value(), ", ")); - } - - // remove last newline - str.pop_back(); - - return str; -} - -std::ostream &operator<<(std::ostream &out, const nvml_temperature_samples &samples) { - return out << std::format("num_fans [int]: {}\n" - "min_fan_speed [%]: {}\n" - "max_fan_speed [%]: {}\n" - "temperature_threshold_gpu_max [°C]: {}\n" - "temperature_threshold_mem_max [°C]: {}\n" - "fan_speed [%]: [{}]\n" - "temperature_gpu [°C]: [{}]", - detail::value_or_default(samples.get_num_fans()), - detail::value_or_default(samples.get_min_fan_speed()), - detail::value_or_default(samples.get_max_fan_speed()), - detail::value_or_default(samples.get_temperature_threshold_gpu_max()), - detail::value_or_default(samples.get_temperature_threshold_mem_max()), - detail::join(detail::value_or_default(samples.get_fan_speed()), ", "), - detail::join(detail::value_or_default(samples.get_temperature_gpu()), ", ")); -} - -} // namespace hws diff --git a/src/hardware_sampling/cpu/cpu_samples.cpp b/src/hws/cpu/cpu_samples.cpp similarity index 58% rename from src/hardware_sampling/cpu/cpu_samples.cpp rename to src/hws/cpu/cpu_samples.cpp index ef5a3b9..e5690d2 100644 --- a/src/hardware_sampling/cpu/cpu_samples.cpp +++ b/src/hws/cpu/cpu_samples.cpp @@ -5,13 +5,15 @@ * See the LICENSE.md file in the project root for full license information. */ -#include "hardware_sampling/cpu/cpu_samples.hpp" +#include "hws/cpu/cpu_samples.hpp" -#include "hardware_sampling/utility.hpp" // hws::detail::{value_or_default, join} +#include "hws/utility.hpp" // hws::detail::{value_or_default, quote} + +#include "fmt/format.h" // fmt::format +#include "fmt/ranges.h" // fmt::join #include // std::array #include // std::size_t -#include // std::format #include // std::ostream #include // std::regex, std::regex::extended, std::regex_match, std::regex_replace #include // std::string @@ -24,138 +26,155 @@ namespace hws { // general samples // //*************************************************************************************************************************************// +bool cpu_general_samples::has_samples() const { + return this->architecture_.has_value() || this->byte_order_.has_value() || this->num_cores_.has_value() || this->num_threads_.has_value() + || this->threads_per_core_.has_value() || this->cores_per_socket_.has_value() || this->num_sockets_.has_value() || this->numa_nodes_.has_value() + || this->vendor_id_.has_value() || this->name_.has_value() || this->flags_.has_value() || this->compute_utilization_.has_value() + || this->ipc_.has_value() || this->irq_.has_value() || this->smi_.has_value() || this->poll_.has_value() || this->poll_percent_.has_value(); +} + std::string cpu_general_samples::generate_yaml_string() const { + // if no samples are available, return an empty string + if (!this->has_samples()) { + return ""; + } + std::string str{ "general:\n" }; // architecture if (this->architecture_.has_value()) { - str += std::format(" architecture:\n" + str += fmt::format(" architecture:\n" " unit: \"string\"\n" " values: \"{}\"\n", this->architecture_.value()); } // byte order if (this->byte_order_.has_value()) { - str += std::format(" byte_order:\n" + str += fmt::format(" byte_order:\n" " unit: \"string\"\n" " values: \"{}\"\n", this->byte_order_.value()); } + // number of cores + if (this->num_cores_.has_value()) { + str += fmt::format(" num_cores:\n" + " unit: \"int\"\n" + " values: {}\n", + this->num_cores_.value()); + } // number of threads including hyper-threads if (this->num_threads_.has_value()) { - str += std::format(" num_threads:\n" + str += fmt::format(" num_threads:\n" " unit: \"int\"\n" " values: {}\n", this->num_threads_.value()); } // number of threads per core if (this->threads_per_core_.has_value()) { - str += std::format(" threads_per_core:\n" + str += fmt::format(" threads_per_core:\n" " unit: \"int\"\n" " values: {}\n", this->threads_per_core_.value()); } // number of cores per socket if (this->cores_per_socket_.has_value()) { - str += std::format(" cores_per_socket:\n" + str += fmt::format(" cores_per_socket:\n" " unit: \"int\"\n" " values: {}\n", this->cores_per_socket_.value()); } // number of cores per socket if (this->num_sockets_.has_value()) { - str += std::format(" num_sockets:\n" + str += fmt::format(" num_sockets:\n" " unit: \"int\"\n" " values: {}\n", this->num_sockets_.value()); } // number of NUMA nodes if (this->numa_nodes_.has_value()) { - str += std::format(" numa_nodes:\n" + str += fmt::format(" numa_nodes:\n" " unit: \"int\"\n" " values: {}\n", this->numa_nodes_.value()); } // the vendor specific ID if (this->vendor_id_.has_value()) { - str += std::format(" vendor_id:\n" + str += fmt::format(" vendor_id:\n" " unit: \"string\"\n" " values: \"{}\"\n", this->vendor_id_.value()); } // the CPU name if (this->name_.has_value()) { - str += std::format(" name:\n" + str += fmt::format(" name:\n" " unit: \"string\"\n" " values: \"{}\"\n", this->name_.value()); } // CPU specific flags (like SSE, AVX, ...) if (this->flags_.has_value()) { - str += std::format(" flags:\n" + str += fmt::format(" flags:\n" " unit: \"string\"\n" " values: [{}]\n", - detail::join(this->flags_.value(), ", ")); + fmt::join(detail::quote(this->flags_.value()), ", ")); } // the percent the CPU was busy - if (this->busy_percent_.has_value()) { - str += std::format(" utilization:\n" + if (this->compute_utilization_.has_value()) { + str += fmt::format(" compute_utilization:\n" " turbostat_name: \"Busy%\"\n" " unit: \"percentage\"\n" " values: [{}]\n", - detail::join(this->busy_percent_.value(), ", ")); + fmt::join(this->compute_utilization_.value(), ", ")); } // the instructions per cycle count if (this->ipc_.has_value()) { - str += std::format(" instructions_per_cycle:\n" + str += fmt::format(" instructions_per_cycle:\n" " turbostat_name: \"IPC\"\n" " unit: \"float\"\n" " values: [{}]\n", - detail::join(this->ipc_.value(), ", ")); + fmt::join(this->ipc_.value(), ", ")); } // the number of interrupts if (this->irq_.has_value()) { - str += std::format(" interrupts:\n" + str += fmt::format(" interrupts:\n" " turbostat_name: \"IRQ\"\n" " unit: \"int\"\n" " values: [{}]\n", - detail::join(this->irq_.value(), ", ")); + fmt::join(this->irq_.value(), ", ")); } // the number of system management interrupts if (this->smi_.has_value()) { - str += std::format(" system_management_interrupts:\n" + str += fmt::format(" system_management_interrupts:\n" " turbostat_name: \"SMI\"\n" " unit: \"int\"\n" " values: [{}]\n", - detail::join(this->smi_.value(), ", ")); + fmt::join(this->smi_.value(), ", ")); } // the number of times the CPU was in the poll state if (this->poll_.has_value()) { - str += std::format(" polling_state:\n" + str += fmt::format(" polling_state:\n" " turbostat_name: \"POLL\"\n" " unit: \"int\"\n" " values: [{}]\n", - detail::join(this->poll_.value(), ", ")); + fmt::join(this->poll_.value(), ", ")); } // the percent the CPU was in the polling state if (this->poll_percent_.has_value()) { - str += std::format(" polling_percentage:\n" + str += fmt::format(" polling_percentage:\n" " turbostat_name: \"POLL%\"\n" " unit: \"percentage\"\n" " values: [{}]\n", - detail::join(this->poll_percent_.value(), ", ")); + fmt::join(this->poll_percent_.value(), ", ")); } - // remove last newline - str.pop_back(); - return str; } std::ostream &operator<<(std::ostream &out, const cpu_general_samples &samples) { - std::string str = std::format("architecture [string]: {}\n" + std::string str = fmt::format("architecture [string]: {}\n" "byte_order [string]: {}\n" + "num_cores [int]: {}\n" "num_threads [int]: {}\n" "threads_per_core [int]: {}\n" "cores_per_socket [int]: {}\n" @@ -164,7 +183,7 @@ std::ostream &operator<<(std::ostream &out, const cpu_general_samples &samples) "vendor_id [string]: {}\n" "name [string]: {}\n" "flags [string]: [{}]\n" - "busy_percent [%]: [{}]\n" + "compute_utilization [%]: [{}]\n" "ipc [float]: [{}]\n" "irq [int]: [{}]\n" "smi [int]: [{}]\n" @@ -172,6 +191,7 @@ std::ostream &operator<<(std::ostream &out, const cpu_general_samples &samples) "poll_percent [%]: [{}]", detail::value_or_default(samples.get_architecture()), detail::value_or_default(samples.get_byte_order()), + detail::value_or_default(samples.get_num_cores()), detail::value_or_default(samples.get_num_threads()), detail::value_or_default(samples.get_threads_per_core()), detail::value_or_default(samples.get_cores_per_socket()), @@ -179,13 +199,13 @@ std::ostream &operator<<(std::ostream &out, const cpu_general_samples &samples) detail::value_or_default(samples.get_numa_nodes()), detail::value_or_default(samples.get_vendor_id()), detail::value_or_default(samples.get_name()), - detail::join(detail::value_or_default(samples.get_flags()), ", "), - detail::join(detail::value_or_default(samples.get_busy_percent()), ", "), - detail::join(detail::value_or_default(samples.get_ipc()), ", "), - detail::join(detail::value_or_default(samples.get_irq()), ", "), - detail::join(detail::value_or_default(samples.get_smi()), ", "), - detail::join(detail::value_or_default(samples.get_poll()), ", "), - detail::join(detail::value_or_default(samples.get_poll_percent()), ", ")); + fmt::join(detail::value_or_default(samples.get_flags()), ", "), + fmt::join(detail::value_or_default(samples.get_compute_utilization()), ", "), + fmt::join(detail::value_or_default(samples.get_ipc()), ", "), + fmt::join(detail::value_or_default(samples.get_irq()), ", "), + fmt::join(detail::value_or_default(samples.get_smi()), ", "), + fmt::join(detail::value_or_default(samples.get_poll()), ", "), + fmt::join(detail::value_or_default(samples.get_poll_percent()), ", ")); // remove last newline str.pop_back(); @@ -197,423 +217,489 @@ std::ostream &operator<<(std::ostream &out, const cpu_general_samples &samples) // clock samples // //*************************************************************************************************************************************// +bool cpu_clock_samples::has_samples() const { + return this->auto_boosted_clock_enabled_.has_value() || this->clock_frequency_min_.has_value() || this->clock_frequency_max_.has_value() + || this->clock_frequency_.has_value() || this->average_non_idle_clock_frequency_.has_value() || this->time_stamp_counter_.has_value(); +} + std::string cpu_clock_samples::generate_yaml_string() const { + // if no samples are available, return an empty string + if (!this->has_samples()) { + return ""; + } + std::string str{ "clock:\n" }; // true if frequency boost is enabled - if (this->frequency_boost_.has_value()) { - str += std::format(" frequency_boost:\n" + if (this->auto_boosted_clock_enabled_.has_value()) { + str += fmt::format(" auto_boosted_clock_enabled:\n" " unit: \"bool\"\n" " values: {}\n", - this->frequency_boost_.value()); + this->auto_boosted_clock_enabled_.value()); } // the minimal CPU frequency - if (this->min_frequency_.has_value()) { - str += std::format(" min_cpu_frequency:\n" + if (this->clock_frequency_min_.has_value()) { + str += fmt::format(" clock_frequency_min:\n" " unit: \"MHz\"\n" " values: {}\n", - this->min_frequency_.value()); + this->clock_frequency_min_.value()); } // the maximum CPU frequency - if (this->max_frequency_.has_value()) { - str += std::format(" max_cpu_frequency:\n" + if (this->clock_frequency_max_.has_value()) { + str += fmt::format(" clock_frequency_max:\n" " unit: \"MHz\"\n" " values: {}\n", - this->max_frequency_.value()); + this->clock_frequency_max_.value()); } // the average CPU frequency - if (this->average_frequency_.has_value()) { - str += std::format(" average_frequency:\n" + if (this->clock_frequency_.has_value()) { + str += fmt::format(" clock_frequency:\n" " turbostat_name: \"Avg_MHz\"\n" " unit: \"MHz\"\n" " values: [{}]\n", - detail::join(this->average_frequency_.value(), ", ")); + fmt::join(this->clock_frequency_.value(), ", ")); } // the average CPU frequency excluding idle time - if (this->average_non_idle_frequency_.has_value()) { - str += std::format(" average_non_idle_frequency:\n" + if (this->average_non_idle_clock_frequency_.has_value()) { + str += fmt::format(" average_non_idle_clock_frequency:\n" " turbostat_name: \"Bzy_MHz\"\n" " unit: \"MHz\"\n" " values: [{}]\n", - detail::join(this->average_non_idle_frequency_.value(), ", ")); + fmt::join(this->average_non_idle_clock_frequency_.value(), ", ")); } // the time stamp counter if (this->time_stamp_counter_.has_value()) { - str += std::format(" time_stamp_counter:\n" + str += fmt::format(" time_stamp_counter:\n" " turbostat_name: \"TSC_MHz\"\n" " unit: \"MHz\"\n" " values: [{}]\n", - detail::join(this->time_stamp_counter_.value(), ", ")); + fmt::join(this->time_stamp_counter_.value(), ", ")); } - // remove last newline - str.pop_back(); - return str; } std::ostream &operator<<(std::ostream &out, const cpu_clock_samples &samples) { - return out << std::format("frequency_boost [bool]: {}\n" - "min_frequency [MHz]: {}\n" - "max_frequency [MHz]: {}\n" - "average_frequency [MHz]: [{}]\n" - "average_non_idle_frequency [MHz]: [{}]\n" + return out << fmt::format("auto_boosted_clock_enabled [bool]: {}\n" + "clock_frequency_min [MHz]: {}\n" + "clock_frequency_max [MHz]: {}\n" + "clock_frequency [MHz]: [{}]\n" + "average_non_idle_clock_frequency [MHz]: [{}]\n" "time_stamp_counter [MHz]: [{}]", - detail::value_or_default(samples.get_frequency_boost()), - detail::value_or_default(samples.get_min_frequency()), - detail::value_or_default(samples.get_max_frequency()), - detail::join(detail::value_or_default(samples.get_average_frequency()), ", "), - detail::join(detail::value_or_default(samples.get_average_non_idle_frequency()), ", "), - detail::join(detail::value_or_default(samples.get_time_stamp_counter()), ", ")); + detail::value_or_default(samples.get_auto_boosted_clock_enabled()), + detail::value_or_default(samples.get_clock_frequency_min()), + detail::value_or_default(samples.get_clock_frequency_max()), + fmt::join(detail::value_or_default(samples.get_clock_frequency()), ", "), + fmt::join(detail::value_or_default(samples.get_average_non_idle_clock_frequency()), ", "), + fmt::join(detail::value_or_default(samples.get_time_stamp_counter()), ", ")); } //*************************************************************************************************************************************// // power samples // //*************************************************************************************************************************************// +bool cpu_power_samples::has_samples() const { + return this->power_measurement_type_.has_value() || this->power_usage_.has_value() || this->power_total_energy_consumption_.has_value() + || this->core_watt_.has_value() || this->ram_watt_.has_value() || this->package_rapl_throttle_percent_.has_value() + || this->dram_rapl_throttle_percent_.has_value(); +} + std::string cpu_power_samples::generate_yaml_string() const { + // if no samples are available, return an empty string + if (!this->has_samples()) { + return ""; + } + std::string str{ "power:\n" }; + // power measurement type + if (this->power_measurement_type_.has_value()) { + str += fmt::format(" power_measurement_type:\n" + " unit: \"string\"\n" + " values: \"{}\"\n", + this->power_measurement_type_.value()); + } + // the package Watt - if (this->package_watt_.has_value()) { - str += std::format(" package_power:\n" + if (this->power_usage_.has_value()) { + str += fmt::format(" power_usage:\n" " turbostat_name: \"PkgWatt\"\n" " unit: \"W\"\n" " values: [{}]\n", - detail::join(this->package_watt_.value(), ", ")); + fmt::join(this->power_usage_.value(), ", ")); + } + // total energy consumed + if (this->power_total_energy_consumption_.has_value()) { + str += fmt::format(" power_total_energy_consumed:\n" + " unit: \"J\"\n" + " values: [{}]\n", + fmt::join(this->power_total_energy_consumption_.value(), ", ")); } + // the core Watt if (this->core_watt_.has_value()) { - str += std::format(" core_power:\n" + str += fmt::format(" core_power:\n" " turbostat_name: \"CorWatt\"\n" " unit: \"W\"\n" " values: [{}]\n", - detail::join(this->core_watt_.value(), ", ")); + fmt::join(this->core_watt_.value(), ", ")); } // the DRAM Watt if (this->ram_watt_.has_value()) { - str += std::format(" dram_power:\n" + str += fmt::format(" dram_power:\n" " turbostat_name: \"RAMWatt\"\n" " unit: \"W\"\n" " values: [{}]\n", - detail::join(this->ram_watt_.value(), ", ")); + fmt::join(this->ram_watt_.value(), ", ")); } // the percent of time when the RAPL package throttle was active if (this->package_rapl_throttle_percent_.has_value()) { - str += std::format(" package_rapl_throttling:\n" + str += fmt::format(" package_rapl_throttling:\n" " turbostat_name: \"PKG_%\"\n" " unit: \"percentage\"\n" " values: [{}]\n", - detail::join(this->package_rapl_throttle_percent_.value(), ", ")); + fmt::join(this->package_rapl_throttle_percent_.value(), ", ")); } // the percent of time when the RAPL DRAM throttle was active if (this->dram_rapl_throttle_percent_.has_value()) { - str += std::format(" dram_rapl_throttling:\n" + str += fmt::format(" dram_rapl_throttling:\n" " turbostat_name: \"RAM_%\"\n" " unit: \"percentage\"\n" " values: [{}]\n", - detail::join(this->dram_rapl_throttle_percent_.value(), ", ")); + fmt::join(this->dram_rapl_throttle_percent_.value(), ", ")); } - // remove last newline - str.pop_back(); - return str; } std::ostream &operator<<(std::ostream &out, const cpu_power_samples &samples) { - return out << std::format("package_watt [W]: [{}]\n" + return out << fmt::format("power_measurement_type [string]: {}\n" + "power_usage [W]: [{}]\n" + "power_total_energy_consumption [J]: [{}]\n" "core_watt [W]: [{}]\n" "ram_watt [W]: [{}]\n" "package_rapl_throttle_percent [%]: [{}]\n" "dram_rapl_throttle_percent [%]: [{}]", - detail::join(detail::value_or_default(samples.get_package_watt()), ", "), - detail::join(detail::value_or_default(samples.get_core_watt()), ", "), - detail::join(detail::value_or_default(samples.get_ram_watt()), ", "), - detail::join(detail::value_or_default(samples.get_package_rapl_throttle_percent()), ", "), - detail::join(detail::value_or_default(samples.get_dram_rapl_throttle_percent()), ", ")); + detail::value_or_default(samples.get_power_measurement_type()), + fmt::join(detail::value_or_default(samples.get_power_usage()), ", "), + fmt::join(detail::value_or_default(samples.get_power_total_energy_consumption()), ", "), + fmt::join(detail::value_or_default(samples.get_core_watt()), ", "), + fmt::join(detail::value_or_default(samples.get_ram_watt()), ", "), + fmt::join(detail::value_or_default(samples.get_package_rapl_throttle_percent()), ", "), + fmt::join(detail::value_or_default(samples.get_dram_rapl_throttle_percent()), ", ")); } //*************************************************************************************************************************************// // memory samples // //*************************************************************************************************************************************// +bool cpu_memory_samples::has_samples() const { + return this->cache_size_L1d_.has_value() || this->cache_size_L1i_.has_value() || this->cache_size_L2_.has_value() || this->cache_size_L3_.has_value() + || this->memory_total_.has_value() || this->swap_memory_total_.has_value() || this->memory_used_.has_value() || this->swap_memory_free_.has_value() + || this->swap_memory_used_.has_value() || this->swap_memory_free_.has_value(); +} + std::string cpu_memory_samples::generate_yaml_string() const { + // if no samples are available, return an empty string + if (!this->has_samples()) { + return ""; + } + std::string str{ "memory:\n" }; // the size of the L1 data cache - if (this->l1d_cache_.has_value()) { - str += std::format(" cache_size_L1d:\n" + if (this->cache_size_L1d_.has_value()) { + str += fmt::format(" cache_size_L1d:\n" " unit: \"string\"\n" " values: \"{}\"\n", - this->l1d_cache_.value()); + this->cache_size_L1d_.value()); } // the size of the L1 instruction cache - if (this->l1i_cache_.has_value()) { - str += std::format(" cache_size_L1i:\n" + if (this->cache_size_L1i_.has_value()) { + str += fmt::format(" cache_size_L1i:\n" " unit: \"string\"\n" " values: \"{}\"\n", - this->l1i_cache_.value()); + this->cache_size_L1i_.value()); } // the size of the L2 cache - if (this->l2_cache_.has_value()) { - str += std::format(" cache_size_L2:\n" + if (this->cache_size_L2_.has_value()) { + str += fmt::format(" cache_size_L2:\n" " unit: \"string\"\n" " values: \"{}\"\n", - this->l2_cache_.value()); + this->cache_size_L2_.value()); } // the size of the L3 cache - if (this->l3_cache_.has_value()) { - str += std::format(" cache_size_L3:\n" + if (this->cache_size_L3_.has_value()) { + str += fmt::format(" cache_size_L3:\n" " unit: \"string\"\n" " values: \"{}\"\n", - this->l3_cache_.value()); + this->cache_size_L3_.value()); } // the total size of available memory if (this->memory_total_.has_value()) { - str += std::format(" memory_total:\n" + str += fmt::format(" memory_total:\n" " unit: \"B\"\n" " values: {}\n", this->memory_total_.value()); } // the total size of the swap memory if (this->swap_memory_total_.has_value()) { - str += std::format(" swap_memory_total:\n" + str += fmt::format(" swap_memory_total:\n" " unit: \"B\"\n" " values: {}\n", this->swap_memory_total_.value()); } - // the available free memory - if (this->memory_free_.has_value()) { - str += std::format(" memory_free:\n" - " unit: \"B\"\n" - " values: [{}]\n", - detail::join(this->memory_free_.value(), ", ")); - } // the used memory if (this->memory_used_.has_value()) { - str += std::format(" memory_used:\n" + str += fmt::format(" memory_used:\n" " unit: \"B\"\n" " values: [{}]\n", - detail::join(this->memory_used_.value(), ", ")); + fmt::join(this->memory_used_.value(), ", ")); } - // the available swap memory - if (this->swap_memory_free_.has_value()) { - str += std::format(" swap_memory_free:\n" + // the available free memory + if (this->memory_free_.has_value()) { + str += fmt::format(" memory_free:\n" " unit: \"B\"\n" " values: [{}]\n", - detail::join(this->swap_memory_free_.value(), ", ")); + fmt::join(this->memory_free_.value(), ", ")); } // the swap memory if (this->swap_memory_used_.has_value()) { - str += std::format(" swap_memory_used:\n" + str += fmt::format(" swap_memory_used:\n" " unit: \"B\"\n" " values: [{}]\n", - detail::join(this->swap_memory_used_.value(), ", ")); + fmt::join(this->swap_memory_used_.value(), ", ")); + } + // the available swap memory + if (this->swap_memory_free_.has_value()) { + str += fmt::format(" swap_memory_free:\n" + " unit: \"B\"\n" + " values: [{}]\n", + fmt::join(this->swap_memory_free_.value(), ", ")); } - - // remove last newline - str.pop_back(); return str; } std::ostream &operator<<(std::ostream &out, const cpu_memory_samples &samples) { - return out << std::format("l1d_cache [string]: {}\n" - "l1i_cache [string]: {}\n" - "l2_cache [string]: {}\n" - "l3_cache [string]: {}\n" + return out << fmt::format("cache_size_L1d [string]: {}\n" + "cache_size_L1i [string]: {}\n" + "cache_size_L2 [string]: {}\n" + "cache_size_L3 [string]: {}\n" "memory_total [B]: {}\n" "swap_memory_total [B]: {}\n" - "memory_free [B]: [{}]\n" "memory_used [B]: [{}]\n" - "swap_memory_free [B]: [{}]\n" - "swap_memory_used [B]: [{}]", - detail::value_or_default(samples.get_l1d_cache()), - detail::value_or_default(samples.get_l1i_cache()), - detail::value_or_default(samples.get_l2_cache()), - detail::value_or_default(samples.get_l3_cache()), + "memory_free [B]: [{}]\n" + "swap_memory_used [B]: [{}]\n" + "swap_memory_free [B]: [{}]", + detail::value_or_default(samples.get_cache_size_L1d()), + detail::value_or_default(samples.get_cache_size_L1i()), + detail::value_or_default(samples.get_cache_size_L2()), + detail::value_or_default(samples.get_cache_size_L3()), detail::value_or_default(samples.get_memory_total()), detail::value_or_default(samples.get_swap_memory_total()), - detail::join(detail::value_or_default(samples.get_memory_free()), ", "), - detail::join(detail::value_or_default(samples.get_memory_used()), ", "), - detail::join(detail::value_or_default(samples.get_swap_memory_free()), ", "), - detail::join(detail::value_or_default(samples.get_swap_memory_used()), ", ")); + fmt::join(detail::value_or_default(samples.get_memory_used()), ", "), + fmt::join(detail::value_or_default(samples.get_memory_free()), ", "), + fmt::join(detail::value_or_default(samples.get_swap_memory_used()), ", "), + fmt::join(detail::value_or_default(samples.get_swap_memory_free()), ", ")); } //*************************************************************************************************************************************// // temperature samples // //*************************************************************************************************************************************// +bool cpu_temperature_samples::has_samples() const { + return this->temperature_.has_value() || this->core_temperature_.has_value() || this->core_throttle_percent_.has_value(); +} + std::string cpu_temperature_samples::generate_yaml_string() const { + // if no samples are available, return an empty string + if (!this->has_samples()) { + return ""; + } + std::string str{ "temperature:\n" }; + // the temperature of the whole package + if (this->temperature_.has_value()) { + str += fmt::format(" temperature:\n" + " turbostat_name: \"PkgTmp\"\n" + " unit: \"°C\"\n" + " values: [{}]\n", + fmt::join(this->temperature_.value(), ", ")); + } // the temperature of the cores if (this->core_temperature_.has_value()) { - str += std::format(" per_core_temperature:\n" + str += fmt::format(" core_temperature:\n" " turbostat_name: \"CoreTmp\"\n" " unit: \"°C\"\n" " values: [{}]\n", - detail::join(this->core_temperature_.value(), ", ")); + fmt::join(this->core_temperature_.value(), ", ")); } // the percentage of time the core throttled due the temperature constraints if (this->core_throttle_percent_.has_value()) { - str += std::format(" core_throttle_percentage:\n" + str += fmt::format(" core_throttle_percentage:\n" " turbostat_name: \"CoreThr\"\n" " unit: \"percentage\"\n" " values: [{}]\n", - detail::join(this->core_throttle_percent_.value(), ", ")); - } - // the temperature of the whole package - if (this->package_temperature_.has_value()) { - str += std::format(" per_package_temperature:\n" - " turbostat_name: \"PkgTmp\"\n" - " unit: \"°C\"\n" - " values: [{}]\n", - detail::join(this->package_temperature_.value(), ", ")); + fmt::join(this->core_throttle_percent_.value(), ", ")); } - // remove last newline - str.pop_back(); - return str; } std::ostream &operator<<(std::ostream &out, const cpu_temperature_samples &samples) { - return out << std::format("core_temperature [°C]: [{}]\n" - "core_throttle_percent [%]: [{}]\n" - "package_temperature [°C]: [{}]", - detail::join(detail::value_or_default(samples.get_core_temperature()), ", "), - detail::join(detail::value_or_default(samples.get_core_throttle_percent()), ", "), - detail::join(detail::value_or_default(samples.get_package_temperature()), ", ")); + return out << fmt::format("temperature [°C]: [{}]\n" + "core_temperature [°C]: [{}]\n" + "core_throttle_percent [%]: [{}]", + fmt::join(detail::value_or_default(samples.get_temperature()), ", "), + fmt::join(detail::value_or_default(samples.get_core_temperature()), ", "), + fmt::join(detail::value_or_default(samples.get_core_throttle_percent()), ", ")); } //*************************************************************************************************************************************// // gfx (iGPU) samples // //*************************************************************************************************************************************// +bool cpu_gfx_samples::has_samples() const { + return this->gfx_render_state_percent_.has_value() || this->gfx_frequency_.has_value() || this->average_gfx_frequency_.has_value() + || this->gfx_state_c0_percent_.has_value() || this->cpu_works_for_gpu_percent_.has_value() || this->gfx_watt_.has_value(); +} + std::string cpu_gfx_samples::generate_yaml_string() const { + // if no samples are available, return an empty string + if (!this->has_samples()) { + return ""; + } + std::string str{ "integrated_gpu:\n" }; // the percentage of time the iGPU was in the render state if (this->gfx_render_state_percent_.has_value()) { - str += std::format(" graphics_render_state:\n" + str += fmt::format(" graphics_render_state:\n" " turbostat_name: \"GFX%rc6\"\n" " unit: \"percentage\"\n" " values: [{}]\n", - detail::join(this->gfx_render_state_percent_.value(), ", ")); + fmt::join(this->gfx_render_state_percent_.value(), ", ")); } // the core frequency of the iGPU if (this->gfx_frequency_.has_value()) { - str += std::format(" graphics_frequency:\n" + str += fmt::format(" graphics_frequency:\n" " turbostat_name: \"GFXMHz\"\n" " unit: \"MHz\"\n" " values: [{}]\n", - detail::join(this->gfx_frequency_.value(), ", ")); + fmt::join(this->gfx_frequency_.value(), ", ")); } // the average core frequency of the iGPU if (this->average_gfx_frequency_.has_value()) { - str += std::format(" average_graphics_frequency:\n" + str += fmt::format(" average_graphics_frequency:\n" " turbostat_name: \"GFXAMHz\"\n" " unit: \"MHz\"\n" " values: [{}]\n", - detail::join(this->average_gfx_frequency_.value(), ", ")); + fmt::join(this->average_gfx_frequency_.value(), ", ")); } // the percentage of time the iGPU was in the c0 state if (this->gfx_state_c0_percent_.has_value()) { - str += std::format(" gpu_state_c0:\n" + str += fmt::format(" gpu_state_c0:\n" " turbostat_name: \"GFX%C0\"\n" " unit: \"percentage\"\n" " values: [{}]\n", - detail::join(this->gfx_state_c0_percent_.value(), ", ")); + fmt::join(this->gfx_state_c0_percent_.value(), ", ")); } // the percentage of time the CPU worked for the iGPU if (this->cpu_works_for_gpu_percent_.has_value()) { - str += std::format(" cpu_works_for_gpu:\n" + str += fmt::format(" cpu_works_for_gpu:\n" " turbostat_name: \"CPUGFX%\"\n" " unit: \"percentage\"\n" " values: [{}]\n", - detail::join(this->cpu_works_for_gpu_percent_.value(), ", ")); + fmt::join(this->cpu_works_for_gpu_percent_.value(), ", ")); } // the iGPU Watt if (this->gfx_watt_.has_value()) { - str += std::format(" graphics_power:\n" + str += fmt::format(" graphics_power:\n" " turbostat_name: \"GFXWatt\"\n" " unit: \"W\"\n" " values: [{}]\n", - detail::join(this->gfx_watt_.value(), ", ")); + fmt::join(this->gfx_watt_.value(), ", ")); } - // remove last newline - str.pop_back(); - return str; } std::ostream &operator<<(std::ostream &out, const cpu_gfx_samples &samples) { - return out << std::format("gfx_render_state_percent [%]: [{}]\n" + return out << fmt::format("gfx_render_state_percent [%]: [{}]\n" "gfx_frequency [MHz]: [{}]\n" "average_gfx_frequency [MHz]: [{}]\n" "gfx_state_c0_percent [%]: [{}]\n" "cpu_works_for_gpu_percent [%]: [{}]\n" "gfx_watt [W]: [{}]", - detail::join(detail::value_or_default(samples.get_gfx_render_state_percent()), ", "), - detail::join(detail::value_or_default(samples.get_gfx_frequency()), ", "), - detail::join(detail::value_or_default(samples.get_average_gfx_frequency()), ", "), - detail::join(detail::value_or_default(samples.get_gfx_state_c0_percent()), ", "), - detail::join(detail::value_or_default(samples.get_cpu_works_for_gpu_percent()), ", "), - detail::join(detail::value_or_default(samples.get_gfx_watt()), ", ")); + fmt::join(detail::value_or_default(samples.get_gfx_render_state_percent()), ", "), + fmt::join(detail::value_or_default(samples.get_gfx_frequency()), ", "), + fmt::join(detail::value_or_default(samples.get_average_gfx_frequency()), ", "), + fmt::join(detail::value_or_default(samples.get_gfx_state_c0_percent()), ", "), + fmt::join(detail::value_or_default(samples.get_cpu_works_for_gpu_percent()), ", "), + fmt::join(detail::value_or_default(samples.get_gfx_watt()), ", ")); } //*************************************************************************************************************************************// // idle state samples // //*************************************************************************************************************************************// +bool cpu_idle_states_samples::has_samples() const { + return this->all_cpus_state_c0_percent_.has_value() || this->any_cpu_state_c0_percent_.has_value() || this->low_power_idle_state_percent_.has_value() + || this->system_low_power_idle_state_percent_.has_value() || this->package_low_power_idle_state_percent_.has_value() || this->idle_states_.has_value(); +} + std::string cpu_idle_states_samples::generate_yaml_string() const { + // if no samples are available, return an empty string + if (!this->has_samples()) { + return ""; + } + std::string str{ "idle_states:\n" }; // the percentage of time all CPUs were in the c0 state if (this->all_cpus_state_c0_percent_.has_value()) { - str += std::format(" all_cpus_state_c0:\n" + str += fmt::format(" all_cpus_state_c0:\n" " turbostat_name: \"Totl%C0\"\n" " unit: \"percentage\"\n" " values: [{}]\n", - detail::join(this->all_cpus_state_c0_percent_.value(), ", ")); + fmt::join(this->all_cpus_state_c0_percent_.value(), ", ")); } // the percentage of time any CPU was in the c0 state if (this->any_cpu_state_c0_percent_.has_value()) { - str += std::format(" any_cpu_state_c0:\n" + str += fmt::format(" any_cpu_state_c0:\n" " turbostat_name: \"Any%C0\"\n" " unit: \"percentage\"\n" " values: [{}]\n", - detail::join(this->any_cpu_state_c0_percent_.value(), ", ")); + fmt::join(this->any_cpu_state_c0_percent_.value(), ", ")); } // the percentage of time the CPUs were in the low power idle state if (this->low_power_idle_state_percent_.has_value()) { - str += std::format(" lower_power_idle_state:\n" + str += fmt::format(" lower_power_idle_state:\n" " turbostat_name: \"CPU%LPI\"\n" " unit: \"percentage\"\n" " values: [{}]\n", - detail::join(this->low_power_idle_state_percent_.value(), ", ")); + fmt::join(this->low_power_idle_state_percent_.value(), ", ")); } // the percentage of time the CPUs were in the system low power idle state if (this->system_low_power_idle_state_percent_.has_value()) { - str += std::format(" system_lower_power_idle_state:\n" + str += fmt::format(" system_lower_power_idle_state:\n" " turbostat_name: \"SYS%LPI\"\n" " unit: \"percentage\"\n" " values: [{}]\n", - detail::join(this->system_low_power_idle_state_percent_.value(), ", ")); + fmt::join(this->system_low_power_idle_state_percent_.value(), ", ")); } // the percentage of time the package was in the low power idle state if (this->package_low_power_idle_state_percent_.has_value()) { - str += std::format(" package_lower_power_idle_state:\n" + str += fmt::format(" package_lower_power_idle_state:\n" " turbostat_name: \"Pkg%LPI\"\n" " unit: \"percentage\"\n" " values: [{}]\n", - detail::join(this->package_low_power_idle_state_percent_.value(), ", ")); + fmt::join(this->package_low_power_idle_state_percent_.value(), ", ")); } // the other core idle states @@ -642,42 +728,39 @@ std::string cpu_idle_states_samples::generate_yaml_string() const { std::string entry_name_with_state{}; std::regex_replace(std::back_inserter(entry_name_with_state), entry_name_placeholder.begin(), entry_name_placeholder.end(), placeholder_reg, std::string{ state }); - str += std::format(" {}:\n" + str += fmt::format(" {}:\n" " turbostat_name: \"{}\"\n" " unit: \"{}\"\n" " values: [{}]\n", entry_name_with_state, entry, entry_unit, - detail::join(values, ", ")); + fmt::join(values, ", ")); break; } } } } - // remove last newline - str.pop_back(); - return str; } std::ostream &operator<<(std::ostream &out, const cpu_idle_states_samples &samples) { - std::string str = std::format("all_cpus_state_c0_percent [%]: [{}]\n" + std::string str = fmt::format("all_cpus_state_c0_percent [%]: [{}]\n" "any_cpu_state_c0_percent [%]: [{}]\n" "low_power_idle_state_percent [%]: [{}]\n" "system_low_power_idle_state_percent [%]: [{}]\n" "package_low_power_idle_state_percent [%]: [{}]\n", - detail::join(detail::value_or_default(samples.get_all_cpus_state_c0_percent()), ", "), - detail::join(detail::value_or_default(samples.get_any_cpu_state_c0_percent()), ", "), - detail::join(detail::value_or_default(samples.get_low_power_idle_state_percent()), ", "), - detail::join(detail::value_or_default(samples.get_system_low_power_idle_state_percent()), ", "), - detail::join(detail::value_or_default(samples.get_package_low_power_idle_state_percent()), ", ")); + fmt::join(detail::value_or_default(samples.get_all_cpus_state_c0_percent()), ", "), + fmt::join(detail::value_or_default(samples.get_any_cpu_state_c0_percent()), ", "), + fmt::join(detail::value_or_default(samples.get_low_power_idle_state_percent()), ", "), + fmt::join(detail::value_or_default(samples.get_system_low_power_idle_state_percent()), ", "), + fmt::join(detail::value_or_default(samples.get_package_low_power_idle_state_percent()), ", ")); // add map entries if (samples.get_idle_states().has_value()) { for (const auto &[key, value] : samples.get_idle_states().value()) { - str += std::format("{}: [{}]\n", key, detail::join(value, ", ")); + str += fmt::format("{}: [{}]\n", key, fmt::join(value, ", ")); } } diff --git a/src/hws/cpu/hardware_sampler.cpp b/src/hws/cpu/hardware_sampler.cpp new file mode 100644 index 0000000..505e0bb --- /dev/null +++ b/src/hws/cpu/hardware_sampler.cpp @@ -0,0 +1,695 @@ +/** + * @author Marcel Breyer + * @copyright 2024-today All Rights Reserved + * @license This file is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + */ + +#include "hws/cpu/hardware_sampler.hpp" + +#include "hws/cpu/cpu_samples.hpp" // hws::{cpu_general_samples, clock_samples, power_samples, memory_samples, temperature_samples, gfx_samples, idle_state_samples} +#include "hws/cpu/utility.hpp" // HWS_SUBPROCESS_ERROR_CHECK, hws::detail::run_subprocess +#include "hws/hardware_sampler.hpp" // hws::tracking::hardware_sampler +#include "hws/sample_category.hpp" // hws::sample_category +#include "hws/utility.hpp" // hws::detail::{split, split_as, trim, convert_to, starts_with} + +#include "fmt/format.h" // fmt::format +#include "fmt/ranges.h" // fmt::join + +#include // assert +#include // std::chrono::{steady_clock, milliseconds} +#include // std::size_t +#include // std::exception, std::terminate +#include // std::ios_base +#include // std::cerr, std::endl +#include // std::make_optional +#include // std::ostream +#include // std::regex, std::regex::extended, std::regex_match, std::regex_replace +#include // std::runtime_error +#include // std::string +#include // std::string_view +#include // std::this_thread +#include // std::unordered_map +#include // std::vector + +namespace hws { + +cpu_hardware_sampler::cpu_hardware_sampler(const sample_category category) : + cpu_hardware_sampler{ HWS_SAMPLING_INTERVAL, category } { } + +cpu_hardware_sampler::cpu_hardware_sampler(const std::chrono::milliseconds sampling_interval, const sample_category category) : + hardware_sampler{ sampling_interval, category } { } + +cpu_hardware_sampler::~cpu_hardware_sampler() { + try { + // if this hardware sampler is still sampling, stop it + if (this->has_sampling_started() && !this->has_sampling_stopped()) { + this->stop_sampling(); + } + } catch (const std::exception &e) { + std::cerr << e.what() << std::endl; + std::terminate(); + } +} + +void cpu_hardware_sampler::sampling_loop() { + // + // add samples where we only have to retrieve the value once + // + + this->add_time_point(std::chrono::steady_clock::now()); + +#if defined(HWS_VIA_LSCPU_ENABLED) + { + const std::string lscpu_output = detail::run_subprocess("lscpu"); + const std::vector lscpu_lines = detail::split(detail::trim(lscpu_output), '\n'); + + for (std::string_view line : lscpu_lines) { + line = detail::trim(line); + // extract the value + std::string_view value{ line }; + value.remove_prefix(value.find_first_of(":") + 1); + value = detail::trim(value); + + // check the lines if the start with an entry that we want to sample + if (this->sample_category_enabled(sample_category::general)) { + if (detail::starts_with(line, "Architecture")) { + general_samples_.architecture_ = detail::convert_to(value); + } else if (detail::starts_with(line, "Byte Order")) { + general_samples_.byte_order_ = detail::convert_to(value); + } else if (detail::starts_with(line, "CPU(s)")) { + general_samples_.num_threads_ = detail::convert_to(value); + } else if (detail::starts_with(line, "Thread(s) per core")) { + general_samples_.threads_per_core_ = detail::convert_to(value); + } else if (detail::starts_with(line, "Core(s) per socket")) { + general_samples_.cores_per_socket_ = detail::convert_to(value); + } else if (detail::starts_with(line, "Socket(s)")) { + general_samples_.num_sockets_ = detail::convert_to(value); + } else if (detail::starts_with(line, "NUMA node(s)")) { + general_samples_.numa_nodes_ = detail::convert_to(value); + } else if (detail::starts_with(line, "Vendor ID")) { + general_samples_.vendor_id_ = detail::convert_to(value); + } else if (detail::starts_with(line, "Model name")) { + general_samples_.name_ = detail::convert_to(value); + } else if (detail::starts_with(line, "Flags")) { + general_samples_.flags_ = detail::split_as(value, ' '); + } + } + if (this->sample_category_enabled(sample_category::clock)) { + if (detail::starts_with(line, "Frequency boost")) { + clock_samples_.auto_boosted_clock_enabled_ = value == "enabled"; + } else if (detail::starts_with(line, "CPU max MHz")) { + clock_samples_.clock_frequency_max_ = detail::convert_to(value); + } else if (detail::starts_with(line, "CPU min MHz")) { + clock_samples_.clock_frequency_min_ = detail::convert_to(value); + } + } + if (this->sample_category_enabled(sample_category::memory)) { + if (detail::starts_with(line, "L1d cache")) { + memory_samples_.cache_size_L1d_ = detail::convert_to(value); + } else if (detail::starts_with(line, "L1i cache")) { + memory_samples_.cache_size_L1i_ = detail::convert_to(value); + } else if (detail::starts_with(line, "L2 cache")) { + memory_samples_.cache_size_L2_ = detail::convert_to(value); + } else if (detail::starts_with(line, "L3 cache")) { + memory_samples_.cache_size_L3_ = detail::convert_to(value); + } + } + } + + if (this->sample_category_enabled(sample_category::general)) { + // check if the number of cores can be derived from the otherwise found values + if (general_samples_.num_threads_.has_value() && general_samples_.threads_per_core_.has_value()) { + general_samples_.num_cores_ = general_samples_.num_threads_.value() / general_samples_.threads_per_core_.value(); + } + } + } +#endif + +#if defined(HWS_VIA_FREE_ENABLED) + const std::regex whitespace_replace_reg{ "[ ]+", std::regex::extended }; + if (this->sample_category_enabled(sample_category::memory)) { + std::string free_output = detail::run_subprocess("free -b"); + free_output = std::regex_replace(free_output, whitespace_replace_reg, " "); + const std::vector free_lines = detail::split(detail::trim(free_output), '\n'); + assert((free_lines.size() >= 3) && "Must read more than three lines, but fewer were read!"); + + // read memory information + const std::vector memory_data = detail::split(free_lines[1], ' '); + memory_samples_.memory_total_ = detail::convert_to(memory_data[1]); + memory_samples_.memory_used_ = decltype(memory_samples_.memory_used_)::value_type{ detail::convert_to(memory_data[2]) }; + memory_samples_.memory_free_ = decltype(memory_samples_.memory_free_)::value_type{ detail::convert_to(memory_data[3]) }; + + // read swap information + const std::vector swap_data = detail::split(free_lines[2], ' '); + memory_samples_.swap_memory_total_ = detail::convert_to(swap_data[1]); + memory_samples_.swap_memory_used_ = decltype(memory_samples_.swap_memory_used_)::value_type{ detail::convert_to(swap_data[2]) }; + memory_samples_.swap_memory_free_ = decltype(memory_samples_.swap_memory_free_)::value_type{ detail::convert_to(swap_data[3]) }; + } +#endif + +#if defined(HWS_VIA_TURBOSTAT_ENABLED) + + // -n, --num_iterations number of the measurement iterations + // -i, --interval sampling interval in seconds (decimal number) + // -S, --Summary limits output to 1-line per interval + // -q, --quiet skip decoding system configuration header + + // get header information + #if defined(HWS_VIA_TURBOSTAT_ROOT) + // run with sudo + const std::string_view turbostat_command_line = "sudo turbostat -n 1 -i 0.001 -S -q"; + #else + // run without sudo + const std::string_view turbostat_command_line = "turbostat -n 1 -i 0.001 -S -q"; + #endif + + { + // run turbostat + const std::string turbostat_output = detail::run_subprocess(turbostat_command_line); + + // retrieve the turbostat data + const std::vector data = detail::split(detail::trim(turbostat_output), '\n'); + assert((data.size() >= 2) && "Must read at least two lines!"); + const std::vector header = detail::split(data[0], '\t'); + const std::vector values = detail::split(data[1], '\t'); + + for (std::size_t i = 0; i < header.size(); ++i) { + // general samples + if (header[i] == "Busy%") { + if (this->sample_category_enabled(sample_category::general)) { + using vector_type = decltype(general_samples_.compute_utilization_)::value_type; + general_samples_.compute_utilization_ = vector_type{ detail::convert_to(values[i]) }; + } + continue; + } else if (header[i] == "IPC") { + if (this->sample_category_enabled(sample_category::general)) { + using vector_type = decltype(general_samples_.ipc_)::value_type; + general_samples_.ipc_ = vector_type{ detail::convert_to(values[i]) }; + } + continue; + } else if (header[i] == "IRQ") { + if (this->sample_category_enabled(sample_category::general)) { + using vector_type = decltype(general_samples_.irq_)::value_type; + general_samples_.irq_ = vector_type{ detail::convert_to(values[i]) }; + } + continue; + } else if (header[i] == "SMI") { + if (this->sample_category_enabled(sample_category::general)) { + using vector_type = decltype(general_samples_.smi_)::value_type; + general_samples_.smi_ = vector_type{ detail::convert_to(values[i]) }; + } + } else if (header[i] == "POLL") { + if (this->sample_category_enabled(sample_category::general)) { + using vector_type = decltype(general_samples_.poll_)::value_type; + general_samples_.poll_ = vector_type{ detail::convert_to(values[i]) }; + } + continue; + } else if (header[i] == "POLL%") { + if (this->sample_category_enabled(sample_category::general)) { + using vector_type = decltype(general_samples_.poll_percent_)::value_type; + general_samples_.poll_percent_ = vector_type{ detail::convert_to(values[i]) }; + } + continue; + } + + // clock related samples + if (header[i] == "Avg_MHz") { + if (this->sample_category_enabled(sample_category::clock)) { + using vector_type = decltype(clock_samples_.clock_frequency_)::value_type; + clock_samples_.clock_frequency_ = vector_type{ detail::convert_to(values[i]) }; + } + continue; + } else if (header[i] == "Bzy_MHz") { + if (this->sample_category_enabled(sample_category::clock)) { + using vector_type = decltype(clock_samples_.average_non_idle_clock_frequency_)::value_type; + clock_samples_.average_non_idle_clock_frequency_ = vector_type{ detail::convert_to(values[i]) }; + } + continue; + } else if (header[i] == "TSC_MHz") { + if (this->sample_category_enabled(sample_category::clock)) { + using vector_type = decltype(clock_samples_.time_stamp_counter_)::value_type; + clock_samples_.time_stamp_counter_ = vector_type{ detail::convert_to(values[i]) }; + } + continue; + } + + // power related samples + if (header[i] == "PkgWatt") { + if (this->sample_category_enabled(sample_category::power)) { + using vector_type = decltype(power_samples_.power_usage_)::value_type; + power_samples_.power_usage_ = vector_type{ detail::convert_to(values[i]) }; + power_samples_.power_measurement_type_ = "current/instant"; + power_samples_.power_total_energy_consumption_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ 0 }; + } + continue; + } else if (header[i] == "CorWatt") { + if (this->sample_category_enabled(sample_category::power)) { + using vector_type = decltype(power_samples_.core_watt_)::value_type; + power_samples_.core_watt_ = vector_type{ detail::convert_to(values[i]) }; + } + continue; + } else if (header[i] == "RAMWatt") { + if (this->sample_category_enabled(sample_category::power)) { + using vector_type = decltype(power_samples_.ram_watt_)::value_type; + power_samples_.ram_watt_ = vector_type{ detail::convert_to(values[i]) }; + } + continue; + } else if (header[i] == "PKG_%") { + if (this->sample_category_enabled(sample_category::power)) { + using vector_type = decltype(power_samples_.package_rapl_throttle_percent_)::value_type; + power_samples_.package_rapl_throttle_percent_ = vector_type{ detail::convert_to(values[i]) }; + } + continue; + } else if (header[i] == "RAM_%") { + if (this->sample_category_enabled(sample_category::power)) { + using vector_type = decltype(power_samples_.dram_rapl_throttle_percent_)::value_type; + power_samples_.dram_rapl_throttle_percent_ = vector_type{ detail::convert_to(values[i]) }; + } + continue; + } + + // temperature related samples + if (header[i] == "CoreTmp") { + if (this->sample_category_enabled(sample_category::temperature)) { + using vector_type = decltype(temperature_samples_.core_temperature_)::value_type; + temperature_samples_.core_temperature_ = vector_type{ detail::convert_to(values[i]) }; + } + continue; + } else if (header[i] == "CoreThr") { + if (this->sample_category_enabled(sample_category::temperature)) { + using vector_type = decltype(temperature_samples_.core_throttle_percent_)::value_type; + temperature_samples_.core_throttle_percent_ = vector_type{ detail::convert_to(values[i]) }; + } + continue; + } else if (header[i] == "PkgTmp") { + if (this->sample_category_enabled(sample_category::temperature)) { + using vector_type = decltype(temperature_samples_.temperature_)::value_type; + temperature_samples_.temperature_ = vector_type{ detail::convert_to(values[i]) }; + } + continue; + } + + // gfx (iGPU) related samples + if (header[i] == "GFX%rc6") { + if (this->sample_category_enabled(sample_category::gfx)) { + using vector_type = decltype(gfx_samples_.gfx_render_state_percent_)::value_type; + gfx_samples_.gfx_render_state_percent_ = vector_type{ detail::convert_to(values[i]) }; + } + continue; + } else if (header[i] == "GFXMHz") { + if (this->sample_category_enabled(sample_category::gfx)) { + using vector_type = decltype(gfx_samples_.gfx_frequency_)::value_type; + gfx_samples_.gfx_frequency_ = vector_type{ detail::convert_to(values[i]) }; + } + continue; + } else if (header[i] == "GFXAMHz") { + if (this->sample_category_enabled(sample_category::gfx)) { + using vector_type = decltype(gfx_samples_.average_gfx_frequency_)::value_type; + gfx_samples_.average_gfx_frequency_ = vector_type{ detail::convert_to(values[i]) }; + } + continue; + } else if (header[i] == "GFX%C0") { + if (this->sample_category_enabled(sample_category::gfx)) { + using vector_type = decltype(gfx_samples_.gfx_state_c0_percent_)::value_type; + gfx_samples_.gfx_state_c0_percent_ = vector_type{ detail::convert_to(values[i]) }; + } + continue; + } else if (header[i] == "CPUGFX%") { + if (this->sample_category_enabled(sample_category::gfx)) { + using vector_type = decltype(gfx_samples_.cpu_works_for_gpu_percent_)::value_type; + gfx_samples_.cpu_works_for_gpu_percent_ = vector_type{ detail::convert_to(values[i]) }; + } + continue; + } else if (header[i] == "GFXWatt") { + if (this->sample_category_enabled(sample_category::gfx)) { + using vector_type = decltype(gfx_samples_.gfx_watt_)::value_type; + gfx_samples_.gfx_watt_ = vector_type{ detail::convert_to(values[i]) }; + } + continue; + } + + // idle state related samples + if (header[i] == "Totl%C0") { + if (this->sample_category_enabled(sample_category::idle_state)) { + using vector_type = decltype(idle_state_samples_.all_cpus_state_c0_percent_)::value_type; + idle_state_samples_.all_cpus_state_c0_percent_ = vector_type{ detail::convert_to(values[i]) }; + } + continue; + } else if (header[i] == "Any%C0") { + if (this->sample_category_enabled(sample_category::idle_state)) { + using vector_type = decltype(idle_state_samples_.any_cpu_state_c0_percent_)::value_type; + idle_state_samples_.any_cpu_state_c0_percent_ = vector_type{ detail::convert_to(values[i]) }; + } + continue; + } else if (header[i] == "CPU%LPI") { + if (this->sample_category_enabled(sample_category::idle_state)) { + using vector_type = decltype(idle_state_samples_.low_power_idle_state_percent_)::value_type; + idle_state_samples_.low_power_idle_state_percent_ = vector_type{ detail::convert_to(values[i]) }; + } + continue; + } else if (header[i] == "SYS%LPI") { + if (this->sample_category_enabled(sample_category::idle_state)) { + using vector_type = decltype(idle_state_samples_.system_low_power_idle_state_percent_)::value_type; + idle_state_samples_.system_low_power_idle_state_percent_ = vector_type{ detail::convert_to(values[i]) }; + } + continue; + } else if (header[i] == "Pkg%LPI") { + if (this->sample_category_enabled(sample_category::idle_state)) { + using vector_type = decltype(idle_state_samples_.package_low_power_idle_state_percent_)::value_type; + idle_state_samples_.package_low_power_idle_state_percent_ = vector_type{ detail::convert_to(values[i]) }; + } + continue; + } else { + if (this->sample_category_enabled(sample_category::idle_state)) { + // test against regex + const std::string header_str{ header[i] }; + const std::regex reg{ std::string{ "CPU%[0-9a-zA-Z]+|Pkg%[0-9a-zA-Z]+|Pk%[0-9a-zA-Z]+|C[0-9a-zA-Z]+%|C[0-9a-zA-Z]+" }, std::regex::extended }; + if (std::regex_match(header_str, reg)) { + // first time this branch is reached -> create optional value + if (!idle_state_samples_.idle_states_.has_value()) { + idle_state_samples_.idle_states_ = std::make_optional(); + } + + using vector_type = cpu_idle_states_samples::map_type::mapped_type; + idle_state_samples_.idle_states_.value()[header_str] = vector_type{ detail::convert_to(values[i]) }; + } + } + continue; + } + } + } +#endif + + // + // loop until stop_sampling() is called + // + + while (!this->has_sampling_stopped()) { + // only sample values if the sampler currently isn't paused + if (this->is_sampling()) { + // add current time point + this->add_time_point(std::chrono::steady_clock::now()); + +#if defined(HWS_VIA_FREE_ENABLED) + if (this->sample_category_enabled(sample_category::memory)) { + // run free + std::string free_output = detail::run_subprocess("free -b"); + free_output = std::regex_replace(free_output, whitespace_replace_reg, " "); + const std::vector free_lines = detail::split(detail::trim(free_output), '\n'); + assert((free_lines.size() >= 3) && "Must read more than three lines, but fewer were read!"); + + // read memory information + const std::vector memory_data = detail::split(free_lines[1], ' '); + memory_samples_.memory_used_->push_back(detail::convert_to(memory_data[2])); + memory_samples_.memory_free_->push_back(detail::convert_to(memory_data[3])); + + // read swap information + const std::vector swap_data = detail::split(free_lines[2], ' '); + memory_samples_.swap_memory_used_->push_back(detail::convert_to(swap_data[2])); + memory_samples_.swap_memory_free_->push_back(detail::convert_to(swap_data[3])); + } +#endif + +#if defined(HWS_VIA_TURBOSTAT_ENABLED) + { + // run turbostat + const std::string turbostat_output = detail::run_subprocess(turbostat_command_line); + + // retrieve the turbostat data + const std::vector data = detail::split(detail::trim(turbostat_output), '\n'); + assert((data.size() >= 2) && "Must read at least two lines!"); + const std::vector header = detail::split(data[0], '\t'); + const std::vector values = detail::split(data[1], '\t'); + + // add values to the respective sample entries + for (std::size_t i = 0; i < header.size(); ++i) { + // general samples + if (header[i] == "Busy%") { + if (this->sample_category_enabled(sample_category::general)) { + using vector_type = decltype(general_samples_.compute_utilization_)::value_type; + general_samples_.compute_utilization_->push_back(detail::convert_to(values[i])); + } + continue; + } else if (header[i] == "IPC") { + if (this->sample_category_enabled(sample_category::general)) { + using vector_type = decltype(general_samples_.ipc_)::value_type; + general_samples_.ipc_->push_back(detail::convert_to(values[i])); + } + continue; + } else if (header[i] == "IRQ") { + if (this->sample_category_enabled(sample_category::general)) { + using vector_type = decltype(general_samples_.irq_)::value_type; + general_samples_.irq_->push_back(detail::convert_to(values[i])); + } + continue; + } else if (header[i] == "SMI") { + if (this->sample_category_enabled(sample_category::general)) { + using vector_type = decltype(general_samples_.smi_)::value_type; + general_samples_.smi_->push_back(detail::convert_to(values[i])); + } + continue; + } else if (header[i] == "POLL") { + if (this->sample_category_enabled(sample_category::general)) { + using vector_type = decltype(general_samples_.poll_)::value_type; + general_samples_.poll_->push_back(detail::convert_to(values[i])); + } + continue; + } else if (header[i] == "POLL%") { + if (this->sample_category_enabled(sample_category::general)) { + using vector_type = decltype(general_samples_.poll_percent_)::value_type; + general_samples_.poll_percent_->push_back(detail::convert_to(values[i])); + } + continue; + } + + // clock related samples + if (header[i] == "Avg_MHz") { + if (this->sample_category_enabled(sample_category::clock)) { + using vector_type = decltype(clock_samples_.clock_frequency_)::value_type; + clock_samples_.clock_frequency_->push_back(detail::convert_to(values[i])); + } + continue; + } else if (header[i] == "Bzy_MHz") { + if (this->sample_category_enabled(sample_category::clock)) { + using vector_type = decltype(clock_samples_.average_non_idle_clock_frequency_)::value_type; + clock_samples_.average_non_idle_clock_frequency_->push_back(detail::convert_to(values[i])); + } + continue; + } else if (header[i] == "TSC_MHz") { + if (this->sample_category_enabled(sample_category::clock)) { + using vector_type = decltype(clock_samples_.time_stamp_counter_)::value_type; + clock_samples_.time_stamp_counter_->push_back(detail::convert_to(values[i])); + } + continue; + } + + // power related samples + if (header[i] == "PkgWatt") { + if (this->sample_category_enabled(sample_category::power)) { + using vector_type = decltype(power_samples_.power_usage_)::value_type; + power_samples_.power_usage_->push_back(detail::convert_to(values[i])); + // calculate total energy consumption + using value_type = decltype(power_samples_.power_total_energy_consumption_)::value_type::value_type; + const std::size_t num_time_points = this->sampling_time_points().size(); + const value_type time_difference = std::chrono::duration(this->sampling_time_points()[num_time_points - 1] - this->sampling_time_points()[num_time_points - 2]).count(); + const auto current = power_samples_.power_usage_->back() * time_difference; + power_samples_.power_total_energy_consumption_->push_back(power_samples_.power_total_energy_consumption_->back() + current); + } + continue; + } else if (header[i] == "CorWatt") { + if (this->sample_category_enabled(sample_category::power)) { + using vector_type = decltype(power_samples_.core_watt_)::value_type; + power_samples_.core_watt_->push_back(detail::convert_to(values[i])); + } + continue; + } else if (header[i] == "RAMWatt") { + if (this->sample_category_enabled(sample_category::power)) { + using vector_type = decltype(power_samples_.ram_watt_)::value_type; + power_samples_.ram_watt_->push_back(detail::convert_to(values[i])); + } + continue; + } else if (header[i] == "PKG_%") { + if (this->sample_category_enabled(sample_category::power)) { + using vector_type = decltype(power_samples_.package_rapl_throttle_percent_)::value_type; + power_samples_.package_rapl_throttle_percent_->push_back(detail::convert_to(values[i])); + } + continue; + } else if (header[i] == "RAM_%") { + if (this->sample_category_enabled(sample_category::power)) { + using vector_type = decltype(power_samples_.dram_rapl_throttle_percent_)::value_type; + power_samples_.dram_rapl_throttle_percent_->push_back(detail::convert_to(values[i])); + } + continue; + } + + // temperature related samples + if (header[i] == "CoreTmp") { + if (this->sample_category_enabled(sample_category::temperature)) { + using vector_type = decltype(temperature_samples_.core_temperature_)::value_type; + temperature_samples_.core_temperature_->push_back(detail::convert_to(values[i])); + } + continue; + } else if (header[i] == "CoreThr") { + if (this->sample_category_enabled(sample_category::temperature)) { + using vector_type = decltype(temperature_samples_.core_throttle_percent_)::value_type; + temperature_samples_.core_throttle_percent_->push_back(detail::convert_to(values[i])); + } + continue; + } else if (header[i] == "PkgTmp") { + if (this->sample_category_enabled(sample_category::temperature)) { + using vector_type = decltype(temperature_samples_.temperature_)::value_type; + temperature_samples_.temperature_->push_back(detail::convert_to(values[i])); + } + continue; + } + + // gfx (iGPU) related samples + if (header[i] == "GFX%rc6") { + if (this->sample_category_enabled(sample_category::gfx)) { + using vector_type = decltype(gfx_samples_.gfx_render_state_percent_)::value_type; + gfx_samples_.gfx_render_state_percent_->push_back(detail::convert_to(values[i])); + } + continue; + } else if (header[i] == "GFXMHz") { + if (this->sample_category_enabled(sample_category::gfx)) { + using vector_type = decltype(gfx_samples_.gfx_frequency_)::value_type; + gfx_samples_.gfx_frequency_->push_back(detail::convert_to(values[i])); + } + continue; + } else if (header[i] == "GFXAMHz") { + if (this->sample_category_enabled(sample_category::gfx)) { + using vector_type = decltype(gfx_samples_.average_gfx_frequency_)::value_type; + gfx_samples_.average_gfx_frequency_->push_back(detail::convert_to(values[i])); + } + continue; + } else if (header[i] == "GFX%C0") { + if (this->sample_category_enabled(sample_category::gfx)) { + using vector_type = decltype(gfx_samples_.gfx_state_c0_percent_)::value_type; + gfx_samples_.gfx_state_c0_percent_->push_back(detail::convert_to(values[i])); + } + continue; + } else if (header[i] == "CPUGFX%") { + if (this->sample_category_enabled(sample_category::gfx)) { + using vector_type = decltype(gfx_samples_.cpu_works_for_gpu_percent_)::value_type; + gfx_samples_.cpu_works_for_gpu_percent_->push_back(detail::convert_to(values[i])); + } + continue; + } else if (header[i] == "GFXWatt") { + if (this->sample_category_enabled(sample_category::gfx)) { + using vector_type = decltype(gfx_samples_.gfx_watt_)::value_type; + gfx_samples_.gfx_watt_->push_back(detail::convert_to(values[i])); + } + continue; + } + + // idle state related samples + if (header[i] == "Totl%C0") { + if (this->sample_category_enabled(sample_category::idle_state)) { + using vector_type = decltype(idle_state_samples_.all_cpus_state_c0_percent_)::value_type; + idle_state_samples_.all_cpus_state_c0_percent_->push_back(detail::convert_to(values[i])); + } + continue; + } else if (header[i] == "Any%C0") { + if (this->sample_category_enabled(sample_category::idle_state)) { + using vector_type = decltype(idle_state_samples_.any_cpu_state_c0_percent_)::value_type; + idle_state_samples_.any_cpu_state_c0_percent_->push_back(detail::convert_to(values[i])); + } + continue; + } else if (header[i] == "CPU%LPI") { + if (this->sample_category_enabled(sample_category::idle_state)) { + using vector_type = decltype(idle_state_samples_.low_power_idle_state_percent_)::value_type; + idle_state_samples_.low_power_idle_state_percent_->push_back(detail::convert_to(values[i])); + } + continue; + } else if (header[i] == "SYS%LPI") { + if (this->sample_category_enabled(sample_category::idle_state)) { + using vector_type = decltype(idle_state_samples_.system_low_power_idle_state_percent_)::value_type; + idle_state_samples_.system_low_power_idle_state_percent_->push_back(detail::convert_to(values[i])); + } + continue; + } else if (header[i] == "Pkg%LPI") { + if (this->sample_category_enabled(sample_category::idle_state)) { + using vector_type = decltype(idle_state_samples_.package_low_power_idle_state_percent_)::value_type; + idle_state_samples_.package_low_power_idle_state_percent_->push_back(detail::convert_to(values[i])); + } + continue; + } else { + if (this->sample_category_enabled(sample_category::idle_state)) { + const std::string header_str{ header[i] }; + if (idle_state_samples_.idle_states_.value().count(header_str) > decltype(idle_state_samples_)::map_type::size_type{ 0 }) { + using vector_type = cpu_idle_states_samples::map_type::mapped_type; + idle_state_samples_.idle_states_.value()[header_str].push_back(detail::convert_to(values[i])); + } + } + continue; + } + } + } +#endif + } + + // wait for the sampling interval to pass to retrieve the next sample + std::this_thread::sleep_for(this->sampling_interval()); + } +} + +std::string cpu_hardware_sampler::device_identification() const { + return "cpu_device"; +} + +std::string cpu_hardware_sampler::samples_only_as_yaml_string() const { + // check whether it's safe to generate the YAML entry + if (this->is_sampling()) { + throw std::runtime_error{ "Can't create the final YAML entry if the hardware sampler is still running!" }; + } + + return fmt::format("{}{}" + "{}{}" + "{}{}" + "{}{}" + "{}{}" + "{}{}" + "{}", + general_samples_.generate_yaml_string(), + general_samples_.has_samples() ? "\n" : "", + clock_samples_.generate_yaml_string(), + clock_samples_.has_samples() ? "\n" : "", + power_samples_.generate_yaml_string(), + power_samples_.has_samples() ? "\n" : "", + memory_samples_.generate_yaml_string(), + memory_samples_.has_samples() ? "\n" : "", + temperature_samples_.generate_yaml_string(), + temperature_samples_.has_samples() ? "\n" : "", + gfx_samples_.generate_yaml_string(), + gfx_samples_.has_samples() ? "\n" : "", + idle_state_samples_.generate_yaml_string()); +} + +std::ostream &operator<<(std::ostream &out, const cpu_hardware_sampler &sampler) { + if (sampler.is_sampling()) { + out.setstate(std::ios_base::failbit); + return out; + } else { + return out << fmt::format("sampling interval: {}\n" + "time points: [{}]\n\n" + "general samples:\n{}\n\n" + "clock samples:\n{}\n\n" + "power samples:\n{}\n\n" + "memory samples:\n{}\n\n" + "temperature samples:\n{}\n\n" + "gfx samples:\n{}\n\n" + "idle state samples:\n{}", + sampler.sampling_interval(), + fmt::join(detail::time_points_to_epoch(sampler.sampling_time_points()), ", "), + sampler.general_samples(), + sampler.clock_samples(), + sampler.power_samples(), + sampler.memory_samples(), + sampler.temperature_samples(), + sampler.gfx_samples(), + sampler.idle_state_samples()); + } +} + +} // namespace hws diff --git a/src/hardware_sampling/cpu/utility.cpp b/src/hws/cpu/utility.cpp similarity index 87% rename from src/hardware_sampling/cpu/utility.cpp rename to src/hws/cpu/utility.cpp index 7ba16d2..7bb6b3d 100644 --- a/src/hardware_sampling/cpu/utility.cpp +++ b/src/hws/cpu/utility.cpp @@ -5,16 +5,16 @@ * See the LICENSE.md file in the project root for full license information. */ -#include "hardware_sampling/cpu/utility.hpp" +#include "hws/cpu/utility.hpp" -#include "hardware_sampling/utility.hpp" // hws::detail::split_as +#include "hws/utility.hpp" // hws::detail::split_as +#include "fmt/format.h" // fmt::format #include "subprocess.h" // subprocess_s, subprocess_create, subprocess_join, subprocess_stdout, subprocess_option_e #include // std::transform #include // std::size_t #include // std::FILE, std::fread -#include // std::format #include // std::runtime_error #include // std::string #include // std::string_view @@ -36,12 +36,12 @@ std::string run_subprocess(const std::string_view cmd_line) { // create subprocess subprocess_s proc{}; - HWS_SUBPROCESS_ERROR_CHECK(subprocess_create(cmd_ptr_split.data(), options, &proc)); + HWS_SUBPROCESS_ERROR_CHECK(subprocess_create(cmd_ptr_split.data(), options, &proc)) // wait until process has finished int return_code{}; - HWS_SUBPROCESS_ERROR_CHECK(subprocess_join(&proc, &return_code)); + HWS_SUBPROCESS_ERROR_CHECK(subprocess_join(&proc, &return_code)) if (return_code != 0) { - throw std::runtime_error{ std::format("Error: \"{}\" returned with {}!", cmd_line, return_code) }; + throw std::runtime_error{ fmt::format("Error: \"{}\" returned with {}!", cmd_line, return_code) }; } // get output handle and read data -> stdout and stderr are the same handle @@ -50,7 +50,7 @@ std::string run_subprocess(const std::string_view cmd_line) { const std::size_t bytes_read = std::fread(buffer.data(), sizeof(typename decltype(buffer)::value_type), buffer.size(), out_handle); // destroy subprocess - HWS_SUBPROCESS_ERROR_CHECK(subprocess_destroy(&proc)); + HWS_SUBPROCESS_ERROR_CHECK(subprocess_destroy(&proc)) // create output return buffer.substr(0, bytes_read); diff --git a/src/hardware_sampling/event.cpp b/src/hws/event.cpp similarity index 80% rename from src/hardware_sampling/event.cpp rename to src/hws/event.cpp index b88eaa3..373990e 100644 --- a/src/hardware_sampling/event.cpp +++ b/src/hws/event.cpp @@ -5,15 +5,16 @@ * See the LICENSE.md file in the project root for full license information. */ -#include "hardware_sampling/event.hpp" +#include "hws/event.hpp" + +#include "fmt/format.h" // fmt::format -#include // std::format #include // std::ostream namespace hws { std::ostream &operator<<(std::ostream &out, const event &e) { - return out << std::format("time_point: {}\n" + return out << fmt::format("time_point: {}\n" "name: {}", e.time_point.time_since_epoch(), e.name); diff --git a/src/hws/gpu_amd/hardware_sampler.cpp b/src/hws/gpu_amd/hardware_sampler.cpp new file mode 100644 index 0000000..6d52e03 --- /dev/null +++ b/src/hws/gpu_amd/hardware_sampler.cpp @@ -0,0 +1,727 @@ +/** + * @author Marcel Breyer + * @copyright 2024-today All Rights Reserved + * @license This file is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + */ + +#include "hws/gpu_amd/hardware_sampler.hpp" + +#include "hws/gpu_amd/rocm_smi_samples.hpp" // hws::{rocm_smi_general_samples, rocm_smi_clock_samples, rocm_smi_power_samples, rocm_smi_memory_samples, rocm_smi_temperature_samples} +#include "hws/gpu_amd/utility.hpp" // hws::detail::performance_level_to_string, HWS_ROCM_SMI_ERROR_CHECK +#include "hws/hardware_sampler.hpp" // hws::hardware_sampler +#include "hws/sample_category.hpp" // hws::sample_category +#include "hws/utility.hpp" // hws::detail::time_points_to_epoch + +#include "fmt/format.h" // fmt::format +#include "fmt/ranges.h" // fmt::join +#include "hip/hip_runtime_api.h" // HIP runtime functions +#include "rocm_smi/rocm_smi.h" // ROCm SMI runtime functions + +#include // std::chrono::{steady_clock, duration_cast, milliseconds} +#include // std::size_t +#include // std::uint32_t, std::uint64_t +#include // std::exception, std::terminate +#include // std::ios_base +#include // std::cerr, std::endl +#include // std::optional +#include // std::ostream +#include // std::runtime_error +#include // std::string +#include // std::this_thread +#include // std::move +#include // std::vector + +namespace hws { + +gpu_amd_hardware_sampler::gpu_amd_hardware_sampler(const sample_category category) : + gpu_amd_hardware_sampler{ 0, HWS_SAMPLING_INTERVAL, category } { } + +gpu_amd_hardware_sampler::gpu_amd_hardware_sampler(const std::size_t device_id, const sample_category category) : + gpu_amd_hardware_sampler{ device_id, HWS_SAMPLING_INTERVAL, category } { } + +gpu_amd_hardware_sampler::gpu_amd_hardware_sampler(const std::chrono::milliseconds sampling_interval, const sample_category category) : + gpu_amd_hardware_sampler{ 0, sampling_interval, category } { } + +gpu_amd_hardware_sampler::gpu_amd_hardware_sampler(const std::size_t device_id, const std::chrono::milliseconds sampling_interval, const sample_category category) : + hardware_sampler{ sampling_interval, category }, + device_id_{ static_cast(device_id) } { + // make sure that rsmi_init is only called once for all instances + if (instances_++ == 0) { + HWS_ROCM_SMI_ERROR_CHECK(rsmi_init(std::uint64_t{ 0 })) + // notify that initialization has been finished + init_finished_ = true; + } else { + // wait until init has been finished! + while (!init_finished_) { } + } +} + +gpu_amd_hardware_sampler::~gpu_amd_hardware_sampler() { + try { + // if this hardware sampler is still sampling, stop it + if (this->has_sampling_started() && !this->has_sampling_stopped()) { + this->stop_sampling(); + } + + // the last instance must shut down the ROCm SMI runtime + // make sure that rsmi_shut_down is only called once + if (--instances_ == 0) { + HWS_ROCM_SMI_ERROR_CHECK(rsmi_shut_down()) + // reset init_finished flag + init_finished_ = false; + } + } catch (const std::exception &e) { + std::cerr << e.what() << std::endl; + std::terminate(); + } +} + +void gpu_amd_hardware_sampler::sampling_loop() { + // + // add samples where we only have to retrieve the value once + // + + this->add_time_point(std::chrono::steady_clock::now()); + + double initial_total_power_consumption{}; // initial total power consumption in J + + // retrieve initial general information + if (this->sample_category_enabled(sample_category::general)) { + // fixed information -> only retrieved once + // the byte order is given by AMD directly + general_samples_.byte_order_ = "Little Endian"; + + hipDeviceProp_t prop{}; + if (hipGetDeviceProperties(&prop, static_cast(device_id_)) == hipSuccess) { + const std::string architecture{ prop.gcnArchName }; + general_samples_.architecture_ = architecture.substr(0, architecture.find_first_of('\0')); + } + + std::string vendor_id(static_cast(1024), '\0'); + if (rsmi_dev_vendor_name_get(device_id_, vendor_id.data(), vendor_id.size()) == RSMI_STATUS_SUCCESS) { + general_samples_.vendor_id_ = vendor_id.substr(0, vendor_id.find_first_of('\0')); + } + + std::string name(static_cast(1024), '\0'); + if (rsmi_dev_name_get(device_id_, name.data(), name.size()) == RSMI_STATUS_SUCCESS) { + general_samples_.name_ = name.substr(0, name.find_first_of('\0')); + } + + // queried samples -> retrieved every iteration if available + rsmi_dev_perf_level_t pstate{}; + if (rsmi_dev_perf_level_get(device_id_, &pstate) == RSMI_STATUS_SUCCESS) { + general_samples_.performance_level_ = decltype(general_samples_.performance_level_)::value_type{ detail::performance_level_to_string(pstate) }; + } + + decltype(general_samples_.compute_utilization_)::value_type::value_type utilization_gpu{}; + if (rsmi_dev_busy_percent_get(device_id_, &utilization_gpu) == RSMI_STATUS_SUCCESS) { + general_samples_.compute_utilization_ = decltype(general_samples_.compute_utilization_)::value_type{ utilization_gpu }; + } + + decltype(general_samples_.memory_utilization_)::value_type::value_type utilization_mem{}; + if (rsmi_dev_memory_busy_percent_get(device_id_, &utilization_mem) == RSMI_STATUS_SUCCESS) { + general_samples_.memory_utilization_ = decltype(general_samples_.memory_utilization_)::value_type{ utilization_mem }; + } + } + + // retrieve initial clock related information + if (this->sample_category_enabled(sample_category::clock)) { + rsmi_frequencies_t frequency_info{}; + if (rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_SYS, &frequency_info) == RSMI_STATUS_SUCCESS) { + clock_samples_.clock_frequency_min_ = static_cast(frequency_info.frequency[0]) / 1000'000.0; + clock_samples_.clock_frequency_max_ = static_cast(frequency_info.frequency[frequency_info.num_supported - 1]) / 1000'000.0; + decltype(clock_samples_.available_clock_frequencies_)::value_type frequencies{}; + for (std::size_t i = 0; i < frequency_info.num_supported; ++i) { + frequencies.push_back(static_cast(frequency_info.frequency[i]) / 1000'000.0); + } + clock_samples_.available_clock_frequencies_ = frequencies; + + // queried samples -> retrieved every iteration if available + clock_samples_.clock_frequency_ = decltype(clock_samples_.clock_frequency_)::value_type{}; + if (frequency_info.current < RSMI_MAX_NUM_FREQUENCIES) { + clock_samples_.clock_frequency_->push_back(static_cast(frequency_info.frequency[frequency_info.current]) / 1000'000.0); + } else { + clock_samples_.clock_frequency_->push_back(0); + } + } + + if (rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_SOC, &frequency_info) == RSMI_STATUS_SUCCESS) { + clock_samples_.socket_clock_frequency_min_ = static_cast(frequency_info.frequency[0]) / 1000'000.0; + clock_samples_.socket_clock_frequency_max_ = static_cast(frequency_info.frequency[frequency_info.num_supported - 1]) / 1000'000.0; + // queried samples -> retrieved every iteration if available + clock_samples_.socket_clock_frequency_ = decltype(clock_samples_.socket_clock_frequency_)::value_type{}; + if (frequency_info.current < RSMI_MAX_NUM_FREQUENCIES) { + clock_samples_.socket_clock_frequency_->push_back(static_cast(frequency_info.frequency[frequency_info.current]) / 1000'000.0); + } else { + clock_samples_.socket_clock_frequency_->push_back(0); + } + } + + if (rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_MEM, &frequency_info) == RSMI_STATUS_SUCCESS) { + clock_samples_.memory_clock_frequency_min_ = static_cast(frequency_info.frequency[0]) / 1000'000.0; + clock_samples_.memory_clock_frequency_max_ = static_cast(frequency_info.frequency[frequency_info.num_supported - 1]) / 1000'000.0; + decltype(clock_samples_.available_memory_clock_frequencies_)::value_type frequencies{}; + for (std::size_t i = 0; i < frequency_info.num_supported; ++i) { + frequencies.push_back(static_cast(frequency_info.frequency[i]) / 1000'000.0); + } + clock_samples_.available_memory_clock_frequencies_ = frequencies; + + // queried samples -> retrieved every iteration if available + clock_samples_.memory_clock_frequency_ = decltype(clock_samples_.memory_clock_frequency_)::value_type{}; + if (frequency_info.current < RSMI_MAX_NUM_FREQUENCIES) { + clock_samples_.memory_clock_frequency_->push_back(static_cast(frequency_info.frequency[frequency_info.current]) / 1000'000.0); + } else { + clock_samples_.memory_clock_frequency_->push_back(0); + } + } + + // queried samples -> retrieved every iteration if available + decltype(clock_samples_.overdrive_level_)::value_type::value_type overdrive_level{}; + if (rsmi_dev_overdrive_level_get(device_id_, &overdrive_level) == RSMI_STATUS_SUCCESS) { + clock_samples_.overdrive_level_ = decltype(clock_samples_.overdrive_level_)::value_type{ overdrive_level }; + } + + decltype(clock_samples_.memory_overdrive_level_)::value_type::value_type memory_overdrive_level{}; + if (rsmi_dev_mem_overdrive_level_get(device_id_, &memory_overdrive_level) == RSMI_STATUS_SUCCESS) { + clock_samples_.memory_overdrive_level_ = decltype(clock_samples_.memory_overdrive_level_)::value_type{ memory_overdrive_level }; + } + } + + // retrieve initial power related information + if (this->sample_category_enabled(sample_category::power)) { + std::uint64_t power_default_cap{}; + if (rsmi_dev_power_cap_default_get(device_id_, &power_default_cap) == RSMI_STATUS_SUCCESS) { + power_samples_.power_management_limit_ = static_cast(power_default_cap) / 1000'000.0; + } + + std::uint64_t power_cap{}; + if (rsmi_dev_power_cap_get(device_id_, std::uint32_t{ 0 }, &power_cap) == RSMI_STATUS_SUCCESS) { + power_samples_.power_enforced_limit_ = static_cast(power_cap) / 1000'000.0; + } + + { + RSMI_POWER_TYPE power_type{}; + std::uint64_t power_usage{}; + if (rsmi_dev_power_get(device_id_, &power_usage, &power_type) == RSMI_STATUS_SUCCESS) { + switch (power_type) { + case RSMI_POWER_TYPE::RSMI_AVERAGE_POWER: + power_samples_.power_measurement_type_ = "average"; + break; + case RSMI_POWER_TYPE::RSMI_CURRENT_POWER: + power_samples_.power_measurement_type_ = "current/instant"; + break; + case RSMI_POWER_TYPE::RSMI_INVALID_POWER: + power_samples_.power_measurement_type_ = "invalid/undetected"; + break; + } + // report power usage since the first sample + power_samples_.power_usage_ = decltype(power_samples_.power_usage_)::value_type{ static_cast(power_usage) / 1000'000.0 }; + } + } + + rsmi_power_profile_status_t power_profile{}; + if (rsmi_dev_power_profile_presets_get(device_id_, std::uint32_t{ 0 }, &power_profile) == RSMI_STATUS_SUCCESS) { + decltype(power_samples_.available_power_profiles_)::value_type available_power_profiles{}; + // go through all possible power profiles + if ((power_profile.available_profiles & RSMI_PWR_PROF_PRST_CUSTOM_MASK) != std::uint64_t{ 0 }) { + available_power_profiles.emplace_back("CUSTOM"); + } + if ((power_profile.available_profiles & RSMI_PWR_PROF_PRST_VIDEO_MASK) != std::uint64_t{ 0 }) { + available_power_profiles.emplace_back("VIDEO"); + } + if ((power_profile.available_profiles & RSMI_PWR_PROF_PRST_POWER_SAVING_MASK) != std::uint64_t{ 0 }) { + available_power_profiles.emplace_back("POWER_SAVING"); + } + if ((power_profile.available_profiles & RSMI_PWR_PROF_PRST_COMPUTE_MASK) != std::uint64_t{ 0 }) { + available_power_profiles.emplace_back("COMPUTE"); + } + if ((power_profile.available_profiles & RSMI_PWR_PROF_PRST_VR_MASK) != std::uint64_t{ 0 }) { + available_power_profiles.emplace_back("VR"); + } + if ((power_profile.available_profiles & RSMI_PWR_PROF_PRST_3D_FULL_SCR_MASK) != std::uint64_t{ 0 }) { + available_power_profiles.emplace_back("3D_FULL_SCREEN"); + } + if ((power_profile.available_profiles & RSMI_PWR_PROF_PRST_BOOTUP_DEFAULT) != std::uint64_t{ 0 }) { + available_power_profiles.emplace_back("BOOTUP_DEFAULT"); + } + power_samples_.available_power_profiles_ = std::move(available_power_profiles); + + // queried samples -> retrieved every iteration if available + switch (power_profile.current) { + case RSMI_PWR_PROF_PRST_CUSTOM_MASK: + power_samples_.power_profile_ = decltype(power_samples_.power_profile_)::value_type{ "CUSTOM" }; + break; + case RSMI_PWR_PROF_PRST_VIDEO_MASK: + power_samples_.power_profile_ = decltype(power_samples_.power_profile_)::value_type{ "VIDEO" }; + break; + case RSMI_PWR_PROF_PRST_POWER_SAVING_MASK: + power_samples_.power_profile_ = decltype(power_samples_.power_profile_)::value_type{ "POWER_SAVING" }; + break; + case RSMI_PWR_PROF_PRST_COMPUTE_MASK: + power_samples_.power_profile_ = decltype(power_samples_.power_profile_)::value_type{ "COMPUTE" }; + break; + case RSMI_PWR_PROF_PRST_VR_MASK: + power_samples_.power_profile_ = decltype(power_samples_.power_profile_)::value_type{ "VR" }; + break; + case RSMI_PWR_PROF_PRST_3D_FULL_SCR_MASK: + power_samples_.power_profile_ = decltype(power_samples_.power_profile_)::value_type{ "3D_FULL_SCREEN" }; + break; + case RSMI_PWR_PROF_PRST_BOOTUP_DEFAULT: + power_samples_.power_profile_ = decltype(power_samples_.power_profile_)::value_type{ "BOOTUP_DEFAULT" }; + break; + case RSMI_PWR_PROF_PRST_INVALID: + power_samples_.power_profile_ = decltype(power_samples_.power_profile_)::value_type{ "INVALID" }; + break; + } + } + + // queried samples -> retrieved every iteration if available + [[maybe_unused]] std::uint64_t timestamp{}; + float resolution{}; + std::uint64_t power_total_energy_consumption{}; + if (rsmi_dev_energy_count_get(device_id_, &power_total_energy_consumption, &resolution, ×tamp) == RSMI_STATUS_SUCCESS) { + const auto scaled_value = static_cast(power_total_energy_consumption) * static_cast(resolution); + initial_total_power_consumption = scaled_value / 1000'000.0; + power_samples_.power_total_energy_consumption_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ 0.0 }; + } else if (power_samples_.power_usage_.has_value()) { + // if the total energy consumption cannot be retrieved, but the current power draw, approximate it + power_samples_.power_total_energy_consumption_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ 0.0 }; + } + } + + // retrieve initial memory related information + if (this->sample_category_enabled(sample_category::memory)) { + decltype(memory_samples_.memory_total_)::value_type memory_total{}; + if (rsmi_dev_memory_total_get(device_id_, RSMI_MEM_TYPE_VRAM, &memory_total) == RSMI_STATUS_SUCCESS) { + memory_samples_.memory_total_ = memory_total; + } + + decltype(memory_samples_.visible_memory_total_)::value_type visible_memory_total{}; + if (rsmi_dev_memory_total_get(device_id_, RSMI_MEM_TYPE_VIS_VRAM, &visible_memory_total) == RSMI_STATUS_SUCCESS) { + memory_samples_.visible_memory_total_ = visible_memory_total; + } + + rsmi_pcie_bandwidth_t bandwidth_info{}; + if (rsmi_dev_pci_bandwidth_get(device_id_, &bandwidth_info) == RSMI_STATUS_SUCCESS) { + memory_samples_.num_pcie_lanes_min_ = bandwidth_info.lanes[0]; + memory_samples_.num_pcie_lanes_max_ = bandwidth_info.lanes[bandwidth_info.transfer_rate.num_supported - 1]; + memory_samples_.pcie_link_transfer_rate_min_ = bandwidth_info.transfer_rate.frequency[0] / 1'000'000; + memory_samples_.pcie_link_transfer_rate_max_ = bandwidth_info.transfer_rate.frequency[bandwidth_info.transfer_rate.num_supported - 1] / 1'000'000; + + // queried samples -> retrieved every iteration if available + memory_samples_.pcie_link_transfer_rate_ = decltype(memory_samples_.pcie_link_transfer_rate_)::value_type{}; + memory_samples_.num_pcie_lanes_ = decltype(memory_samples_.num_pcie_lanes_)::value_type{}; + if (bandwidth_info.transfer_rate.current < RSMI_MAX_NUM_FREQUENCIES) { + memory_samples_.pcie_link_transfer_rate_->push_back(bandwidth_info.transfer_rate.frequency[bandwidth_info.transfer_rate.current] / 1'000'000); + memory_samples_.num_pcie_lanes_->push_back(bandwidth_info.lanes[bandwidth_info.transfer_rate.current]); + } else { + // the current index is (somehow) wrong + memory_samples_.pcie_link_transfer_rate_->push_back(0); + memory_samples_.num_pcie_lanes_->push_back(0); + } + } + + // queried samples -> retrieved every iteration if available + decltype(memory_samples_.memory_used_)::value_type::value_type memory_used{}; + if (rsmi_dev_memory_usage_get(device_id_, RSMI_MEM_TYPE_VRAM, &memory_used) == RSMI_STATUS_SUCCESS) { + memory_samples_.memory_used_ = decltype(memory_samples_.memory_used_)::value_type{ memory_used }; + if (memory_samples_.memory_total_.has_value()) { + memory_samples_.memory_free_ = decltype(memory_samples_.memory_used_)::value_type{ memory_samples_.memory_total_.value() - memory_samples_.memory_used_->front() }; + } + } + } + + // retrieve fixed temperature related information + if (this->sample_category_enabled(sample_category::temperature)) { + std::uint32_t fan_id{ 0 }; + std::int64_t fan_speed{}; + while (rsmi_dev_fan_speed_get(device_id_, fan_id, &fan_speed) == RSMI_STATUS_SUCCESS) { + if (fan_id == 0) { + // queried samples -> retrieved every iteration if available + const auto percentage = static_cast(fan_speed) / static_cast(RSMI_MAX_FAN_SPEED); + temperature_samples_.fan_speed_percentage_ = decltype(temperature_samples_.fan_speed_percentage_)::value_type{ percentage }; + } + ++fan_id; + } + temperature_samples_.num_fans_ = fan_id; + + decltype(temperature_samples_.fan_speed_max_)::value_type max_fan_speed{}; + if (rsmi_dev_fan_speed_max_get(device_id_, std::uint32_t{ 0 }, &max_fan_speed) == RSMI_STATUS_SUCCESS) { + temperature_samples_.fan_speed_max_ = max_fan_speed; + } + + std::int64_t temperature_min{}; + if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_EDGE, RSMI_TEMP_MIN, &temperature_min) == RSMI_STATUS_SUCCESS) { + temperature_samples_.temperature_min_ = static_cast(temperature_min) / 1000.0; + } + + std::int64_t temperature_max{}; + if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_EDGE, RSMI_TEMP_MAX, &temperature_max) == RSMI_STATUS_SUCCESS) { + temperature_samples_.temperature_max_ = static_cast(temperature_max) / 1000.0; + } + + std::int64_t memory_temperature_min{}; + if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_MEMORY, RSMI_TEMP_MIN, &memory_temperature_min) == RSMI_STATUS_SUCCESS) { + temperature_samples_.memory_temperature_min_ = static_cast(memory_temperature_min) / 1000.0; + } + + std::int64_t memory_temperature_max{}; + if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_MEMORY, RSMI_TEMP_MAX, &memory_temperature_max) == RSMI_STATUS_SUCCESS) { + temperature_samples_.memory_temperature_max_ = static_cast(memory_temperature_max) / 1000.0; + } + + std::int64_t hotspot_temperature_min{}; + if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_JUNCTION, RSMI_TEMP_MIN, &hotspot_temperature_min) == RSMI_STATUS_SUCCESS) { + temperature_samples_.hotspot_temperature_min_ = static_cast(hotspot_temperature_min) / 1000.0; + } + + std::int64_t hotspot_temperature_max{}; + if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_JUNCTION, RSMI_TEMP_MAX, &hotspot_temperature_max) == RSMI_STATUS_SUCCESS) { + temperature_samples_.hotspot_temperature_max_ = static_cast(hotspot_temperature_max) / 1000.0; + } + + std::int64_t hbm_0_temperature_min{}; + if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_0, RSMI_TEMP_MIN, &hbm_0_temperature_min) == RSMI_STATUS_SUCCESS) { + temperature_samples_.hbm_0_temperature_min_ = static_cast(hbm_0_temperature_min) / 1000.0; + } + + std::int64_t hbm_0_temperature_max{}; + if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_0, RSMI_TEMP_MAX, &hbm_0_temperature_max) == RSMI_STATUS_SUCCESS) { + temperature_samples_.hbm_0_temperature_max_ = static_cast(hbm_0_temperature_max) / 1000.0; + } + + std::int64_t hbm_1_temperature_min{}; + if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_1, RSMI_TEMP_MIN, &hbm_1_temperature_min) == RSMI_STATUS_SUCCESS) { + temperature_samples_.hbm_1_temperature_min_ = static_cast(hbm_1_temperature_min) / 1000.0; + } + + std::int64_t hbm_1_temperature_max{}; + if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_1, RSMI_TEMP_MAX, &hbm_1_temperature_max) == RSMI_STATUS_SUCCESS) { + temperature_samples_.hbm_1_temperature_max_ = static_cast(hbm_1_temperature_max) / 1000.0; + } + + std::int64_t hbm_2_temperature_min{}; + if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_2, RSMI_TEMP_MIN, &hbm_2_temperature_min) == RSMI_STATUS_SUCCESS) { + temperature_samples_.hbm_2_temperature_min_ = static_cast(hbm_2_temperature_min) / 1000.0; + } + + std::int64_t hbm_2_temperature_max{}; + if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_2, RSMI_TEMP_MAX, &hbm_2_temperature_max) == RSMI_STATUS_SUCCESS) { + temperature_samples_.hbm_2_temperature_max_ = static_cast(hbm_2_temperature_max) / 1000.0; + } + + std::int64_t hbm_3_temperature_min{}; + if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_3, RSMI_TEMP_MIN, &hbm_3_temperature_min) == RSMI_STATUS_SUCCESS) { + temperature_samples_.hbm_3_temperature_min_ = static_cast(hbm_3_temperature_min) / 1000.0; + } + + std::int64_t hbm_3_temperature_max{}; + if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_3, RSMI_TEMP_MAX, &hbm_3_temperature_max) == RSMI_STATUS_SUCCESS) { + temperature_samples_.hbm_3_temperature_max_ = static_cast(hbm_3_temperature_max) / 1000.0; + } + + // queried samples -> retrieved every iteration if available + std::int64_t temperature{}; + if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_EDGE, RSMI_TEMP_CURRENT, &temperature) == RSMI_STATUS_SUCCESS) { + temperature_samples_.temperature_ = decltype(temperature_samples_.temperature_)::value_type{ static_cast(temperature) / 1000.0 }; + } + + std::int64_t hotspot_temperature{}; + if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_JUNCTION, RSMI_TEMP_CURRENT, &hotspot_temperature) == RSMI_STATUS_SUCCESS) { + temperature_samples_.hotspot_temperature_ = decltype(temperature_samples_.hotspot_temperature_)::value_type{ static_cast(hotspot_temperature) / 1000.0 }; + } + + std::int64_t memory_temperature{}; + if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_MEMORY, RSMI_TEMP_CURRENT, &memory_temperature) == RSMI_STATUS_SUCCESS) { + temperature_samples_.memory_temperature_ = decltype(temperature_samples_.memory_temperature_)::value_type{ static_cast(memory_temperature) / 1000.0 }; + } + + std::int64_t hbm_0_temperature{}; + if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_0, RSMI_TEMP_CURRENT, &hbm_0_temperature) == RSMI_STATUS_SUCCESS) { + temperature_samples_.hbm_0_temperature_ = decltype(temperature_samples_.hbm_0_temperature_)::value_type{ static_cast(hbm_0_temperature) / 1000.0 }; + } + + std::int64_t hbm_1_temperature{}; + if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_1, RSMI_TEMP_CURRENT, &hbm_1_temperature) == RSMI_STATUS_SUCCESS) { + temperature_samples_.hbm_1_temperature_ = decltype(temperature_samples_.hbm_1_temperature_)::value_type{ static_cast(hbm_1_temperature) / 1000.0 }; + } + + std::int64_t hbm_2_temperature{}; + if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_2, RSMI_TEMP_CURRENT, &hbm_2_temperature) == RSMI_STATUS_SUCCESS) { + temperature_samples_.hbm_2_temperature_ = decltype(temperature_samples_.hbm_2_temperature_)::value_type{ static_cast(hbm_2_temperature) / 1000.0 }; + } + + std::int64_t hbm_3_temperature{}; + if (rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_3, RSMI_TEMP_CURRENT, &hbm_3_temperature) == RSMI_STATUS_SUCCESS) { + temperature_samples_.hbm_3_temperature_ = decltype(temperature_samples_.hbm_3_temperature_)::value_type{ static_cast(hbm_3_temperature) / 1000.0 }; + } + } + + // + // loop until stop_sampling() is called + // + + while (!this->has_sampling_stopped()) { + // only sample values if the sampler currently isn't paused + if (this->is_sampling()) { + // add current time point + this->add_time_point(std::chrono::steady_clock::now()); + + // retrieve general samples + if (this->sample_category_enabled(sample_category::general)) { + if (general_samples_.performance_level_.has_value()) { + rsmi_dev_perf_level_t pstate{}; + HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_perf_level_get(device_id_, &pstate)) + general_samples_.performance_level_->push_back(detail::performance_level_to_string(pstate)); + } + + if (general_samples_.compute_utilization_.has_value()) { + decltype(general_samples_.compute_utilization_)::value_type::value_type value{}; + HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_busy_percent_get(device_id_, &value)) + general_samples_.compute_utilization_->push_back(value); + } + + if (general_samples_.memory_utilization_.has_value()) { + decltype(general_samples_.memory_utilization_)::value_type::value_type value{}; + HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_memory_busy_percent_get(device_id_, &value)) + general_samples_.memory_utilization_->push_back(value); + } + } + + // retrieve clock related samples + if (this->sample_category_enabled(sample_category::clock)) { + if (clock_samples_.clock_frequency_.has_value()) { + rsmi_frequencies_t frequency_info{}; + HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_SYS, &frequency_info)) + if (frequency_info.current < RSMI_MAX_NUM_FREQUENCIES) { + clock_samples_.clock_frequency_->push_back(static_cast(frequency_info.frequency[frequency_info.current]) / 1000'000.0); + } else { + // the current index is (somehow) wrong + clock_samples_.clock_frequency_->push_back(0); + } + } + + if (clock_samples_.socket_clock_frequency_.has_value()) { + rsmi_frequencies_t frequency_info{}; + HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_SOC, &frequency_info)) + if (frequency_info.current < RSMI_MAX_NUM_FREQUENCIES) { + clock_samples_.socket_clock_frequency_->push_back(static_cast(frequency_info.frequency[frequency_info.current]) / 1000'000.0); + } else { + // the current index is (somehow) wrong + clock_samples_.socket_clock_frequency_->push_back(0); + } + } + + if (clock_samples_.memory_clock_frequency_.has_value()) { + rsmi_frequencies_t frequency_info{}; + HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_gpu_clk_freq_get(device_id_, RSMI_CLK_TYPE_MEM, &frequency_info)) + if (frequency_info.current < RSMI_MAX_NUM_FREQUENCIES) { + clock_samples_.memory_clock_frequency_->push_back(static_cast(frequency_info.frequency[frequency_info.current]) / 1000'000.0); + } else { + // the current index is (somehow) wrong + clock_samples_.memory_clock_frequency_->push_back(0); + } + } + + if (clock_samples_.overdrive_level_.has_value()) { + decltype(clock_samples_.overdrive_level_)::value_type::value_type value{}; + HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_overdrive_level_get(device_id_, &value)) + clock_samples_.overdrive_level_->push_back(value); + } + + if (clock_samples_.memory_overdrive_level_.has_value()) { + decltype(clock_samples_.memory_overdrive_level_)::value_type::value_type value{}; + HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_mem_overdrive_level_get(device_id_, &value)) + clock_samples_.memory_overdrive_level_->push_back(value); + } + } + + // retrieve power related samples + if (this->sample_category_enabled(sample_category::power)) { + if (power_samples_.power_usage_.has_value()) { + [[maybe_unused]] RSMI_POWER_TYPE power_type{}; + std::uint64_t value{}; + HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_power_get(device_id_, &value, &power_type)) + power_samples_.power_usage_->push_back(static_cast(value) / 1000'000.0); + } + + if (power_samples_.power_total_energy_consumption_.has_value()) { + [[maybe_unused]] std::uint64_t timestamp{}; + float resolution{}; + std::uint64_t value{}; + if (rsmi_dev_energy_count_get(device_id_, &value, &resolution, ×tamp) == RSMI_STATUS_SUCCESS) { + const auto scaled_value = static_cast(value) * static_cast(resolution); + power_samples_.power_total_energy_consumption_->push_back((scaled_value / 1000'000.0) - initial_total_power_consumption); + } else if (power_samples_.power_usage_.has_value()) { + // if the total energy consumption cannot be retrieved, but the current power draw, approximate it + const std::size_t num_time_points = this->sampling_time_points().size(); + const auto time_difference = std::chrono::duration(this->sampling_time_points()[num_time_points - 1] - this->sampling_time_points()[num_time_points - 2]).count(); + const auto current = power_samples_.power_usage_->back() * time_difference; + power_samples_.power_total_energy_consumption_->push_back(power_samples_.power_total_energy_consumption_->back() + current); + } + } + + if (power_samples_.power_profile_.has_value()) { + rsmi_power_profile_status_t power_profile{}; + HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_power_profile_presets_get(device_id_, std::uint32_t{ 0 }, &power_profile)) + switch (power_profile.current) { + case RSMI_PWR_PROF_PRST_CUSTOM_MASK: + power_samples_.power_profile_->emplace_back("CUSTOM"); + break; + case RSMI_PWR_PROF_PRST_VIDEO_MASK: + power_samples_.power_profile_->emplace_back("VIDEO"); + break; + case RSMI_PWR_PROF_PRST_POWER_SAVING_MASK: + power_samples_.power_profile_->emplace_back("POWER_SAVING"); + break; + case RSMI_PWR_PROF_PRST_COMPUTE_MASK: + power_samples_.power_profile_->emplace_back("COMPUTE"); + break; + case RSMI_PWR_PROF_PRST_VR_MASK: + power_samples_.power_profile_->emplace_back("VR"); + break; + case RSMI_PWR_PROF_PRST_3D_FULL_SCR_MASK: + power_samples_.power_profile_->emplace_back("3D_FULL_SCREEN"); + break; + case RSMI_PWR_PROF_PRST_BOOTUP_DEFAULT: + power_samples_.power_profile_->emplace_back("BOOTUP_DEFAULT"); + break; + case RSMI_PWR_PROF_PRST_INVALID: + power_samples_.power_profile_->emplace_back("INVALID"); + break; + } + } + } + + // retrieve memory related samples + if (this->sample_category_enabled(sample_category::memory)) { + if (memory_samples_.memory_used_.has_value()) { + decltype(memory_samples_.memory_used_)::value_type::value_type value{}; + HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_memory_usage_get(device_id_, RSMI_MEM_TYPE_VRAM, &value)) + memory_samples_.memory_used_->push_back(value); + if (memory_samples_.memory_free_.has_value()) { + memory_samples_.memory_free_->push_back(memory_samples_.memory_total_.value() - value); + } + } + + if (memory_samples_.pcie_link_transfer_rate_.has_value() && memory_samples_.num_pcie_lanes_.has_value()) { + rsmi_pcie_bandwidth_t bandwidth_info{}; + HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_pci_bandwidth_get(device_id_, &bandwidth_info)) + if (bandwidth_info.transfer_rate.current < RSMI_MAX_NUM_FREQUENCIES) { + memory_samples_.pcie_link_transfer_rate_->push_back(bandwidth_info.transfer_rate.frequency[bandwidth_info.transfer_rate.current] / 1'000'000); + memory_samples_.num_pcie_lanes_->push_back(bandwidth_info.lanes[bandwidth_info.transfer_rate.current]); + } else { + // the current index is (somehow) wrong + memory_samples_.pcie_link_transfer_rate_->push_back(0); + memory_samples_.num_pcie_lanes_->push_back(0); + } + } + } + + // retrieve temperature related samples + if (this->sample_category_enabled(sample_category::temperature)) { + if (temperature_samples_.fan_speed_percentage_.has_value()) { + std::int64_t value{}; + HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_fan_speed_get(device_id_, std::uint32_t{ 0 }, &value)) + temperature_samples_.fan_speed_percentage_->push_back(static_cast(value) / static_cast(RSMI_MAX_FAN_SPEED)); + } + + if (temperature_samples_.temperature_.has_value()) { + std::int64_t value{}; + HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_EDGE, RSMI_TEMP_CURRENT, &value)) + temperature_samples_.temperature_->push_back(static_cast(value) / 1000.0); + } + + if (temperature_samples_.memory_temperature_.has_value()) { + std::int64_t value{}; + HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_MEMORY, RSMI_TEMP_CURRENT, &value)) + temperature_samples_.memory_temperature_->push_back(static_cast(value) / 1000.0); + } + + if (temperature_samples_.hotspot_temperature_.has_value()) { + std::int64_t value{}; + HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_JUNCTION, RSMI_TEMP_CURRENT, &value)) + temperature_samples_.hotspot_temperature_->push_back(static_cast(value) / 1000.0); + } + + if (temperature_samples_.hbm_0_temperature_.has_value()) { + std::int64_t value{}; + HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_0, RSMI_TEMP_CURRENT, &value)) + temperature_samples_.hbm_0_temperature_->push_back(static_cast(value) / 1000.0); + } + + if (temperature_samples_.hbm_1_temperature_.has_value()) { + std::int64_t value{}; + HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_1, RSMI_TEMP_CURRENT, &value)) + temperature_samples_.hbm_1_temperature_->push_back(static_cast(value) / 1000.0); + } + + if (temperature_samples_.hbm_2_temperature_.has_value()) { + std::int64_t value{}; + HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_2, RSMI_TEMP_CURRENT, &value)) + temperature_samples_.hbm_2_temperature_->push_back(static_cast(value) / 1000.0); + } + + if (temperature_samples_.hbm_3_temperature_.has_value()) { + std::int64_t value{}; + HWS_ROCM_SMI_ERROR_CHECK(rsmi_dev_temp_metric_get(device_id_, RSMI_TEMP_TYPE_HBM_3, RSMI_TEMP_CURRENT, &value)) + temperature_samples_.hbm_3_temperature_->push_back(static_cast(value) / 1000.0); + } + } + } + + // wait for the sampling interval to pass to retrieve the next sample + std::this_thread::sleep_for(this->sampling_interval()); + } +} + +std::string gpu_amd_hardware_sampler::device_identification() const { + return fmt::format("gpu_amd_device_{}", device_id_); +} + +std::string gpu_amd_hardware_sampler::samples_only_as_yaml_string() const { + // check whether it's safe to generate the YAML entry + if (this->is_sampling()) { + throw std::runtime_error{ "Can't create the final YAML entry if the hardware sampler is still running!" }; + } + + return fmt::format("{}{}" + "{}{}" + "{}{}" + "{}{}" + "{}", + general_samples_.generate_yaml_string(), + general_samples_.has_samples() ? "\n" : "", + clock_samples_.generate_yaml_string(), + clock_samples_.has_samples() ? "\n" : "", + power_samples_.generate_yaml_string(), + power_samples_.has_samples() ? "\n" : "", + memory_samples_.generate_yaml_string(), + memory_samples_.has_samples() ? "\n" : "", + temperature_samples_.generate_yaml_string()); +} + +std::ostream &operator<<(std::ostream &out, const gpu_amd_hardware_sampler &sampler) { + if (sampler.is_sampling()) { + out.setstate(std::ios_base::failbit); + return out; + } else { + return out << fmt::format("sampling interval: {}\n" + "time points: [{}]\n\n" + "general samples:\n{}\n\n" + "clock samples:\n{}\n\n" + "power samples:\n{}\n\n" + "memory samples:\n{}\n\n" + "temperature samples:\n{}", + sampler.sampling_interval(), + fmt::join(detail::time_points_to_epoch(sampler.sampling_time_points()), ", "), + sampler.general_samples(), + sampler.clock_samples(), + sampler.power_samples(), + sampler.memory_samples(), + sampler.temperature_samples()); + } +} + +} // namespace hws diff --git a/src/hws/gpu_amd/rocm_smi_samples.cpp b/src/hws/gpu_amd/rocm_smi_samples.cpp new file mode 100644 index 0000000..f149c4e --- /dev/null +++ b/src/hws/gpu_amd/rocm_smi_samples.cpp @@ -0,0 +1,706 @@ +/** + * @author Marcel Breyer + * @copyright 2024-today All Rights Reserved + * @license This file is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + */ + +#include "hws/gpu_amd/rocm_smi_samples.hpp" + +#include "hws/utility.hpp" // hws::detail::{value_or_default, quote} + +#include "fmt/format.h" // fmt::format +#include "fmt/ranges.h" // fmt::join + +#include // std::ostream +#include // std::string + +namespace hws { + +//*************************************************************************************************************************************// +// general samples // +//*************************************************************************************************************************************// + +bool rocm_smi_general_samples::has_samples() const { + return this->architecture_.has_value() || this->byte_order_.has_value() || this->vendor_id_.has_value() || this->name_.has_value() + || this->compute_utilization_.has_value() || this->memory_utilization_.has_value() || this->performance_level_.has_value(); +} + +std::string rocm_smi_general_samples::generate_yaml_string() const { + // if no samples are available, return an empty string + if (!this->has_samples()) { + return ""; + } + + std::string str{ "general:\n" }; + + // device architecture + if (this->architecture_.has_value()) { + str += fmt::format(" architecture:\n" + " unit: \"string\"\n" + " values: \"{}\"\n", + this->architecture_.value()); + } + // device byte order + if (this->byte_order_.has_value()) { + str += fmt::format(" byte_order:\n" + " unit: \"string\"\n" + " values: \"{}\"\n", + this->byte_order_.value()); + } + // the vendor specific ID + if (this->vendor_id_.has_value()) { + str += fmt::format(" vendor_id:\n" + " unit: \"string\"\n" + " values: \"{}\"\n", + this->vendor_id_.value()); + } + // device name + if (this->name_.has_value()) { + str += fmt::format(" name:\n" + " unit: \"string\"\n" + " values: \"{}\"\n", + this->name_.value()); + } + + // device compute utilization + if (this->compute_utilization_.has_value()) { + str += fmt::format(" compute_utilization:\n" + " unit: \"percentage\"\n" + " values: [{}]\n", + fmt::join(this->compute_utilization_.value(), ", ")); + } + // device memory utilization + if (this->memory_utilization_.has_value()) { + str += fmt::format(" memory_utilization:\n" + " unit: \"percentage\"\n" + " values: [{}]\n", + fmt::join(this->memory_utilization_.value(), ", ")); + } + // performance state + if (this->performance_level_.has_value()) { + str += fmt::format(" performance_state:\n" + " unit: \"string\"\n" + " values: [{}]\n", + fmt::join(detail::quote(this->performance_level_.value()), ", ")); + } + + return str; +} + +std::ostream &operator<<(std::ostream &out, const rocm_smi_general_samples &samples) { + return out << fmt::format("architecture [string]: {}\n" + "byte_order [string]: {}\n" + "vendor_id [string]: {}\n" + "name [string]: {}\n" + "compute_utilization [%]: [{}]\n" + "memory_utilization [%]: [{}]\n" + "performance_level [string]: [{}]", + detail::value_or_default(samples.get_architecture()), + detail::value_or_default(samples.get_byte_order()), + detail::value_or_default(samples.get_vendor_id()), + detail::value_or_default(samples.get_name()), + fmt::join(detail::value_or_default(samples.get_compute_utilization()), ", "), + fmt::join(detail::value_or_default(samples.get_memory_utilization()), ", "), + fmt::join(detail::value_or_default(samples.get_performance_level()), ", ")); +} + +//*************************************************************************************************************************************// +// clock samples // +//*************************************************************************************************************************************// + +bool rocm_smi_clock_samples::has_samples() const { + return this->clock_frequency_min_.has_value() || this->clock_frequency_max_.has_value() || this->memory_clock_frequency_min_.has_value() + || this->memory_clock_frequency_max_.has_value() || this->socket_clock_frequency_min_.has_value() || this->socket_clock_frequency_max_.has_value() + || this->available_clock_frequencies_.has_value() || this->available_memory_clock_frequencies_.has_value() || this->clock_frequency_.has_value() + || this->memory_clock_frequency_.has_value() || this->socket_clock_frequency_.has_value() || this->overdrive_level_.has_value() + || this->memory_overdrive_level_.has_value(); +} + +std::string rocm_smi_clock_samples::generate_yaml_string() const { + // if no samples are available, return an empty string + if (!this->has_samples()) { + return ""; + } + + std::string str{ "clock:\n" }; + + // system clock min frequencies + if (this->clock_frequency_min_.has_value()) { + str += fmt::format(" clock_frequency_min:\n" + " unit: \"MHz\"\n" + " values: {}\n", + this->clock_frequency_min_.value()); + } + // system clock max frequencies + if (this->clock_frequency_max_.has_value()) { + str += fmt::format(" clock_frequency_max:\n" + " unit: \"MHz\"\n" + " values: {}\n", + this->clock_frequency_max_.value()); + } + // memory clock min frequencies + if (this->memory_clock_frequency_min_.has_value()) { + str += fmt::format(" memory_clock_frequency_min:\n" + " unit: \"MHz\"\n" + " values: {}\n", + this->memory_clock_frequency_min_.value()); + } + // memory clock max frequencies + if (this->memory_clock_frequency_max_.has_value()) { + str += fmt::format(" memory_clock_frequency_max:\n" + " unit: \"MHz\"\n" + " values: {}\n", + this->memory_clock_frequency_max_.value()); + } + // socket clock min frequencies + if (this->socket_clock_frequency_min_.has_value()) { + str += fmt::format(" socket_clock_frequency_min:\n" + " unit: \"MHz\"\n" + " values: {}\n", + this->socket_clock_frequency_min_.value()); + } + // socket clock max frequencies + if (this->socket_clock_frequency_max_.has_value()) { + str += fmt::format(" socket_clock_frequency_max:\n" + " unit: \"MHz\"\n" + " values: {}\n", + this->socket_clock_frequency_max_.value()); + } + // the available clock frequencies + if (this->available_clock_frequencies_.has_value()) { + str += fmt::format(" available_clock_frequencies:\n" + " unit: \"MHz\"\n" + " values: [{}]\n", + fmt::join(this->available_clock_frequencies_.value(), ", ")); + } + // the available memory clock frequencies + if (this->available_memory_clock_frequencies_.has_value()) { + str += fmt::format(" available_memory_clock_frequencies:\n" + " unit: \"MHz\"\n" + " values: [{}]\n", + fmt::join(this->available_memory_clock_frequencies_.value(), ", ")); + } + + // system clock frequency + if (this->clock_frequency_.has_value()) { + str += fmt::format(" clock_frequency:\n" + " unit: \"MHz\"\n" + " values: [{}]\n", + fmt::join(this->clock_frequency_.value(), ", ")); + } + // memory clock frequency + if (this->memory_clock_frequency_.has_value()) { + str += fmt::format(" memory_clock_frequency:\n" + " unit: \"MHz\"\n" + " values: [{}]\n", + fmt::join(this->memory_clock_frequency_.value(), ", ")); + } + // socket clock frequency + if (this->socket_clock_frequency_.has_value()) { + str += fmt::format(" socket_clock_frequency:\n" + " unit: \"MHz\"\n" + " values: [{}]\n", + fmt::join(this->socket_clock_frequency_.value(), ", ")); + } + // overdrive level + if (this->overdrive_level_.has_value()) { + str += fmt::format(" overdrive_level:\n" + " unit: \"percentage\"\n" + " values: [{}]\n", + fmt::join(this->overdrive_level_.value(), ", ")); + } + // memory overdrive level + if (this->memory_overdrive_level_.has_value()) { + str += fmt::format(" memory_overdrive_level:\n" + " unit: \"percentage\"\n" + " values: [{}]\n", + fmt::join(this->memory_overdrive_level_.value(), ", ")); + } + + return str; +} + +std::ostream &operator<<(std::ostream &out, const rocm_smi_clock_samples &samples) { + return out << fmt::format("clock_frequency_min [MHz]: {}\n" + "clock_frequency_max [MHz]: {}\n" + "memory_clock_frequency_min [MHz]: {}\n" + "memory_clock_frequency_max [MHz]: {}\n" + "socket_clock_frequency_min [MHz]: {}\n" + "socket_clock_frequency_max [MHz]: {}\n" + "available_clock_frequencies [MHz]: [{}]\n" + "available_memory_clock_frequencies [MHz]: [{}]\n" + "clock_frequency [MHz]: [{}]\n" + "memory_clock_frequency [MHz]: [{}]\n" + "socket_clock_frequency [MHz]: [{}]\n" + "overdrive_level [%]: [{}]\n" + "memory_overdrive_level [%]: [{}]", + detail::value_or_default(samples.get_clock_frequency_min()), + detail::value_or_default(samples.get_clock_frequency_max()), + detail::value_or_default(samples.get_memory_clock_frequency_min()), + detail::value_or_default(samples.get_memory_clock_frequency_max()), + detail::value_or_default(samples.get_socket_clock_frequency_min()), + detail::value_or_default(samples.get_socket_clock_frequency_max()), + fmt::join(detail::value_or_default(samples.get_available_clock_frequencies()), ", "), + fmt::join(detail::value_or_default(samples.get_available_memory_clock_frequencies()), ", "), + fmt::join(detail::value_or_default(samples.get_clock_frequency()), ", "), + fmt::join(detail::value_or_default(samples.get_memory_clock_frequency()), ", "), + fmt::join(detail::value_or_default(samples.get_socket_clock_frequency()), ", "), + fmt::join(detail::value_or_default(samples.get_overdrive_level()), ", "), + fmt::join(detail::value_or_default(samples.get_memory_overdrive_level()), ", ")); +} + +//*************************************************************************************************************************************// +// power samples // +//*************************************************************************************************************************************// + +bool rocm_smi_power_samples::has_samples() const { + return this->power_management_limit_.has_value() || this->power_enforced_limit_.has_value() || this->power_measurement_type_.has_value() + || this->available_power_profiles_.has_value() || this->power_usage_.has_value() || this->power_total_energy_consumption_.has_value() + || this->power_profile_.has_value(); +} + +std::string rocm_smi_power_samples::generate_yaml_string() const { + // if no samples are available, return an empty string + if (!this->has_samples()) { + return ""; + } + + std::string str{ "power:\n" }; + + // power management limit + if (this->power_management_limit_.has_value()) { + str += fmt::format(" power_management_limit:\n" + " unit: \"W\"\n" + " values: {}\n", + this->power_management_limit_.value()); + } + // power enforced limit + if (this->power_enforced_limit_.has_value()) { + str += fmt::format(" power_enforced_limit:\n" + " unit: \"W\"\n" + " values: {}\n", + this->power_enforced_limit_.value()); + } + // power measurement type + if (this->power_measurement_type_.has_value()) { + str += fmt::format(" power_measurement_type:\n" + " unit: \"string\"\n" + " values: \"{}\"\n", + this->power_measurement_type_.value()); + } + // available power levels + if (this->available_power_profiles_.has_value()) { + str += fmt::format(" available_power_profiles:\n" + " unit: \"string\"\n" + " values: [{}]\n", + fmt::join(detail::quote(this->available_power_profiles_.value()), ", ")); + } + + // current power usage + if (this->power_usage_.has_value()) { + str += fmt::format(" power_usage:\n" + " unit: \"W\"\n" + " values: [{}]\n", + fmt::join(this->power_usage_.value(), ", ")); + } + // total energy consumed + if (this->power_total_energy_consumption_.has_value()) { + str += fmt::format(" power_total_energy_consumed:\n" + " unit: \"J\"\n" + " values: [{}]\n", + fmt::join(this->power_total_energy_consumption_.value(), ", ")); + } + // current power level + if (this->power_profile_.has_value()) { + str += fmt::format(" power_profile:\n" + " unit: \"string\"\n" + " values: [{}]\n", + fmt::join(detail::quote(this->power_profile_.value()), ", ")); + } + + return str; +} + +std::ostream &operator<<(std::ostream &out, const rocm_smi_power_samples &samples) { + return out << fmt::format("power_management_limit [W]: {}\n" + "power_enforced_limit [W]: {}\n" + "power_measurement_type [string]: {}\n" + "available_power_profiles [string]: [{}]\n" + "power_usage [W]: [{}]\n" + "power_total_energy_consumption [J]: [{}]\n" + "power_profile [string]: [{}]", + detail::value_or_default(samples.get_power_management_limit()), + detail::value_or_default(samples.get_power_enforced_limit()), + detail::value_or_default(samples.get_power_measurement_type()), + fmt::join(detail::value_or_default(samples.get_available_power_profiles()), ", "), + fmt::join(detail::value_or_default(samples.get_power_usage()), ", "), + fmt::join(detail::value_or_default(samples.get_power_total_energy_consumption()), ", "), + fmt::join(detail::value_or_default(samples.get_power_profile()), ", ")); +} + +//*************************************************************************************************************************************// +// memory samples // +//*************************************************************************************************************************************// + +bool rocm_smi_memory_samples::has_samples() const { + return this->memory_total_.has_value() || this->visible_memory_total_.has_value() || this->num_pcie_lanes_min_.has_value() + || this->num_pcie_lanes_max_.has_value() || this->pcie_link_transfer_rate_min_.has_value() || this->pcie_link_transfer_rate_max_.has_value() + || this->memory_used_.has_value() || this->memory_free_.has_value() || this->num_pcie_lanes_.has_value() || this->pcie_link_transfer_rate_.has_value(); +} + +std::string rocm_smi_memory_samples::generate_yaml_string() const { + // if no samples are available, return an empty string + if (!this->has_samples()) { + return ""; + } + + std::string str{ "memory:\n" }; + + // total memory + if (this->memory_total_.has_value()) { + str += fmt::format(" memory_total:\n" + " unit: \"B\"\n" + " values: {}\n", + this->memory_total_.value()); + } + // total visible memory + if (this->visible_memory_total_.has_value()) { + str += fmt::format(" visible_memory_total:\n" + " unit: \"B\"\n" + " values: {}\n", + this->visible_memory_total_.value()); + } + // min number of PCIe lanes + if (this->num_pcie_lanes_min_.has_value()) { + str += fmt::format(" num_pcie_lanes_min:\n" + " unit: \"int\"\n" + " values: {}\n", + this->num_pcie_lanes_min_.value()); + } + // max number of PCIe lanes + if (this->num_pcie_lanes_max_.has_value()) { + str += fmt::format(" num_pcie_lanes_max:\n" + " unit: \"int\"\n" + " values: {}\n", + this->num_pcie_lanes_max_.value()); + } + // the minimum PCIe link transfer rate + if (this->pcie_link_transfer_rate_min_.has_value()) { + str += fmt::format(" pcie_link_transfer_rate_min:\n" + " unit: \"MT/s\"\n" + " values: {}\n", + this->pcie_link_transfer_rate_min_.value()); + } + // the maximum PCIe link transfer rate + if (this->pcie_link_transfer_rate_max_.has_value()) { + str += fmt::format(" pcie_link_transfer_rate_max:\n" + " unit: \"MT/s\"\n" + " values: {}\n", + this->pcie_link_transfer_rate_max_.value()); + } + + // used memory + if (this->memory_used_.has_value()) { + str += fmt::format(" memory_used:\n" + " unit: \"B\"\n" + " values: [{}]\n", + fmt::join(this->memory_used_.value(), ", ")); + } + // free memory + if (this->memory_free_.has_value()) { + str += fmt::format(" memory_free:\n" + " unit: \"B\"\n" + " values: [{}]\n", + fmt::join(this->memory_free_.value(), ", ")); + } + + // number of PCIe lanes + if (this->num_pcie_lanes_.has_value()) { + str += fmt::format(" num_pcie_lanes:\n" + " unit: \"int\"\n" + " values: [{}]\n", + fmt::join(this->num_pcie_lanes_.value(), ", ")); + } + // PCIe transfer rate + if (this->pcie_link_transfer_rate_.has_value()) { + str += fmt::format(" pcie_link_transfer_rate:\n" + " unit: \"MT/s\"\n" + " values: [{}]\n", + fmt::join(this->pcie_link_transfer_rate_.value(), ", ")); + } + + return str; +} + +std::ostream &operator<<(std::ostream &out, const rocm_smi_memory_samples &samples) { + return out << fmt::format("memory_total [B]: {}\n" + "visible_memory_total [B]: {}\n" + "num_pcie_lanes_min [int]: {}\n" + "num_pcie_lanes_max [int]: {}\n" + "pcie_link_transfer_rate_min [MBPS]: {}\n" + "pcie_link_transfer_rate_max [MBPS]: {}\n" + "memory_used [B]: [{}]\n" + "memory_free [B]: [{}]\n" + "num_pcie_lanes [int]: [{}]\n" + "pcie_link_transfer_rate [MBPS]: [{}]", + detail::value_or_default(samples.get_memory_total()), + detail::value_or_default(samples.get_visible_memory_total()), + detail::value_or_default(samples.get_num_pcie_lanes_min()), + detail::value_or_default(samples.get_num_pcie_lanes_max()), + detail::value_or_default(samples.get_pcie_link_transfer_rate_min()), + detail::value_or_default(samples.get_pcie_link_transfer_rate_max()), + fmt::join(detail::value_or_default(samples.get_memory_used()), ", "), + fmt::join(detail::value_or_default(samples.get_memory_free()), ", "), + fmt::join(detail::value_or_default(samples.get_num_pcie_lanes()), ", "), + fmt::join(detail::value_or_default(samples.get_pcie_link_transfer_rate()), ", ")); +} + +//*************************************************************************************************************************************// +// temperature samples // +//*************************************************************************************************************************************// + +bool rocm_smi_temperature_samples::has_samples() const { + return this->num_fans_.has_value() || this->fan_speed_max_.has_value() || this->temperature_min_.has_value() || this->temperature_max_.has_value() + || this->memory_temperature_min_.has_value() || this->memory_temperature_max_.has_value() || this->hotspot_temperature_min_.has_value() + || this->hotspot_temperature_max_.has_value() || this->hbm_0_temperature_min_.has_value() || this->hbm_0_temperature_max_.has_value() + || this->hbm_1_temperature_min_.has_value() || this->hbm_1_temperature_max_.has_value() || this->hbm_2_temperature_min_.has_value() + || this->hbm_2_temperature_max_.has_value() || this->hbm_3_temperature_min_.has_value() || this->hbm_3_temperature_max_.has_value() + || this->fan_speed_percentage_.has_value() || this->temperature_.has_value() || this->memory_temperature_.has_value() + || this->hotspot_temperature_.has_value() || this->hbm_0_temperature_.has_value() || this->hbm_1_temperature_.has_value() + || this->hbm_2_temperature_.has_value() || this->hbm_3_temperature_.has_value(); +} + +std::string rocm_smi_temperature_samples::generate_yaml_string() const { + // if no samples are available, return an empty string + if (!this->has_samples()) { + return ""; + } + + std::string str{ "temperature:\n" }; + + // number of fans (emulated) + if (this->num_fans_.has_value()) { + str += fmt::format(" num_fans:\n" + " unit: \"int\"\n" + " values: {}\n", + this->num_fans_.value()); + } + // maximum fan speed + if (this->fan_speed_max_.has_value()) { + str += fmt::format(" fan_speed_max:\n" + " unit: \"RPM\"\n" + " values: {}\n", + this->fan_speed_max_.value()); + } + // minimum GPU edge temperature + if (this->temperature_min_.has_value()) { + str += fmt::format(" temperature_min:\n" + " unit: \"°C\"\n" + " values: {}\n", + this->temperature_min_.value()); + } + // maximum GPU edge temperature + if (this->temperature_max_.has_value()) { + str += fmt::format(" temperature_max:\n" + " unit: \"°C\"\n" + " values: {}\n", + this->temperature_max_.value()); + } + // minimum GPU memory temperature + if (this->memory_temperature_min_.has_value()) { + str += fmt::format(" memory_temperature_min:\n" + " unit: \"°C\"\n" + " values: {}\n", + this->memory_temperature_min_.value()); + } + // maximum GPU memory temperature + if (this->memory_temperature_max_.has_value()) { + str += fmt::format(" memory_temperature_max:\n" + " unit: \"°C\"\n" + " values: {}\n", + this->memory_temperature_max_.value()); + } + // minimum GPU hotspot temperature + if (this->hotspot_temperature_min_.has_value()) { + str += fmt::format(" hotspot_temperature_min:\n" + " unit: \"°C\"\n" + " values: {}\n", + this->hotspot_temperature_min_.value()); + } + // maximum GPU hotspot temperature + if (this->hotspot_temperature_max_.has_value()) { + str += fmt::format(" hotspot_temperature_max:\n" + " unit: \"°C\"\n" + " values: {}\n", + this->hotspot_temperature_max_.value()); + } + // minimum GPU HBM 0 temperature + if (this->hbm_0_temperature_min_.has_value()) { + str += fmt::format(" hbm_0_temperature_min:\n" + " unit: \"°C\"\n" + " values: {}\n", + this->hbm_0_temperature_min_.value()); + } + // maximum GPU HBM 0 temperature + if (this->hbm_0_temperature_max_.has_value()) { + str += fmt::format(" hbm_0_temperature_max:\n" + " unit: \"°C\"\n" + " values: {}\n", + this->hbm_0_temperature_max_.value()); + } + // minimum GPU HBM 1 temperature + if (this->hbm_1_temperature_min_.has_value()) { + str += fmt::format(" hbm_1_temperature_min:\n" + " unit: \"°C\"\n" + " values: {}\n", + this->hbm_1_temperature_min_.value()); + } + // maximum GPU HBM 1 temperature + if (this->hbm_1_temperature_max_.has_value()) { + str += fmt::format(" hbm_1_temperature_max:\n" + " unit: \"°C\"\n" + " values: {}\n", + this->hbm_1_temperature_max_.value()); + } + // minimum GPU HBM 2 temperature + if (this->hbm_2_temperature_min_.has_value()) { + str += fmt::format(" hbm_2_temperature_min:\n" + " unit: \"°C\"\n" + " values: {}\n", + this->hbm_2_temperature_min_.value()); + } + // maximum GPU HBM 2 temperature + if (this->hbm_2_temperature_max_.has_value()) { + str += fmt::format(" hbm_2_temperature_max:\n" + " unit: \"°C\"\n" + " values: {}\n", + this->hbm_2_temperature_max_.value()); + } + // minimum GPU HBM 3 temperature + if (this->hbm_3_temperature_min_.has_value()) { + str += fmt::format(" hbm_3_temperature_min:\n" + " unit: \"°C\"\n" + " values: {}\n", + this->hbm_3_temperature_min_.value()); + } + // maximum GPU HBM 3 temperature + if (this->hbm_3_temperature_max_.has_value()) { + str += fmt::format(" hbm_3_temperature_max:\n" + " unit: \"°C\"\n" + " values: {}\n", + this->hbm_3_temperature_max_.value()); + } + + // fan speed + if (this->fan_speed_percentage_.has_value()) { + str += fmt::format(" fan_speed_percentage:\n" + " unit: \"percentage\"\n" + " values: [{}]\n", + fmt::join(this->fan_speed_percentage_.value(), ", ")); + } + // GPU edge temperature + if (this->temperature_.has_value()) { + str += fmt::format(" temperature:\n" + " unit: \"°C\"\n" + " values: [{}]\n", + fmt::join(this->temperature_.value(), ", ")); + } + // GPU memory temperature + if (this->memory_temperature_.has_value()) { + str += fmt::format(" memory_temperature:\n" + " unit: \"°C\"\n" + " values: [{}]\n", + fmt::join(this->memory_temperature_.value(), ", ")); + } + // GPU hotspot temperature + if (this->hotspot_temperature_.has_value()) { + str += fmt::format(" hotspot_temperature:\n" + " unit: \"°C\"\n" + " values: [{}]\n", + fmt::join(this->hotspot_temperature_.value(), ", ")); + } + // GPU HBM 0 temperature + if (this->hbm_0_temperature_.has_value()) { + str += fmt::format(" hbm_0_temperature:\n" + " unit: \"°C\"\n" + " values: [{}]\n", + fmt::join(this->hbm_0_temperature_.value(), ", ")); + } + // GPU HBM 1 temperature + if (this->hbm_1_temperature_.has_value()) { + str += fmt::format(" hbm_1_temperature:\n" + " unit: \"°C\"\n" + " values: [{}]\n", + fmt::join(this->hbm_1_temperature_.value(), ", ")); + } + // GPU HBM 2 temperature + if (this->hbm_2_temperature_.has_value()) { + str += fmt::format(" hbm_2_temperature:\n" + " unit: \"°C\"\n" + " values: [{}]\n", + fmt::join(this->hbm_2_temperature_.value(), ", ")); + } + // GPU HBM 3 temperature + if (this->hbm_3_temperature_.has_value()) { + str += fmt::format(" hbm_3_temperature:\n" + " unit: \"°C\"\n" + " values: [{}]\n", + fmt::join(this->hbm_3_temperature_.value(), ", ")); + } + + return str; +} + +std::ostream &operator<<(std::ostream &out, const rocm_smi_temperature_samples &samples) { + return out << fmt::format("num_fans [int]: {}\n" + "fan_speed_max [RPM]: {}\n" + "temperature_min [°C]: {}\n" + "temperature_max [°C]: {}\n" + "memory_temperature_min [°C]: {}\n" + "memory_temperature_max [°C]: {}\n" + "hotspot_temperature_min [°C]: {}\n" + "hotspot_temperature_max [°C]: {}\n" + "hbm_0_temperature_min [°C]: {}\n" + "hbm_0_temperature_max [°C]: {}\n" + "hbm_1_temperature_min [°C]: {}\n" + "hbm_1_temperature_max [°C]: {}\n" + "hbm_2_temperature_min [°C]: {}\n" + "hbm_2_temperature_max [°C]: {}\n" + "hbm_3_temperature_min [°C]: {}\n" + "hbm_3_temperature_max [°C]: {}\n" + "fan_speed_percentage [%]: [{}]\n" + "temperature [°C]: [{}]\n" + "memory_temperature [°C]: [{}]\n" + "hotspot_temperature [°C]: [{}]\n" + "hbm_0_temperature [°C]: [{}]\n" + "hbm_1_temperature [°C]: [{}]\n" + "hbm_2_temperature [°C]: [{}]\n" + "hbm_3_temperature [°C]: [{}]", + detail::value_or_default(samples.get_num_fans()), + detail::value_or_default(samples.get_fan_speed_max()), + detail::value_or_default(samples.get_temperature_min()), + detail::value_or_default(samples.get_temperature_max()), + detail::value_or_default(samples.get_memory_temperature_min()), + detail::value_or_default(samples.get_memory_temperature_max()), + detail::value_or_default(samples.get_hotspot_temperature_min()), + detail::value_or_default(samples.get_hotspot_temperature_max()), + detail::value_or_default(samples.get_hbm_0_temperature_min()), + detail::value_or_default(samples.get_hbm_0_temperature_max()), + detail::value_or_default(samples.get_hbm_1_temperature_min()), + detail::value_or_default(samples.get_hbm_1_temperature_max()), + detail::value_or_default(samples.get_hbm_2_temperature_min()), + detail::value_or_default(samples.get_hbm_2_temperature_max()), + detail::value_or_default(samples.get_hbm_3_temperature_min()), + detail::value_or_default(samples.get_hbm_3_temperature_max()), + fmt::join(detail::value_or_default(samples.get_fan_speed_percentage()), ", "), + fmt::join(detail::value_or_default(samples.get_temperature()), ", "), + fmt::join(detail::value_or_default(samples.get_memory_temperature()), ", "), + fmt::join(detail::value_or_default(samples.get_hotspot_temperature()), ", "), + fmt::join(detail::value_or_default(samples.get_hbm_0_temperature()), ", "), + fmt::join(detail::value_or_default(samples.get_hbm_1_temperature()), ", "), + fmt::join(detail::value_or_default(samples.get_hbm_2_temperature()), ", "), + fmt::join(detail::value_or_default(samples.get_hbm_3_temperature()), ", ")); +} + +} // namespace hws diff --git a/src/hws/gpu_amd/utility.cpp b/src/hws/gpu_amd/utility.cpp new file mode 100644 index 0000000..a88969a --- /dev/null +++ b/src/hws/gpu_amd/utility.cpp @@ -0,0 +1,42 @@ +/** + * @author Marcel Breyer + * @copyright 2024-today All Rights Reserved + * @license This file is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + */ + +#include "hws/gpu_amd/utility.hpp" + +#include "rocm_smi/rocm_smi.h" // ROCm SMI runtime functions + +#include // std::string + +namespace hws::detail { + +std::string performance_level_to_string(const rsmi_dev_perf_level_t perf_level) { + switch (perf_level) { + case RSMI_DEV_PERF_LEVEL_AUTO: + return "auto"; + case RSMI_DEV_PERF_LEVEL_LOW: + return "low"; + case RSMI_DEV_PERF_LEVEL_HIGH: + return "high"; + case RSMI_DEV_PERF_LEVEL_MANUAL: + return "manual"; + case RSMI_DEV_PERF_LEVEL_STABLE_STD: + return "stable_std"; + case RSMI_DEV_PERF_LEVEL_STABLE_PEAK: + return "stable_peak"; + case RSMI_DEV_PERF_LEVEL_STABLE_MIN_MCLK: + return "stable_min_mclk"; + case RSMI_DEV_PERF_LEVEL_STABLE_MIN_SCLK: + return "stable_min_sclk"; + case RSMI_DEV_PERF_LEVEL_DETERMINISM: + return "determinism"; + case RSMI_DEV_PERF_LEVEL_UNKNOWN: + default: + return "unknown"; + } +} + +} // namespace hws::detail diff --git a/src/hardware_sampling/gpu_intel/hardware_sampler.cpp b/src/hws/gpu_intel/hardware_sampler.cpp similarity index 50% rename from src/hardware_sampling/gpu_intel/hardware_sampler.cpp rename to src/hws/gpu_intel/hardware_sampler.cpp index 7bfa1c6..0be124e 100644 --- a/src/hardware_sampling/gpu_intel/hardware_sampler.cpp +++ b/src/hws/gpu_intel/hardware_sampler.cpp @@ -5,22 +5,23 @@ * See the LICENSE.md file in the project root for full license information. */ -#include "hardware_sampling/gpu_intel/hardware_sampler.hpp" +#include "hws/gpu_intel/hardware_sampler.hpp" -#include "hardware_sampling/gpu_intel/level_zero_device_handle_impl.hpp" // hws::level_zero_device_handle implementation -#include "hardware_sampling/gpu_intel/level_zero_samples.hpp" // hws::{level_zero_general_samples, level_zero_clock_samples, level_zero_power_samples, level_zero_memory_samples, level_zero_temperature_samples} -#include "hardware_sampling/gpu_intel/utility.hpp" // HWS_LEVEL_ZERO_ERROR_CHECK -#include "hardware_sampling/hardware_sampler.hpp" // hws::hardware_sampler -#include "hardware_sampling/utility.hpp" // hws::{durations_from_reference_time, join} +#include "hws/gpu_intel/level_zero_device_handle_impl.hpp" // hws::level_zero_device_handle implementation +#include "hws/gpu_intel/level_zero_samples.hpp" // hws::{level_zero_general_samples, level_zero_clock_samples, level_zero_power_samples, level_zero_memory_samples, level_zero_temperature_samples} +#include "hws/gpu_intel/utility.hpp" // HWS_LEVEL_ZERO_ERROR_CHECK +#include "hws/hardware_sampler.hpp" // hws::hardware_sampler +#include "hws/sample_category.hpp" // hws::sample_category +#include "hws/utility.hpp" // hws::{durations_from_reference_time, join} +#include "fmt/format.h" // fmt::format #include "level_zero/ze_api.h" // Level Zero runtime functions #include "level_zero/zes_api.h" // Level Zero runtime functions #include // std::chrono::{steady_clock, duration_cast, milliseconds} #include // std::size_t -#include // std::int32_t +#include // std::int32_t, std::int64_t #include // std::exception, std::terminate -#include // std::format #include // std::ios_base #include // std::cerr, std::endl #include // std::runtime_error @@ -31,20 +32,20 @@ namespace hws { -gpu_intel_hardware_sampler::gpu_intel_hardware_sampler() : - gpu_intel_hardware_sampler{ 0, HWS_SAMPLING_INTERVAL } { } +gpu_intel_hardware_sampler::gpu_intel_hardware_sampler(const sample_category category) : + gpu_intel_hardware_sampler{ 0, HWS_SAMPLING_INTERVAL, category } { } -gpu_intel_hardware_sampler::gpu_intel_hardware_sampler(const std::size_t device_id) : - gpu_intel_hardware_sampler{ device_id, HWS_SAMPLING_INTERVAL } { } +gpu_intel_hardware_sampler::gpu_intel_hardware_sampler(const std::size_t device_id, const sample_category category) : + gpu_intel_hardware_sampler{ device_id, HWS_SAMPLING_INTERVAL, category } { } -gpu_intel_hardware_sampler::gpu_intel_hardware_sampler(const std::chrono::milliseconds sampling_interval) : - gpu_intel_hardware_sampler{ 0, sampling_interval } { } +gpu_intel_hardware_sampler::gpu_intel_hardware_sampler(const std::chrono::milliseconds sampling_interval, const sample_category category) : + gpu_intel_hardware_sampler{ 0, sampling_interval, category } { } -gpu_intel_hardware_sampler::gpu_intel_hardware_sampler(const std::size_t device_id, const std::chrono::milliseconds sampling_interval) : - hardware_sampler{ sampling_interval } { +gpu_intel_hardware_sampler::gpu_intel_hardware_sampler(const std::size_t device_id, const std::chrono::milliseconds sampling_interval, const sample_category category) : + hardware_sampler{ sampling_interval, category } { // make sure that zeInit is only called once for all instances if (instances_++ == 0) { - HWS_LEVEL_ZERO_ERROR_CHECK(zeInit(ZE_INIT_FLAG_GPU_ONLY)); + HWS_LEVEL_ZERO_ERROR_CHECK(zeInit(ZE_INIT_FLAG_GPU_ONLY)) // notify that initialization has been finished init_finished_ = true; } else { @@ -77,6 +78,7 @@ void gpu_intel_hardware_sampler::sampling_loop() { std::vector frequency_handles{}; std::vector power_handles{}; std::vector memory_handles{}; + std::vector fan_handles{}; std::vector psu_handles{}; std::vector temperature_handles{}; @@ -86,12 +88,21 @@ void gpu_intel_hardware_sampler::sampling_loop() { this->add_time_point(std::chrono::steady_clock::now()); + double initial_total_power_consumption{}; // initial total power consumption in J + // retrieve initial general information - { + if (this->sample_category_enabled(sample_category::general)) { + // the byte order is given by Intel directly + general_samples_.byte_order_ = "Little Endian"; + ze_device_properties_t ze_device_prop{}; if (zeDeviceGetProperties(device, &ze_device_prop) == ZE_RESULT_SUCCESS) { + general_samples_.vendor_id_ = fmt::format("{:x}", ze_device_prop.vendorId); general_samples_.num_threads_per_eu_ = ze_device_prop.numThreadsPerEU; general_samples_.eu_simd_width_ = ze_device_prop.physicalEUSimdWidth; + + // assemble list of GPU flags + general_samples_.flags_ = detail::property_flags_to_vector(ze_device_prop.flags); } zes_device_properties_t zes_device_prop{}; @@ -127,7 +138,7 @@ void gpu_intel_hardware_sampler::sampling_loop() { } // retrieve initial clock related information - { + if (this->sample_category_enabled(sample_category::clock)) { std::uint32_t num_frequency_domains{ 0 }; if (zesDeviceEnumFrequencyDomains(device, &num_frequency_domains, nullptr) == ZE_RESULT_SUCCESS) { frequency_handles.resize(num_frequency_domains); @@ -135,16 +146,16 @@ void gpu_intel_hardware_sampler::sampling_loop() { for (zes_freq_handle_t handle : frequency_handles) { // get frequency properties zes_freq_properties_t prop{}; - if (zesFrequencyGetProperties(handle, &prop)) { + if (zesFrequencyGetProperties(handle, &prop) == ZE_RESULT_SUCCESS) { // determine the frequency domain (e.g. GPU, memory, etc) switch (prop.type) { case ZES_FREQ_DOMAIN_GPU: - clock_samples_.clock_gpu_min_ = prop.min; - clock_samples_.clock_gpu_max_ = prop.max; + clock_samples_.clock_frequency_min_ = prop.min; + clock_samples_.clock_frequency_max_ = prop.max; break; case ZES_FREQ_DOMAIN_MEMORY: - clock_samples_.clock_mem_min_ = prop.min; - clock_samples_.clock_mem_max_ = prop.max; + clock_samples_.memory_clock_frequency_min_ = prop.min; + clock_samples_.memory_clock_frequency_max_ = prop.max; break; default: // do nothing @@ -159,10 +170,10 @@ void gpu_intel_hardware_sampler::sampling_loop() { // determine the frequency domain (e.g. GPU, memory, etc) switch (prop.type) { case ZES_FREQ_DOMAIN_GPU: - clock_samples_.available_clocks_gpu_ = available_clocks; + clock_samples_.available_clock_frequencies_ = available_clocks; break; case ZES_FREQ_DOMAIN_MEMORY: - clock_samples_.available_clocks_mem_ = available_clocks; + clock_samples_.available_memory_clock_frequencies_ = available_clocks; break; default: // do nothing @@ -179,28 +190,40 @@ void gpu_intel_hardware_sampler::sampling_loop() { case ZES_FREQ_DOMAIN_GPU: { if (frequency_state.tdp >= 0.0) { - clock_samples_.tdp_frequency_limit_gpu_ = decltype(clock_samples_.tdp_frequency_limit_gpu_)::value_type{ frequency_state.tdp }; + clock_samples_.frequency_limit_tdp_ = decltype(clock_samples_.frequency_limit_tdp_)::value_type{ frequency_state.tdp }; } if (frequency_state.actual >= 0.0) { - clock_samples_.clock_gpu_ = decltype(clock_samples_.clock_gpu_)::value_type{ frequency_state.actual }; + clock_samples_.clock_frequency_ = decltype(clock_samples_.clock_frequency_)::value_type{ frequency_state.actual }; } if (frequency_state.throttleReasons >= 0.0) { - using vector_type = decltype(clock_samples_.throttle_reason_gpu_)::value_type; - clock_samples_.throttle_reason_gpu_ = vector_type{ static_cast(frequency_state.throttleReasons) }; + { + using vector_type = decltype(clock_samples_.throttle_reason_)::value_type; + clock_samples_.throttle_reason_ = vector_type{ static_cast(static_cast(frequency_state.throttleReasons)) }; + } + { + using vector_type = decltype(clock_samples_.throttle_reason_string_)::value_type; + clock_samples_.throttle_reason_string_ = vector_type{ static_cast(detail::throttle_reason_to_string(frequency_state.throttleReasons)) }; + } } } break; case ZES_FREQ_DOMAIN_MEMORY: { if (frequency_state.tdp >= 0.0) { - clock_samples_.tdp_frequency_limit_mem_ = decltype(clock_samples_.tdp_frequency_limit_mem_)::value_type{ frequency_state.tdp }; + clock_samples_.memory_frequency_limit_tdp_ = decltype(clock_samples_.memory_frequency_limit_tdp_)::value_type{ frequency_state.tdp }; } if (frequency_state.actual >= 0.0) { - clock_samples_.clock_mem_ = decltype(clock_samples_.clock_mem_)::value_type{ frequency_state.actual }; + clock_samples_.memory_clock_frequency_ = decltype(clock_samples_.memory_clock_frequency_)::value_type{ frequency_state.actual }; } if (frequency_state.throttleReasons >= 0.0) { - using vector_type = decltype(clock_samples_.throttle_reason_mem_)::value_type; - clock_samples_.throttle_reason_mem_ = vector_type{ static_cast(frequency_state.throttleReasons) }; + { + using vector_type = decltype(clock_samples_.memory_throttle_reason_)::value_type; + clock_samples_.memory_throttle_reason_ = vector_type{ static_cast(static_cast(frequency_state.throttleReasons)) }; + } + { + using vector_type = decltype(clock_samples_.memory_throttle_reason_string_)::value_type; + clock_samples_.memory_throttle_reason_string_ = vector_type{ static_cast(detail::throttle_reason_to_string(frequency_state.throttleReasons)) }; + } } } break; @@ -216,24 +239,54 @@ void gpu_intel_hardware_sampler::sampling_loop() { } // retrieve initial power related information - { + if (this->sample_category_enabled(sample_category::power)) { std::uint32_t num_power_domains{ 0 }; if (zesDeviceEnumPowerDomains(device, &num_power_domains, nullptr) == ZE_RESULT_SUCCESS) { power_handles.resize(num_power_domains); if (zesDeviceEnumPowerDomains(device, &num_power_domains, power_handles.data()) == ZE_RESULT_SUCCESS) { if (!power_handles.empty()) { // NOTE: only the first power domain is used here + // get the power measurement type + // NOTE: only the first value is used here! + std::uint32_t num_power_limit_descriptors{ 1 }; + zes_power_limit_ext_desc_t desc{}; + if (zesPowerGetLimitsExt(power_handles.front(), &num_power_limit_descriptors, &desc) == ZE_RESULT_SUCCESS) { + switch (desc.level) { + case ZES_POWER_LEVEL_UNKNOWN: + power_samples_.power_measurement_type_ = "unknown"; + break; + case ZES_POWER_LEVEL_SUSTAINED: + power_samples_.power_measurement_type_ = "sustained"; + break; + case ZES_POWER_LEVEL_BURST: + power_samples_.power_measurement_type_ = "burst"; + break; + case ZES_POWER_LEVEL_PEAK: + power_samples_.power_measurement_type_ = "peak"; + break; + case ZES_POWER_LEVEL_INSTANTANEOUS: + power_samples_.power_measurement_type_ = "current/instant"; + break; + case ZES_POWER_LEVEL_FORCE_UINT32: + power_samples_.power_measurement_type_ = "force uint32"; + break; + } + + power_samples_.power_enforced_limit_ = static_cast(desc.limit) / 1000.0; + } + // get total power consumption zes_power_energy_counter_t energy_counter{}; if (zesPowerGetEnergyCounter(power_handles.front(), &energy_counter) == ZE_RESULT_SUCCESS) { - power_samples_.power_total_energy_consumption_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ energy_counter.energy }; + initial_total_power_consumption = static_cast(energy_counter.energy) / 1000.0 / 1000.0; + power_samples_.power_total_energy_consumption_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ 0.0 }; + power_samples_.power_usage_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ 0.0 }; } // get energy thresholds zes_energy_threshold_t energy_threshold{}; if (zesPowerGetEnergyThreshold(power_handles.front(), &energy_threshold) == ZE_RESULT_SUCCESS) { - power_samples_.energy_threshold_enabled_ = static_cast(energy_threshold.enable); - power_samples_.energy_threshold_ = energy_threshold.threshold; + power_samples_.power_management_mode_ = static_cast(energy_threshold.enable); } } } @@ -241,7 +294,7 @@ void gpu_intel_hardware_sampler::sampling_loop() { } // retrieve initial memory related information - { + if (this->sample_category_enabled(sample_category::memory)) { std::uint32_t num_memory_modules{ 0 }; if (zesDeviceEnumMemoryModules(device, &num_memory_modules, nullptr) == ZE_RESULT_SUCCESS) { memory_handles.resize(num_memory_modules); @@ -262,40 +315,46 @@ void gpu_intel_hardware_sampler::sampling_loop() { } if (prop.busWidth != -1) { // first value to add -> initialize map - if (!memory_samples_.bus_width_.has_value()) { - memory_samples_.bus_width_ = decltype(memory_samples_.bus_width_)::value_type{}; + if (!memory_samples_.memory_bus_width_.has_value()) { + memory_samples_.memory_bus_width_ = decltype(memory_samples_.memory_bus_width_)::value_type{}; } // add new memory bus width - memory_samples_.bus_width_.value()[memory_module_name] = prop.busWidth; + memory_samples_.memory_bus_width_.value()[memory_module_name] = prop.busWidth; } if (prop.numChannels != -1) { // first value to add -> initialize map - if (!memory_samples_.num_channels_.has_value()) { - memory_samples_.num_channels_ = decltype(memory_samples_.num_channels_)::value_type{}; + if (!memory_samples_.memory_num_channels_.has_value()) { + memory_samples_.memory_num_channels_ = decltype(memory_samples_.memory_num_channels_)::value_type{}; } // add new number of memory channels - memory_samples_.num_channels_.value()[memory_module_name] = prop.numChannels; + memory_samples_.memory_num_channels_.value()[memory_module_name] = prop.numChannels; } // first value to add -> initialize map - if (!memory_samples_.location_.has_value()) { - memory_samples_.location_ = decltype(memory_samples_.location_)::value_type{}; + if (!memory_samples_.memory_location_.has_value()) { + memory_samples_.memory_location_ = decltype(memory_samples_.memory_location_)::value_type{}; } - memory_samples_.location_.value()[memory_module_name] = detail::memory_location_to_name(prop.location); + memory_samples_.memory_location_.value()[memory_module_name] = detail::memory_location_to_name(prop.location); // get current memory information zes_mem_state_t mem_state{}; if (zesMemoryGetState(handle, &mem_state) == ZE_RESULT_SUCCESS) { // first value to add -> initialize map - if (!memory_samples_.allocatable_memory_total_.has_value()) { - memory_samples_.allocatable_memory_total_ = decltype(memory_samples_.allocatable_memory_total_)::value_type{}; + if (!memory_samples_.visible_memory_total_.has_value()) { + memory_samples_.visible_memory_total_ = decltype(memory_samples_.visible_memory_total_)::value_type{}; } - memory_samples_.allocatable_memory_total_.value()[memory_module_name] = mem_state.size; + memory_samples_.visible_memory_total_.value()[memory_module_name] = mem_state.size; // first value to add -> initialize map if (!memory_samples_.memory_free_.has_value()) { memory_samples_.memory_free_ = decltype(memory_samples_.memory_free_)::value_type{}; } memory_samples_.memory_free_.value()[memory_module_name].push_back(mem_state.free); + + // first value to add -> initialize map + if (!memory_samples_.memory_used_.has_value()) { + memory_samples_.memory_used_ = decltype(memory_samples_.memory_used_)::value_type{}; + } + memory_samples_.memory_used_.value()[memory_module_name].push_back(mem_state.size - mem_state.free); } } } @@ -304,13 +363,13 @@ void gpu_intel_hardware_sampler::sampling_loop() { zes_pci_properties_t pci_prop{}; if (zesDevicePciGetProperties(device, &pci_prop) == ZE_RESULT_SUCCESS) { if (pci_prop.maxSpeed.gen != -1) { - memory_samples_.max_pcie_link_generation_ = pci_prop.maxSpeed.gen; + memory_samples_.pcie_link_generation_max_ = pci_prop.maxSpeed.gen; } if (pci_prop.maxSpeed.width != -1) { - memory_samples_.pcie_max_width_ = pci_prop.maxSpeed.width; + memory_samples_.num_pcie_lanes_max_ = pci_prop.maxSpeed.width; } if (pci_prop.maxSpeed.maxBandwidth != -1) { - memory_samples_.pcie_link_max_speed_ = pci_prop.maxSpeed.maxBandwidth; + memory_samples_.pcie_link_speed_max_ = static_cast(static_cast(pci_prop.maxSpeed.maxBandwidth) / 1e6); } } @@ -318,10 +377,10 @@ void gpu_intel_hardware_sampler::sampling_loop() { zes_pci_state_t pci_state{}; if (zesDevicePciGetState(device, &pci_state) == ZE_RESULT_SUCCESS) { if (pci_state.speed.maxBandwidth != -1) { - memory_samples_.pcie_link_speed_ = decltype(memory_samples_.pcie_link_speed_)::value_type{ pci_state.speed.maxBandwidth }; + memory_samples_.pcie_link_speed_ = decltype(memory_samples_.pcie_link_speed_)::value_type{ static_cast(static_cast(pci_state.speed.maxBandwidth) / 1e6) }; } if (pci_state.speed.width != -1) { - memory_samples_.pcie_link_width_ = decltype(memory_samples_.pcie_link_width_)::value_type{ pci_state.speed.width }; + memory_samples_.num_pcie_lanes_ = decltype(memory_samples_.num_pcie_lanes_)::value_type{ pci_state.speed.width }; } if (pci_state.speed.gen != -1) { memory_samples_.pcie_link_generation_ = decltype(memory_samples_.pcie_link_generation_)::value_type{ pci_state.speed.gen }; @@ -332,7 +391,30 @@ void gpu_intel_hardware_sampler::sampling_loop() { } // retrieve initial temperature related information - { + if (this->sample_category_enabled(sample_category::temperature)) { + std::uint32_t num_fans{ 0 }; + if (zesDeviceEnumFans(device, &num_fans, nullptr) == ZE_RESULT_SUCCESS) { + temperature_samples_.num_fans_ = num_fans; + + fan_handles.resize(num_fans); + if (zesDeviceEnumFans(device, &num_fans, fan_handles.data()) == ZE_RESULT_SUCCESS) { + // NOTE: only the first fan handle is used here + if (!fan_handles.empty()) { + zes_fan_properties_t prop{}; + if (zesFanGetProperties(fan_handles.front(), &prop) == ZE_RESULT_SUCCESS) { + temperature_samples_.fan_speed_max_ = prop.maxRPM; + } + + std::int32_t fan_speed{}; + if (zesFanGetState(fan_handles.front(), ZES_FAN_SPEED_UNITS_PERCENT, &fan_speed) == ZE_RESULT_SUCCESS) { + if (fan_speed != -1) { + temperature_samples_.fan_speed_percentage_ = decltype(temperature_samples_.fan_speed_percentage_)::value_type{ static_cast(fan_speed) }; + } + } + } + } + } + std::uint32_t num_psus{ 0 }; if (zesDeviceEnumPsus(device, &num_psus, nullptr) == ZE_RESULT_SUCCESS) { psu_handles.resize(num_psus); @@ -342,7 +424,7 @@ void gpu_intel_hardware_sampler::sampling_loop() { zes_psu_state_t psu_state{}; if (zesPsuGetState(psu_handles.front(), &psu_state) == ZE_RESULT_SUCCESS) { if (psu_state.temperature != -1) { - temperature_samples_.temperature_psu_ = decltype(temperature_samples_.temperature_psu_)::value_type{ psu_state.temperature }; + temperature_samples_.psu_temperature_ = static_cast(psu_state.temperature); } } } @@ -356,26 +438,66 @@ void gpu_intel_hardware_sampler::sampling_loop() { for (zes_temp_handle_t handle : temperature_handles) { zes_temp_properties_t prop{}; if (zesTemperatureGetProperties(handle, &prop) == ZE_RESULT_SUCCESS) { - const std::string sensor_name = detail::temperature_sensor_type_to_name(prop.type); - if (sensor_name.empty()) { - // unsupported sensor type - continue; - } + switch (prop.type) { + case ZES_TEMP_SENSORS_GLOBAL: + { + // first value to add -> initialize map + if (!temperature_samples_.global_temperature_max_.has_value()) { + temperature_samples_.global_temperature_max_ = decltype(temperature_samples_.global_temperature_max_)::value_type{}; + } + // add new maximum temperature + temperature_samples_.global_temperature_max_ = prop.maxTemperature; - // first value to add -> initialize map - if (!temperature_samples_.temperature_max_.has_value()) { - temperature_samples_.temperature_max_ = decltype(temperature_samples_.temperature_max_)::value_type{}; - } - // add new maximum temperature - temperature_samples_.temperature_max_.value()[sensor_name] = prop.maxTemperature; + // first value to add -> initialize map + if (!temperature_samples_.global_temperature_.has_value()) { + temperature_samples_.global_temperature_ = decltype(temperature_samples_.global_temperature_)::value_type{}; + } + double temp{}; + if (zesTemperatureGetState(handle, &temp) == ZE_RESULT_SUCCESS) { + temperature_samples_.global_temperature_->push_back(temp); + } + } + break; + case ZES_TEMP_SENSORS_GPU: + { + // first value to add -> initialize map + if (!temperature_samples_.temperature_max_.has_value()) { + temperature_samples_.temperature_max_ = decltype(temperature_samples_.temperature_max_)::value_type{}; + } + // add new maximum temperature + temperature_samples_.temperature_max_ = prop.maxTemperature; - // first value to add -> initialize map - if (!temperature_samples_.temperature_.has_value()) { - temperature_samples_.temperature_ = decltype(temperature_samples_.temperature_)::value_type{}; - } - double temp{}; - if (zesTemperatureGetState(handle, &temp) == ZE_RESULT_SUCCESS) { - temperature_samples_.temperature_.value()[sensor_name].push_back(temp); + // first value to add -> initialize map + if (!temperature_samples_.temperature_.has_value()) { + temperature_samples_.temperature_ = decltype(temperature_samples_.temperature_)::value_type{}; + } + double temp{}; + if (zesTemperatureGetState(handle, &temp) == ZE_RESULT_SUCCESS) { + temperature_samples_.temperature_->push_back(temp); + } + } + break; + case ZES_TEMP_SENSORS_MEMORY: + { + // first value to add -> initialize map + if (!temperature_samples_.memory_temperature_max_.has_value()) { + temperature_samples_.memory_temperature_max_ = decltype(temperature_samples_.memory_temperature_max_)::value_type{}; + } + // add new maximum temperature + temperature_samples_.memory_temperature_max_ = prop.maxTemperature; + + // first value to add -> initialize map + if (!temperature_samples_.memory_temperature_.has_value()) { + temperature_samples_.memory_temperature_ = decltype(temperature_samples_.memory_temperature_)::value_type{}; + } + double temp{}; + if (zesTemperatureGetState(handle, &temp) == ZE_RESULT_SUCCESS) { + temperature_samples_.memory_temperature_->push_back(temp); + } + } + break; + default: + break; } } } @@ -394,41 +516,47 @@ void gpu_intel_hardware_sampler::sampling_loop() { this->add_time_point(std::chrono::steady_clock::now()); // retrieve clock related samples - { + if (this->sample_category_enabled(sample_category::clock)) { for (zes_freq_handle_t handle : frequency_handles) { // get frequency properties zes_freq_properties_t prop{}; - HWS_LEVEL_ZERO_ERROR_CHECK(zesFrequencyGetProperties(handle, &prop)); + HWS_LEVEL_ZERO_ERROR_CHECK(zesFrequencyGetProperties(handle, &prop)) // get current frequency information zes_freq_state_t frequency_state{}; - if (clock_samples_.clock_gpu_.has_value() || clock_samples_.clock_mem_.has_value()) { - HWS_LEVEL_ZERO_ERROR_CHECK(zesFrequencyGetState(handle, &frequency_state)); + if (clock_samples_.clock_frequency_.has_value() || clock_samples_.memory_clock_frequency_.has_value()) { + HWS_LEVEL_ZERO_ERROR_CHECK(zesFrequencyGetState(handle, &frequency_state)) // determine the frequency domain (e.g. GPU, memory, etc) switch (prop.type) { case ZES_FREQ_DOMAIN_GPU: { - if (clock_samples_.tdp_frequency_limit_gpu_.has_value()) { - clock_samples_.tdp_frequency_limit_gpu_->push_back(frequency_state.tdp); + if (clock_samples_.frequency_limit_tdp_.has_value()) { + clock_samples_.frequency_limit_tdp_->push_back(frequency_state.tdp); + } + if (clock_samples_.clock_frequency_.has_value()) { + clock_samples_.clock_frequency_->push_back(frequency_state.actual); } - if (clock_samples_.clock_gpu_.has_value()) { - clock_samples_.clock_gpu_->push_back(frequency_state.actual); + if (clock_samples_.throttle_reason_.has_value()) { + clock_samples_.throttle_reason_->push_back(static_cast(frequency_state.throttleReasons)); } - if (clock_samples_.throttle_reason_gpu_.has_value()) { - clock_samples_.throttle_reason_gpu_->push_back(static_cast(frequency_state.throttleReasons)); + if (clock_samples_.throttle_reason_string_.has_value()) { + clock_samples_.throttle_reason_string_->push_back(detail::throttle_reason_to_string(frequency_state.throttleReasons)); } } break; case ZES_FREQ_DOMAIN_MEMORY: { - if (clock_samples_.tdp_frequency_limit_mem_.has_value()) { - clock_samples_.tdp_frequency_limit_mem_->push_back(frequency_state.tdp); + if (clock_samples_.memory_frequency_limit_tdp_.has_value()) { + clock_samples_.memory_frequency_limit_tdp_->push_back(frequency_state.tdp); } - if (clock_samples_.clock_mem_.has_value()) { - clock_samples_.clock_mem_->push_back(frequency_state.actual); + if (clock_samples_.memory_clock_frequency_.has_value()) { + clock_samples_.memory_clock_frequency_->push_back(frequency_state.actual); } - if (clock_samples_.throttle_reason_mem_.has_value()) { - clock_samples_.throttle_reason_mem_->push_back(static_cast(frequency_state.throttleReasons)); + if (clock_samples_.memory_throttle_reason_.has_value()) { + clock_samples_.memory_throttle_reason_->push_back(static_cast(frequency_state.throttleReasons)); + } + if (clock_samples_.memory_throttle_reason_string_.has_value()) { + clock_samples_.memory_throttle_reason_string_->push_back(detail::throttle_reason_to_string(frequency_state.throttleReasons)); } } break; @@ -441,24 +569,32 @@ void gpu_intel_hardware_sampler::sampling_loop() { } // retrieve power related samples - { + if (this->sample_category_enabled(sample_category::power)) { if (!power_handles.empty()) { // NOTE: only the first power domain is used here if (power_samples_.power_total_energy_consumption_.has_value()) { // get total power consumption zes_power_energy_counter_t energy_counter{}; - HWS_LEVEL_ZERO_ERROR_CHECK(zesPowerGetEnergyCounter(power_handles.front(), &energy_counter)); + HWS_LEVEL_ZERO_ERROR_CHECK(zesPowerGetEnergyCounter(power_handles.front(), &energy_counter)) + + const auto power_consumption = static_cast(energy_counter.energy) / 1000.0 / 1000.0; - power_samples_.power_total_energy_consumption_->push_back(energy_counter.energy); + // calculate current power draw as (Energy Difference [J]) / (Time Difference [s]) + const std::size_t last_index = this->sampling_time_points().size() - 1; + const double power_usage = ((power_consumption - initial_total_power_consumption) - power_samples_.power_total_energy_consumption_->back()) / (std::chrono::duration(this->sampling_time_points()[last_index] - this->sampling_time_points()[last_index - 1]).count()); + power_samples_.power_usage_->push_back(power_usage); + + // add power consumption last to be able to use the std::vector::back() function + power_samples_.power_total_energy_consumption_->push_back(power_consumption - initial_total_power_consumption); } } } // retrieve memory related samples - { + if (this->sample_category_enabled(sample_category::memory)) { for (zes_mem_handle_t handle : memory_handles) { zes_mem_properties_t prop{}; - HWS_LEVEL_ZERO_ERROR_CHECK(zesMemoryGetProperties(handle, &prop)); + HWS_LEVEL_ZERO_ERROR_CHECK(zesMemoryGetProperties(handle, &prop)) // get the memory module name const std::string memory_module_name = detail::memory_module_to_name(prop.type); @@ -466,53 +602,77 @@ void gpu_intel_hardware_sampler::sampling_loop() { if (memory_samples_.memory_free_.has_value()) { // get current memory information zes_mem_state_t mem_state{}; - HWS_LEVEL_ZERO_ERROR_CHECK(zesMemoryGetState(handle, &mem_state)); + HWS_LEVEL_ZERO_ERROR_CHECK(zesMemoryGetState(handle, &mem_state)) memory_samples_.memory_free_.value()[memory_module_name].push_back(mem_state.free); + + if (memory_samples_.visible_memory_total_.has_value()) { + memory_samples_.memory_used_.value()[memory_module_name].push_back(memory_samples_.visible_memory_total_.value()[memory_module_name] - mem_state.free); + } } } - if (memory_samples_.pcie_link_speed_.has_value() || memory_samples_.pcie_link_width_.has_value() || memory_samples_.pcie_link_width_.has_value()) { + if (memory_samples_.pcie_link_speed_.has_value() || memory_samples_.num_pcie_lanes_.has_value() || memory_samples_.num_pcie_lanes_.has_value()) { // the current PCIe stats zes_pci_state_t pci_state{}; - HWS_LEVEL_ZERO_ERROR_CHECK(zesDevicePciGetState(device, &pci_state)); + HWS_LEVEL_ZERO_ERROR_CHECK(zesDevicePciGetState(device, &pci_state)) if (memory_samples_.pcie_link_speed_.has_value()) { - memory_samples_.pcie_link_speed_->push_back(pci_state.speed.maxBandwidth); + memory_samples_.pcie_link_speed_->push_back(static_cast(static_cast(pci_state.speed.maxBandwidth) / 1e6)); } - if (memory_samples_.pcie_link_width_.has_value()) { - memory_samples_.pcie_link_width_->push_back(pci_state.speed.width); + if (memory_samples_.num_pcie_lanes_.has_value()) { + memory_samples_.num_pcie_lanes_->push_back(pci_state.speed.width); } - if (memory_samples_.pcie_link_width_.has_value()) { + if (memory_samples_.pcie_link_generation_.has_value()) { memory_samples_.pcie_link_generation_->push_back(pci_state.speed.gen); } } } // retrieve temperature related samples - { + if (this->sample_category_enabled(sample_category::temperature)) { if (!psu_handles.empty()) { - if (temperature_samples_.temperature_psu_.has_value()) { + if (temperature_samples_.psu_temperature_.has_value()) { // NOTE: only the first PSU is used here zes_psu_state_t psu_state{}; - HWS_LEVEL_ZERO_ERROR_CHECK(zesPsuGetState(psu_handles.front(), &psu_state)); - temperature_samples_.temperature_psu_->push_back(psu_state.temperature); + HWS_LEVEL_ZERO_ERROR_CHECK(zesPsuGetState(psu_handles.front(), &psu_state)) + temperature_samples_.psu_temperature_->push_back(psu_state.temperature); } } for (zes_temp_handle_t handle : temperature_handles) { zes_temp_properties_t prop{}; - HWS_LEVEL_ZERO_ERROR_CHECK(zesTemperatureGetProperties(handle, &prop)); - - const std::string sensor_name = detail::temperature_sensor_type_to_name(prop.type); - if (sensor_name.empty()) { - // unsupported sensor type - continue; - } - - if (temperature_samples_.temperature_.has_value() && temperature_samples_.temperature_.value().contains(sensor_name)) { - double temp{}; - HWS_LEVEL_ZERO_ERROR_CHECK(zesTemperatureGetState(handle, &temp)); - temperature_samples_.temperature_.value()[sensor_name].push_back(temp); + HWS_LEVEL_ZERO_ERROR_CHECK(zesTemperatureGetProperties(handle, &prop)) + + switch (prop.type) { + case ZES_TEMP_SENSORS_GLOBAL: + { + if (temperature_samples_.global_temperature_.has_value()) { + double temp{}; + HWS_LEVEL_ZERO_ERROR_CHECK(zesTemperatureGetState(handle, &temp)) + temperature_samples_.global_temperature_->push_back(temp); + } + } + break; + case ZES_TEMP_SENSORS_GPU: + { + if (temperature_samples_.temperature_.has_value()) { + double temp{}; + HWS_LEVEL_ZERO_ERROR_CHECK(zesTemperatureGetState(handle, &temp)) + temperature_samples_.temperature_->push_back(temp); + } + } + break; + case ZES_TEMP_SENSORS_MEMORY: + { + if (temperature_samples_.memory_temperature_.has_value()) { + double temp{}; + HWS_LEVEL_ZERO_ERROR_CHECK(zesTemperatureGetState(handle, &temp)) + temperature_samples_.memory_temperature_->push_back(temp); + } + } + break; + default: + break; } } } @@ -527,25 +687,29 @@ std::string gpu_intel_hardware_sampler::device_identification() const { // get the level zero handle from the device ze_device_handle_t device = device_.get_impl().device; ze_device_properties_t prop{}; - HWS_LEVEL_ZERO_ERROR_CHECK(zeDeviceGetProperties(device, &prop)); - return std::format("gpu_intel_device_{}", prop.deviceId); + HWS_LEVEL_ZERO_ERROR_CHECK(zeDeviceGetProperties(device, &prop)) + return fmt::format("gpu_intel_device_{}", prop.deviceId); } -std::string gpu_intel_hardware_sampler::generate_yaml_string() const { +std::string gpu_intel_hardware_sampler::samples_only_as_yaml_string() const { // check whether it's safe to generate the YAML entry if (this->is_sampling()) { throw std::runtime_error{ "Can't create the final YAML entry if the hardware sampler is still running!" }; } - return std::format("{}\n" - "{}\n" - "{}\n" - "{}\n" + return fmt::format("{}{}" + "{}{}" + "{}{}" + "{}{}" "{}", general_samples_.generate_yaml_string(), + general_samples_.has_samples() ? "\n" : "", clock_samples_.generate_yaml_string(), + clock_samples_.has_samples() ? "\n" : "", power_samples_.generate_yaml_string(), + power_samples_.has_samples() ? "\n" : "", memory_samples_.generate_yaml_string(), + memory_samples_.has_samples() ? "\n" : "", temperature_samples_.generate_yaml_string()); } @@ -554,7 +718,7 @@ std::ostream &operator<<(std::ostream &out, const gpu_intel_hardware_sampler &sa out.setstate(std::ios_base::failbit); return out; } else { - return out << std::format("sampling interval: {}\n" + return out << fmt::format("sampling interval: {}\n" "time points: [{}]\n\n" "general samples:\n{}\n\n" "clock samples:\n{}\n\n" @@ -562,7 +726,7 @@ std::ostream &operator<<(std::ostream &out, const gpu_intel_hardware_sampler &sa "memory samples:\n{}\n\n" "temperature samples:\n{}", sampler.sampling_interval(), - detail::join(detail::time_points_to_epoch(sampler.sampling_time_points()), ", "), + fmt::join(detail::time_points_to_epoch(sampler.sampling_time_points()), ", "), sampler.general_samples(), sampler.clock_samples(), sampler.power_samples(), diff --git a/src/hws/gpu_intel/level_zero_samples.cpp b/src/hws/gpu_intel/level_zero_samples.cpp new file mode 100644 index 0000000..e296cab --- /dev/null +++ b/src/hws/gpu_intel/level_zero_samples.cpp @@ -0,0 +1,628 @@ +/** + * @author Marcel Breyer + * @copyright 2024-today All Rights Reserved + * @license This file is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + */ + +#include "hws/gpu_intel/level_zero_samples.hpp" + +#include "hws/utility.hpp" // hws::detail::{value_or_default, remove_cvref_t} + +#include // std::ostream +#include // std::string +#include // std::string_view +#include // std::remove_cvref_t, std::false_type, std::true_type +#include // std::vector + +namespace hws { + +namespace { + +template +void append_map_values(std::string &str, const std::string_view entry_name, const MapType &map) { + if (map.has_value()) { + for (const auto &[key, value] : map.value()) { + if constexpr (detail::is_vector_v>) { + str += fmt::format("{}_{}: [{}]\n", entry_name, key, fmt::join(value, ", ")); + } else { + str += fmt::format("{}_{}: {}\n", entry_name, key, value); + } + } + } +} + +} // namespace + +//*************************************************************************************************************************************// +// general samples // +//*************************************************************************************************************************************// + +bool level_zero_general_samples::has_samples() const { + return this->byte_order_.has_value() || this->vendor_id_.has_value() || this->name_.has_value() || this->flags_.has_value() || this->standby_mode_.has_value() + || this->num_threads_per_eu_.has_value() || this->eu_simd_width_.has_value(); +} + +std::string level_zero_general_samples::generate_yaml_string() const { + // if no samples are available, return an empty string + if (!this->has_samples()) { + return ""; + } + + std::string str{ "general:\n" }; + + // device byte order + if (this->byte_order_.has_value()) { + str += fmt::format(" byte_order:\n" + " unit: \"string\"\n" + " values: \"{}\"\n", + this->byte_order_.value()); + } + // the vendor specific ID + if (this->vendor_id_.has_value()) { + str += fmt::format(" vendor_id:\n" + " unit: \"string\"\n" + " values: \"{}\"\n", + this->vendor_id_.value()); + } + // device name + if (this->name_.has_value()) { + str += fmt::format(" name:\n" + " unit: \"string\"\n" + " values: \"{}\"\n", + this->name_.value()); + } + // GPU specific flags + if (this->flags_.has_value()) { + str += fmt::format(" flags:\n" + " unit: \"string\"\n" + " values: [{}]\n", + fmt::join(detail::quote(this->flags_.value()), ", ")); + } + // the standby mode + if (this->standby_mode_.has_value()) { + str += fmt::format(" standby_mode:\n" + " unit: \"string\"\n" + " values: \"{}\"\n", + this->standby_mode_.value()); + } + // the number of threads per EU unit + if (this->num_threads_per_eu_.has_value()) { + str += fmt::format(" num_threads_per_eu:\n" + " unit: \"int\"\n" + " values: {}\n", + this->num_threads_per_eu_.value()); + } + // the EU SIMD width + if (this->eu_simd_width_.has_value()) { + str += fmt::format(" eu_simd_width:\n" + " unit: \"int\"\n" + " values: {}\n", + this->eu_simd_width_.value()); + } + + return str; +} + +std::ostream &operator<<(std::ostream &out, const level_zero_general_samples &samples) { + return out << fmt::format("byte_order [string]: {}\n" + "vendor_id [string]: {}\n" + "name [string]: {}\n" + "flags [string]: [{}]\n" + "standby_mode [string]: {}\n" + "num_threads_per_eu [int]: {}\n" + "eu_simd_width [int]: {}", + detail::value_or_default(samples.get_byte_order()), + detail::value_or_default(samples.get_vendor_id()), + detail::value_or_default(samples.get_name()), + fmt::join(detail::value_or_default(samples.get_flags()), ", "), + detail::value_or_default(samples.get_standby_mode()), + detail::value_or_default(samples.get_num_threads_per_eu()), + detail::value_or_default(samples.get_eu_simd_width())); +} + +//*************************************************************************************************************************************// +// clock samples // +//*************************************************************************************************************************************// + +bool level_zero_clock_samples::has_samples() const { + return this->clock_frequency_min_.has_value() || this->clock_frequency_max_.has_value() || this->memory_clock_frequency_min_.has_value() + || this->memory_clock_frequency_max_.has_value() || this->available_clock_frequencies_.has_value() || this->available_memory_clock_frequencies_.has_value() + || this->clock_frequency_.has_value() || this->memory_clock_frequency_.has_value() || this->throttle_reason_.has_value() + || this->throttle_reason_string_.has_value() || this->memory_throttle_reason_.has_value() || this->memory_throttle_reason_string_.has_value() + || this->frequency_limit_tdp_.has_value() || this->memory_frequency_limit_tdp_.has_value(); +} + +std::string level_zero_clock_samples::generate_yaml_string() const { + // if no samples are available, return an empty string + if (!this->has_samples()) { + return ""; + } + + std::string str{ "clock:\n" }; + + // minimum GPU core clock + if (this->clock_frequency_min_.has_value()) { + str += fmt::format(" clock_frequency_min:\n" + " unit: \"MHz\"\n" + " values: {}\n", + this->clock_frequency_min_.value()); + } + // maximum GPU core clock + if (this->clock_frequency_max_.has_value()) { + str += fmt::format(" clock_gpu_max:\n" + " unit: \"MHz\"\n" + " values: {}\n", + this->clock_frequency_max_.value()); + } + // minimum memory clock + if (this->memory_clock_frequency_min_.has_value()) { + str += fmt::format(" memory_clock_frequency_min:\n" + " unit: \"MHz\"\n" + " values: {}\n", + this->memory_clock_frequency_min_.value()); + } + // maximum memory clock + if (this->memory_clock_frequency_max_.has_value()) { + str += fmt::format(" memory_clock_frequency_max:\n" + " unit: \"MHz\"\n" + " values: {}\n", + this->memory_clock_frequency_max_.value()); + } + // all possible GPU core clock frequencies + if (this->available_clock_frequencies_.has_value()) { + str += fmt::format(" available_clock_frequencies:\n" + " unit: \"MHz\"\n" + " values: [{}]\n", + fmt::join(this->available_clock_frequencies_.value(), ", ")); + } + // all possible memory clock frequencies + if (this->available_memory_clock_frequencies_.has_value()) { + str += fmt::format(" available_memory_clock_frequencies:\n" + " unit: \"MHz\"\n" + " values: [{}]\n", + fmt::join(this->available_memory_clock_frequencies_.value(), ", ")); + } + + // the current GPU core clock frequency + if (this->clock_frequency_.has_value()) { + str += fmt::format(" clock_frequency:\n" + " unit: \"MHz\"\n" + " values: [{}]\n", + fmt::join(this->clock_frequency_.value(), ", ")); + } + // the current memory clock frequency + if (this->memory_clock_frequency_.has_value()) { + str += fmt::format(" memory_clock_frequency:\n" + " unit: \"MHz\"\n" + " values: [{}]\n", + fmt::join(this->memory_clock_frequency_.value(), ", ")); + } + // the current GPU core throttle reason as bitmask + if (this->throttle_reason_.has_value()) { + str += fmt::format(" throttle_reason:\n" + " unit: \"bitmask\"\n" + " values: [{}]\n", + fmt::join(this->throttle_reason_.value(), ", ")); + } + // the current GPU core throttle reason as string + if (this->throttle_reason_string_.has_value()) { + str += fmt::format(" throttle_reason_string:\n" + " unit: \"string\"\n" + " values: [{}]\n", + fmt::join(this->throttle_reason_string_.value(), ", ")); + } + // the current memory throttle reason as bitmask + if (this->memory_throttle_reason_.has_value()) { + str += fmt::format(" memory_throttle_reason:\n" + " unit: \"bitmask\"\n" + " values: [{}]\n", + fmt::join(this->memory_throttle_reason_.value(), ", ")); + } + // the current memory throttle reason as string + if (this->memory_throttle_reason_string_.has_value()) { + str += fmt::format(" memory_throttle_reason_string:\n" + " unit: \"string\"\n" + " values: [{}]\n", + fmt::join(this->memory_throttle_reason_string_.value(), ", ")); + } + // the maximum GPU core frequency based on the current TDP limit + if (this->frequency_limit_tdp_.has_value()) { + str += fmt::format(" frequency_limit_tdp:\n" + " unit: \"MHz\"\n" + " values: [{}]\n", + fmt::join(this->frequency_limit_tdp_.value(), ", ")); + } + // the maximum memory frequency based on the current TDP limit + if (this->memory_frequency_limit_tdp_.has_value()) { + str += fmt::format(" memory_frequency_limit_tdp:\n" + " unit: \"MHz\"\n" + " values: [{}]\n", + fmt::join(this->memory_frequency_limit_tdp_.value(), ", ")); + } + + return str; +} + +std::ostream &operator<<(std::ostream &out, const level_zero_clock_samples &samples) { + return out << fmt::format("clock_frequency_min [MHz]: {}\n" + "clock_frequency_max [MHz]: {}\n" + "memory_clock_frequency_min [MHz]: {}\n" + "memory_clock_frequency_max [MHz]: {}\n" + "available_clock_frequencies [MHz]: [{}]\n" + "available_memory_clock_frequencies [MHz]: [{}]\n" + "clock_frequency [MHz]: [{}]\n" + "memory_clock_frequency [MHz]: [{}]\n" + "throttle_reason [bitmask]: [{}]\n" + "throttle_reason_string [string]: [{}]\n" + "memory_throttle_reason [bitmask]: [{}]\n" + "memory_throttle_reason_string [string]: [{}]\n" + "frequency_limit_tdp [MHz]: [{}]\n" + "memory_frequency_limit_tdp [MHz]: [{}]", + detail::value_or_default(samples.get_clock_frequency_min()), + detail::value_or_default(samples.get_clock_frequency_max()), + detail::value_or_default(samples.get_memory_clock_frequency_min()), + detail::value_or_default(samples.get_memory_clock_frequency_max()), + fmt::join(detail::value_or_default(samples.get_available_clock_frequencies()), ", "), + fmt::join(detail::value_or_default(samples.get_available_memory_clock_frequencies()), ", "), + fmt::join(detail::value_or_default(samples.get_clock_frequency()), ", "), + fmt::join(detail::value_or_default(samples.get_memory_clock_frequency()), ", "), + fmt::join(detail::value_or_default(samples.get_throttle_reason()), ", "), + fmt::join(detail::value_or_default(samples.get_throttle_reason_string()), ", "), + fmt::join(detail::value_or_default(samples.get_memory_throttle_reason()), ", "), + fmt::join(detail::value_or_default(samples.get_memory_throttle_reason_string()), ", "), + fmt::join(detail::value_or_default(samples.get_frequency_limit_tdp()), ", "), + fmt::join(detail::value_or_default(samples.get_memory_frequency_limit_tdp()), ", ")); +} + +//*************************************************************************************************************************************// +// power samples // +//*************************************************************************************************************************************// + +bool level_zero_power_samples::has_samples() const { + return this->power_enforced_limit_.has_value() || this->power_measurement_type_.has_value() || this->power_management_mode_.has_value() + || this->power_usage_.has_value() || this->power_total_energy_consumption_.has_value(); +} + +std::string level_zero_power_samples::generate_yaml_string() const { + // if no samples are available, return an empty string + if (!this->has_samples()) { + return ""; + } + + std::string str{ "power:\n" }; + + // power enforced limit + if (this->power_enforced_limit_.has_value()) { + str += fmt::format(" power_enforced_limit:\n" + " unit: \"W\"\n" + " values: {}\n", + this->power_enforced_limit_.value()); + } + // power measurement type + if (this->power_measurement_type_.has_value()) { + str += fmt::format(" power_measurement_type:\n" + " unit: \"string\"\n" + " values: {}\n", + this->power_measurement_type_.value()); + } + // the power management mode + if (this->power_management_mode_.has_value()) { + str += fmt::format(" power_management_mode:\n" + " unit: \"bool\"\n" + " values: {}\n", + this->power_management_mode_.value()); + } + + // the current power draw + if (this->power_usage_.has_value()) { + str += fmt::format(" power_usage:\n" + " unit: \"W\"\n" + " values: [{}]\n", + fmt::join(this->power_usage_.value(), ", ")); + } + // the total consumed energy + if (this->power_total_energy_consumption_.has_value()) { + str += fmt::format(" power_total_energy_consumption:\n" + " unit: \"J\"\n" + " values: [{}]\n", + fmt::join(this->power_total_energy_consumption_.value(), ", ")); + } + + return str; +} + +std::ostream &operator<<(std::ostream &out, const level_zero_power_samples &samples) { + return out << fmt::format("power_enforced_limit [W]: {}\n" + "power_measurement_type [string]: {}\n" + "power_management_mode [bool]: {}\n" + "power_usage [W]: [{}]\n" + "power_total_energy_consumption [J]: [{}]", + detail::value_or_default(samples.get_power_enforced_limit()), + detail::value_or_default(samples.get_power_measurement_type()), + detail::value_or_default(samples.get_power_management_mode()), + fmt::join(detail::value_or_default(samples.get_power_usage()), ", "), + fmt::join(detail::value_or_default(samples.get_power_total_energy_consumption()), ", ")); +} + +//*************************************************************************************************************************************// +// memory samples // +//*************************************************************************************************************************************// + +bool level_zero_memory_samples::has_samples() const { + return this->memory_total_.has_value() || this->visible_memory_total_.has_value() || this->memory_location_.has_value() + || this->num_pcie_lanes_max_.has_value() || this->pcie_link_generation_max_.has_value() || this->pcie_link_speed_max_.has_value() + || this->memory_bus_width_.has_value() || this->memory_num_channels_.has_value() || this->memory_free_.has_value() + || this->memory_used_.has_value() || this->num_pcie_lanes_.has_value() || this->pcie_link_generation_.has_value() + || this->pcie_link_speed_.has_value(); +} + +std::string level_zero_memory_samples::generate_yaml_string() const { + // if no samples are available, return an empty string + if (!this->has_samples()) { + return ""; + } + + std::string str{ "memory:\n" }; + + // the total memory + if (this->memory_total_.has_value()) { + for (const auto &[key, value] : this->memory_total_.value()) { + str += fmt::format(" {}_memory_total:\n" + " unit: \"B\"\n" + " values: {}\n", + key, + value); + } + } + // the total allocatable memory + if (this->visible_memory_total_.has_value()) { + for (const auto &[key, value] : this->visible_memory_total_.value()) { + str += fmt::format(" {}_visible_memory_total:\n" + " unit: \"B\"\n" + " values: {}\n", + key, + value); + } + } + // the memory location (system or device) + if (this->memory_location_.has_value()) { + for (const auto &[key, value] : this->memory_location_.value()) { + str += fmt::format(" {}_memory_location:\n" + " unit: \"string\"\n" + " values: \"{}\"\n", + key, + value); + } + } + // the pcie link width + if (this->num_pcie_lanes_max_.has_value()) { + str += fmt::format(" num_pcie_lanes_max:\n" + " unit: \"int\"\n" + " values: {}\n", + this->num_pcie_lanes_max_.value()); + } + // the pcie generation + if (this->pcie_link_generation_max_.has_value()) { + str += fmt::format(" pcie_link_generation_max:\n" + " unit: \"int\"\n" + " values: {}\n", + this->pcie_link_generation_max_.value()); + } + // the pcie max bandwidth + if (this->pcie_link_speed_max_.has_value()) { + str += fmt::format(" pcie_link_speed_max:\n" + " unit: \"MBPS\"\n" + " values: {}\n", + this->pcie_link_speed_max_.value()); + } + // the memory bus width + if (this->memory_bus_width_.has_value()) { + for (const auto &[key, value] : this->memory_bus_width_.value()) { + str += fmt::format(" {}_memory_bus_width:\n" + " unit: \"Bit\"\n" + " values: {}\n", + key, + value); + } + } + // the number of memory channels + if (this->memory_num_channels_.has_value()) { + for (const auto &[key, value] : this->memory_num_channels_.value()) { + str += fmt::format(" {}_memory_num_channels:\n" + " unit: \"int\"\n" + " values: {}\n", + key, + value); + } + } + + // the currently free memory + if (this->memory_free_.has_value()) { + for (const auto &[key, value] : this->memory_free_.value()) { + str += fmt::format(" {}_memory_free:\n" + " unit: \"string\"\n" + " values: [{}]\n", + key, + fmt::join(value, ", ")); + } + } + // the currently used memory + if (this->memory_used_.has_value()) { + for (const auto &[key, value] : this->memory_used_.value()) { + str += fmt::format(" {}_memory_used:\n" + " unit: \"string\"\n" + " values: [{}]\n", + key, + fmt::join(value, ", ")); + } + } + // PCIe link width + if (this->num_pcie_lanes_.has_value()) { + str += fmt::format(" num_pcie_lanes:\n" + " unit: \"int\"\n" + " values: [{}]\n", + fmt::join(this->num_pcie_lanes_.value(), ", ")); + } + // PCIe link generation + if (this->pcie_link_generation_.has_value()) { + str += fmt::format(" pcie_link_generation:\n" + " unit: \"int\"\n" + " values: [{}]\n", + fmt::join(this->pcie_link_generation_.value(), ", ")); + } + // PCIe link speed + if (this->pcie_link_speed_.has_value()) { + str += fmt::format(" pcie_link_speed:\n" + " unit: \"MBPS\"\n" + " values: [{}]\n", + fmt::join(this->pcie_link_speed_.value(), ", ")); + } + + return str; +} + +std::ostream &operator<<(std::ostream &out, const level_zero_memory_samples &samples) { + std::string str{}; + + append_map_values(str, "memory_total [B]", samples.get_memory_total()); + append_map_values(str, "visible_memory_total [B]", samples.get_visible_memory_total()); + append_map_values(str, "memory_location [string]", samples.get_memory_location()); + str += fmt::format("num_pcie_lanes_max [int]: {}\n" + "pcie_link_generation_max [int]: {}\n" + "pcie_link_speed_max [MBPS]: {}\n", + detail::value_or_default(samples.get_num_pcie_lanes_max()), + detail::value_or_default(samples.get_pcie_link_generation_max()), + detail::value_or_default(samples.get_pcie_link_speed_max())); + append_map_values(str, "memory_bus_width [Bit]", samples.get_memory_bus_width()); + append_map_values(str, "memory_num_channels [int]", samples.get_memory_num_channels()); + + append_map_values(str, "memory_free [string]", samples.get_memory_free()); + append_map_values(str, "memory_used [string]", samples.get_memory_used()); + str += fmt::format("num_pcie_lanes [int]: [{}]\n" + "pcie_link_generation [int]: [{}]\n" + "pcie_link_speed [MBPS]: [{}]", + fmt::join(detail::value_or_default(samples.get_num_pcie_lanes()), ", "), + fmt::join(detail::value_or_default(samples.get_pcie_link_generation()), ", "), + fmt::join(detail::value_or_default(samples.get_pcie_link_speed()), ", ")); + + return out << str; +} + +//*************************************************************************************************************************************// +// temperature samples // +//*************************************************************************************************************************************// + +bool level_zero_temperature_samples::has_samples() const { + return this->num_fans_.has_value() || this->fan_speed_max_.has_value() || this->temperature_max_.has_value() || this->memory_temperature_max_.has_value() + || this->global_temperature_max_.has_value() || this->fan_speed_percentage_.has_value() || this->temperature_.has_value() + || this->memory_temperature_.has_value() || this->global_temperature_.has_value() || this->psu_temperature_.has_value(); +} + +std::string level_zero_temperature_samples::generate_yaml_string() const { + // if no samples are available, return an empty string + if (!this->has_samples()) { + return ""; + } + + std::string str{ "temperature:\n" }; + + // the number of fans + if (this->num_fans_.has_value()) { + str += fmt::format(" num_fans:\n" + " unit: \"int\"\n" + " values: {}\n", + this->num_fans_.value()); + } + // the maximum fan speed in RPM + if (this->fan_speed_max_.has_value()) { + str += fmt::format(" fan_speed_max:\n" + " unit: \"RPM\"\n" + " values: {}\n", + this->fan_speed_max_.value()); + } + // the maximum GPU temperature + if (this->temperature_max_.has_value()) { + str += fmt::format(" temperature_max:\n" + " unit: \"°C\"\n" + " values: {}\n", + this->temperature_max_.value()); + } + // the maximum memory temperature + if (this->memory_temperature_max_.has_value()) { + str += fmt::format(" memory_temperature_max:\n" + " unit: \"°C\"\n" + " values: {}\n", + this->memory_temperature_max_.value()); + } + // the maximum global temperature + if (this->global_temperature_max_.has_value()) { + str += fmt::format(" global_temperature_max:\n" + " unit: \"°C\"\n" + " values: {}\n", + this->global_temperature_max_.value()); + } + + // the current fan speed in percent + if (this->fan_speed_percentage_.has_value()) { + str += fmt::format(" fan_speed_percentage:\n" + " unit: \"percentage\"\n" + " values: [{}]\n", + fmt::join(this->fan_speed_percentage_.value(), ", ")); + } + // the current GPU temperature + if (this->temperature_.has_value()) { + str += fmt::format(" temperature:\n" + " unit: \"°C\"\n" + " values: [{}]\n", + fmt::join(this->temperature_.value(), ", ")); + } + // the current memory temperature + if (this->memory_temperature_.has_value()) { + str += fmt::format(" memory_temperature:\n" + " unit: \"°C\"\n" + " values: [{}]\n", + fmt::join(this->memory_temperature_.value(), ", ")); + } + // the current global temperature + if (this->global_temperature_.has_value()) { + str += fmt::format(" global_temperature:\n" + " unit: \"°C\"\n" + " values: [{}]\n", + fmt::join(this->global_temperature_.value(), ", ")); + } + // the current PSU temperature + if (this->psu_temperature_.has_value()) { + str += fmt::format(" psu_temperature:\n" + " unit: \"°C\"\n" + " values: [{}]\n", + fmt::join(this->psu_temperature_.value(), ", ")); + } + + return str; +} + +std::ostream &operator<<(std::ostream &out, const level_zero_temperature_samples &samples) { + return out << fmt::format("num_fans [int]: {}\n" + "fan_speed_max [RPM]: {}\n" + "temperature_max [°C]: {}\n" + "memory_temperature_max [°C]: {}\n" + "global_temperature_max [°C]: {}\n" + "fan_speed_percentage [%]: [{}]\n" + "temperature [°C]: [{}]\n" + "memory_temperature [°C]: [{}]\n" + "global_temperature [°C]: [{}]\n" + "psu_temperature [°C]: [{}]", + detail::value_or_default(samples.get_num_fans()), + detail::value_or_default(samples.get_fan_speed_max()), + detail::value_or_default(samples.get_temperature_max()), + detail::value_or_default(samples.get_memory_temperature_max()), + detail::value_or_default(samples.get_global_temperature_max()), + fmt::join(detail::value_or_default(samples.get_fan_speed_percentage()), ", "), + fmt::join(detail::value_or_default(samples.get_temperature()), ", "), + fmt::join(detail::value_or_default(samples.get_memory_temperature()), ", "), + fmt::join(detail::value_or_default(samples.get_global_temperature()), ", "), + fmt::join(detail::value_or_default(samples.get_psu_temperature()), ", ")); +} + +} // namespace hws diff --git a/src/hardware_sampling/gpu_intel/utility.cpp b/src/hws/gpu_intel/utility.cpp similarity index 81% rename from src/hardware_sampling/gpu_intel/utility.cpp rename to src/hws/gpu_intel/utility.cpp index a0cefe9..5a29eee 100644 --- a/src/hardware_sampling/gpu_intel/utility.cpp +++ b/src/hws/gpu_intel/utility.cpp @@ -5,16 +5,68 @@ * See the LICENSE.md file in the project root for full license information. */ -#include "hardware_sampling/gpu_intel/utility.hpp" +#include "hws/gpu_intel/utility.hpp" +#include "fmt/format.h" // fmt::format +#include "fmt/ranges.h" // fmt::join #include "level_zero/ze_api.h" // Level Zero runtime functions #include "level_zero/zes_api.h" // Level Zero runtime functions #include // std::string #include // std::string_view +#include // std::vector namespace hws::detail { +std::vector property_flags_to_vector(const ze_device_property_flags_t flags) { + std::vector string_flags{}; + + if ((flags & ze_device_property_flag_t::ZE_DEVICE_PROPERTY_FLAG_INTEGRATED) != 0) { + string_flags.emplace_back("integrated_gpu"); + } + if ((flags & ze_device_property_flag_t::ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE) != 0) { + string_flags.emplace_back("sub-device"); + } + if ((flags & ze_device_property_flag_t::ZE_DEVICE_PROPERTY_FLAG_ECC) != 0) { + string_flags.emplace_back("ecc"); + } + if ((flags & ze_device_property_flag_t::ZE_DEVICE_PROPERTY_FLAG_ONDEMANDPAGING) != 0) { + string_flags.emplace_back("on-demand_page-faulting"); + } + + return string_flags; +} + +std::string throttle_reason_to_string(const zes_freq_throttle_reason_flags_t reasons) { + if (reasons == 0) { + return "None"; + } else { + std::vector string_reasons{}; + if ((reasons & zes_freq_throttle_reason_flag_t::ZES_FREQ_THROTTLE_REASON_FLAG_AVE_PWR_CAP) != 0) { + string_reasons.emplace_back("average_power"); + } + if ((reasons & zes_freq_throttle_reason_flag_t::ZES_FREQ_THROTTLE_REASON_FLAG_BURST_PWR_CAP) != 0) { + string_reasons.emplace_back("burst_power"); + } + if ((reasons & zes_freq_throttle_reason_flag_t::ZES_FREQ_THROTTLE_REASON_FLAG_CURRENT_LIMIT) != 0) { + string_reasons.emplace_back("current_limit"); + } + if ((reasons & zes_freq_throttle_reason_flag_t::ZES_FREQ_THROTTLE_REASON_FLAG_THERMAL_LIMIT) != 0) { + string_reasons.emplace_back("thermal_limit"); + } + if ((reasons & zes_freq_throttle_reason_flag_t::ZES_FREQ_THROTTLE_REASON_FLAG_PSU_ALERT) != 0) { + string_reasons.emplace_back("psu_assertion"); + } + if ((reasons & zes_freq_throttle_reason_flag_t::ZES_FREQ_THROTTLE_REASON_FLAG_SW_RANGE) != 0) { + string_reasons.emplace_back("software_frequency_range"); + } + if ((reasons & zes_freq_throttle_reason_flag_t::ZES_FREQ_THROTTLE_REASON_FLAG_HW_RANGE) != 0) { + string_reasons.emplace_back("hardware_frequency_range"); + } + return fmt::format("{}", fmt::join(string_reasons, "|")); + } +} + std::string_view to_result_string(const ze_result_t errc) { switch (errc) { case ZE_RESULT_SUCCESS: @@ -175,17 +227,4 @@ std::string memory_location_to_name(const zes_mem_loc_t mem_loc) { } } -std::string temperature_sensor_type_to_name(const zes_temp_sensors_t sensor_type) { - switch (sensor_type) { - case ZES_TEMP_SENSORS_GLOBAL: - return "global"; - case ZES_TEMP_SENSORS_GPU: - return "gpu"; - case ZES_TEMP_SENSORS_MEMORY: - return "memory"; - default: - return ""; - } -} - } // namespace hws::detail diff --git a/src/hws/gpu_nvidia/hardware_sampler.cpp b/src/hws/gpu_nvidia/hardware_sampler.cpp new file mode 100644 index 0000000..9c2a927 --- /dev/null +++ b/src/hws/gpu_nvidia/hardware_sampler.cpp @@ -0,0 +1,595 @@ +/** + * @author Marcel Breyer + * @copyright 2024-today All Rights Reserved + * @license This file is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + */ + +#include "hws/gpu_nvidia/hardware_sampler.hpp" + +#include "hws/gpu_nvidia/nvml_device_handle_impl.hpp" // hws::detail::nvml_device_handle implementation +#include "hws/gpu_nvidia/nvml_samples.hpp" // hws::{nvml_general_samples, nvml_clock_samples, nvml_power_samples, nvml_memory_samples, nvml_temperature_samples} +#include "hws/gpu_nvidia/utility.hpp" // HWS_NVML_ERROR_CHECK +#include "hws/hardware_sampler.hpp" // hws::hardware_sampler +#include "hws/sample_category.hpp" // hws::sample_category +#include "hws/utility.hpp" // hws::detail::time_points_to_epoch + +#include "fmt/format.h" // fmt::format +#include "fmt/ranges.h" // fmt::join +#include "nvml.h" // NVML runtime functions + +#include // std::min_element, std::sort, std::transform +#include // std::chrono::{steady_clock, duration_cast, milliseconds} +#include // std::size_t +#include // std::exception, std::terminate +#include // std::ios_base +#include // std::cerr, std::endl +#include // std::iota +#include // std::optional +#include // std::ostream +#include // std::runtime_error +#include // std::string +#include // std::this_thread +#include // std::vector + +namespace hws { + +gpu_nvidia_hardware_sampler::gpu_nvidia_hardware_sampler(const sample_category category) : + gpu_nvidia_hardware_sampler{ 0, HWS_SAMPLING_INTERVAL, category } { } + +gpu_nvidia_hardware_sampler::gpu_nvidia_hardware_sampler(const std::size_t device_id, const sample_category category) : + gpu_nvidia_hardware_sampler{ device_id, HWS_SAMPLING_INTERVAL, category } { } + +gpu_nvidia_hardware_sampler::gpu_nvidia_hardware_sampler(const std::chrono::milliseconds sampling_interval, const sample_category category) : + gpu_nvidia_hardware_sampler{ 0, sampling_interval, category } { } + +gpu_nvidia_hardware_sampler::gpu_nvidia_hardware_sampler(const std::size_t device_id, const std::chrono::milliseconds sampling_interval, const sample_category category) : + hardware_sampler{ sampling_interval, category } { + // make sure that nvmlInit is only called once for all instances + if (instances_++ == 0) { + HWS_NVML_ERROR_CHECK(nvmlInit()) + // notify that initialization has been finished + init_finished_ = true; + } else { + // wait until init has been finished! + while (!init_finished_) { } + } + + // initialize samples -> can't be done beforehand since the device handle can only be initialized after a call to nvmlInit + device_ = detail::nvml_device_handle{ device_id }; +} + +gpu_nvidia_hardware_sampler::~gpu_nvidia_hardware_sampler() { + try { + // if this hardware sampler is still sampling, stop it + if (this->has_sampling_started() && !this->has_sampling_stopped()) { + this->stop_sampling(); + } + + // the last instance must shut down the NVML runtime + // make sure that nvmlShutdown is only called once + if (--instances_ == 0) { + HWS_NVML_ERROR_CHECK(nvmlShutdown()) + // reset init_finished flag + init_finished_ = false; + } + } catch (const std::exception &e) { + std::cerr << e.what() << std::endl; + std::terminate(); + } +} + +void gpu_nvidia_hardware_sampler::sampling_loop() { + // get the nvml handle from the device + nvmlDevice_t device = device_.get_impl().device; + + // + // add samples where we only have to retrieve the value once + // + + this->add_time_point(std::chrono::steady_clock::now()); + + double initial_total_power_consumption{}; // initial total power consumption in J + + // retrieve initial general information + if (this->sample_category_enabled(sample_category::general)) { + // fixed information -> only retrieved once + nvmlDeviceArchitecture_t device_arch{}; + if (nvmlDeviceGetArchitecture(device, &device_arch) == NVML_SUCCESS) { + switch (device_arch) { +#if defined(NVML_DEVICE_ARCH_KEPLER) + case NVML_DEVICE_ARCH_KEPLER: + general_samples_.architecture_ = "Kepler"; + break; +#endif +#if defined(NVML_DEVICE_ARCH_MAXWELL) + case NVML_DEVICE_ARCH_MAXWELL: + general_samples_.architecture_ = "Maxwell"; + break; +#endif +#if defined(NVML_DEVICE_ARCH_PASCAL) + case NVML_DEVICE_ARCH_PASCAL: + general_samples_.architecture_ = "Pascal"; + break; +#endif +#if defined(NVML_DEVICE_ARCH_VOLTA) + case NVML_DEVICE_ARCH_VOLTA: + general_samples_.architecture_ = "Volta"; + break; +#endif +#if defined(NVML_DEVICE_ARCH_TURING) + case NVML_DEVICE_ARCH_TURING: + general_samples_.architecture_ = "Turing"; + break; +#endif +#if defined(NVML_DEVICE_ARCH_AMPERE) + case NVML_DEVICE_ARCH_AMPERE: + general_samples_.architecture_ = "Ampere"; + break; +#endif +#if defined(NVML_DEVICE_ARCH_ADA) + case NVML_DEVICE_ARCH_ADA: + general_samples_.architecture_ = "Ada"; + break; +#endif +#if defined(NVML_DEVICE_ARCH_HOPPER) + case NVML_DEVICE_ARCH_HOPPER: + general_samples_.architecture_ = "Hopper"; + break; +#endif +#if defined(NVML_DEVICE_ARCH_BLACKWELL) + case NVML_DEVICE_ARCH_BLACKWELL: + general_samples_.architecture_ = "Blackwell"; + break; +#endif +#if defined(NVML_DEVICE_ARCH_T23X) + case NVML_DEVICE_ARCH_T23X: + general_samples_.architecture_ = "Orin"; + break; +#endif + default: + break; + } + } + + // the byte order is given by the NVIDIA CUDA guide + general_samples_.byte_order_ = "Little Endian"; + + // the vendor ID is fixed for NVIDIA GPUs + general_samples_.byte_order_ = "NVIDIA"; + + std::string name(NVML_DEVICE_NAME_V2_BUFFER_SIZE, '\0'); + if (nvmlDeviceGetName(device, name.data(), name.size()) == NVML_SUCCESS) { + general_samples_.name_ = name.substr(0, name.find_first_of('\0')); + } + + nvmlEnableState_t mode{}; + if (nvmlDeviceGetPersistenceMode(device, &mode) == NVML_SUCCESS) { + general_samples_.persistence_mode_ = mode == NVML_FEATURE_ENABLED; + } + + decltype(general_samples_.num_cores_)::value_type num_cores{}; + if (nvmlDeviceGetNumGpuCores(device, &num_cores) == NVML_SUCCESS) { + general_samples_.num_cores_ = num_cores; + } + + // queried samples -> retrieved every iteration if available + nvmlPstates_t pstate{}; + if (nvmlDeviceGetPerformanceState(device, &pstate) == NVML_SUCCESS) { + general_samples_.performance_level_ = decltype(general_samples_.performance_level_)::value_type{ static_cast(pstate) }; + } + + nvmlUtilization_t util{}; + if (nvmlDeviceGetUtilizationRates(device, &util) == NVML_SUCCESS) { + general_samples_.compute_utilization_ = decltype(general_samples_.compute_utilization_)::value_type{ util.gpu }; + general_samples_.memory_utilization_ = decltype(general_samples_.memory_utilization_)::value_type{ util.memory }; + } + } + + // retrieve initial clock related information + if (this->sample_category_enabled(sample_category::clock)) { + // fixed information -> only retrieved once + unsigned int adaptive_clock_status{}; + if (nvmlDeviceGetAdaptiveClockInfoStatus(device, &adaptive_clock_status) == NVML_SUCCESS) { + clock_samples_.auto_boosted_clock_enabled_ = adaptive_clock_status == NVML_ADAPTIVE_CLOCKING_INFO_STATUS_ENABLED; + } + + unsigned int clock_graph_max{}; + if (nvmlDeviceGetMaxClockInfo(device, NVML_CLOCK_GRAPHICS, &clock_graph_max) == NVML_SUCCESS) { + clock_samples_.clock_frequency_max_ = static_cast(clock_graph_max); + } + + unsigned int clock_sm_max{}; + if (nvmlDeviceGetMaxClockInfo(device, NVML_CLOCK_SM, &clock_sm_max) == NVML_SUCCESS) { + clock_samples_.sm_clock_frequency_max_ = static_cast(clock_sm_max); + } + + unsigned int clock_mem_max{}; + if (nvmlDeviceGetMaxClockInfo(device, NVML_CLOCK_MEM, &clock_mem_max) == NVML_SUCCESS) { + clock_samples_.memory_clock_frequency_max_ = static_cast(clock_mem_max); + } + + { + unsigned int clock_count{ 128 }; + std::vector supported_clocks(clock_count); + if (nvmlDeviceGetSupportedMemoryClocks(device, &clock_count, supported_clocks.data()) == NVML_SUCCESS) { + supported_clocks.resize(clock_count); + clock_samples_.memory_clock_frequency_min_ = static_cast(*std::min_element(supported_clocks.cbegin(), supported_clocks.cend())); + + decltype(clock_samples_.available_memory_clock_frequencies_)::value_type available_memory_clock_frequencies(supported_clocks.size()); + // convert unsigned int values to double values + std::transform(supported_clocks.cbegin(), supported_clocks.cend(), available_memory_clock_frequencies.begin(), [](const unsigned int c) { return static_cast(c); }); + // we want to report all supported memory clocks in ascending order + std::sort(available_memory_clock_frequencies.begin(), available_memory_clock_frequencies.end()); + clock_samples_.available_memory_clock_frequencies_ = available_memory_clock_frequencies; + } + } + + { + unsigned int clock_count{ 128 }; + std::vector supported_clocks(clock_count); + if (clock_samples_.memory_clock_frequency_min_.has_value() && nvmlDeviceGetSupportedGraphicsClocks(device, static_cast(clock_samples_.memory_clock_frequency_min_.value()), &clock_count, supported_clocks.data()) == NVML_SUCCESS) { + clock_samples_.clock_frequency_min_ = static_cast(*std::min_element(supported_clocks.cbegin(), supported_clocks.cbegin() + clock_count)); + } + + if (clock_samples_.available_memory_clock_frequencies_.has_value()) { + for (const auto value : clock_samples_.available_memory_clock_frequencies_.value()) { + if (nvmlDeviceGetSupportedGraphicsClocks(device, static_cast(value), &clock_count, supported_clocks.data()) == NVML_SUCCESS) { + decltype(clock_samples_.available_clock_frequencies_)::value_type::mapped_type available_clock_frequencies(clock_count); + // convert unsigned int values to double values + std::transform(supported_clocks.cbegin(), supported_clocks.cbegin() + clock_count, available_clock_frequencies.begin(), [](const unsigned int c) { return static_cast(c); }); + // we want to report all supported memory clocks in ascending order + std::sort(available_clock_frequencies.begin(), available_clock_frequencies.end()); + // if no map exists, default construct an empty map + if (!clock_samples_.available_clock_frequencies_.has_value()) { + clock_samples_.available_clock_frequencies_ = decltype(clock_samples_)::map_type{}; + } + clock_samples_.available_clock_frequencies_->emplace(value, available_clock_frequencies); + } + } + } + } + + // queried samples -> retrieved every iteration if available + unsigned int clock_graph{}; + if (nvmlDeviceGetClockInfo(device, NVML_CLOCK_GRAPHICS, &clock_graph) == NVML_SUCCESS) { + clock_samples_.clock_frequency_ = decltype(clock_samples_.clock_frequency_)::value_type{ static_cast(clock_graph) }; + } + + unsigned int clock_sm{}; + if (nvmlDeviceGetClockInfo(device, NVML_CLOCK_SM, &clock_sm) == NVML_SUCCESS) { + clock_samples_.sm_clock_frequency_ = decltype(clock_samples_.sm_clock_frequency_)::value_type{ static_cast(clock_sm) }; + } + + unsigned int clock_mem{}; + if (nvmlDeviceGetClockInfo(device, NVML_CLOCK_MEM, &clock_mem) == NVML_SUCCESS) { + clock_samples_.memory_clock_frequency_ = decltype(clock_samples_.memory_clock_frequency_)::value_type{ static_cast(clock_mem) }; + } + + decltype(clock_samples_.throttle_reason_)::value_type::value_type clock_throttle_reason{}; + if (nvmlDeviceGetCurrentClocksEventReasons(device, &clock_throttle_reason) == NVML_SUCCESS) { + clock_samples_.throttle_reason_ = decltype(clock_samples_.throttle_reason_)::value_type{ clock_throttle_reason }; + clock_samples_.throttle_reason_string_ = decltype(clock_samples_.throttle_reason_string_)::value_type{ detail::throttle_event_reason_to_string(clock_throttle_reason) }; + } + + nvmlEnableState_t mode{}; + nvmlEnableState_t default_mode{}; + if (nvmlDeviceGetAutoBoostedClocksEnabled(device, &mode, &default_mode) == NVML_SUCCESS) { + clock_samples_.auto_boosted_clock_ = decltype(clock_samples_.auto_boosted_clock_)::value_type{ mode == NVML_FEATURE_ENABLED }; + } + } + + // retrieve initial power related information + if (this->sample_category_enabled(sample_category::power)) { + // fixed information -> only retrieved once + nvmlEnableState_t mode{}; + if (nvmlDeviceGetPowerManagementMode(device, &mode) == NVML_SUCCESS) { + power_samples_.power_management_mode_ = mode == NVML_FEATURE_ENABLED; + } + + unsigned int power_management_limit{}; + if (nvmlDeviceGetPowerManagementLimit(device, &power_management_limit) == NVML_SUCCESS) { + power_samples_.power_management_limit_ = static_cast(power_management_limit) / 1000.0; + } + + unsigned int power_enforced_limit{}; + if (nvmlDeviceGetEnforcedPowerLimit(device, &power_enforced_limit) == NVML_SUCCESS) { + power_samples_.power_enforced_limit_ = static_cast(power_enforced_limit) / 1000.0; + } + + if (general_samples_.architecture_.has_value()) { + // based on https://docs.nvidia.com/deploy/nvml-api/group__nvmlDeviceQueries.html#group__nvmlDeviceQueries_1gf754f109beca3a4a8c8c1cd650d7d66c + if (general_samples_.architecture_ == "Kepler" || general_samples_.architecture_ == "Maxwell" || general_samples_.architecture_ == "Pascal" || general_samples_.architecture_ == "Volta" || general_samples_.architecture_ == "Turing") { + power_samples_.power_measurement_type_ = "current/instant"; + } else if (general_samples_.architecture_ == "Ampere" || general_samples_.architecture_ == "Ada" || general_samples_.architecture_ == "Hopper" || general_samples_.architecture_ == "Blackwell" || general_samples_.architecture_ == "Orin") { + if (general_samples_.name_.has_value() && general_samples_.name_.value().find("A100") != std::string::npos) { + // GA100 also has instant power draw measurements + power_samples_.power_measurement_type_ = "current/instant"; + } else { + power_samples_.power_measurement_type_ = "average"; + } + } else { + power_samples_.power_measurement_type_ = "invalid/undetected"; + } + } + + decltype(power_samples_.available_power_profiles_)::value_type power_states(17, 32); // 17 power states, value 32 = unknown + std::iota(power_states.begin(), power_states.end() - 1, decltype(power_samples_.available_power_profiles_)::value_type::value_type{ 0 }); + power_samples_.available_power_profiles_ = power_states; + + // queried samples -> retrieved every iteration if available + unsigned int power_usage{}; + if (nvmlDeviceGetPowerUsage(device, &power_usage) == NVML_SUCCESS) { + power_samples_.power_usage_ = decltype(power_samples_.power_usage_)::value_type{ static_cast(power_usage) / 1000.0 }; + } + + unsigned long long power_total_energy_consumption{}; + if (nvmlDeviceGetTotalEnergyConsumption(device, &power_total_energy_consumption) == NVML_SUCCESS) { + initial_total_power_consumption = static_cast(power_total_energy_consumption) / 1000.0; + power_samples_.power_total_energy_consumption_ = decltype(power_samples_.power_total_energy_consumption_)::value_type{ 0.0 }; + } + + nvmlPstates_t pstate{}; + if (nvmlDeviceGetPowerState(device, &pstate) == NVML_SUCCESS) { + power_samples_.power_profile_ = decltype(power_samples_.power_profile_)::value_type{ static_cast(pstate) }; + } + } + + // retrieve initial memory related information + if (this->sample_category_enabled(sample_category::memory)) { + // fixed information -> only retrieved once + nvmlMemory_t memory_info{}; + if (nvmlDeviceGetMemoryInfo(device, &memory_info) == NVML_SUCCESS) { + memory_samples_.memory_total_ = memory_info.total; + // queried samples -> retrieved every iteration if available + memory_samples_.memory_free_ = decltype(memory_samples_.memory_free_)::value_type{ memory_info.free }; + memory_samples_.memory_used_ = decltype(memory_samples_.memory_used_)::value_type{ memory_info.used }; + } + + decltype(memory_samples_.memory_bus_width_)::value_type memory_bus_width{}; + if (nvmlDeviceGetMemoryBusWidth(device, &memory_bus_width) == NVML_SUCCESS) { + memory_samples_.memory_bus_width_ = memory_bus_width; + } + + decltype(memory_samples_.num_pcie_lanes_max_)::value_type num_pcie_lanes_max{}; + if (nvmlDeviceGetMaxPcieLinkWidth(device, &num_pcie_lanes_max) == NVML_SUCCESS) { + memory_samples_.num_pcie_lanes_max_ = num_pcie_lanes_max; + } + + decltype(memory_samples_.pcie_link_generation_max_)::value_type pcie_link_generation_max{}; + if (nvmlDeviceGetMaxPcieLinkGeneration(device, &pcie_link_generation_max) == NVML_SUCCESS) { + memory_samples_.pcie_link_generation_max_ = pcie_link_generation_max; + } + + decltype(memory_samples_.pcie_link_speed_max_)::value_type pcie_link_speed_max{}; + if (nvmlDeviceGetPcieLinkMaxSpeed(device, &pcie_link_speed_max) == NVML_SUCCESS) { + memory_samples_.pcie_link_speed_max_ = pcie_link_speed_max; + } + + // queried samples -> retrieved every iteration if available + decltype(memory_samples_.num_pcie_lanes_)::value_type::value_type num_pcie_lanes{}; + if (nvmlDeviceGetCurrPcieLinkWidth(device, &num_pcie_lanes) == NVML_SUCCESS) { + memory_samples_.num_pcie_lanes_ = decltype(memory_samples_.num_pcie_lanes_)::value_type{ num_pcie_lanes }; + } + + decltype(memory_samples_.pcie_link_generation_)::value_type::value_type pcie_link_generation{}; + if (nvmlDeviceGetCurrPcieLinkGeneration(device, &pcie_link_generation) == NVML_SUCCESS) { + memory_samples_.pcie_link_generation_ = decltype(memory_samples_.pcie_link_generation_)::value_type{ pcie_link_generation }; + } + } + + // retrieve initial temperature related information + if (this->sample_category_enabled(sample_category::temperature)) { + // fixed information -> only retrieved once + decltype(temperature_samples_.num_fans_)::value_type num_fans{}; + if (nvmlDeviceGetNumFans(device, &num_fans) == NVML_SUCCESS) { + temperature_samples_.num_fans_ = num_fans; + } + + if (temperature_samples_.num_fans_.has_value() && temperature_samples_.num_fans_.value() > 0) { + decltype(temperature_samples_.fan_speed_min_)::value_type min_fan_speed{}; + decltype(temperature_samples_.fan_speed_max_)::value_type max_fan_speed{}; + if (nvmlDeviceGetMinMaxFanSpeed(device, &min_fan_speed, &max_fan_speed) == NVML_SUCCESS) { + temperature_samples_.fan_speed_min_ = min_fan_speed; + temperature_samples_.fan_speed_max_ = max_fan_speed; + } + } + + unsigned int temperature_max{}; + if (nvmlDeviceGetTemperatureThreshold(device, NVML_TEMPERATURE_THRESHOLD_GPU_MAX, &temperature_max) == NVML_SUCCESS) { + temperature_samples_.temperature_max_ = static_cast(temperature_max); + } + + unsigned int memory_temperature_max{}; + if (nvmlDeviceGetTemperatureThreshold(device, NVML_TEMPERATURE_THRESHOLD_MEM_MAX, &memory_temperature_max) == NVML_SUCCESS) { + temperature_samples_.memory_temperature_max_ = static_cast(memory_temperature_max); + } + + // queried samples -> retrieved every iteration if available + unsigned int fan_speed_percentage{}; + if (nvmlDeviceGetFanSpeed(device, &fan_speed_percentage) == NVML_SUCCESS) { + temperature_samples_.fan_speed_percentage_ = decltype(temperature_samples_.fan_speed_percentage_)::value_type{ static_cast(fan_speed_percentage) }; + } + + unsigned int temperature{}; + if (nvmlDeviceGetTemperature(device, NVML_TEMPERATURE_GPU, &temperature) == NVML_SUCCESS) { + temperature_samples_.temperature_ = decltype(temperature_samples_.temperature_)::value_type{ static_cast(temperature) }; + } + } + + // + // loop until stop_sampling() is called + // + + while (!this->has_sampling_stopped()) { + // only sample values if the sampler currently isn't paused + if (this->is_sampling()) { + // add current time point + this->add_time_point(std::chrono::steady_clock::now()); + + // retrieve general samples + if (this->sample_category_enabled(sample_category::general)) { + if (general_samples_.performance_level_.has_value()) { + nvmlPstates_t pstate{}; + HWS_NVML_ERROR_CHECK(nvmlDeviceGetPerformanceState(device, &pstate)) + general_samples_.performance_level_->push_back(static_cast(pstate)); + } + + if (general_samples_.compute_utilization_.has_value() && general_samples_.memory_utilization_.has_value()) { + nvmlUtilization_t util{}; + HWS_NVML_ERROR_CHECK(nvmlDeviceGetUtilizationRates(device, &util)) + general_samples_.compute_utilization_->push_back(util.gpu); + general_samples_.memory_utilization_->push_back(util.memory); + } + } + + // retrieve clock related samples + if (this->sample_category_enabled(sample_category::clock)) { + if (clock_samples_.clock_frequency_.has_value()) { + unsigned int value{}; + HWS_NVML_ERROR_CHECK(nvmlDeviceGetClockInfo(device, NVML_CLOCK_GRAPHICS, &value)) + clock_samples_.clock_frequency_->push_back(static_cast(value)); + } + + if (clock_samples_.sm_clock_frequency_.has_value()) { + unsigned int value{}; + HWS_NVML_ERROR_CHECK(nvmlDeviceGetClockInfo(device, NVML_CLOCK_SM, &value)) + clock_samples_.sm_clock_frequency_->push_back(static_cast(value)); + } + + if (clock_samples_.memory_clock_frequency_.has_value()) { + unsigned int value{}; + HWS_NVML_ERROR_CHECK(nvmlDeviceGetClockInfo(device, NVML_CLOCK_MEM, &value)) + clock_samples_.memory_clock_frequency_->push_back(static_cast(value)); + } + + if (clock_samples_.throttle_reason_string_.has_value()) { + decltype(clock_samples_.throttle_reason_)::value_type::value_type value{}; + HWS_NVML_ERROR_CHECK(nvmlDeviceGetCurrentClocksEventReasons(device, &value)) + clock_samples_.throttle_reason_->push_back(value); + clock_samples_.throttle_reason_string_->push_back(detail::throttle_event_reason_to_string(value)); + } + + if (clock_samples_.auto_boosted_clock_.has_value()) { + nvmlEnableState_t mode{}; + nvmlEnableState_t default_mode{}; + HWS_NVML_ERROR_CHECK(nvmlDeviceGetAutoBoostedClocksEnabled(device, &mode, &default_mode)) + clock_samples_.auto_boosted_clock_->push_back(mode == NVML_FEATURE_ENABLED); + } + } + + // retrieve power related information + if (this->sample_category_enabled(sample_category::power)) { + if (power_samples_.power_profile_.has_value()) { + nvmlPstates_t pstate{}; + HWS_NVML_ERROR_CHECK(nvmlDeviceGetPowerState(device, &pstate)) + power_samples_.power_profile_->push_back(static_cast(pstate)); + } + + if (power_samples_.power_usage_.has_value()) { + unsigned int value{}; + HWS_NVML_ERROR_CHECK(nvmlDeviceGetPowerUsage(device, &value)) + power_samples_.power_usage_->push_back(static_cast(value) / 1000.0); + } + + if (power_samples_.power_total_energy_consumption_.has_value()) { + unsigned long long value{}; + HWS_NVML_ERROR_CHECK(nvmlDeviceGetTotalEnergyConsumption(device, &value)) + power_samples_.power_total_energy_consumption_->push_back((static_cast(value) / 1000.0) - initial_total_power_consumption); + } + } + + // retrieve memory related information + if (this->sample_category_enabled(sample_category::memory)) { + if (memory_samples_.memory_free_.has_value() && memory_samples_.memory_used_.has_value()) { + nvmlMemory_t memory_info{}; + HWS_NVML_ERROR_CHECK(nvmlDeviceGetMemoryInfo(device, &memory_info)) + memory_samples_.memory_free_->push_back(memory_info.free); + memory_samples_.memory_used_->push_back(memory_info.used); + } + + if (memory_samples_.num_pcie_lanes_.has_value()) { + decltype(memory_samples_.num_pcie_lanes_)::value_type::value_type value{}; + HWS_NVML_ERROR_CHECK(nvmlDeviceGetCurrPcieLinkWidth(device, &value)) + memory_samples_.num_pcie_lanes_->push_back(value); + } + + if (memory_samples_.pcie_link_generation_.has_value()) { + decltype(memory_samples_.pcie_link_generation_)::value_type::value_type value{}; + HWS_NVML_ERROR_CHECK(nvmlDeviceGetCurrPcieLinkGeneration(device, &value)) + memory_samples_.pcie_link_generation_->push_back(value); + } + } + + // retrieve temperature related information + if (this->sample_category_enabled(sample_category::temperature)) { + if (temperature_samples_.fan_speed_percentage_.has_value()) { + unsigned int value{}; + HWS_NVML_ERROR_CHECK(nvmlDeviceGetFanSpeed(device, &value)) + temperature_samples_.fan_speed_percentage_->push_back(static_cast(value)); + } + + if (temperature_samples_.temperature_.has_value()) { + unsigned int value{}; + HWS_NVML_ERROR_CHECK(nvmlDeviceGetTemperature(device, NVML_TEMPERATURE_GPU, &value)) + temperature_samples_.temperature_->push_back(static_cast(value)); + } + } + } + + // wait for the sampling interval to pass to retrieve the next sample + std::this_thread::sleep_for(this->sampling_interval()); + } +} + +std::string gpu_nvidia_hardware_sampler::device_identification() const { + nvmlPciInfo_st pcie_info{}; + HWS_NVML_ERROR_CHECK(nvmlDeviceGetPciInfo_v3(device_.get_impl().device, &pcie_info)) + return fmt::format("gpu_nvidia_device_{}_{}", pcie_info.device, pcie_info.bus); +} + +std::string gpu_nvidia_hardware_sampler::samples_only_as_yaml_string() const { + // check whether it's safe to generate the YAML entry + if (this->is_sampling()) { + throw std::runtime_error{ "Can't create the final YAML entry if the hardware sampler is still running!" }; + } + + return fmt::format("{}{}" + "{}{}" + "{}{}" + "{}{}" + "{}", + general_samples_.generate_yaml_string(), + general_samples_.has_samples() ? "\n" : "", + clock_samples_.generate_yaml_string(), + clock_samples_.has_samples() ? "\n" : "", + power_samples_.generate_yaml_string(), + power_samples_.has_samples() ? "\n" : "", + memory_samples_.generate_yaml_string(), + memory_samples_.has_samples() ? "\n" : "", + temperature_samples_.generate_yaml_string()); +} + +std::ostream &operator<<(std::ostream &out, const gpu_nvidia_hardware_sampler &sampler) { + if (sampler.is_sampling()) { + out.setstate(std::ios_base::failbit); + return out; + } else { + return out << fmt::format("sampling interval: {}\n" + "time points: [{}]\n\n" + "general samples:\n{}\n\n" + "clock samples:\n{}\n\n" + "power samples:\n{}\n\n" + "memory samples:\n{}\n\n" + "temperature samples:\n{}", + sampler.sampling_interval(), + fmt::join(detail::time_points_to_epoch(sampler.sampling_time_points()), ", "), + sampler.general_samples(), + sampler.clock_samples(), + sampler.power_samples(), + sampler.memory_samples(), + sampler.temperature_samples()); + } +} + +} // namespace hws diff --git a/src/hws/gpu_nvidia/nvml_samples.cpp b/src/hws/gpu_nvidia/nvml_samples.cpp new file mode 100644 index 0000000..3ce65c3 --- /dev/null +++ b/src/hws/gpu_nvidia/nvml_samples.cpp @@ -0,0 +1,587 @@ +/** + * @author Marcel Breyer + * @copyright 2024-today All Rights Reserved + * @license This file is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + */ + +#include "hws/gpu_nvidia/nvml_samples.hpp" + +#include "hws/utility.hpp" // hws::detail::{value_or_default, map_entry_to_string, quote} + +#include "fmt/format.h" // fmt::format +#include "fmt/ranges.h" // fmt::join + +#include // std::ostream +#include // std::string + +namespace hws { + +//*************************************************************************************************************************************// +// general samples // +//*************************************************************************************************************************************// + +bool nvml_general_samples::has_samples() const { + return this->architecture_.has_value() || this->byte_order_.has_value() || this->vendor_id_.has_value() || this->name_.has_value() + || this->persistence_mode_.has_value() || this->num_cores_.has_value() || this->compute_utilization_.has_value() + || this->memory_utilization_.has_value() || this->performance_level_.has_value(); +} + +std::string nvml_general_samples::generate_yaml_string() const { + // if no samples are available, return an empty string + if (!this->has_samples()) { + return ""; + } + + std::string str{ "general:\n" }; + + // device architecture + if (this->architecture_.has_value()) { + str += fmt::format(" architecture:\n" + " unit: \"string\"\n" + " values: \"{}\"\n", + this->architecture_.value()); + } + // device byte order + if (this->byte_order_.has_value()) { + str += fmt::format(" byte_order:\n" + " unit: \"string\"\n" + " values: \"{}\"\n", + this->byte_order_.value()); + } + // the vendor specific ID + if (this->vendor_id_.has_value()) { + str += fmt::format(" vendor_id:\n" + " unit: \"string\"\n" + " values: \"{}\"\n", + this->vendor_id_.value()); + } + // device name + if (this->name_.has_value()) { + str += fmt::format(" name:\n" + " unit: \"string\"\n" + " values: \"{}\"\n", + this->name_.value()); + } + // persistence mode enabled + if (this->persistence_mode_.has_value()) { + str += fmt::format(" persistence_mode:\n" + " unit: \"bool\"\n" + " values: {}\n", + this->persistence_mode_.value()); + } + // number of cores + if (this->num_cores_.has_value()) { + str += fmt::format(" num_cores:\n" + " unit: \"int\"\n" + " values: {}\n", + this->num_cores_.value()); + } + + // device compute utilization + if (this->compute_utilization_.has_value()) { + str += fmt::format(" compute_utilization:\n" + " unit: \"percentage\"\n" + " values: [{}]\n", + fmt::join(this->compute_utilization_.value(), ", ")); + } + + // device memory utilization + if (this->memory_utilization_.has_value()) { + str += fmt::format(" memory_utilization:\n" + " unit: \"percentage\"\n" + " values: [{}]\n", + fmt::join(this->memory_utilization_.value(), ", ")); + } + // performance state + if (this->performance_level_.has_value()) { + str += fmt::format(" performance_level:\n" + " unit: \"0 - maximum performance; 15 - minimum performance; 32 - unknown\"\n" + " values: [{}]\n", + fmt::join(this->performance_level_.value(), ", ")); + } + + return str; +} + +std::ostream &operator<<(std::ostream &out, const nvml_general_samples &samples) { + return out << fmt::format("architecture [string]: {}\n" + "byte_order [string]: {}\n" + "num_cores [int]: {}\n" + "vendor_id [string]: {}\n" + "name [string]: {}\n" + "persistence_mode [bool]: {}\n" + "compute_utilization [%]: [{}]\n" + "memory_utilization [%]: [{}]\n" + "performance_level [int]: [{}]", + detail::value_or_default(samples.get_architecture()), + detail::value_or_default(samples.get_byte_order()), + detail::value_or_default(samples.get_num_cores()), + detail::value_or_default(samples.get_vendor_id()), + detail::value_or_default(samples.get_name()), + detail::value_or_default(samples.get_persistence_mode()), + fmt::join(detail::value_or_default(samples.get_compute_utilization()), ", "), + fmt::join(detail::value_or_default(samples.get_memory_utilization()), ", "), + fmt::join(detail::value_or_default(samples.get_performance_level()), ", ")); +} + +//*************************************************************************************************************************************// +// clock samples // +//*************************************************************************************************************************************// + +bool nvml_clock_samples::has_samples() const { + return this->auto_boosted_clock_enabled_.has_value() || this->clock_frequency_min_.has_value() || this->clock_frequency_max_.has_value() + || this->memory_clock_frequency_min_.has_value() || this->memory_clock_frequency_max_.has_value() || this->sm_clock_frequency_max_.has_value() + || this->available_clock_frequencies_.has_value() || this->available_memory_clock_frequencies_.has_value() || this->clock_frequency_.has_value() + || this->memory_clock_frequency_.has_value() || this->sm_clock_frequency_.has_value() || this->throttle_reason_.has_value() + || this->throttle_reason_string_.has_value() || this->auto_boosted_clock_.has_value(); +} + +std::string nvml_clock_samples::generate_yaml_string() const { + // if no samples are available, return an empty string + if (!this->has_samples()) { + return ""; + } + + std::string str{ "clock:\n" }; + + // adaptive clock status + if (this->auto_boosted_clock_enabled_.has_value()) { + str += fmt::format(" auto_boosted_clock_enabled:\n" + " unit: \"bool\"\n" + " values: {}\n", + this->auto_boosted_clock_enabled_.value()); + } + // minimum graph clock + if (this->clock_frequency_min_.has_value()) { + str += fmt::format(" clock_frequency_min:\n" + " unit: \"MHz\"\n" + " values: {}\n", + this->clock_frequency_min_.value()); + } + // maximum graph clock + if (this->clock_frequency_max_.has_value()) { + str += fmt::format(" clock_frequency_max:\n" + " unit: \"MHz\"\n" + " values: {}\n", + this->clock_frequency_max_.value()); + } + // minimum memory clock + if (this->memory_clock_frequency_min_.has_value()) { + str += fmt::format(" memory_clock_frequency_min:\n" + " unit: \"MHz\"\n" + " values: {}\n", + this->memory_clock_frequency_min_.value()); + } + // maximum memory clock + if (this->memory_clock_frequency_max_.has_value()) { + str += fmt::format(" memory_clock_frequency_max:\n" + " unit: \"MHz\"\n" + " values: {}\n", + this->memory_clock_frequency_max_.value()); + } + // maximum SM clock + if (this->sm_clock_frequency_max_.has_value()) { + str += fmt::format(" sm_clock_frequency_max:\n" + " unit: \"MHz\"\n" + " values: {}\n", + this->sm_clock_frequency_max_.value()); + } + // the available clock frequencies + if (this->available_clock_frequencies_.has_value()) { + str += fmt::format(" available_clock_frequencies:\n" + " unit: \"MHz\"\n" + " values:\n"); + for (const auto &[key, value] : this->available_clock_frequencies_.value()) { + str += fmt::format(" memory_clock_frequency_{}: [{}]\n", key, fmt::join(value, ", ")); + } + } + // the available memory clock frequencies + if (this->available_memory_clock_frequencies_.has_value()) { + str += fmt::format(" available_memory_clock_frequencies:\n" + " unit: \"MHz\"\n" + " values: [{}]\n", + fmt::join(this->available_memory_clock_frequencies_.value(), ", ")); + } + + // graph clock + if (this->clock_frequency_.has_value()) { + str += fmt::format(" clock_frequency:\n" + " unit: \"MHz\"\n" + " values: [{}]\n", + fmt::join(this->clock_frequency_.value(), ", ")); + } + // memory clock + if (this->memory_clock_frequency_.has_value()) { + str += fmt::format(" memory_clock_frequency:\n" + " unit: \"MHz\"\n" + " values: [{}]\n", + fmt::join(this->memory_clock_frequency_.value(), ", ")); + } + // SM clock + if (this->sm_clock_frequency_.has_value()) { + str += fmt::format(" sm_clock_frequency:\n" + " unit: \"MHz\"\n" + " values: [{}]\n", + fmt::join(this->sm_clock_frequency_.value(), ", ")); + } + // clock throttle reason as bitmask + if (this->throttle_reason_.has_value()) { + str += fmt::format(" throttle_reason:\n" + " unit: \"bitmask\"\n" + " values: [{}]\n", + fmt::join(detail::quote(this->throttle_reason_.value()), ", ")); + } + // clock throttle reason as string + if (this->throttle_reason_string_.has_value()) { + str += fmt::format(" throttle_reason_string:\n" + " unit: \"string\"\n" + " values: [{}]\n", + fmt::join(detail::quote(this->throttle_reason_string_.value()), ", ")); + } + // clock is auto-boosted + if (this->auto_boosted_clock_.has_value()) { + str += fmt::format(" auto_boosted_clock:\n" + " unit: \"bool\"\n" + " values: [{}]\n", + fmt::join(this->auto_boosted_clock_.value(), ", ")); + } + + return str; +} + +std::ostream &operator<<(std::ostream &out, const nvml_clock_samples &samples) { + return out << fmt::format("auto_boosted_clock_enabled [bool]: {}\n" + "clock_frequency_min [MHz]: {}\n" + "clock_frequency_max [MHz]: {}\n" + "memory_clock_frequency_min [MHz]: {}\n" + "memory_clock_frequency_max [MHz]: {}\n" + "sm_clock_frequency_max [MHz]: {}\n" + "available_clock_frequencies [MHz]: [{}]\n" + "available_memory_clock_frequencies [MHz]: [{}]\n" + "clock_frequency [MHz]: [{}]\n" + "memory_clock_frequency [MHz]: [{}]\n" + "sm_clock_frequency [MHz]: [{}]\n" + "throttle_reason [bitmask]: [{}]\n" + "throttle_reason_string [string]: [{}]\n" + "auto_boosted_clock [bool]: [{}]", + detail::value_or_default(samples.get_auto_boosted_clock_enabled()), + detail::value_or_default(samples.get_clock_frequency_min()), + detail::value_or_default(samples.get_clock_frequency_max()), + detail::value_or_default(samples.get_memory_clock_frequency_min()), + detail::value_or_default(samples.get_memory_clock_frequency_max()), + detail::value_or_default(samples.get_sm_clock_frequency_max()), + detail::map_entry_to_string(samples.get_available_clock_frequencies()), + fmt::join(detail::value_or_default(samples.get_available_memory_clock_frequencies()), ", "), + fmt::join(detail::value_or_default(samples.get_clock_frequency()), ", "), + fmt::join(detail::value_or_default(samples.get_memory_clock_frequency()), ", "), + fmt::join(detail::value_or_default(samples.get_sm_clock_frequency()), ", "), + fmt::join(detail::value_or_default(samples.get_throttle_reason()), ", "), + fmt::join(detail::value_or_default(samples.get_throttle_reason_string()), ", "), + fmt::join(detail::value_or_default(samples.get_auto_boosted_clock()), ", ")); +} + +//*************************************************************************************************************************************// +// power samples // +//*************************************************************************************************************************************// + +bool nvml_power_samples::has_samples() const { + return this->power_management_limit_.has_value() || this->power_enforced_limit_.has_value() || this->power_measurement_type_.has_value() + || this->power_management_mode_.has_value() || this->available_power_profiles_.has_value() || this->power_usage_.has_value() + || this->power_total_energy_consumption_.has_value() || this->power_profile_.has_value(); +} + +std::string nvml_power_samples::generate_yaml_string() const { + // if no samples are available, return an empty string + if (!this->has_samples()) { + return ""; + } + + std::string str{ "power:\n" }; + + // power management limit + if (this->power_management_limit_.has_value()) { + str += fmt::format(" power_management_limit:\n" + " unit: \"W\"\n" + " values: {}\n", + this->power_management_limit_.value()); + } + // power enforced limit + if (this->power_enforced_limit_.has_value()) { + str += fmt::format(" power_enforced_limit:\n" + " unit: \"W\"\n" + " values: {}\n", + this->power_enforced_limit_.value()); + } + // power measurement type + if (this->power_measurement_type_.has_value()) { + str += fmt::format(" power_measurement_type:\n" + " unit: \"string\"\n" + " values: \"{}\"\n", + this->power_measurement_type_.value()); + } + // the power management mode + if (this->power_management_mode_.has_value()) { + str += fmt::format(" power_management_mode:\n" + " unit: \"bool\"\n" + " values: {}\n", + this->power_management_mode_.value()); + } + // available power levels + if (this->available_power_profiles_.has_value()) { + str += fmt::format(" available_power_profiles:\n" + " unit: \"int\"\n" + " values: [{}]\n", + fmt::join(this->available_power_profiles_.value(), ", ")); + } + + // current power usage + if (this->power_usage_.has_value()) { + str += fmt::format(" power_usage:\n" + " unit: \"W\"\n" + " values: [{}]\n", + fmt::join(this->power_usage_.value(), ", ")); + } + // total energy consumed + if (this->power_total_energy_consumption_.has_value()) { + str += fmt::format(" power_total_energy_consumed:\n" + " unit: \"J\"\n" + " values: [{}]\n", + fmt::join(this->power_total_energy_consumption_.value(), ", ")); + } + // power state + if (this->power_profile_.has_value()) { + str += fmt::format(" power_profile:\n" + " unit: \"int\"\n" + " values: [{}]\n", + fmt::join(this->power_profile_.value(), ", ")); + } + + return str; +} + +std::ostream &operator<<(std::ostream &out, const nvml_power_samples &samples) { + return out << fmt::format("power_management_limit [W]: {}\n" + "power_enforced_limit [W]: {}\n" + "power_measurement_type [string]: {}\n" + "power_management_mode [bool]: {}\n" + "available_power_profiles [int]: [{}]\n" + "power_usage [W]: [{}]\n" + "power_total_energy_consumption [J]: [{}]" + "power_profile [int]: [{}]\n", + detail::value_or_default(samples.get_power_management_limit()), + detail::value_or_default(samples.get_power_enforced_limit()), + detail::value_or_default(samples.get_power_measurement_type()), + detail::value_or_default(samples.get_power_management_mode()), + fmt::join(detail::value_or_default(samples.get_available_power_profiles()), ", "), + fmt::join(detail::value_or_default(samples.get_power_usage()), ", "), + fmt::join(detail::value_or_default(samples.get_power_total_energy_consumption()), ", "), + fmt::join(detail::value_or_default(samples.get_power_profile()), ", ")); +} + +//*************************************************************************************************************************************// +// memory samples // +//*************************************************************************************************************************************// + +bool nvml_memory_samples::has_samples() const { + return this->memory_total_.has_value() || this->pcie_link_speed_max_.has_value() || this->pcie_link_generation_max_.has_value() + || this->num_pcie_lanes_max_.has_value() || this->memory_bus_width_.has_value() || this->memory_used_.has_value() + || this->memory_free_.has_value() || this->num_pcie_lanes_.has_value() || this->pcie_link_generation_.has_value() + || this->pcie_link_speed_.has_value(); +} + +std::string nvml_memory_samples::generate_yaml_string() const { + // if no samples are available, return an empty string + if (!this->has_samples()) { + return ""; + } + + std::string str{ "memory:\n" }; + + // total memory size + if (this->memory_total_.has_value()) { + str += fmt::format(" memory_total:\n" + " unit: \"B\"\n" + " values: {}\n", + this->memory_total_.value()); + } + // maximum PCIe link speed + if (this->pcie_link_speed_max_.has_value()) { + str += fmt::format(" pcie_link_speed_max:\n" + " unit: \"MBPS\"\n" + " values: {}\n", + this->pcie_link_speed_max_.value()); + } + // maximum PCIe link generation + if (this->pcie_link_generation_max_.has_value()) { + str += fmt::format(" pcie_link_generation_max:\n" + " unit: \"int\"\n" + " values: {}\n", + this->pcie_link_generation_max_.value()); + } + // maximum number of available PCIe lanes + if (this->num_pcie_lanes_max_.has_value()) { + str += fmt::format(" num_pcie_lanes_max:\n" + " unit: \"int\"\n" + " values: {}\n", + this->num_pcie_lanes_max_.value()); + } + // memory bus width + if (this->memory_bus_width_.has_value()) { + str += fmt::format(" memory_bus_width:\n" + " unit: \"Bit\"\n" + " values: {}\n", + this->memory_bus_width_.value()); + } + + // used memory size + if (this->memory_used_.has_value()) { + str += fmt::format(" memory_used:\n" + " unit: \"B\"\n" + " values: [{}]\n", + fmt::join(this->memory_used_.value(), ", ")); + } + // free memory size + if (this->memory_free_.has_value()) { + str += fmt::format(" memory_free:\n" + " unit: \"B\"\n" + " values: [{}]\n", + fmt::join(this->memory_free_.value(), ", ")); + } + // PCIe link width + if (this->num_pcie_lanes_.has_value()) { + str += fmt::format(" num_pcie_lanes:\n" + " unit: \"int\"\n" + " values: [{}]\n", + fmt::join(this->num_pcie_lanes_.value(), ", ")); + } + // PCIe link generation + if (this->pcie_link_generation_.has_value()) { + str += fmt::format(" pcie_link_generation:\n" + " unit: \"int\"\n" + " values: [{}]\n", + fmt::join(this->pcie_link_generation_.value(), ", ")); + } + // PCIe link speed + if (this->pcie_link_speed_.has_value()) { + str += fmt::format(" pcie_link_speed:\n" + " unit: \"MBPS\"\n" + " values: [{}]\n", + fmt::join(this->pcie_link_speed_.value(), ", ")); + } + + return str; +} + +std::ostream &operator<<(std::ostream &out, const nvml_memory_samples &samples) { + return out << fmt::format("memory_total [B]: {}\n" + "pcie_link_speed_max [MBPS]: {}\n" + "pcie_link_generation_max [int]: {}\n" + "num_pcie_lanes_max [int]: {}\n" + "memory_bus_width [Bit]: {}\n" + "memory_used [B]: [{}]\n" + "memory_free [B]: [{}]\n" + "num_pcie_lanes [int]: [{}]\n" + "pcie_link_generation [int]: [{}]\n" + "pcie_link_speed [MBPS]: [{}]", + detail::value_or_default(samples.get_memory_total()), + detail::value_or_default(samples.get_pcie_link_speed_max()), + detail::value_or_default(samples.get_pcie_link_generation_max()), + detail::value_or_default(samples.get_num_pcie_lanes_max()), + detail::value_or_default(samples.get_memory_bus_width()), + fmt::join(detail::value_or_default(samples.get_memory_used()), ", "), + fmt::join(detail::value_or_default(samples.get_memory_free()), ", "), + fmt::join(detail::value_or_default(samples.get_num_pcie_lanes()), ", "), + fmt::join(detail::value_or_default(samples.get_pcie_link_generation()), ", "), + fmt::join(detail::value_or_default(samples.get_pcie_link_speed()), ", ")); +} + +//*************************************************************************************************************************************// +// temperature samples // +//*************************************************************************************************************************************// + +bool nvml_temperature_samples::has_samples() const { + return this->num_fans_.has_value() || this->fan_speed_min_.has_value() || this->fan_speed_max_.has_value() || this->temperature_max_.has_value() + || this->memory_temperature_max_.has_value() || this->fan_speed_percentage_.has_value() || this->temperature_.has_value(); +} + +std::string nvml_temperature_samples::generate_yaml_string() const { + // if no samples are available, return an empty string + if (!this->has_samples()) { + return ""; + } + + std::string str{ "temperature:\n" }; + + // number of fans + if (this->num_fans_.has_value()) { + str += fmt::format(" num_fans:\n" + " unit: \"int\"\n" + " values: {}\n", + this->num_fans_.value()); + } + // min fan speed + if (this->fan_speed_min_.has_value()) { + str += fmt::format(" fan_speed_min:\n" + " unit: \"percentage\"\n" + " values: {}\n", + this->fan_speed_min_.value()); + } + // max fan speed + if (this->fan_speed_max_.has_value()) { + str += fmt::format(" fan_speed_max:\n" + " unit: \"percentage\"\n" + " values: {}\n", + this->fan_speed_max_.value()); + } + // temperature threshold GPU max + if (this->temperature_max_.has_value()) { + str += fmt::format(" temperature_max:\n" + " unit: \"°C\"\n" + " values: {}\n", + this->temperature_max_.value()); + } + // temperature threshold memory max + if (this->memory_temperature_max_.has_value()) { + str += fmt::format(" memory_temperature_max:\n" + " unit: \"°C\"\n" + " values: {}\n", + this->memory_temperature_max_.value()); + } + + // fan speed + if (this->fan_speed_percentage_.has_value()) { + str += fmt::format(" fan_speed_percentage:\n" + " unit: \"percentage\"\n" + " values: [{}]\n", + fmt::join(this->fan_speed_percentage_.value(), ", ")); + } + // temperature GPU + if (this->temperature_.has_value()) { + str += fmt::format(" temperature:\n" + " unit: \"°C\"\n" + " values: [{}]\n", + fmt::join(this->temperature_.value(), ", ")); + } + + return str; +} + +std::ostream &operator<<(std::ostream &out, const nvml_temperature_samples &samples) { + return out << fmt::format("num_fans [int]: {}\n" + "min_fan_speed [%]: {}\n" + "max_fan_speed [%]: {}\n" + "temperature__max [°C]: {}\n" + "memory_temperature_max [°C]: {}\n" + "fan_speed_percentage [%]: [{}]\n" + "temperature [°C]: [{}]", + detail::value_or_default(samples.get_num_fans()), + detail::value_or_default(samples.get_fan_speed_min()), + detail::value_or_default(samples.get_fan_speed_max()), + detail::value_or_default(samples.get_temperature_max()), + detail::value_or_default(samples.get_memory_temperature_max()), + fmt::join(detail::value_or_default(samples.get_fan_speed_percentage()), ", "), + fmt::join(detail::value_or_default(samples.get_temperature()), ", ")); +} + +} // namespace hws diff --git a/src/hws/gpu_nvidia/utility.cpp b/src/hws/gpu_nvidia/utility.cpp new file mode 100644 index 0000000..7c1b9f7 --- /dev/null +++ b/src/hws/gpu_nvidia/utility.cpp @@ -0,0 +1,55 @@ +/** + * @author Marcel Breyer + * @copyright 2024-today All Rights Reserved + * @license This file is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + */ + +#include "hws/gpu_nvidia/utility.hpp" + +#include "fmt/format.h" // fmt::format +#include "fmt/ranges.h" // fmt::join +#include "nvml.h" // NVML runtime functions + +#include // std::string +#include // std::vector + +namespace hws::detail { + +std::string throttle_event_reason_to_string(const unsigned long long clocks_event_reasons) { + if (clocks_event_reasons == 0ull) { + return "None"; + } else { + std::vector reasons{}; + if ((clocks_event_reasons & nvmlClocksEventReasonApplicationsClocksSetting) != 0ull) { + reasons.emplace_back("ApplicationsClocksSetting"); + } + if ((clocks_event_reasons & nvmlClocksEventReasonDisplayClockSetting) != 0ull) { + reasons.emplace_back("DisplayClockSetting"); + } + if ((clocks_event_reasons & nvmlClocksEventReasonGpuIdle) != 0ull) { + reasons.emplace_back("GpuIdle"); + } + if ((clocks_event_reasons & nvmlClocksEventReasonSwPowerCap) != 0ull) { + reasons.emplace_back("SwPowerCap"); + } + if ((clocks_event_reasons & nvmlClocksEventReasonSwThermalSlowdown) != 0ull) { + reasons.emplace_back("SwThermalSlowdown"); + } + if ((clocks_event_reasons & nvmlClocksEventReasonSyncBoost) != 0ull) { + reasons.emplace_back("SyncBoost"); + } + if ((clocks_event_reasons & nvmlClocksThrottleReasonHwPowerBrakeSlowdown) != 0ull) { + reasons.emplace_back("HwPowerBrakeSlowdown"); + } + if ((clocks_event_reasons & nvmlClocksThrottleReasonHwSlowdown) != 0ull) { + reasons.emplace_back("HwSlowdown"); + } + if ((clocks_event_reasons & nvmlClocksThrottleReasonHwThermalSlowdown) != 0ull) { + reasons.emplace_back("HwThermalSlowdown"); + } + return fmt::format("{}", fmt::join(reasons, "|")); + } +} + +} // namespace hws::detail diff --git a/src/hardware_sampling/hardware_sampler.cpp b/src/hws/hardware_sampler.cpp similarity index 58% rename from src/hardware_sampling/hardware_sampler.cpp rename to src/hws/hardware_sampler.cpp index c6554cd..abd907d 100644 --- a/src/hardware_sampling/hardware_sampler.cpp +++ b/src/hws/hardware_sampler.cpp @@ -5,15 +5,19 @@ * See the LICENSE.md file in the project root for full license information. */ -#include "hardware_sampling/hardware_sampler.hpp" +#include "hws/hardware_sampler.hpp" -#include "hardware_sampling/event.hpp" // hws::event -#include "hardware_sampling/utility.hpp" // hws::detail::{durations_from_reference_time, join} +#include "hws/event.hpp" // hws::event +#include "hws/utility.hpp" // hws::detail::durations_from_reference_time +#include "hws/version.hpp" // hws::version::version + +#include "fmt/chrono.h" // direct formatting of std::chrono types +#include "fmt/format.h" // fmt::format +#include "fmt/ranges.h" // fmt::join #include // std::chrono::{system_clock, steady_clock, duration_cast, milliseconds} #include // std::size_t #include // std::exception -#include // std::format #include // std::ofstream #include // std::cerr, std::endl #include // std::runtime_error, std::out_of_range @@ -22,8 +26,13 @@ namespace hws { -hardware_sampler::hardware_sampler(const std::chrono::milliseconds sampling_interval) : - sampling_interval_{ sampling_interval } { } +hardware_sampler::hardware_sampler(const std::chrono::milliseconds sampling_interval, const sample_category category) : + sampling_interval_{ sampling_interval }, + sample_category_{ category } { + if (sampling_interval == std::chrono::milliseconds{ 0 }) { + throw std::invalid_argument{ "The sampling interval must be larger than 0ms!" }; + } +} hardware_sampler::~hardware_sampler() = default; @@ -109,13 +118,13 @@ void hardware_sampler::add_event(decltype(event::name) name) { event hardware_sampler::get_event(const std::size_t idx) const { if (idx >= this->num_events()) { - throw std::out_of_range{ std::format("The index {} is out-of-range for the number of events {}!", idx, this->num_events()) }; + throw std::out_of_range{ fmt::format("The index {} is out-of-range for the number of events {}!", idx, this->num_events()) }; } return events_[idx]; } -void hardware_sampler::dump_yaml(const char *filename) { +void hardware_sampler::dump_yaml(const char *filename) const { if (!this->has_sampling_stopped()) { throw std::runtime_error{ "Can dump samples to the YAML file only after the sampling has been stopped!" }; } @@ -123,42 +132,68 @@ void hardware_sampler::dump_yaml(const char *filename) { std::ofstream file{ filename, std::ios_base::app }; // begin a new YAML document (only with "---" multiple YAML documents in a single file are allowed) - file << "---\n\n"; + file << "---\n\n" + << this->as_yaml_string(); +} - // set the device identification - file << std::format("device_identification: {}\n\n", this->device_identification()); +void hardware_sampler::dump_yaml(const std::string &filename) const { + this->dump_yaml(filename.c_str()); +} + +void hardware_sampler::dump_yaml(const std::filesystem::path &filename) const { + this->dump_yaml(filename.string().c_str()); +} - // output the start date time of this hardware sampling - file << std::format("start_time: \"{:%Y-%m-%d %X}\"\n\n", std::chrono::current_zone()->to_local(start_date_time_)); +std::string hardware_sampler::as_yaml_string() const { + if (!this->has_sampling_stopped()) { + throw std::runtime_error{ "Can return samples as string only after the sampling has been stopped!" }; + } - // output the event information + // generate the event information std::vector event_time_points{}; std::vector event_names{}; for (const auto &[time_point, name] : events_) { event_time_points.push_back(time_point); - event_names.push_back(name); + event_names.push_back(fmt::format("\"{}\"", name)); } - file << std::format("events:\n" - " time_points: [{}]\n" - " names: [{}]\n\n", - detail::join(detail::durations_from_reference_time(event_time_points, this->get_event(0).time_point), ", "), - detail::join(event_names, ", ")); - - // output the sampling information - file << std::format("sampling_interval: {}\n" - "time_points: [{}]\n" - "{}\n\n", - this->sampling_interval(), - detail::join(detail::durations_from_reference_time(this->sampling_time_points(), this->get_event(0).time_point), ", "), - this->generate_yaml_string()); -} - -void hardware_sampler::dump_yaml(const std::string &filename) { - this->dump_yaml(filename.c_str()); + + return fmt::format("device_identification: \"{}\"\n" + "\n" + "version: \"{}\"\n" + "\n" + "start_time: \"{:%Y-%m-%d %X}\"\n" + "\n" + "events:\n" + " time_points:\n" + " unit: \"s\"\n" + " values: [{}]\n" + " names: [{}]\n" + "\n" + "sampling_interval:\n" + " unit: \"ms\"\n" + " values: {}\n" + "\n" + "time_points:\n" + " unit: \"s\"\n" + " values: [{}]\n" + "\n" + "{}\n", + this->device_identification(), + version::version, + start_date_time_, + fmt::join(detail::durations_from_reference_time(event_time_points, this->get_event(0).time_point), ", "), + fmt::join(event_names, ", "), + this->sampling_interval().count(), + fmt::join(detail::durations_from_reference_time(this->sampling_time_points(), this->get_event(0).time_point), ", "), + this->samples_only_as_yaml_string()); } -void hardware_sampler::dump_yaml(const std::filesystem::path &filename) { - this->dump_yaml(filename.string().c_str()); +void hardware_sampler::add_time_point(const std::chrono::steady_clock::time_point time_point) { + time_points_.push_back(time_point); +} + +bool hardware_sampler::sample_category_enabled(const sample_category category) const noexcept { + return static_cast(this->sample_category_ & category) != 0; } } // namespace hws diff --git a/src/hws/system_hardware_sampler.cpp b/src/hws/system_hardware_sampler.cpp new file mode 100644 index 0000000..1c08762 --- /dev/null +++ b/src/hws/system_hardware_sampler.cpp @@ -0,0 +1,212 @@ +/** + * @author Marcel Breyer + * @copyright 2024-today All Rights Reserved + * @license This file is released under the MIT license. + * See the LICENSE.md file in the project root for full license information. + */ + +#include "hws/system_hardware_sampler.hpp" + +#include "hws/event.hpp" // hws::event +#include "hws/sample_category.hpp" // hws::sample_category + +#if defined(HWS_FOR_CPUS_ENABLED) + #include "hws/cpu/hardware_sampler.hpp" // hws::cpu_hardware_sampler +#endif +#if defined(HWS_FOR_NVIDIA_GPUS_ENABLED) + #include "hws/gpu_nvidia/hardware_sampler.hpp" // hws::gpu_nvidia_hardware_sampler + #include "hws/gpu_nvidia/utility.hpp" // HWS_CUDA_ERROR_CHECK + + #include "cuda_runtime.h" // cudaGetDeviceCount +#endif +#if defined(HWS_FOR_AMD_GPUS_ENABLED) + #include "hws/gpu_amd/hardware_sampler.hpp" // hws::gpu_amd_hardware_sampler + #include "hws/gpu_amd/utility.hpp" // HWS_HIP_ERROR_CHECK + + #include "hip/hip_runtime.h" // hipGetDeviceCount +#endif +#if defined(HWS_FOR_INTEL_GPUS_ENABLED) + #include "hws/gpu_intel/hardware_sampler.hpp" // hws::gpu_intel_hardware_sampler + #include "hws/gpu_intel/utility.hpp" // HWS_LEVEL_ZERO_ERROR_CHECK +#endif + +#include "fmt/format.h" // fmt::format + +#include // std::for_each, std::all_of +#include // std::chrono::milliseconds +#include // std::size_t +#include // std::uint32_t +#include // std::unique_ptr, std::make_unique +#include // std::accumulate +#include // std::out_of_range +#include // std::vector + +namespace hws { + +system_hardware_sampler::system_hardware_sampler(const sample_category category) : + system_hardware_sampler{ HWS_SAMPLING_INTERVAL, category } { } + +system_hardware_sampler::system_hardware_sampler(const std::chrono::milliseconds sampling_interval, sample_category category) { + // create the hardware samplers based on the available hardware +#if defined(HWS_FOR_CPUS_ENABLED) + { + samplers_.push_back(std::make_unique(sampling_interval, category)); + } +#endif +#if defined(HWS_FOR_NVIDIA_GPUS_ENABLED) + { + int device_count{}; + HWS_CUDA_ERROR_CHECK(cudaGetDeviceCount(&device_count)); + for (int device = 0; device < device_count; ++device) { + samplers_.push_back(std::make_unique(static_cast(device), sampling_interval, category)); + } + } +#endif +#if defined(HWS_FOR_AMD_GPUS_ENABLED) + { + int device_count{}; + HWS_HIP_ERROR_CHECK(hipGetDeviceCount(&device_count)); + for (int device = 0; device < device_count; ++device) { + samplers_.push_back(std::make_unique(static_cast(device), sampling_interval, category)); + } + } +#endif +#if defined(HWS_FOR_INTEL_GPUS_ENABLED) + { + // init level zero driver + HWS_LEVEL_ZERO_ERROR_CHECK(zeInit(ZE_INIT_FLAG_GPU_ONLY)) + + // discover the number of drivers + std::uint32_t driver_count{ 0 }; + HWS_LEVEL_ZERO_ERROR_CHECK(zeDriverGet(&driver_count, nullptr)) + + // check if only the single GPU driver has been found + if (driver_count > 1) { + throw std::runtime_error{ fmt::format("Found too many GPU drivers ({})!", driver_count) }; + } + + // get the GPU driver + ze_driver_handle_t driver{}; + HWS_LEVEL_ZERO_ERROR_CHECK(zeDriverGet(&driver_count, &driver)) + + // get all GPUs for the current driver + std::uint32_t device_count{ 0 }; + HWS_LEVEL_ZERO_ERROR_CHECK(zeDeviceGet(driver, &device_count, nullptr)) + for (std::uint32_t device = 0; device < device_count; ++device) { + samplers_.push_back(std::make_unique(static_cast(device), sampling_interval, category)); + } + } +#endif +} + +void system_hardware_sampler::start_sampling() { + std::for_each(samplers_.begin(), samplers_.end(), [](auto &ptr) { ptr->start_sampling(); }); +} + +void system_hardware_sampler::stop_sampling() { + std::for_each(samplers_.begin(), samplers_.end(), [](auto &ptr) { ptr->stop_sampling(); }); +} + +void system_hardware_sampler::pause_sampling() { + std::for_each(samplers_.begin(), samplers_.end(), [](auto &ptr) { ptr->pause_sampling(); }); +} + +void system_hardware_sampler::resume_sampling() { + std::for_each(samplers_.begin(), samplers_.end(), [](auto &ptr) { ptr->resume_sampling(); }); +} + +bool system_hardware_sampler::has_sampling_started() const noexcept { + return std::all_of(samplers_.cbegin(), samplers_.cend(), [](const auto &ptr) { return ptr->has_sampling_started(); }); +} + +bool system_hardware_sampler::is_sampling() const noexcept { + return std::all_of(samplers_.cbegin(), samplers_.cend(), [](const auto &ptr) { return ptr->is_sampling(); }); +} + +bool system_hardware_sampler::has_sampling_stopped() const noexcept { + return std::all_of(samplers_.cbegin(), samplers_.cend(), [](const auto &ptr) { return ptr->has_sampling_stopped(); }); +} + +void system_hardware_sampler::add_event(event e) { + std::for_each(samplers_.begin(), samplers_.end(), [&e](auto &ptr) { ptr->add_event(e); }); +} + +void system_hardware_sampler::add_event(decltype(event::time_point) time_point, decltype(event::name) name) { + std::for_each(samplers_.begin(), samplers_.end(), [&time_point, &name](auto &ptr) { ptr->add_event(time_point, name); }); +} + +void system_hardware_sampler::add_event(decltype(event::name) name) { + std::for_each(samplers_.begin(), samplers_.end(), [&name](auto &ptr) { ptr->add_event(name); }); +} + +std::vector system_hardware_sampler::num_events() const { + std::vector num_events_per_sampler(this->num_samplers()); + std::transform(samplers_.cbegin(), samplers_.cend(), num_events_per_sampler.begin(), [](const auto &ptr) { return ptr->num_events(); }); + return num_events_per_sampler; +} + +std::vector> system_hardware_sampler::get_events() const { + std::vector> events_per_sampler(this->num_samplers()); + std::transform(samplers_.cbegin(), samplers_.cend(), events_per_sampler.begin(), [](const auto &ptr) { return ptr->get_events(); }); + return events_per_sampler; +} + +std::vector> system_hardware_sampler::sampling_time_points() const { + std::vector> sampling_time_points_per_sampler(this->num_samplers()); + std::transform(samplers_.cbegin(), samplers_.cend(), sampling_time_points_per_sampler.begin(), [](const auto &ptr) { return ptr->sampling_time_points(); }); + return sampling_time_points_per_sampler; +} + +std::vector system_hardware_sampler::sampling_interval() const { + std::vector sampling_interval_per_sampler(this->num_samplers()); + std::transform(samplers_.cbegin(), samplers_.cend(), sampling_interval_per_sampler.begin(), [](const auto &ptr) { return ptr->sampling_interval(); }); + return sampling_interval_per_sampler; +} + +std::size_t system_hardware_sampler::num_samplers() const noexcept { + return samplers_.size(); +} + +std::vector> &system_hardware_sampler::samplers() noexcept { + return samplers_; +} + +const std::vector> &system_hardware_sampler::samplers() const noexcept { + return samplers_; +} + +std::unique_ptr &system_hardware_sampler::sampler(const std::size_t idx) { + if (idx >= samplers_.size()) { + throw std::out_of_range{ fmt::format("Index {} is out-of-range for size {}!", idx, samplers_.size()) }; + } + return samplers_[idx]; +} + +const std::unique_ptr &system_hardware_sampler::sampler(const std::size_t idx) const { + if (idx >= samplers_.size()) { + throw std::out_of_range{ fmt::format("Index {} is out-of-range for size {}!", idx, samplers_.size()) }; + } + return samplers_[idx]; +} + +void system_hardware_sampler::dump_yaml(const char *filename) const { + std::for_each(samplers_.cbegin(), samplers_.cend(), [&filename](const auto &ptr) { ptr->dump_yaml(filename); }); +} + +void system_hardware_sampler::dump_yaml(const std::string &filename) const { + std::for_each(samplers_.cbegin(), samplers_.cend(), [&filename](const auto &ptr) { ptr->dump_yaml(filename); }); +} + +void system_hardware_sampler::dump_yaml(const std::filesystem::path &filename) const { + std::for_each(samplers_.cbegin(), samplers_.cend(), [&filename](const auto &ptr) { ptr->dump_yaml(filename); }); +} + +std::string system_hardware_sampler::as_yaml_string() const { + return std::accumulate(samplers_.cbegin(), samplers_.cend(), std::string{}, [](const std::string str, const auto &ptr) { return str + ptr->as_yaml_string(); }); +} + +std::string system_hardware_sampler::samples_only_as_yaml_string() const { + return std::accumulate(samplers_.cbegin(), samplers_.cend(), std::string{}, [](const std::string str, const auto &ptr) { return str + ptr->samples_only_as_yaml_string(); }); +} + +} // namespace hws diff --git a/src/hardware_sampling/utility.cpp b/src/hws/utility.cpp similarity index 91% rename from src/hardware_sampling/utility.cpp rename to src/hws/utility.cpp index 58e604c..6651763 100644 --- a/src/hardware_sampling/utility.cpp +++ b/src/hws/utility.cpp @@ -5,7 +5,7 @@ * See the LICENSE.md file in the project root for full license information. */ -#include "hardware_sampling/utility.hpp" +#include "hws/utility.hpp" #include // std::min, std::transform #include // std::tolower @@ -15,6 +15,10 @@ namespace hws::detail { +bool starts_with(const std::string_view sv, const std::string_view start) noexcept { + return sv.substr(0, start.size()) == start; +} + std::string_view trim(std::string_view str) noexcept { // trim right {