Skip to content

Commit

Permalink
Introduced benchmarks for HIP.
Browse files Browse the repository at this point in the history
Simply copying the current CUDA benchmark code, with
all its imperfections.
  • Loading branch information
krasznaa committed Nov 28, 2024
1 parent 7b60f23 commit 288f9fc
Show file tree
Hide file tree
Showing 5 changed files with 391 additions and 0 deletions.
3 changes: 3 additions & 0 deletions benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@ add_subdirectory(core)
if(VECMEM_BUILD_CUDA_LIBRARY)
add_subdirectory(cuda)
endif()
if(VECMEM_BUILD_HIP_LIBRARY)
add_subdirectory(hip)
endif()
if(VECMEM_BUILD_SYCL_LIBRARY)
add_subdirectory(sycl)
endif()
29 changes: 29 additions & 0 deletions benchmarks/hip/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# VecMem project, part of the ACTS project (R&D line)
#
# (c) 2024 CERN for the benefit of the ACTS project
#
# Mozilla Public License Version 2.0

# Project include(s).
include( vecmem-compiler-options-cpp )
include( vecmem-compiler-options-hip )

# Set up the benchmark(s) for the HIP library.
add_executable( vecmem_benchmark_hip
"benchmark_hip.cpp"
"benchmark_copy.cpp"
"benchmark_edm_copy.cpp" )

target_link_libraries(
vecmem_benchmark_hip

PRIVATE
vecmem::core
vecmem::hip
vecmem_benchmark_common
benchmark::benchmark
benchmark::benchmark_main
)

set_target_properties( vecmem_benchmark_hip PROPERTIES
FOLDER "vecmem/benchmarks" )
166 changes: 166 additions & 0 deletions benchmarks/hip/benchmark_copy.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
/*
* VecMem project, part of the ACTS project (R&D line)
*
* (c) 2022-2024 CERN for the benefit of the ACTS project
*
* Mozilla Public License Version 2.0
*/

// VecMem include(s).
#include <vecmem/memory/hip/device_memory_resource.hpp>
#include <vecmem/memory/host_memory_resource.hpp>
#include <vecmem/utils/hip/copy.hpp>

// Common benchmark include(s).
#include "../common/make_jagged_sizes.hpp"
#include "../common/make_jagged_vector.hpp"

// Google benchmark include(s).
#include <benchmark/benchmark.h>

// System include(s).
#include <numeric>
#include <vector>

namespace vecmem::hip::benchmark {

/// The (host) memory resource to use in the benchmark(s).
static vecmem::host_memory_resource host_mr;
/// The (device) memory resource to use in the benchmark(s).
static device_memory_resource device_mr;
/// The copy object to use in the benchmark(s).
static copy hip_copy;

/// Function benchmarking "unknown" host-to-device jagged vector copies
void jaggedVectorUnknownHtoDCopy(::benchmark::State& state) {

// Generate the sizes of the jagged vector/buffer for the test.
const std::vector<std::size_t> sizes =
vecmem::benchmark::make_jagged_sizes(state.range(0), state.range(1));

// Set custom "counters" for the benchmark.
const std::size_t bytes = std::accumulate(sizes.begin(), sizes.end(),
static_cast<std::size_t>(0u)) *
sizeof(int);
state.counters["Bytes"] = static_cast<double>(bytes);
state.counters["Rate"] =
::benchmark::Counter(static_cast<double>(bytes),
::benchmark::Counter::kIsIterationInvariantRate,
::benchmark::Counter::kIs1024);

// Create the "source vector".
jagged_vector<int> source =
vecmem::benchmark::make_jagged_vector(sizes, host_mr);
const data::jagged_vector_data<int> source_data = get_data(source);
// Create the "destination buffer".
data::jagged_vector_buffer<int> dest(sizes, device_mr, &host_mr);
hip_copy.setup(dest)->wait();

// Perform the copy benchmark.
for (auto _ : state) {
hip_copy(source_data, dest)->wait();
}
}
// Set up the benchmark.
BENCHMARK(jaggedVectorUnknownHtoDCopy)->Ranges({{10, 100000}, {50, 5000}});

/// Function benchmarking "known" host-to-device jagged vector copies
void jaggedVectorKnownHtoDCopy(::benchmark::State& state) {

// Generate the sizes of the jagged vector/buffer for the test.
const std::vector<std::size_t> sizes =
vecmem::benchmark::make_jagged_sizes(state.range(0), state.range(1));

// Set custom "counters" for the benchmark.
const std::size_t bytes = std::accumulate(sizes.begin(), sizes.end(),
static_cast<std::size_t>(0u)) *
sizeof(int);
state.counters["Bytes"] = static_cast<double>(bytes);
state.counters["Rate"] =
::benchmark::Counter(static_cast<double>(bytes),
::benchmark::Counter::kIsIterationInvariantRate,
::benchmark::Counter::kIs1024);

// Create the "source vector".
jagged_vector<int> source =
vecmem::benchmark::make_jagged_vector(sizes, host_mr);
const data::jagged_vector_data<int> source_data = get_data(source);
// Create the "destination buffer".
data::jagged_vector_buffer<int> dest(sizes, device_mr, &host_mr);
hip_copy.setup(dest)->wait();

// Perform the copy benchmark.
for (auto _ : state) {
hip_copy(source_data, dest, copy::type::host_to_device)->wait();
}
}
// Set up the benchmark.
BENCHMARK(jaggedVectorKnownHtoDCopy)->Ranges({{10, 100000}, {50, 5000}});

/// Function benchmarking "unknown" device-to-host jagged vector copies
void jaggedVectorUnknownDtoHCopy(::benchmark::State& state) {

// Generate the sizes of the jagged vector/buffer for the test.
const std::vector<std::size_t> sizes =
vecmem::benchmark::make_jagged_sizes(state.range(0), state.range(1));

// Set custom "counters" for the benchmark.
const std::size_t bytes = std::accumulate(sizes.begin(), sizes.end(),
static_cast<std::size_t>(0u)) *
sizeof(int);
state.counters["Bytes"] = static_cast<double>(bytes);
state.counters["Rate"] =
::benchmark::Counter(static_cast<double>(bytes),
::benchmark::Counter::kIsIterationInvariantRate,
::benchmark::Counter::kIs1024);

// Create the "source buffer".
data::jagged_vector_buffer<int> source(sizes, device_mr, &host_mr);
hip_copy.setup(source)->wait();
// Create the "destination vector".
jagged_vector<int> dest =
vecmem::benchmark::make_jagged_vector(sizes, host_mr);
data::jagged_vector_data<int> dest_data = get_data(dest);

// Perform the copy benchmark.
for (auto _ : state) {
hip_copy(source, dest_data)->wait();
}
}
// Set up the benchmark.
BENCHMARK(jaggedVectorUnknownDtoHCopy)->Ranges({{10, 100000}, {50, 5000}});

/// Function benchmarking "known" device-to-host jagged vector copies
void jaggedVectorKnownDtoHCopy(::benchmark::State& state) {

// Generate the sizes of the jagged vector/buffer for the test.
const std::vector<std::size_t> sizes =
vecmem::benchmark::make_jagged_sizes(state.range(0), state.range(1));

// Set custom "counters" for the benchmark.
const std::size_t bytes = std::accumulate(sizes.begin(), sizes.end(),
static_cast<std::size_t>(0u)) *
sizeof(int);
state.counters["Bytes"] = static_cast<double>(bytes);
state.counters["Rate"] =
::benchmark::Counter(static_cast<double>(bytes),
::benchmark::Counter::kIsIterationInvariantRate,
::benchmark::Counter::kIs1024);

// Create the "source buffer".
data::jagged_vector_buffer<int> source(sizes, device_mr, &host_mr);
hip_copy.setup(source)->wait();
// Create the "destination vector".
jagged_vector<int> dest =
vecmem::benchmark::make_jagged_vector(sizes, host_mr);
data::jagged_vector_data<int> dest_data = get_data(dest);

// Perform the copy benchmark.
for (auto _ : state) {
hip_copy(source, dest_data, copy::type::device_to_host)->wait();
}
}
// Set up the benchmark.
BENCHMARK(jaggedVectorKnownDtoHCopy)->Ranges({{10, 100000}, {50, 5000}});

} // namespace vecmem::hip::benchmark
81 changes: 81 additions & 0 deletions benchmarks/hip/benchmark_edm_copy.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
/*
* VecMem project, part of the ACTS project (R&D line)
*
* (c) 2024 CERN for the benefit of the ACTS project
*
* Mozilla Public License Version 2.0
*/

// VecMem include(s).
#include <vecmem/memory/hip/device_memory_resource.hpp>
#include <vecmem/memory/hip/host_memory_resource.hpp>
#include <vecmem/memory/host_memory_resource.hpp>
#include <vecmem/utils/copy.hpp>
#include <vecmem/utils/hip/copy.hpp>

// Common benchmark include(s).
#include "../common/simple_aos_copy_benchmarks.hpp"
#include "../common/simple_soa_copy_benchmarks.hpp"

// Google benchmark include(s).
#include <benchmark/benchmark.h>

namespace vecmem::benchmark {

/// Non-pinned host memory resource to use in the benchmark(s).
static host_memory_resource host_mr;
/// Pinned host memory resource to use in the benchmark(s).
static hip::host_memory_resource hip_host_mr;

/// Device memory resource to use in the benchmark(s).
static hip::device_memory_resource device_mr;

/// The host copy object to use in the benchmark(s).
static copy host_copy;
/// The synchronous device copy object to use in the benchmark(s).
static hip::copy device_copy;

//
// Helper macro(s) for setting up all the different benchmarks.
//
#define CONFIGURE_BENCHMARK(BM) BM->Range(1UL, 1UL << 26)

#define EDM_COPY_BENCHMARKS(TITLE, HOST_MR, DEVICE_MR, HOST_COPY, DEVICE_COPY) \
CONFIGURE_BENCHMARK(BENCHMARK_CAPTURE( \
simple_soa_direct_h2d_copy_benchmark, TITLE##_fixed_buffer, HOST_MR, \
DEVICE_MR, DEVICE_COPY, data::buffer_type::fixed_size)); \
CONFIGURE_BENCHMARK(BENCHMARK_CAPTURE( \
simple_soa_direct_h2d_copy_benchmark, TITLE##_resizable_buffer, \
HOST_MR, DEVICE_MR, DEVICE_COPY, data::buffer_type::resizable)); \
CONFIGURE_BENCHMARK(BENCHMARK_CAPTURE( \
simple_soa_optimal_h2d_copy_benchmark, TITLE##_fixed_buffer, HOST_MR, \
DEVICE_MR, HOST_COPY, DEVICE_COPY, data::buffer_type::fixed_size)); \
CONFIGURE_BENCHMARK(BENCHMARK_CAPTURE( \
simple_soa_optimal_h2d_copy_benchmark, TITLE##_resizable_buffer, \
HOST_MR, DEVICE_MR, HOST_COPY, DEVICE_COPY, \
data::buffer_type::resizable)); \
CONFIGURE_BENCHMARK( \
BENCHMARK_CAPTURE(simple_soa_direct_d2h_copy_benchmark, TITLE, \
HOST_MR, DEVICE_MR, DEVICE_COPY)); \
CONFIGURE_BENCHMARK( \
BENCHMARK_CAPTURE(simple_soa_optimal_d2h_copy_benchmark, TITLE, \
HOST_MR, DEVICE_MR, HOST_COPY, DEVICE_COPY)); \
CONFIGURE_BENCHMARK(BENCHMARK_CAPTURE( \
simple_aos_h2d_copy_benchmark, TITLE##_fixed_buffer, HOST_MR, \
DEVICE_MR, DEVICE_COPY, data::buffer_type::fixed_size)); \
CONFIGURE_BENCHMARK(BENCHMARK_CAPTURE( \
simple_aos_h2d_copy_benchmark, TITLE##_resizable_buffer, HOST_MR, \
DEVICE_MR, DEVICE_COPY, data::buffer_type::resizable)); \
CONFIGURE_BENCHMARK(BENCHMARK_CAPTURE(simple_aos_d2h_copy_benchmark, \
TITLE, HOST_MR, DEVICE_MR, \
DEVICE_COPY))

//
// Set up all the different benchmarks.
//
EDM_COPY_BENCHMARKS(hip_pageable_sync, host_mr, device_mr, host_copy,
device_copy);
EDM_COPY_BENCHMARKS(hip_pinned_sync, hip_host_mr, device_mr, host_copy,
device_copy);

} // namespace vecmem::benchmark
Loading

0 comments on commit 288f9fc

Please sign in to comment.