From 0f367454e83902677d023a876c8dea19399032cb Mon Sep 17 00:00:00 2001 From: akolliasAMD Date: Tue, 3 Dec 2024 15:44:40 -0500 Subject: [PATCH] cherry picked from commit 02ce785c3780bd707971830913904418a7539bda --- CMakeLists.txt | 48 ++++++++++++++++++++++++++++++++++++++++- src/include/Kernels.hpp | 2 +- 2 files changed, 48 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e4ac517..123f31d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,7 +6,53 @@ else() endif() cmake_minimum_required(VERSION 3.5) -project(TransferBench VERSION 1.51.0 LANGUAGES CXX) +project(TransferBench VERSION 1.52.0 LANGUAGES CXX) + +# Default GPU architectures to build +#================================================================================================== +set(DEFAULT_GPUS + gfx906 + gfx908 + gfx90a + gfx942 + gfx1030 + gfx1100 + gfx1101 + gfx1102 + gfx1200 + gfx1201) + +# Build only for local GPU architecture +if (BUILD_LOCAL_GPU_TARGET_ONLY) + message(STATUS "Building only for local GPU target") + if (COMMAND rocm_local_targets) + rocm_local_targets(DEFAULT_GPUS) + else() + message(WARNING "Unable to determine local GPU targets. Falling back to default GPUs.") + endif() +endif() + +# Determine which GPU architectures to build for +set(AMDGPU_TARGETS "${DEFAULT_GPUS}" CACHE STRING "Target default GPUs if AMDGPU_TARGETS is not defined.") + +# Check if clang compiler can offload to AMDGPU_TARGETS +if (COMMAND rocm_check_target_ids) + message(STATUS "Checking for ROCm support for GPU targets: " "${AMDGPU_TARGETS}") + rocm_check_target_ids(SUPPORTED_GPUS TARGETS ${AMDGPU_TARGETS}) +else() + message(WARNING "Unable to check for supported GPU targets. Falling back to default GPUs.") + set(SUPPORTED_GPUS ${DEFAULT_GPUS}) +endif() + +set(GPU_TARGETS "${SUPPORTED_GPUS}" CACHE STRING "GPU targets to compile for.") +message(STATUS "Compiling for ${GPU_TARGETS}") + +foreach(target ${GPU_TARGETS}) + list(APPEND static_link_flags --offload-arch=${target}) +endforeach() +list(JOIN static_link_flags " " flags_str) +set( CMAKE_CXX_FLAGS "${flags_str} ${CMAKE_CXX_FLAGS}") + set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -L${ROCM_PATH}/lib") include_directories(${ROCM_PATH}/include) link_libraries(numa hsa-runtime64 pthread) diff --git a/src/include/Kernels.hpp b/src/include/Kernels.hpp index 249b792..56fbcb4 100644 --- a/src/include/Kernels.hpp +++ b/src/include/Kernels.hpp @@ -62,7 +62,7 @@ struct SubExecParam }; // Macro for collecting HW_REG_HW_ID -#if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) +#if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__gfx1200__) || defined(__gfx1201__) #define GetHwId(hwId) \ hwId = 0 #elif defined(__NVCC__)