Skip to content

Commit

Permalink
cherry picked from commit 02ce785
Browse files Browse the repository at this point in the history
  • Loading branch information
akolliasAMD committed Dec 3, 2024
1 parent f6cc992 commit 0f36745
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 2 deletions.
48 changes: 47 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,53 @@ else()
endif()
cmake_minimum_required(VERSION 3.5)

project(TransferBench VERSION 1.51.0 LANGUAGES CXX)
project(TransferBench VERSION 1.52.0 LANGUAGES CXX)

# Default GPU architectures to build
#==================================================================================================
set(DEFAULT_GPUS
gfx906
gfx908
gfx90a
gfx942
gfx1030
gfx1100
gfx1101
gfx1102
gfx1200
gfx1201)

# Build only for local GPU architecture
if (BUILD_LOCAL_GPU_TARGET_ONLY)
message(STATUS "Building only for local GPU target")
if (COMMAND rocm_local_targets)
rocm_local_targets(DEFAULT_GPUS)
else()
message(WARNING "Unable to determine local GPU targets. Falling back to default GPUs.")
endif()
endif()

# Determine which GPU architectures to build for
set(AMDGPU_TARGETS "${DEFAULT_GPUS}" CACHE STRING "Target default GPUs if AMDGPU_TARGETS is not defined.")

# Check if clang compiler can offload to AMDGPU_TARGETS
if (COMMAND rocm_check_target_ids)
message(STATUS "Checking for ROCm support for GPU targets: " "${AMDGPU_TARGETS}")
rocm_check_target_ids(SUPPORTED_GPUS TARGETS ${AMDGPU_TARGETS})
else()
message(WARNING "Unable to check for supported GPU targets. Falling back to default GPUs.")
set(SUPPORTED_GPUS ${DEFAULT_GPUS})
endif()

set(GPU_TARGETS "${SUPPORTED_GPUS}" CACHE STRING "GPU targets to compile for.")
message(STATUS "Compiling for ${GPU_TARGETS}")

foreach(target ${GPU_TARGETS})
list(APPEND static_link_flags --offload-arch=${target})
endforeach()
list(JOIN static_link_flags " " flags_str)
set( CMAKE_CXX_FLAGS "${flags_str} ${CMAKE_CXX_FLAGS}")

set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -L${ROCM_PATH}/lib")
include_directories(${ROCM_PATH}/include)
link_libraries(numa hsa-runtime64 pthread)
Expand Down
2 changes: 1 addition & 1 deletion src/include/Kernels.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ struct SubExecParam
};

// Macro for collecting HW_REG_HW_ID
#if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__)
#if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__gfx1200__) || defined(__gfx1201__)
#define GetHwId(hwId) \
hwId = 0
#elif defined(__NVCC__)
Expand Down

0 comments on commit 0f36745

Please sign in to comment.