Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix ucx build #938

Closed
wants to merge 9 commits into from
Closed
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 8 additions & 103 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
STRING "Choose the type of build." FORCE)
endif()

# set std 11
set (CMAKE_CXX_STANDARD 11)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@lockshaw Have we reached out the agreement that FlexFlow will use c++17 moving forward?


# do not disable assertions even if in release mode
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -UNDEBUG")

Expand All @@ -43,106 +46,15 @@ option(FF_USE_PREBUILT_NCCL "Enable use of NCCL pre-compiled library, if availab
option(FF_USE_PREBUILT_LEGION "Enable use of Legion pre-compiled library, if available" ON)
option(FF_USE_ALL_PREBUILT_LIBRARIES "Enable use of all pre-compiled libraries, if available" OFF)

# option for using Python
set(FF_GASNET_CONDUITS aries udp mpi ibv ucx)
# option for using network
set(FF_GASNET_CONDUITS aries udp mpi ibv)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we remove ucx as an opinion?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is prefer to use the realm ucx module over gastnet ucx conduit, so I think it is not necessary to provide the ucx option.

set(FF_GASNET_CONDUIT "mpi" CACHE STRING "Select GASNet conduit ${FF_GASNET_CONDUITS}")
set_property(CACHE FF_GASNET_CONDUIT PROPERTY STRINGS ${FF_GASNET_CONDUITS})
set(FF_LEGION_NETWORKS "" CACHE STRING "Network backend(s) to use")

if ((FF_LEGION_NETWORKS STREQUAL "gasnet" AND FF_GASNET_CONDUIT STREQUAL "ucx") OR FF_LEGION_NETWORKS STREQUAL "ucx")
if("${FF_UCX_URL}" STREQUAL "")
set(UCX_URL "https://github.com/openucx/ucx/releases/download/v1.14.0-rc1/ucx-1.14.0.tar.gz")
else()
set(UCX_URL "${FF_UCX_URL}")
endif()

set(UCX_DIR ${CMAKE_CURRENT_BINARY_DIR}/ucx)
get_filename_component(UCX_COMPRESSED_FILE_NAME "${UCX_URL}" NAME)
# message(STATUS "UCX_URL: ${UCX_URL}")
# message(STATUS "UCX_COMPRESSED_FILE_NAME: ${UCX_COMPRESSED_FILE_NAME}")
set(UCX_COMPRESSED_FILE_PATH "${CMAKE_CURRENT_BINARY_DIR}/${UCX_COMPRESSED_FILE_NAME}")
set(UCX_BUILD_NEEDED OFF)
set(UCX_CONFIG_FILE ${UCX_DIR}/config.txt)
set(UCX_BUILD_OUTPUT ${UCX_DIR}/build.log)

if(EXISTS ${UCX_CONFIG_FILE})
file(READ ${UCX_CONFIG_FILE} PREV_UCX_CONFIG)
# message(STATUS "PREV_UCX_CONFIG: ${PREV_UCX_CONFIG}")
if("${UCX_URL}" STREQUAL "${PREV_UCX_CONFIG}")
# configs match - no build needed
set(UCX_BUILD_NEEDED OFF)
else()
message(STATUS "UCX configuration has changed - rebuilding...")
set(UCX_BUILD_NEEDED ON)
endif()
else()
message(STATUS "Configuring and building UCX...")
set(UCX_BUILD_NEEDED ON)
endif()

if(UCX_BUILD_NEEDED)
if(NOT EXISTS "${UCX_COMPRESSED_FILE_PATH}")
message(STATUS "Downloading openucx/ucx from: ${UCX_URL}")
file(
DOWNLOAD
"${UCX_URL}" "${UCX_COMPRESSED_FILE_PATH}"
SHOW_PROGRESS
STATUS status
LOG log
)

list(GET status 0 status_code)
list(GET status 1 status_string)

if(status_code EQUAL 0)
message(STATUS "Downloading... done")
else()
message(FATAL_ERROR "error: downloading '${UCX_URL}' failed
status_code: ${status_code}
status_string: ${status_string}
log:
--- LOG BEGIN ---
${log}
--- LOG END ---"
)
endif()
else()
message(STATUS "${UCX_COMPRESSED_FILE_NAME} already exists")
endif()

execute_process(COMMAND mkdir -p ${UCX_DIR})
execute_process(COMMAND tar xzf ${UCX_COMPRESSED_FILE_PATH} -C ${UCX_DIR} --strip-components 1)
message(STATUS "Building UCX...")
execute_process(
COMMAND sh -c "cd ${UCX_DIR} && ${UCX_DIR}/contrib/configure-release --prefix=${UCX_DIR}/install --enable-mt && make -j8 && make install"
RESULT_VARIABLE UCX_BUILD_STATUS
OUTPUT_FILE ${UCX_BUILD_OUTPUT}
ERROR_FILE ${UCX_BUILD_OUTPUT}
)

if(UCX_BUILD_STATUS)
message(FATAL_ERROR "UCX build result = ${UCX_BUILD_STATUS} - see ${UCX_BUILD_OUTPUT} for more details")
endif()

# Currently, we use default build configurations for UCX and therefore only save URL as configuration settings
file(WRITE ${UCX_CONFIG_FILE} "${UCX_URL}")
endif()

if (FF_LEGION_NETWORKS STREQUAL "gasnet" AND FF_GASNET_CONDUIT STREQUAL "ucx")
set(ENV{UCX_HOME} "${UCX_DIR}/install")
install(DIRECTORY ${UCX_DIR}/install/bin/ DESTINATION bin)
install(DIRECTORY ${UCX_DIR}/install/include/ DESTINATION include)
install(DIRECTORY ${UCX_DIR}/install/lib/ DESTINATION lib)
install(DIRECTORY ${UCX_DIR}/install/share/ DESTINATION share)
endif()

if (FF_LEGION_NETWORKS STREQUAL "ucx")
set(ucx_DIR ${UCX_DIR}/cmake)
set(ENV{Legion_NETWORKS} "ucx")
message(STATUS "Legion_NETWORKS: $ENV{Legion_NETWORKS}")
endif()
else()
message(STATUS "FF_GASNET_CONDUIT: ${FF_GASNET_CONDUIT}")
message(STATUS "FF_LEGION_NETWORKS: ${FF_LEGION_NETWORKS}")
if (FF_LEGION_NETWORKS STREQUAL "gasnet")
message(STATUS "FF_GASNET_CONDUIT: ${FF_GASNET_CONDUIT}")
endif()

set(FF_GPU_BACKENDS cuda hip_cuda hip_rocm intel)
Expand Down Expand Up @@ -179,13 +91,6 @@ set(CC_FLAGS $ENV{CC_FLAGS})
set(NVCC_FLAGS $ENV{NVCC_FLAGS})
set(LD_FLAGS $ENV{LD_FLAGS})

# Set global FLAGS
list(APPEND CC_FLAGS
-std=c++11)

list(APPEND NVCC_FLAGS
-std=c++11)

add_compile_options(${CC_FLAGS})
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ${NVCC_FLAGS})
link_libraries(${LD_FLAGS})
Expand Down
4 changes: 4 additions & 0 deletions cmake/legion.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,10 @@ else()
set(Legion_EMBED_GASNet_VERSION "GASNet-2022.3.0" CACHE STRING "GASNet version")
set(Legion_NETWORKS "gasnetex" CACHE STRING "GASNet conduit")
set(GASNet_CONDUIT ${FF_GASNET_CONDUIT})
elseif("${FF_LEGION_NETWORKS}" STREQUAL "ucx")
set(ucx_ROOT ${UCX_PATH}/lib/cmake)
message(STATUS "Find ucx: ${UCX_PATH}")
set(Legion_NETWORKS "ucx" CACHE STRING "Enable UCX")
endif()
message(STATUS "GASNET ROOT: $ENV{GASNet_ROOT_DIR}")
set(Legion_MAX_DIM ${FF_MAX_DIM} CACHE STRING "Maximum number of dimensions")
Expand Down
3 changes: 2 additions & 1 deletion cmake/nccl.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,9 @@ else()
message( STATUS "NCCL include : ${NCCL_INCLUDE_DIRS}" )
message( STATUS "NCCL libraries : ${NCCL_LIBRARIES}" )
add_library(nccl SHARED IMPORTED)

# Build NCCL from source
else()
# Build NCCL from source
message(STATUS "Building NCCL from source")
list(TRANSFORM CUDA_GENCODE PREPEND "NVCC_GENCODE=" OUTPUT_VARIABLE NCCL_BUILD_NVCC_GENCODE)

Expand Down
7 changes: 6 additions & 1 deletion config/config.inc
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,11 @@ if [ -n "$CUDNN_DIR" ]; then
SET_CUDNN="-DCUDNN_PATH=${CUDNN_DIR}"
fi

# set ucx dir
if [ -n "$UCX_DIR" ]; then
SET_UCX="-DUCX_PATH=${UCX_DIR}"
fi

# enable Python
if [ "$FF_USE_PYTHON" = "ON" ]; then
SET_PYTHON="-DFF_USE_PYTHON=ON"
Expand Down Expand Up @@ -188,7 +193,7 @@ if [ -n "$FF_GPU_BACKEND" ]; then
fi
fi

CMAKE_FLAGS="-DCUDA_USE_STATIC_CUDA_RUNTIME=OFF -DLegion_HIJACK_CUDART=OFF ${SET_CC} ${SET_CXX} ${SET_INSTALL_DIR} ${SET_BUILD} ${SET_CUDA_ARCH} ${SET_CUDA} ${SET_CUDNN} ${SET_PYTHON} ${SET_NCCL} ${SET_NCCL_DIR} ${SET_LEGION_NETWORKS} ${SET_EXAMPLES} ${SET_USE_PREBUILT_LEGION} ${SET_USE_PREBUILT_NCCL} ${SET_USE_ALL_PREBUILT_LIBRARIES} ${SET_BUILD_UNIT_TESTS} ${SET_AVX2} ${SET_MAX_DIM} ${SET_ROCM_PATH} ${SET_FF_GPU_BACKEND}"
CMAKE_FLAGS="-DCUDA_USE_STATIC_CUDA_RUNTIME=OFF -DLegion_HIJACK_CUDART=OFF ${SET_CC} ${SET_CXX} ${SET_INSTALL_DIR} ${SET_BUILD} ${SET_CUDA_ARCH} ${SET_CUDA} ${SET_CUDNN} ${SET_UCX} ${SET_PYTHON} ${SET_NCCL} ${SET_NCCL_DIR} ${SET_LEGION_NETWORKS} ${SET_EXAMPLES} ${SET_USE_PREBUILT_LEGION} ${SET_USE_PREBUILT_NCCL} ${SET_USE_ALL_PREBUILT_LIBRARIES} ${SET_BUILD_UNIT_TESTS} ${SET_AVX2} ${SET_MAX_DIM} ${SET_ROCM_PATH} ${SET_FF_GPU_BACKEND}"

function run_cmake() {
SRC_LOCATION=${SRC_LOCATION:=`dirname $0`/../}
Expand Down
9 changes: 7 additions & 2 deletions config/config.linux
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ FF_LEGION_NETWORKS=${FF_LEGION_NETWORKS:-}
# select GASNET conduit
FF_GASNET_CONDUIT=${FF_GASNET_CONDUIT:-ibv}

# set UCX URL
FF_UCX_URL=${FF_UCX_URL:-""}
# set UCX dir if Legion networks is set to ucx
UCX_DIR=${UCX_DIR:-""}

# build C++ examples
FF_BUILD_ALL_EXAMPLES=${FF_BUILD_ALL_EXAMPLES:-OFF}
Expand All @@ -52,6 +52,11 @@ FF_BUILD_UNIT_TESTS=${FF_BUILD_UNIT_TESTS:-OFF}
# use precompiled NCCL and Legion libraries, where available
FF_USE_PREBUILT_NCCL=${FF_USE_PREBUILT_NCCL:-OFF}
FF_USE_PREBUILT_LEGION=${FF_USE_PREBUILT_LEGION:-OFF}

# if not use PREBUILD_NCCL, you can set NCCL_DIR to use external nccl lib,
# otherwise, we will build nccl from source
NCCL_DIR=${NCCL_DIR:-""}

# use the flag below to use both the NCCL and Legion pre-built libraries.
# when the flag below is set to ON, the two flags above are ignored.
FF_USE_ALL_PREBUILT_LIBRARIES=${FF_USE_ALL_PREBUILT_LIBRARIES:-OFF}
Expand Down