Skip to content

Commit

Permalink
Merge branch 'develop' into nab_atlas_gpu_field
Browse files Browse the repository at this point in the history
  • Loading branch information
sbrdar committed Oct 14, 2024
2 parents 402b7d3 + 57e95e8 commit 68f9d33
Show file tree
Hide file tree
Showing 39 changed files with 6,325 additions and 5,848 deletions.
52 changes: 52 additions & 0 deletions arch/cambridge/dawn/intel/2024.0.0/env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# (C) Copyright 1988- ECMWF.
#
# This software is licensed under the terms of the Apache Licence Version 2.0
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
# In applying this licence, ECMWF does not waive the privileges and immunities
# granted to it by virtue of its status as an intergovernmental organisation
# nor does it submit to any jurisdiction.

# Source me to get the correct configure/build/run environment

# Store tracing and disable (module is *way* too verbose)
{ tracing_=${-//[^x]/}; set +x; } 2>/dev/null

module_load() {
echo "+ module load $1"
module load $1
}
module_unload() {
echo "+ module unload $1"
module unload $1
}

# Unload all modules to be certain
module purge

# Load modules
module load rhel8/slurm
module load rhel8/global
module load dot
module load dawn-env/2023-12-22
module load intel-oneapi-compilers/2024.0.0
module load intel-oneapi-mpi/2021.11.0
module_load boost/1.83.0
module_load cmake/3.27.9
module_load hdf5/1.14.3

set -x

# below should be the default these days
export EnableImplicitScaling=0
# card 0, tile 0
export ZE_AFFINITY_MASK=0.0
export ONEAPI_DEVICE_SELECTOR=level_zero:gpu
# 256 registers per thread, fewer threads
export SYCL_PROGRAM_COMPILE_OPTIONS="-ze-opt-large-register-file"
# this option affects the overhead of SYCL offload calls, in this case 0 seems to help
export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0

# Restore tracing to stored setting
{ if [[ -n "$tracing_" ]]; then set -x; else set +x; fi } 2>/dev/null

export ECBUILD_TOOLCHAIN="./toolchain.cmake"
23 changes: 23 additions & 0 deletions arch/cambridge/dawn/intel/2024.0.0/toolchain.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# (C) Copyright 1988- ECMWF.
#
# This software is licensed under the terms of the Apache Licence Version 2.0
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
# In applying this licence, ECMWF does not waive the privileges and immunities
# granted to it by virtue of its status as an intergovernmental organisation
# nor does it submit to any jurisdiction.

####################################################################
# COMPILER FLAGS
####################################################################

set( OpenMP_Fortran_FLAGS "-fopenmp" CACHE STRING "" )
set( OpenMP_C_FLAGS "-fopenmp" CACHE STRING "" )
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -O3")

####################################################################
# LINK FLAGS
####################################################################

set( ECBUILD_SHARED_LINKER_FLAGS "-Wl,--eh-frame-hdr " )
set( ECBUILD_MODULE_LINKER_FLAGS "-Wl,--eh-frame-hdr -Wl,-Map,loadmap" )
set( ECBUILD_EXE_LINKER_FLAGS "-Wl,--eh-frame-hdr -Wl,-Map,loadmap -Wl,--as-needed" )
52 changes: 52 additions & 0 deletions arch/cambridge/dawn/intel/2024.1.0/env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# (C) Copyright 1988- ECMWF.
#
# This software is licensed under the terms of the Apache Licence Version 2.0
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
# In applying this licence, ECMWF does not waive the privileges and immunities
# granted to it by virtue of its status as an intergovernmental organisation
# nor does it submit to any jurisdiction.

# Source me to get the correct configure/build/run environment

# Store tracing and disable (module is *way* too verbose)
{ tracing_=${-//[^x]/}; set +x; } 2>/dev/null

module_load() {
echo "+ module load $1"
module load $1
}
module_unload() {
echo "+ module unload $1"
module unload $1
}

# Unload all modules to be certain
module purge

# Load modules
module load rhel8/slurm
module load rhel8/global
module load dot
module load dawn-env/2024-04-15
module load intel-oneapi-compilers/2024.1.0
module load intel-oneapi-mpi/2021.12.0
module_load boost/1.84.0
module_load cmake/3.27.9
module_load hdf5/1.14.3

set -x

# below should be the default these days
export EnableImplicitScaling=0
# card 0, tile 0
export ZE_AFFINITY_MASK=0.0
export ONEAPI_DEVICE_SELECTOR=level_zero:gpu
# 256 registers per thread, fewer threads
export SYCL_PROGRAM_COMPILE_OPTIONS="-ze-opt-large-register-file"
# this option affects the overhead of SYCL offload calls, in this case 0 seems to help
export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0

# Restore tracing to stored setting
{ if [[ -n "$tracing_" ]]; then set -x; else set +x; fi } 2>/dev/null

export ECBUILD_TOOLCHAIN="./toolchain.cmake"
23 changes: 23 additions & 0 deletions arch/cambridge/dawn/intel/2024.1.0/toolchain.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# (C) Copyright 1988- ECMWF.
#
# This software is licensed under the terms of the Apache Licence Version 2.0
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
# In applying this licence, ECMWF does not waive the privileges and immunities
# granted to it by virtue of its status as an intergovernmental organisation
# nor does it submit to any jurisdiction.

####################################################################
# COMPILER FLAGS
####################################################################

set( OpenMP_Fortran_FLAGS "-fopenmp" CACHE STRING "" )
set( OpenMP_C_FLAGS "-fopenmp" CACHE STRING "" )
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -O3")

####################################################################
# LINK FLAGS
####################################################################

set( ECBUILD_SHARED_LINKER_FLAGS "-Wl,--eh-frame-hdr " )
set( ECBUILD_MODULE_LINKER_FLAGS "-Wl,--eh-frame-hdr -Wl,-Map,loadmap" )
set( ECBUILD_EXE_LINKER_FLAGS "-Wl,--eh-frame-hdr -Wl,-Map,loadmap -Wl,--as-needed" )
9 changes: 9 additions & 0 deletions bundle.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ name : cloudsc-bundle
version : 1.0.0-develop
cmake : >
CMAKE_LINK_DEPENDS_NO_SHARED=ON
CMAKE_EXPORT_COMPILE_COMMANDS=ON
BUILD_serialbox=OFF
BUILD_field_api=OFF
BUILD_eckit=OFF
Expand Down Expand Up @@ -121,6 +122,14 @@ options :
cmake: >
ENABLE_SYCL=ON
- sycl-sub-group-size :
help : SYCL subgroup size
cmake : SYCL_SUB_GROUP_SIZE={{value}}

- cloudsc-cpp-math :
help : [C|STD|SYCL]
cmake : CLOUDSC_CPP_MATH={{value}}

- with-mpi :
help : Enable MPI-parallel kernel
cmake : ENABLE_MPI=ON
Expand Down
10 changes: 5 additions & 5 deletions src/cloudsc_cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ if( HAVE_CLOUDSC_C_CUDA )
SOURCES
cloudsc/yoecldp_c.h
cloudsc/load_state.h
cloudsc/dtype.h
cloudsc/load_state.cu
cloudsc/cloudsc_c.h
cloudsc/cloudsc_c.cu
Expand Down Expand Up @@ -53,7 +54,7 @@ if( HAVE_CLOUDSC_C_CUDA )
target_compile_options(dwarf-cloudsc-c-cuda-lib PRIVATE $<$<COMPILE_LANGUAGE:CUDA>>)
else()
target_compile_options(dwarf-cloudsc-c-cuda-lib PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:
-gencode arch=compute_${CMAKE_CUDA_ARCHITECTURES},code=sm_${CMAKE_CUDA_ARCHITECTURES}>)
--ptxas-options=-O3 -use_fast_math -maxrregcount=128 -gencode arch=compute_${CMAKE_CUDA_ARCHITECTURES},code=sm_${CMAKE_CUDA_ARCHITECTURES}>)
endif()

set_target_properties( dwarf-cloudsc-c-cuda-lib PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
Expand Down Expand Up @@ -111,7 +112,7 @@ if( HAVE_CLOUDSC_C_CUDA )
target_compile_options(dwarf-cloudsc-c-cuda-hoist-lib PRIVATE $<$<COMPILE_LANGUAGE:CUDA>>)
else()
target_compile_options(dwarf-cloudsc-c-cuda-hoist-lib PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:
-gencode arch=compute_${CMAKE_CUDA_ARCHITECTURES},code=sm_${CMAKE_CUDA_ARCHITECTURES}>)
--ptxas-options=-O3 -use_fast_math -maxrregcount=128 -gencode arch=compute_${CMAKE_CUDA_ARCHITECTURES},code=sm_${CMAKE_CUDA_ARCHITECTURES}>)
endif()

set_target_properties( dwarf-cloudsc-c-cuda-hoist-lib PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
Expand Down Expand Up @@ -169,7 +170,7 @@ if( HAVE_CLOUDSC_C_CUDA )
target_compile_options(dwarf-cloudsc-c-cuda-k-caching-lib PRIVATE $<$<COMPILE_LANGUAGE:CUDA>>)
else()
target_compile_options(dwarf-cloudsc-c-cuda-k-caching-lib PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:
-gencode arch=compute_${CMAKE_CUDA_ARCHITECTURES},code=sm_${CMAKE_CUDA_ARCHITECTURES}>)
--ptxas-options=-O3 -use_fast_math -maxrregcount=128 -gencode arch=compute_${CMAKE_CUDA_ARCHITECTURES},code=sm_${CMAKE_CUDA_ARCHITECTURES}>)
endif()
set_target_properties( dwarf-cloudsc-c-cuda-k-caching-lib PROPERTIES CUDA_SEPARABLE_COMPILATION ON)

Expand All @@ -190,7 +191,6 @@ if( HAVE_CLOUDSC_C_CUDA )
)
###


# Create symlink for the input data
if( HAVE_SERIALBOX )
execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink
Expand All @@ -205,5 +205,5 @@ if( HAVE_CLOUDSC_C_CUDA )
endif()

else()
ecbuild_info( "Serialbox and/or CUDA not found, disabling CUDA prototype(s)" )
ecbuild_info( "CUDA not found, disabling CUDA prototype(s)" )
endif()
Loading

0 comments on commit 68f9d33

Please sign in to comment.