Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updated Field API variants of the clouds dwarf (CPU and GPU) #96

Merged
merged 20 commits into from
Nov 4, 2024
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
3dc7920
FIELD state types for cloudsc skeleton
wertysas Oct 1, 2024
4f0ab23
Added Field API view functionality to field state module and Field ap…
wertysas Oct 1, 2024
50ba3b5
Added aux, flux and state types using field API for storage as in the…
wertysas Oct 1, 2024
8e0591c
Fortran CPU version modified to use Field API as in the IFS
wertysas Oct 1, 2024
32828de
Updated bundle and CMake, added with-field api option
wertysas Oct 1, 2024
99f4da3
moved aux, state and flux types into their own modules
wertysas Oct 1, 2024
45a31c0
CMake and bundle updates
wertysas Oct 1, 2024
6796466
Updated authors list
wertysas Oct 4, 2024
91f01f2
Restoring modified comments
wertysas Oct 4, 2024
97eb718
Cmake and bundle updates after PR comments
wertysas Oct 17, 2024
0b52301
Updated github CI builds
wertysas Oct 17, 2024
611b385
Updated Field API version in bundle and disables CUDA if --with-field…
wertysas Oct 17, 2024
77d19d2
Updated call signature of GPU field variant and switched field api pi…
wertysas Oct 21, 2024
a8cc3a0
README updated and bug fixes in field gpu driver and CMake
wertysas Oct 22, 2024
b5a13c8
Switched F-API fields to mapped by default and changed option with-ma…
wertysas Oct 24, 2024
a3bc0eb
added missing ENABLE before mapped feature in bundle and moved back I…
wertysas Oct 24, 2024
d37f273
Updated workflows and tests to handle without-mapped-fields option, a…
wertysas Oct 24, 2024
125741e
Passing flag to CMake to prevent F-API from breaking GNU workflows
wertysas Oct 24, 2024
e455384
replaced gnu gpu tests with cpu tests, added F-API DEV_ALLOC_SIZE env…
wertysas Oct 31, 2024
128382c
CMake clean
wertysas Nov 4, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .github/scripts/verify-targets.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ exit_code=0
#

targets=(dwarf-P-cloudMicrophysics-IFSScheme dwarf-cloudsc-fortran dwarf-cloudsc-c)
if [[ "$build_flags" == *"--with-field"* ]]
then
targets+=(dwarf-cloudsc-fortran-field)
fi

if [[ "$build_flags" == *"--with-gpu"* ]]
then
Expand All @@ -18,6 +22,10 @@ then
then
targets+=(dwarf-cloudsc-gpu-claw)
fi
if [[ "$build_flags" == *"--with-field"* ]]
then
targets+=(dwarf-cloudsc-gpu-scc-field)
fi
if [[ "$build_flags" == *"--with-cuda"* ]]
then
targets+=(dwarf-cloudsc-gpu-scc-cuf dwarf-cloudsc-gpu-scc-cuf-k-caching)
Expand Down
43 changes: 43 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ jobs:
- '--with-gpu --with-loki --with-atlas' # Enable Loki, Atlas, and GPU variants
- '--with-gpu --with-loki --with-atlas --with-mpi' # Enable Loki, Atlas, and GPU variants with MPI
- '--single-precision --with-gpu --with-loki --with-atlas --with-mpi' # Enable Loki, and GPU variants with MPI in a single-precision build
- '--with-gpu --with-loki --with-field --cmake="FIELD_API_ENABLE_ACC=OFF"' # Enable Loki, Field, and GPU variants
- '--with-gpu --with-loki --with-field --with-mpi --cmake="FIELD_API_ENABLE_ACC=OFF"' # Enable Loki, Field, and GPU variants with MPI
- '--single-precision --with-gpu --with-loki --with-field --with-mpi --cmake="FIELD_API_ENABLE_ACC=OFF"' # Enable Loki, Field and GPU variants with MPI in a single-precision build

pyiface_flag: [''] # Enable the pyiface variant

Expand Down Expand Up @@ -70,6 +73,26 @@ jobs:
io_library_flag: '--with-serialbox'
build_flags: '--with-gpu --with-loki --with-cuda --with-atlas'
ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-cuda' # GPU variants don't work on CPU runners, loki-c variant causes SIGFPE
- arch: nvhpc/21.9
nvhpc_version: 21.9
io_library_flag: ''
build_flags: '--single-precision --with-gpu --with-loki --with-cuda --with-field'
ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-cuda' # GPU variants don't work on CPU runners, loki-c variant causes SIGFPE
- arch: nvhpc/21.9
nvhpc_version: 21.9
io_library_flag: '--with-serialbox'
build_flags: '--with-gpu --with-loki --with-cuda --with-field'
ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-cuda' # GPU variants don't work on CPU runners, loki-c variant causes SIGFPE
- arch: nvhpc/21.9
nvhpc_version: 21.9
io_library_flag: ''
build_flags: '--single-precision --with-gpu --with-loki --with-cuda --with-field --without-mapped-fields'
ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-cuda' # GPU variants don't work on CPU runners, loki-c variant causes SIGFPE
- arch: nvhpc/21.9
nvhpc_version: 21.9
io_library_flag: '--with-serialbox'
build_flags: '--with-gpu --with-loki --with-cuda --with-field --without-mapped-fields'
ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-cuda' # GPU variants don't work on CPU runners, loki-c variant causes SIGFPE
Comment on lines +76 to +95
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[No action required!] I think we can skip the 21.9 builds. That compiler version is fairly outdated now and the 23.5 tests are much more meaningful. But that can be dealt with in a subsequent CI-cleanup that is on the horizon.


- arch: nvhpc/23.5
nvhpc_version: 23.5
Expand All @@ -91,6 +114,26 @@ jobs:
io_library_flag: '--with-serialbox'
build_flags: '--with-gpu --with-loki --with-cuda --with-atlas'
ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-loki-sca|-cuda' # GPU variants don't work on CPU runners, loki-c and loki-sca variant causes SIGFPE
- arch: nvhpc/23.5
nvhpc_version: 23.5
io_library_flag: ''
build_flags: '--single-precision --with-gpu --with-loki --with-cuda --with-field'
ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-loki-sca|-cuda' # GPU variants don't work on CPU runners, loki-c and loki-sca variant causes SIGFPE
- arch: nvhpc/23.5
nvhpc_version: 23.5
io_library_flag: '--with-serialbox'
build_flags: '--with-gpu --with-loki --with-cuda --with-field'
ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-loki-sca|-cuda' # GPU variants don't work on CPU runners, loki-c and loki-sca variant causes SIGFPE
- arch: nvhpc/23.5
nvhpc_version: 23.5
io_library_flag: ''
build_flags: '--single-precision --with-gpu --with-loki --with-cuda --with-field --without-mapped-fields'
ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-loki-sca|-cuda' # GPU variants don't work on CPU runners, loki-c and loki-sca variant causes SIGFPE
- arch: nvhpc/23.5
nvhpc_version: 23.5
io_library_flag: '--with-serialbox'
build_flags: '--with-gpu --with-loki --with-cuda --with-field --without-mapped-fields'
ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-loki-sca|-cuda' # GPU variants don't work on CPU runners, loki-c and loki-sca variant causes SIGFPE

# Steps represent a sequence of tasks that will be executed as part of the job
steps:
Expand Down
1 change: 1 addition & 0 deletions AUTHORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
- P. Bechtold (ECMWF)
- S. Brdar (ECMWF)
- W. Deconinck (ECMWF)
- J. Ericsson (ECMWF)
- R. Forbes (ECMWF)
- C. Jakob (ECMWF)
- J. Hague (ECMWF)
Expand Down
10 changes: 9 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -111,12 +111,20 @@ endif()
ecbuild_add_option( FEATURE FIELD_API
DESCRIPTION "Use field_api to manage GPU data offload and copyback"
REQUIRED_PACKAGES "field_api"
CONDITION HAVE_CUDA
DEFAULT ON )

ecbuild_find_package( NAME loki )
ecbuild_find_package( NAME atlas )

ecbuild_add_option( FEATURE FIELD_API_DISABLE_MAPPED_MEMORY
DESCRIPTION "Use ACC mapped memory by default in Field API objects"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That description should probably read something like "Disable the use of ACC mapped memory in Field API objects"

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, thank you for spotting that, I missed to update the description when changing this option. I have changed it now.

CONDITION HAVE_FIELD_API AND field_api_HAVE_ACC AND field_api_HAVE_CUDA
DEFAULT OFF )
if( HAVE_FIELD_API_DISABLE_MAPPED_MEMORY )
list(APPEND CLOUDSC_DEFINITIONS FIELD_API_DISABLE_MAPPED_MEMORY)
endif()


# Add option for single-precision builds
ecbuild_add_option( FEATURE SINGLE_PRECISION
DESCRIPTION "Build CLOUDSC in single precision" DEFAULT OFF
Expand Down
23 changes: 16 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ Balthasar Reuter ([email protected])
prototype that validates runs against platform and language-agnostic
off-line reference data via HDF5 or the Serialbox package. The kernel code
also is slightly cleaner than the original version.
- **dwarf-cloudsc-fortran-field**: A fortran version of CLOUDSC that uses Field API
for the data structures. The intent of this version is to show how
Field API is used in newer versions of the IFS.
- **dwarf-cloudsc-c**: Standalone C version of the kernel that has
been generated by ECMWF tools. This relies exclusively on the Serialbox
validation mechanism.
Expand Down Expand Up @@ -81,13 +84,18 @@ Balthasar Reuter ([email protected])
- **dwarf-cloudsc-gpu-scc-field**: GPU-enabled and optimized version of
CLOUDSC that uses the SCC loop layout, and uses [FIELD API](https://github.com/ecmwf-ifs/field_api) (a Fortran library purpose-built for IFS data-structures that facilitates the
creation and management of field objects in scientific code) to perform device offload
and copyback. The intent is to demonstrate the explicit use of pinned host memory to speed-up
data transfers, as provided by the shipped prototype implmentation, and
investigate the effect of different data storage allocation layouts.
and copyback.
The field api variant supports modern features of the FIELD API such as *field gangs* that group
multiple fields and allocates them in one larger field, in order to reduce allocations and
data transfers. Field gang support can be enabled at runtime by setting the environment
variable `CLOUDSC_PACKED_STORAGE=ON`. If CUDA is available, then the field api variant also supports
the use of allocating fields in pinned memory. This is enabled by setting the
environemnt variable `CLOUDSC_FIELD_API_PINNED=ON` and will speed up data transfers between host and device.
To enable this variant, a suitable CUDA installation is required and the
`--with-cuda` flag needs to be passed at the build stage. This variant lets the CUDA runtime
manage temporary arrays and needs a large `NV_ACC_CUDA_HEAPSIZE`
(eg. `NV_ACC_CUDA_HEAPSIZE=8GB` for 160K columns.)
manage temporary arrays and needs a large `NV_ACC_CUDA_HEAPSIZE` (eg. `NV_ACC_CUDA_HEAPSIZE=8GB` for 160K columns.).
It is possible to disable Field API registering fields in the OpenACC data map, by passing the
`--without-mapped-fields` flag at build stage.
- **cloudsc-pyiface.py**: a combination of the cloudsc/cloudsc-driver routines
of cloudsc-fortran with the uppermost `dwarf` program replaced with a
corresponding Python script capable of HDF5 data load and
Expand Down Expand Up @@ -320,8 +328,9 @@ transfer overheads will dominate timings, and that most supported GPU
variants aim to optimise compute kernel timings only. However, a
dedicated variant `dwarf-cloudsc-gpu-scc-field` has been added to
explore host-side memory pinning, which improves data transfer times
and alternative data layout strategies. By default, this will allocate
each array variable individually in pinned memory. A runtime flag
and alternative data layout strategies. By default, pinned memory is turned off
but can be turned on by setting the environment variable `CLOUDSC_FIELD_API_PINNED=ON`.
This will allocate each array variable individually in pinned memory. A runtime flag
`CLOUDSC_PACKED_STORAGE=ON` can be used to enable "packed" storage,
where multiple arrays are stored in a single base allocation, eg.

Expand Down
20 changes: 16 additions & 4 deletions bundle.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,12 @@ cmake : >
CMAKE_LINK_DEPENDS_NO_SHARED=ON
CMAKE_EXPORT_COMPILE_COMMANDS=ON
BUILD_serialbox=OFF
BUILD_field_api=OFF
BUILD_eckit=OFF
BUILD_fckit=OFF
BUILD_atlas=OFF
BUILD_field_api=OFF
ENABLE_OMP=ON
ENABLE_CUDA=OFF
ENABLE_SINGLE_PRECISION=OFF

projects :
Expand Down Expand Up @@ -52,10 +53,11 @@ projects :

- field_api :
git : https://github.com/ecmwf-ifs/field_api.git
version : v0.3.0
version : v0.3.3
require : ecbuild
cmake : >
UTIL_MODULE_PATH=${CMAKE_SOURCE_DIR}/cloudsc-dwarf/src/common/module
FIELD_API_ENABLE_ACC=OFF

- fckit :
git : https://github.com/ecmwf/fckit
Expand All @@ -79,7 +81,6 @@ projects :
require : ecbuild serialbox loki field_api

options :

- toolchain :
help : Specify compiler options via supplied toolchain file
cmake : CMAKE_TOOLCHAIN_FILE={{value}}
Expand All @@ -92,9 +93,21 @@ options :
ENABLE_DOUBLE_PRECISION=OFF
FIELD_API_DEFINITIONS=SINGLE

- with-field-api :
help : Enable Field API variants to be built
cmake : >
BUILD_field_api=ON
ENABLE_CLOUDSC_FORTRAN_FIELD=ON

- without-mapped-fields :
help : Disables automatic registering of Field API fields in ACC map.
cmake : >
ENABLE_FIELD_API_DISABLE_MAPPED_MEMORY=ON

- with-gpu :
help : Enable GPU kernels
cmake : >
FIELD_API_ENABLE_ACC=ON
ENABLE_CLOUDSC_GPU_SCC=ON
ENABLE_CLOUDSC_GPU_SCC_HOIST=ON
ENABLE_CLOUDSC_GPU_SCC_K_CACHING=ON
Expand All @@ -106,7 +119,6 @@ options :
ENABLE_CUDA=ON
ENABLE_CLOUDSC_GPU_SCC_CUF=ON
ENABLE_CLOUDSC_GPU_SCC_CUF_K_CACHING=ON
BUILD_field_api=ON

- with-hip :
help: Enable GPU kernel variant based on HIP
Expand Down
69 changes: 69 additions & 0 deletions src/cloudsc_fortran/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@ ecbuild_add_option( FEATURE CLOUDSC_FORTRAN
CONDITION Serialbox_FOUND OR HDF5_FOUND
)

# Define the cloudsc CPU variant
ecbuild_add_option( FEATURE CLOUDSC_FORTRAN_FIELD
DESCRIPTION "Build the field API Fortran version of CLOUDSC using Serialbox" DEFAULT ON
CONDITION HAVE_FIELD_API AND (Serialbox_FOUND OR HDF5_FOUND)
)

if( HAVE_CLOUDSC_FORTRAN )

# Define the binary build target for this variant
Expand Down Expand Up @@ -73,3 +79,66 @@ if( HAVE_CLOUDSC_FORTRAN )
CONDITION HAVE_OMP AND HAVE_MPI
)
endif()

if( HAVE_CLOUDSC_FORTRAN_FIELD )

# Define the binary build target for this variant
ecbuild_add_executable(
TARGET dwarf-cloudsc-fortran-field
SOURCES
dwarf_cloudsc.F90
cloudsc_driver_field_mod.F90
cloudsc.F90
LIBS
cloudsc-common-lib
DEFINITIONS ${CLOUDSC_DEFINITIONS} CLOUDSC_FIELD
)

# Create symlink for the input data
if( HAVE_SERIALBOX )
execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink
${CMAKE_CURRENT_SOURCE_DIR}/../../data ${CMAKE_CURRENT_BINARY_DIR}/../../../data )
endif()

if( HAVE_HDF5 )
execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink
${CMAKE_CURRENT_SOURCE_DIR}/../../config-files/input.h5 ${CMAKE_CURRENT_BINARY_DIR}/../../../input.h5 )
execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink
${CMAKE_CURRENT_SOURCE_DIR}/../../config-files/reference.h5 ${CMAKE_CURRENT_BINARY_DIR}/../../../reference.h5 )
endif()

ecbuild_add_test(
TARGET dwarf-cloudsc-fortran-field-serial
COMMAND bin/dwarf-cloudsc-fortran-field
ARGS 1 100 16
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../..
OMP 1
)
ecbuild_add_test(
TARGET dwarf-cloudsc-fortran-field-omp
COMMAND bin/dwarf-cloudsc-fortran-field
ARGS 4 100 16
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../..
OMP 4
CONDITION HAVE_OMP
)
ecbuild_add_test(
TARGET dwarf-cloudsc-fortran-field-mpi
COMMAND bin/dwarf-cloudsc-fortran-field
ARGS 1 100 16
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../..
MPI 2
OMP 1
CONDITION HAVE_MPI
)
ecbuild_add_test(
TARGET dwarf-cloudsc-fortran-field-mpi-omp
COMMAND bin/dwarf-cloudsc-fortran-field
ARGS 4 100 16
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../..
MPI 2
OMP 4
CONDITION HAVE_OMP AND HAVE_MPI
)
endif()

Loading
Loading