From ea3501c251e4de66802435687befcef4d551d119 Mon Sep 17 00:00:00 2001 From: Michael Lange Date: Fri, 17 Jan 2025 19:10:48 +0000 Subject: [PATCH 1/5] Fortran: Guard non-field imports to avoid aliasing --- src/cloudsc_fortran/dwarf_cloudsc.F90 | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/cloudsc_fortran/dwarf_cloudsc.F90 b/src/cloudsc_fortran/dwarf_cloudsc.F90 index c774c526..e376e39a 100644 --- a/src/cloudsc_fortran/dwarf_cloudsc.F90 +++ b/src/cloudsc_fortran/dwarf_cloudsc.F90 @@ -11,8 +11,6 @@ PROGRAM DWARF_CLOUDSC USE PARKIND1, ONLY: JPIM, JPIB USE CLOUDSC_MPI_MOD, ONLY: CLOUDSC_MPI_INIT, CLOUDSC_MPI_END, NUMPROC, IRANK -USE CLOUDSC_GLOBAL_STATE_MOD, ONLY: CLOUDSC_GLOBAL_STATE -USE CLOUDSC_DRIVER_MOD, ONLY: CLOUDSC_DRIVER USE EC_PMON_MOD, ONLY: EC_PMON USE YOECLDP , ONLY : YRECLDP @@ -26,6 +24,9 @@ PROGRAM DWARF_CLOUDSC #ifdef CLOUDSC_FIELD USE CLOUDSC_FIELD_STATE_MOD, ONLY: CLOUDSC_FIELD_STATE USE CLOUDSC_DRIVER_FIELD_MOD, ONLY: CLOUDSC_DRIVER_FIELD +#else +USE CLOUDSC_GLOBAL_STATE_MOD, ONLY: CLOUDSC_GLOBAL_STATE +USE CLOUDSC_DRIVER_MOD, ONLY: CLOUDSC_DRIVER #endif IMPLICIT NONE From d1cba0fd15be875ee29b638f1022558efe56b6a2 Mon Sep 17 00:00:00 2001 From: Michael Lange Date: Fri, 17 Jan 2025 19:11:22 +0000 Subject: [PATCH 2/5] Bundle: Update to latest FIELD API to avoid compiler warnings --- bundle.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bundle.yml b/bundle.yml index d8dbf529..ee65ee91 100644 --- a/bundle.yml +++ b/bundle.yml @@ -53,7 +53,7 @@ projects : - field_api : git : https://github.com/ecmwf-ifs/field_api.git - version : v0.3.3 + version : v0.3.4 require : ecbuild cmake : > UTIL_MODULE_PATH=${CMAKE_SOURCE_DIR}/cloudsc-dwarf/src/common/module From c0eafed610ee0249ec154ace31bbc73c4968f417 Mon Sep 17 00:00:00 2001 From: Michael Lange Date: Fri, 17 Jan 2025 19:11:44 +0000 Subject: [PATCH 3/5] Arch: Update CMake module version on HPC2020 to accomodate Loki --- arch/ecmwf/hpc2020/gnu/11.2.0/env.sh | 2 +- arch/ecmwf/hpc2020/gnu/9.3.0/env.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/ecmwf/hpc2020/gnu/11.2.0/env.sh b/arch/ecmwf/hpc2020/gnu/11.2.0/env.sh index 67caadee..96d86621 100644 --- a/arch/ecmwf/hpc2020/gnu/11.2.0/env.sh +++ b/arch/ecmwf/hpc2020/gnu/11.2.0/env.sh @@ -34,7 +34,7 @@ module_load gcc/11.2.0 module_load hpcx-openmpi/2.10.0 module_load boost/1.71.0 module_load hdf5/1.10.6 -module_load cmake/3.20.2 +module_load cmake/3.25.2 module_load python3/3.8.8-01 module_load java/11.0.6 diff --git a/arch/ecmwf/hpc2020/gnu/9.3.0/env.sh b/arch/ecmwf/hpc2020/gnu/9.3.0/env.sh index 788bfa1a..2d97e4c7 100644 --- a/arch/ecmwf/hpc2020/gnu/9.3.0/env.sh +++ b/arch/ecmwf/hpc2020/gnu/9.3.0/env.sh @@ -33,7 +33,7 @@ module_load gcc/9.3.0 module_load openmpi/4.1.1.1 module_load boost/1.71.0 module_load hdf5/1.10.6 -module_load cmake/3.20.2 +module_load cmake/3.25.2 module_load python3/3.8.8-01 module_load java/11.0.6 From c6deae30e5692b013884cda4c9c8805925be5573 Mon Sep 17 00:00:00 2001 From: Michael Lange Date: Fri, 17 Jan 2025 19:18:56 +0000 Subject: [PATCH 4/5] Fortran: Removing now obsolete EC_PMON power measurements --- .../cloudsc_driver_field_mod.F90 | 31 ++----------------- src/cloudsc_fortran/cloudsc_driver_mod.F90 | 31 ++----------------- src/cloudsc_fortran/dwarf_cloudsc.F90 | 10 ------ 3 files changed, 4 insertions(+), 68 deletions(-) diff --git a/src/cloudsc_fortran/cloudsc_driver_field_mod.F90 b/src/cloudsc_fortran/cloudsc_driver_field_mod.F90 index 09b1b1ca..232b5133 100644 --- a/src/cloudsc_fortran/cloudsc_driver_field_mod.F90 +++ b/src/cloudsc_fortran/cloudsc_driver_field_mod.F90 @@ -13,7 +13,6 @@ MODULE CLOUDSC_DRIVER_FIELD_MOD USE YOECLDP, ONLY : NCLV USE CLOUDSC_MPI_MOD, ONLY: NUMPROC, IRANK USE TIMER_MOD, ONLY : PERFORMANCE_TIMER, GET_THREAD_NUM - USE EC_PMON_MOD, ONLY: EC_PMON USE CLOUDSC_FIELD_STATE_MOD, ONLY: CLOUDSC_AUX_TYPE, CLOUDSC_FLUX_TYPE, CLOUDSC_STATE_TYPE IMPLICIT NONE @@ -44,16 +43,6 @@ SUBROUTINE CLOUDSC_DRIVER_FIELD( & TYPE(PERFORMANCE_TIMER) :: TIMER INTEGER(KIND=JPIM) :: TID ! thread id from 0 .. NUMOMP - 1 - INTEGER(KIND=JPIB) :: ENERGY, POWER, POWER_TOTAL, POWER_MAX, POWER_COUNT - LOGICAL :: LEC_PMON = .FALSE. - CHARACTER(LEN=1) :: CLEC_PMON - - CALL GET_ENVIRONMENT_VARIABLE('EC_PMON', CLEC_PMON) - IF (CLEC_PMON == '1') LEC_PMON = .TRUE. - - POWER_MAX = 0_JPIB - POWER_TOTAL = 0_JPIB - POWER_COUNT = 0_JPIB NGPBLKS = (NGPTOT / NPROMA) + MIN(MOD(NGPTOT,NPROMA), 1) 1003 format(5x,'NUMPROC=',i0,', NUMOMP=',i0,', NGPTOTG=',i0,', NPROMA=',i0,', NGPBLKS=',i0) @@ -64,14 +53,14 @@ SUBROUTINE CLOUDSC_DRIVER_FIELD( & ! Global timer for the parallel region CALL TIMER%START(NUMOMP) - !$omp parallel default(shared) private(JKGLO,IBL,ICEND,TID,energy,power) & + !$omp parallel default(shared) private(JKGLO,IBL,ICEND,TID) & !$omp& num_threads(NUMOMP) firstprivate(PAUX, FLUX, TENDENCY_TMP, TENDENCY_LOC) ! Local timer for each thread TID = GET_THREAD_NUM() CALL TIMER%THREAD_START(TID) - !$omp do schedule(runtime) reduction(+:power_total,power_count) reduction(max:power_max) + !$omp do schedule(runtime) DO JKGLO=1,NGPTOT,NPROMA IBL=(JKGLO-1)/NPROMA+1 ICEND=MIN(NPROMA,NGPTOT-JKGLO+1) @@ -115,16 +104,6 @@ SUBROUTINE CLOUDSC_DRIVER_FIELD( & & FLUX%PFPLSL, FLUX%PFPLSN, FLUX%PFHPSL, FLUX%PFHPSN, KFLDX, & & YDOMCST, YDOETHF, YDECLDP) - IF (LEC_PMON) THEN - ! Sample power consuption - IF (MOD(IBL, 100) == 0) THEN - CALL EC_PMON(ENERGY, POWER) - POWER_MAX = MAX(POWER_MAX, POWER) - POWER_TOTAL = POWER_TOTAL + POWER - POWER_COUNT = POWER_COUNT + 1 - END IF - END IF - ! Log number of columns processed by this thread CALL TIMER%THREAD_LOG(TID, IGPC=ICEND) ENDDO @@ -140,12 +119,6 @@ SUBROUTINE CLOUDSC_DRIVER_FIELD( & CALL TIMER%END() CALL TIMER%PRINT_PERFORMANCE(NPROMA, NGPBLKS, NGPTOT) - - IF (LEC_PMON) THEN - print *, "Power usage (sampled):: max: ", POWER_MAX, "avg:", & - & (REAL(POWER_TOTAL, KIND=JPRD) / REAL(POWER_COUNT, KIND=JPRD)), & - & "count:", POWER_COUNT - END IF END SUBROUTINE CLOUDSC_DRIVER_FIELD diff --git a/src/cloudsc_fortran/cloudsc_driver_mod.F90 b/src/cloudsc_fortran/cloudsc_driver_mod.F90 index 52f33e31..f43136c4 100644 --- a/src/cloudsc_fortran/cloudsc_driver_mod.F90 +++ b/src/cloudsc_fortran/cloudsc_driver_mod.F90 @@ -13,7 +13,6 @@ MODULE CLOUDSC_DRIVER_MOD USE YOECLDP, ONLY : NCLV USE CLOUDSC_MPI_MOD, ONLY: NUMPROC, IRANK USE TIMER_MOD, ONLY : PERFORMANCE_TIMER, GET_THREAD_NUM - USE EC_PMON_MOD, ONLY: EC_PMON IMPLICIT NONE @@ -102,21 +101,11 @@ SUBROUTINE CLOUDSC_DRIVER( & TYPE(PERFORMANCE_TIMER) :: TIMER INTEGER(KIND=JPIM) :: TID ! thread id from 0 .. NUMOMP - 1 - INTEGER(KIND=JPIB) :: ENERGY, POWER, POWER_TOTAL, POWER_MAX, POWER_COUNT - LOGICAL :: LEC_PMON = .FALSE. - CHARACTER(LEN=1) :: CLEC_PMON TYPE(TOMCST) :: YDOMCST TYPE(TOETHF) :: YDOETHF TYPE(TECLDP) :: YDECLDP - CALL GET_ENVIRONMENT_VARIABLE('EC_PMON', CLEC_PMON) - IF (CLEC_PMON == '1') LEC_PMON = .TRUE. - - POWER_MAX = 0_JPIB - POWER_TOTAL = 0_JPIB - POWER_COUNT = 0_JPIB - NGPBLKS = (NGPTOT / NPROMA) + MIN(MOD(NGPTOT,NPROMA), 1) 1003 format(5x,'NUMPROC=',i0,', NUMOMP=',i0,', NGPTOTG=',i0,', NPROMA=',i0,', NGPBLKS=',i0) if (irank == 0) then @@ -126,14 +115,14 @@ SUBROUTINE CLOUDSC_DRIVER( & ! Global timer for the parallel region CALL TIMER%START(NUMOMP) - !$omp parallel default(shared) private(JKGLO,IBL,ICEND,TID,energy,power) & + !$omp parallel default(shared) private(JKGLO,IBL,ICEND,TID) & !$omp& num_threads(NUMOMP) ! Local timer for each thread TID = GET_THREAD_NUM() CALL TIMER%THREAD_START(TID) - !$omp do schedule(runtime) reduction(+:power_total,power_count) reduction(max:power_max) + !$omp do schedule(runtime) DO JKGLO=1,NGPTOT,NPROMA IBL=(JKGLO-1)/NPROMA+1 ICEND=MIN(NPROMA,NGPTOT-JKGLO+1) @@ -169,16 +158,6 @@ SUBROUTINE CLOUDSC_DRIVER( & & KFLDX, & & YDOMCST, YDOETHF, YDECLDP) - IF (LEC_PMON) THEN - ! Sample power consuption - IF (MOD(IBL, 100) == 0) THEN - CALL EC_PMON(ENERGY, POWER) - POWER_MAX = MAX(POWER_MAX, POWER) - POWER_TOTAL = POWER_TOTAL + POWER - POWER_COUNT = POWER_COUNT + 1 - END IF - END IF - ! Log number of columns processed by this thread CALL TIMER%THREAD_LOG(TID, IGPC=ICEND) ENDDO @@ -194,12 +173,6 @@ SUBROUTINE CLOUDSC_DRIVER( & CALL TIMER%END() CALL TIMER%PRINT_PERFORMANCE(NPROMA, NGPBLKS, NGPTOT) - - IF (LEC_PMON) THEN - print *, "Power usage (sampled):: max: ", POWER_MAX, "avg:", & - & (REAL(POWER_TOTAL, KIND=JPRD) / REAL(POWER_COUNT, KIND=JPRD)), & - & "count:", POWER_COUNT - END IF END SUBROUTINE CLOUDSC_DRIVER diff --git a/src/cloudsc_fortran/dwarf_cloudsc.F90 b/src/cloudsc_fortran/dwarf_cloudsc.F90 index e376e39a..45dc9a33 100644 --- a/src/cloudsc_fortran/dwarf_cloudsc.F90 +++ b/src/cloudsc_fortran/dwarf_cloudsc.F90 @@ -11,7 +11,6 @@ PROGRAM DWARF_CLOUDSC USE PARKIND1, ONLY: JPIM, JPIB USE CLOUDSC_MPI_MOD, ONLY: CLOUDSC_MPI_INIT, CLOUDSC_MPI_END, NUMPROC, IRANK -USE EC_PMON_MOD, ONLY: EC_PMON USE YOECLDP , ONLY : YRECLDP USE YOMCST , ONLY : YRCST @@ -41,21 +40,12 @@ PROGRAM DWARF_CLOUDSC INTEGER(KIND=JPIM) :: NPROMA = 32 ! NPROMA blocking factor (currently active) INTEGER(KIND=JPIM) :: NGPTOT ! Local number of grid points -INTEGER(KIND=JPIB) :: ENERGY, POWER -CHARACTER(LEN=1) :: CLEC_PMON - #ifdef CLOUDSC_FIELD TYPE(CLOUDSC_FIELD_STATE) :: GLOBAL_STATE #else TYPE(CLOUDSC_GLOBAL_STATE) :: GLOBAL_STATE #endif -CALL GET_ENVIRONMENT_VARIABLE('EC_PMON', CLEC_PMON) -IF (CLEC_PMON == '1') THEN - CALL EC_PMON(ENERGY, POWER) - print *, "EC_PMON:: Initial (idle) power: ", POWER -END IF - IARGS = COMMAND_ARGUMENT_COUNT() ! Get the number of OpenMP threads to use for the benchmark From 6f52efae55d9d41b9c6ae45afad0f5aeedb83072 Mon Sep 17 00:00:00 2001 From: Michael Lange Date: Fri, 17 Jan 2025 12:48:21 +0000 Subject: [PATCH 5/5] Loki: Remove deprecated Loki options for CUDA builds --- src/cloudsc_loki/CMakeLists.txt | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/cloudsc_loki/CMakeLists.txt b/src/cloudsc_loki/CMakeLists.txt index ccb5fa70..f42bca94 100644 --- a/src/cloudsc_loki/CMakeLists.txt +++ b/src/cloudsc_loki/CMakeLists.txt @@ -703,8 +703,6 @@ if( HAVE_CUDA ) ${COMMON_MODULE}/yoecldp.F90 INCLUDES ${COMMON_INCLUDE} DEFINITIONS CLOUDSC_GPU_TIMING - DATA_OFFLOAD - REMOVE_OPENMP XMOD ${_TARGET_XMOD_DIR} ${XMOD_DIR} BUILDDIR ${CMAKE_CURRENT_BINARY_DIR}/loki-scc-cuf-parametrise OUTPUT @@ -758,8 +756,6 @@ if( HAVE_CUDA ) ${COMMON_MODULE}/yoecldp.F90 INCLUDES ${COMMON_INCLUDE} DEFINITIONS CLOUDSC_GPU_TIMING - DATA_OFFLOAD - REMOVE_OPENMP XMOD ${_TARGET_XMOD_DIR} ${XMOD_DIR} BUILDDIR ${CMAKE_CURRENT_BINARY_DIR}/loki-scc-cuf-hoist OUTPUT