Skip to content

Commit

Permalink
Merge pull request #111 from ecmwf-ifs/naml-spring-clean
Browse files Browse the repository at this point in the history
Loki: Small fixes and clean-up; EC_PMON removal
  • Loading branch information
mlange05 authored Jan 19, 2025
2 parents 62963b2 + 6f52efa commit b707227
Show file tree
Hide file tree
Showing 7 changed files with 10 additions and 77 deletions.
2 changes: 1 addition & 1 deletion arch/ecmwf/hpc2020/gnu/11.2.0/env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ module_load gcc/11.2.0
module_load hpcx-openmpi/2.10.0
module_load boost/1.71.0
module_load hdf5/1.10.6
module_load cmake/3.20.2
module_load cmake/3.25.2
module_load python3/3.8.8-01
module_load java/11.0.6

Expand Down
2 changes: 1 addition & 1 deletion arch/ecmwf/hpc2020/gnu/9.3.0/env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ module_load gcc/9.3.0
module_load openmpi/4.1.1.1
module_load boost/1.71.0
module_load hdf5/1.10.6
module_load cmake/3.20.2
module_load cmake/3.25.2
module_load python3/3.8.8-01
module_load java/11.0.6

Expand Down
2 changes: 1 addition & 1 deletion bundle.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ projects :
- field_api :
git : https://github.com/ecmwf-ifs/field_api.git
version : v0.3.3
version : v0.3.4
require : ecbuild
cmake : >
UTIL_MODULE_PATH=${CMAKE_SOURCE_DIR}/cloudsc-dwarf/src/common/module
Expand Down
31 changes: 2 additions & 29 deletions src/cloudsc_fortran/cloudsc_driver_field_mod.F90
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ MODULE CLOUDSC_DRIVER_FIELD_MOD
USE YOECLDP, ONLY : NCLV
USE CLOUDSC_MPI_MOD, ONLY: NUMPROC, IRANK
USE TIMER_MOD, ONLY : PERFORMANCE_TIMER, GET_THREAD_NUM
USE EC_PMON_MOD, ONLY: EC_PMON
USE CLOUDSC_FIELD_STATE_MOD, ONLY: CLOUDSC_AUX_TYPE, CLOUDSC_FLUX_TYPE, CLOUDSC_STATE_TYPE

IMPLICIT NONE
Expand Down Expand Up @@ -44,16 +43,6 @@ SUBROUTINE CLOUDSC_DRIVER_FIELD( &

TYPE(PERFORMANCE_TIMER) :: TIMER
INTEGER(KIND=JPIM) :: TID ! thread id from 0 .. NUMOMP - 1
INTEGER(KIND=JPIB) :: ENERGY, POWER, POWER_TOTAL, POWER_MAX, POWER_COUNT
LOGICAL :: LEC_PMON = .FALSE.
CHARACTER(LEN=1) :: CLEC_PMON

CALL GET_ENVIRONMENT_VARIABLE('EC_PMON', CLEC_PMON)
IF (CLEC_PMON == '1') LEC_PMON = .TRUE.

POWER_MAX = 0_JPIB
POWER_TOTAL = 0_JPIB
POWER_COUNT = 0_JPIB

NGPBLKS = (NGPTOT / NPROMA) + MIN(MOD(NGPTOT,NPROMA), 1)
1003 format(5x,'NUMPROC=',i0,', NUMOMP=',i0,', NGPTOTG=',i0,', NPROMA=',i0,', NGPBLKS=',i0)
Expand All @@ -64,14 +53,14 @@ SUBROUTINE CLOUDSC_DRIVER_FIELD( &
! Global timer for the parallel region
CALL TIMER%START(NUMOMP)

!$omp parallel default(shared) private(JKGLO,IBL,ICEND,TID,energy,power) &
!$omp parallel default(shared) private(JKGLO,IBL,ICEND,TID) &
!$omp& num_threads(NUMOMP) firstprivate(PAUX, FLUX, TENDENCY_TMP, TENDENCY_LOC)

! Local timer for each thread
TID = GET_THREAD_NUM()
CALL TIMER%THREAD_START(TID)

!$omp do schedule(runtime) reduction(+:power_total,power_count) reduction(max:power_max)
!$omp do schedule(runtime)
DO JKGLO=1,NGPTOT,NPROMA
IBL=(JKGLO-1)/NPROMA+1
ICEND=MIN(NPROMA,NGPTOT-JKGLO+1)
Expand Down Expand Up @@ -115,16 +104,6 @@ SUBROUTINE CLOUDSC_DRIVER_FIELD( &
& FLUX%PFPLSL, FLUX%PFPLSN, FLUX%PFHPSL, FLUX%PFHPSN, KFLDX, &
& YDOMCST, YDOETHF, YDECLDP)

IF (LEC_PMON) THEN
! Sample power consuption
IF (MOD(IBL, 100) == 0) THEN
CALL EC_PMON(ENERGY, POWER)
POWER_MAX = MAX(POWER_MAX, POWER)
POWER_TOTAL = POWER_TOTAL + POWER
POWER_COUNT = POWER_COUNT + 1
END IF
END IF

! Log number of columns processed by this thread
CALL TIMER%THREAD_LOG(TID, IGPC=ICEND)
ENDDO
Expand All @@ -140,12 +119,6 @@ SUBROUTINE CLOUDSC_DRIVER_FIELD( &
CALL TIMER%END()

CALL TIMER%PRINT_PERFORMANCE(NPROMA, NGPBLKS, NGPTOT)

IF (LEC_PMON) THEN
print *, "Power usage (sampled):: max: ", POWER_MAX, "avg:", &
& (REAL(POWER_TOTAL, KIND=JPRD) / REAL(POWER_COUNT, KIND=JPRD)), &
& "count:", POWER_COUNT
END IF

END SUBROUTINE CLOUDSC_DRIVER_FIELD

Expand Down
31 changes: 2 additions & 29 deletions src/cloudsc_fortran/cloudsc_driver_mod.F90
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ MODULE CLOUDSC_DRIVER_MOD
USE YOECLDP, ONLY : NCLV
USE CLOUDSC_MPI_MOD, ONLY: NUMPROC, IRANK
USE TIMER_MOD, ONLY : PERFORMANCE_TIMER, GET_THREAD_NUM
USE EC_PMON_MOD, ONLY: EC_PMON

IMPLICIT NONE

Expand Down Expand Up @@ -102,21 +101,11 @@ SUBROUTINE CLOUDSC_DRIVER( &

TYPE(PERFORMANCE_TIMER) :: TIMER
INTEGER(KIND=JPIM) :: TID ! thread id from 0 .. NUMOMP - 1
INTEGER(KIND=JPIB) :: ENERGY, POWER, POWER_TOTAL, POWER_MAX, POWER_COUNT
LOGICAL :: LEC_PMON = .FALSE.
CHARACTER(LEN=1) :: CLEC_PMON

TYPE(TOMCST) :: YDOMCST
TYPE(TOETHF) :: YDOETHF
TYPE(TECLDP) :: YDECLDP

CALL GET_ENVIRONMENT_VARIABLE('EC_PMON', CLEC_PMON)
IF (CLEC_PMON == '1') LEC_PMON = .TRUE.

POWER_MAX = 0_JPIB
POWER_TOTAL = 0_JPIB
POWER_COUNT = 0_JPIB

NGPBLKS = (NGPTOT / NPROMA) + MIN(MOD(NGPTOT,NPROMA), 1)
1003 format(5x,'NUMPROC=',i0,', NUMOMP=',i0,', NGPTOTG=',i0,', NPROMA=',i0,', NGPBLKS=',i0)
if (irank == 0) then
Expand All @@ -126,14 +115,14 @@ SUBROUTINE CLOUDSC_DRIVER( &
! Global timer for the parallel region
CALL TIMER%START(NUMOMP)

!$omp parallel default(shared) private(JKGLO,IBL,ICEND,TID,energy,power) &
!$omp parallel default(shared) private(JKGLO,IBL,ICEND,TID) &
!$omp& num_threads(NUMOMP)

! Local timer for each thread
TID = GET_THREAD_NUM()
CALL TIMER%THREAD_START(TID)

!$omp do schedule(runtime) reduction(+:power_total,power_count) reduction(max:power_max)
!$omp do schedule(runtime)
DO JKGLO=1,NGPTOT,NPROMA
IBL=(JKGLO-1)/NPROMA+1
ICEND=MIN(NPROMA,NGPTOT-JKGLO+1)
Expand Down Expand Up @@ -169,16 +158,6 @@ SUBROUTINE CLOUDSC_DRIVER( &
& KFLDX, &
& YDOMCST, YDOETHF, YDECLDP)

IF (LEC_PMON) THEN
! Sample power consuption
IF (MOD(IBL, 100) == 0) THEN
CALL EC_PMON(ENERGY, POWER)
POWER_MAX = MAX(POWER_MAX, POWER)
POWER_TOTAL = POWER_TOTAL + POWER
POWER_COUNT = POWER_COUNT + 1
END IF
END IF

! Log number of columns processed by this thread
CALL TIMER%THREAD_LOG(TID, IGPC=ICEND)
ENDDO
Expand All @@ -194,12 +173,6 @@ SUBROUTINE CLOUDSC_DRIVER( &
CALL TIMER%END()

CALL TIMER%PRINT_PERFORMANCE(NPROMA, NGPBLKS, NGPTOT)

IF (LEC_PMON) THEN
print *, "Power usage (sampled):: max: ", POWER_MAX, "avg:", &
& (REAL(POWER_TOTAL, KIND=JPRD) / REAL(POWER_COUNT, KIND=JPRD)), &
& "count:", POWER_COUNT
END IF

END SUBROUTINE CLOUDSC_DRIVER

Expand Down
15 changes: 3 additions & 12 deletions src/cloudsc_fortran/dwarf_cloudsc.F90
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,6 @@ PROGRAM DWARF_CLOUDSC

USE PARKIND1, ONLY: JPIM, JPIB
USE CLOUDSC_MPI_MOD, ONLY: CLOUDSC_MPI_INIT, CLOUDSC_MPI_END, NUMPROC, IRANK
USE CLOUDSC_GLOBAL_STATE_MOD, ONLY: CLOUDSC_GLOBAL_STATE
USE CLOUDSC_DRIVER_MOD, ONLY: CLOUDSC_DRIVER
USE EC_PMON_MOD, ONLY: EC_PMON

USE YOECLDP , ONLY : YRECLDP
USE YOMCST , ONLY : YRCST
Expand All @@ -26,6 +23,9 @@ PROGRAM DWARF_CLOUDSC
#ifdef CLOUDSC_FIELD
USE CLOUDSC_FIELD_STATE_MOD, ONLY: CLOUDSC_FIELD_STATE
USE CLOUDSC_DRIVER_FIELD_MOD, ONLY: CLOUDSC_DRIVER_FIELD
#else
USE CLOUDSC_GLOBAL_STATE_MOD, ONLY: CLOUDSC_GLOBAL_STATE
USE CLOUDSC_DRIVER_MOD, ONLY: CLOUDSC_DRIVER
#endif

IMPLICIT NONE
Expand All @@ -40,21 +40,12 @@ PROGRAM DWARF_CLOUDSC
INTEGER(KIND=JPIM) :: NPROMA = 32 ! NPROMA blocking factor (currently active)
INTEGER(KIND=JPIM) :: NGPTOT ! Local number of grid points

INTEGER(KIND=JPIB) :: ENERGY, POWER
CHARACTER(LEN=1) :: CLEC_PMON

#ifdef CLOUDSC_FIELD
TYPE(CLOUDSC_FIELD_STATE) :: GLOBAL_STATE
#else
TYPE(CLOUDSC_GLOBAL_STATE) :: GLOBAL_STATE
#endif

CALL GET_ENVIRONMENT_VARIABLE('EC_PMON', CLEC_PMON)
IF (CLEC_PMON == '1') THEN
CALL EC_PMON(ENERGY, POWER)
print *, "EC_PMON:: Initial (idle) power: ", POWER
END IF

IARGS = COMMAND_ARGUMENT_COUNT()

! Get the number of OpenMP threads to use for the benchmark
Expand Down
4 changes: 0 additions & 4 deletions src/cloudsc_loki/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -703,8 +703,6 @@ if( HAVE_CUDA )
${COMMON_MODULE}/yoecldp.F90
INCLUDES ${COMMON_INCLUDE}
DEFINITIONS CLOUDSC_GPU_TIMING
DATA_OFFLOAD
REMOVE_OPENMP
XMOD ${_TARGET_XMOD_DIR} ${XMOD_DIR}
BUILDDIR ${CMAKE_CURRENT_BINARY_DIR}/loki-scc-cuf-parametrise
OUTPUT
Expand Down Expand Up @@ -758,8 +756,6 @@ if( HAVE_CUDA )
${COMMON_MODULE}/yoecldp.F90
INCLUDES ${COMMON_INCLUDE}
DEFINITIONS CLOUDSC_GPU_TIMING
DATA_OFFLOAD
REMOVE_OPENMP
XMOD ${_TARGET_XMOD_DIR} ${XMOD_DIR}
BUILDDIR ${CMAKE_CURRENT_BINARY_DIR}/loki-scc-cuf-hoist
OUTPUT
Expand Down

0 comments on commit b707227

Please sign in to comment.