Skip to content

Commit

Permalink
Merge pull request #67 from samhatfield/samhatfield/DE-1127-resolution
Browse files Browse the repository at this point in the history
Add ability to inflate GPU buffers at runtime to accommodate postprocessing requirements
  • Loading branch information
samhatfield authored May 2, 2024
2 parents 7fb1d17 + c1c5f63 commit 3c41427
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 51 deletions.
17 changes: 14 additions & 3 deletions src/trans/gpu/external/setup_trans.F90
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,7 @@ SUBROUTINE SETUP_TRANS(KSMAX,KDGL,KDLON,KLOEN,LDSPLIT,PSTRET,&
INTEGER(ACC_DEVICE_KIND) :: IDEVTYPE
#endif
INTEGER :: INUMDEVS, IUNIT, ISTAT, IDEV, MYGPU
INTEGER :: IF_PP_FACT

#include "user_clock.intfb.h"
! ------------------------------------------------------------------
Expand Down Expand Up @@ -475,9 +476,19 @@ SUBROUTINE SETUP_TRANS(KSMAX,KDGL,KDLON,KLOEN,LDSPLIT,PSTRET,&
!IF_OUT_LT = 5*NFLEV0+2
!IF_FS = 6*NFLEV0+3

! add additional post-processing requirements
!IF_PP = 2*NFLEV0
IF_PP = 0
! Add additional post-processing requirements, based on environment variable ECTRANS_GPU_IF_PP_FACT
! This variable determines the extra number of fields, on top of the standard number required for
! a model spectral transform, which post-processing might need
! It's given in units of NFLEV0
CALL EC_GETENV("ECTRANS_GPU_IF_PP_FACT", CENV)
IF (LEN_TRIM(CENV) > 0) THEN
WRITE(NOUT,'(A)') "Allocating additional device memory for post-processing"
WRITE(NOUT,'(2A)') "${ECTRANS_GPU_IF_PP_FACT}=", CENV
READ(CENV,*) IF_PP_FACT
ELSE
IF_PP_FACT = 0
ENDIF
IF_PP = IF_PP_FACT*NFLEV0

! u/v + scalars 3d + scalars 2d
IF_UV = NFLEV0
Expand Down
87 changes: 39 additions & 48 deletions src/trans/gpu/internal/uvtvd_mod.F90
Original file line number Diff line number Diff line change
Expand Up @@ -73,49 +73,37 @@ SUBROUTINE UVTVD(KFIELD)
INTEGER(KIND=JPIM) :: KM, KMLOC

!REAL(KIND=JPRBT), INTENT(IN) :: PEPSNM(1:d%nump,0:R%NTMAX+2)
!REAL(KIND=JPRBT), INTENT(OUT) :: PVOR(:,:,:),PDIV(:,:,:)
!REAL(KIND=JPRBT), INTENT(INOUT) :: PU (:,:,:),PV (:,:,:)

! LOCAL INTEGER SCALARS
INTEGER(KIND=JPIM) :: II, IN, IR, J, JN, ITMAX
INTEGER(KIND=JPIM) :: IUS, IUE, IVS, IVE, IVORS, IVORE, IDIVS, IDIVE
INTEGER(KIND=JPIM) :: I_DIV_OFFSET

! LOCAL REAL SCALARS
REAL(KIND=JPRBT) :: ZKM
REAL(KIND=JPRBT) :: ZN(-1:R%NTMAX+3)
REAL(KIND=JPRBT), POINTER :: PU(:,:,:),PV(:,:,:),PVOR(:,:,:),PDIV(:,:,:)

IUS = 1
IUE = 2*KFIELD
IVS = 2*KFIELD+1
IVE = 4*KFIELD
IVORS = 1
IVORE = 2*KFIELD
IDIVS = 2*KFIELD+1
IDIVE = 4*KFIELD
! ZOA1 and ZOA2 are arranged with vorticity/U from 1 to 2 * KFIELD and divergence/V from
! 2 * KFIELD + 1 to 4 * KFIELD
I_DIV_OFFSET = 2 * KFIELD

! ------------------------------------------------------------------

!* 1. COMPUTE U V FROM VORTICITY AND DIVERGENCE.
! ------------------------------------------

PU => ZOA1(IUS:IUE,:,:)
PV => ZOA1(IVS:IVE,:,:)
PVOR => ZOA2(IVORS:IVORE,:,:)
PDIV => ZOA2(IDIVS:IDIVE,:,:)

#ifdef ACCGPU
!$ACC DATA &
!$ACC& CREATE(ZN) &
!$ACC& PRESENT(D_MYMS,D_NUMP,R_NTMAX,F_RN,ZEPSNM,PU,PV,PVOR,PDIV)
!$ACC& CREATE(ZN) COPYIN(I_DIV_OFFSET) &
!$ACC& PRESENT(D_MYMS,D_NUMP,R_NTMAX,F_RN,ZEPSNM,ZOA1,ZOA2)
#endif
#ifdef OMPGPU
!WARNING: following line should be PRESENT,ALLOC but causes issues with AMD compiler!
!$OMP TARGET DATA&
!$OMP& MAP(ALLOC:ZN) &
!$OMP& MAP(TO:D_MYMS,D_NUMP,R_NTMAX) &
!$OMP& MAP(TO:F_RN) &
!$OMP& MAP(ALLOC:ZEPSNM,PU,PV,PVOR,PDIV)
!$OMP& MAP(ALLOC:ZEPSNM) &
!$OMP& SHARED(I_DIV_OFFSET)
#endif

#ifdef OMPGPU
Expand All @@ -133,33 +121,33 @@ SUBROUTINE UVTVD(KFIELD)
#ifdef OMPGPU
!$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2)
!! PRIVATE(KM,IN) DEFAULT(NONE) &
!!$OMP& SHARED(D_NUMP,KFIELD,D_MYMS,PU,PV)
!!$OMP& SHARED(D_NUMP,KFIELD,D_MYMS)
#endif
#ifdef ACCGPU
!$ACC PARALLEL LOOP COLLAPSE(2) PRIVATE(KM,IN) DEFAULT(NONE) &
!$ACC& COPYIN(KFIELD) &
!$ACC& PRESENT(D_NUMP,D_MYMS,F_RN,PU,PV,R_NTMAX)
!$ACC& PRESENT(D_NUMP,D_MYMS,F_RN,R_NTMAX)
#endif
DO KMLOC=1,D_NUMP
DO J=1,2*KFIELD
KM = D_MYMS(KMLOC)
!IN = F%NLTN(KM-1)
IN=R_NTMAX+3-KM
PU(J,IN,KMLOC) = 0.0_JPRBT
PV(J,IN,KMLOC) = 0.0_JPRBT
ZOA1(J,IN,KMLOC) = 0.0_JPRBT
ZOA1(I_DIV_OFFSET+J,IN,KMLOC) = 0.0_JPRBT
ENDDO
ENDDO

!* 1.2 COMPUTE VORTICITY AND DIVERGENCE.

#ifdef OMPGPU
!$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(3) PRIVATE(IR,II,IN,KM,ZKM) DEFAULT(NONE) &
!$OMP& SHARED(D_NUMP,R_NTMAX,KFIELD,D_MYMS,PVOR,PV,PU,ZN,PDIV,ZEPSNM)
!$OMP& SHARED(D_NUMP,R_NTMAX,KFIELD,D_MYMS,ZN,ZEPSNM)
#endif
#ifdef ACCGPU
!$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(IR,II,IN,KM,ZKM) DEFAULT(NONE) &
!$ACC& COPYIN(KFIELD) &
!$ACC& PRESENT(D_NUMP,R_NTMAX,D_MYMS,PVOR,PV,PU,ZN,PDIV,ZEPSNM)
!$ACC& PRESENT(D_NUMP,R_NTMAX,D_MYMS,ZN,ZEPSNM)
#endif
DO KMLOC=1,D_NUMP
DO JN=0,R_NTMAX
Expand All @@ -170,28 +158,31 @@ SUBROUTINE UVTVD(KFIELD)
ZKM = REAL(KM,JPRBT)
IN = R_NTMAX+2-JN

IF(KM /= 0 .AND. JN.GE.KM) THEN
PVOR(IR,IN,KMLOC) = -ZKM*PV(II,IN,KMLOC)-&
&ZN(JN)*ZEPSNM(KMLOC,JN+1)*PU(IR,IN-1,KMLOC)+&
&ZN(JN+1)*ZEPSNM(KMLOC,JN)*PU(IR,IN+1,KMLOC)
PVOR(II,IN,KMLOC) = +ZKM*PV(IR,IN,KMLOC)-&
&ZN(JN)*ZEPSNM(KMLOC,JN+1)*PU(II,IN-1,KMLOC)+&
&ZN(JN+1)*ZEPSNM(KMLOC,JN)*PU(II,IN+1,KMLOC)
PDIV(IR,IN,KMLOC) = -ZKM*PU(II,IN,KMLOC)+&
&ZN(JN)*ZEPSNM(KMLOC,JN+1)*PV(IR,IN-1,KMLOC)-&
&ZN(JN+1)*ZEPSNM(KMLOC,JN)*PV(IR,IN+1,KMLOC)
PDIV(II,IN,KMLOC) = +ZKM*PU(IR,IN,KMLOC)+&
&ZN(JN)*ZEPSNM(KMLOC,JN+1)*PV(II,IN-1,KMLOC)-&
&ZN(JN+1)*ZEPSNM(KMLOC,JN)*PV(II,IN+1,KMLOC)
ELSE
IF(KM == 0) THEN
PVOR(IR,IN,KMLOC) = -&
&ZN(JN)*ZEPSNM(KMLOC,JN+1)*PU(IR,IN-1,KMLOC)+&
&ZN(JN+1)*ZEPSNM(KMLOC,JN)*PU(IR,IN+1,KMLOC)
PDIV(IR,IN,KMLOC) = &
&ZN(JN)*ZEPSNM(KMLOC,JN+1)*PV(IR,IN-1,KMLOC)-&
&ZN(JN+1)*ZEPSNM(KMLOC,JN)*PV(IR,IN+1,KMLOC)
ENDIF
IF (KM /= 0 .AND. JN >= KM) THEN
! Vorticity
ZOA2(IR,IN,KMLOC) = -ZKM * ZOA1(I_DIV_OFFSET+II,IN,KMLOC) &
& - ZN(JN) * ZEPSNM(KMLOC,JN+1) * ZOA1(IR,IN-1,KMLOC) &
& + ZN(JN+1) * ZEPSNM(KMLOC,JN) * ZOA1(IR,IN+1,KMLOC)
ZOA2(II,IN,KMLOC) = ZKM * ZOA1(I_DIV_OFFSET+IR,IN,KMLOC) &
& - ZN(JN) * ZEPSNM(KMLOC,JN+1) * ZOA1(II,IN-1,KMLOC) &
& + ZN(JN+1) * ZEPSNM(KMLOC,JN) * ZOA1(II,IN+1,KMLOC)

! Divergence
ZOA2(I_DIV_OFFSET+IR,IN,KMLOC) = -ZKM * ZOA1(II,IN,KMLOC) &
& + ZN(JN) * ZEPSNM(KMLOC,JN+1) * ZOA1(I_DIV_OFFSET+IR,IN-1,KMLOC) &
& - ZN(JN+1) * ZEPSNM(KMLOC,JN) * ZOA1(I_DIV_OFFSET+IR,IN+1,KMLOC)
ZOA2(I_DIV_OFFSET+II,IN,KMLOC) = ZKM * ZOA1(IR,IN,KMLOC) &
& + ZN(JN) * ZEPSNM(KMLOC,JN+1) * ZOA1(I_DIV_OFFSET+II,IN-1,KMLOC) &
& - ZN(JN+1) * ZEPSNM(KMLOC,JN) * ZOA1(I_DIV_OFFSET+II,IN+1,KMLOC)
ELSEIF (KM == 0) THEN
! Vorticity
ZOA2(IR,IN,KMLOC) = -ZN(JN) * ZEPSNM(KMLOC,JN+1) * ZOA1(IR,IN-1,KMLOC) &
& + ZN(JN+1) * ZEPSNM(KMLOC,JN) * ZOA1(IR,IN+1,KMLOC)

! Divergence
ZOA2(I_DIV_OFFSET+IR,IN,KMLOC) = &
& ZN(JN) * ZEPSNM(KMLOC,JN+1) * ZOA1(I_DIV_OFFSET+IR,IN-1,KMLOC) &
& - ZN(JN+1) * ZEPSNM(KMLOC,JN) * ZOA1(I_DIV_OFFSET+IR,IN+1,KMLOC)
ENDIF
ENDDO
ENDDO
Expand Down

0 comments on commit 3c41427

Please sign in to comment.