Skip to content

Commit

Permalink
S. Riette 17 May 2024: enable AROME stack deallocation (perf on GPU)
Browse files Browse the repository at this point in the history
  • Loading branch information
SebastienRietteMTO committed May 17, 2024
1 parent 1ddd25b commit 914a1b2
Show file tree
Hide file tree
Showing 8 changed files with 150 additions and 122 deletions.
37 changes: 18 additions & 19 deletions src/testprogs/ice_adjust/main_ice_adjust.F90
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,11 @@ PROGRAM MAIN_ICE_ADJUST
LOGICAL :: LLCHECKDIFF
LOGICAL :: LLDIFF
INTEGER :: IBLOCK1, IBLOCK2
INTEGER :: ISTSZ, JBLK1, JBLK2
INTEGER :: ISTSZ(2), JBLK1, JBLK2
INTEGER :: NTID, ITID

REAL, ALLOCATABLE, TARGET :: PSTACK(:,:)
REAL, ALLOCATABLE, TARGET :: PSTACK8(:,:)
REAL(KIND=4), ALLOCATABLE, TARGET :: PSTACK4(:,:)
TYPE(STACK), TARGET :: YLSTACK

REAL(KIND=8) :: TS,TE
Expand Down Expand Up @@ -140,13 +141,15 @@ PROGRAM MAIN_ICE_ADJUST
D0%NKTB = 1
D0%NKTE = KLEV

#if defined(USE_COLCALL) && defined(_OPENACC)
ISTSZ = NPROMA * 500 * KLEV
#else
ISTSZ = NPROMA * 20 * KLEV
ISTSZ=0
ISTSZ(KIND(PRHODJ)/4) = NPROMA * 15 * KLEV
#ifndef USE_STACK
ISTSZ(2) = ISTSZ(2) + CEILING(ISTSZ(1) / 2.)
ISTSZ(1) = 0
#endif
ALLOCATE (PSTACK (ISTSZ, NGPBLKS))
ZMNH_STACK => PSTACK
ALLOCATE (PSTACK4 (ISTSZ(1), NGPBLKS))
ALLOCATE (PSTACK8 (ISTSZ(2), NGPBLKS))
ZMNH_STACK => PSTACK8

TS = OMP_GET_WTIME ()

Expand All @@ -169,7 +172,7 @@ PROGRAM MAIN_ICE_ADJUST
!$acc & ZDUM1, ZDUM2, ZDUM3, ZDUM4, ZDUM5, ZRS, ZICE_CLD_WGT) &
!$acc & copy (PRS, PTHS) &
!$acc & copyout (PSRCS, PCLDFR, PHLC_HRC, PHLC_HCF, PHLI_HRI, PHLI_HCF) &
!$acc & create (PSTACK)
!$acc & create (PSTACK4, PSTACK8)

TSC = OMP_GET_WTIME ()

Expand Down Expand Up @@ -202,18 +205,14 @@ PROGRAM MAIN_ICE_ADJUST

#ifdef USE_STACK
!Using cray pointers, AROME mechanism
#if defined(USE_COLCALL) && defined(_OPENACC)
!Due to the collapse (2) directive, each point must have its own stack
YLSTACK%L = LOC (PSTACK (1, IBL)) + CEILING(ISTSZ * KIND (PSTACK) / NPROMA / 8.) * 8 * (JLON - 1)
YLSTACK%U = YLSTACK%L + FLOOR(ISTSZ * KIND (PSTACK) / NPROMA / 8.) * 8
#else
YLSTACK%L = LOC (PSTACK (1, IBL))
YLSTACK%U = YLSTACK%L + ISTSZ * KIND (PSTACK)
#endif
YLSTACK%L(1) = LOC (PSTACK4 (1, IBL))
YLSTACK%U(1) = YLSTACK%L(1) + ISTSZ(1) * KIND (PSTACK4)
YLSTACK%L(2) = LOC (PSTACK8 (1, IBL))
YLSTACK%U(2) = YLSTACK%L(2) + ISTSZ(2) * KIND (PSTACK8)
#else
!Using fortran indexing, Meso-NH mechanism
YLSTACK%L = 1
YLSTACK%U = ISTSZ
YLSTACK%L(2) = 1
YLSTACK%U(2) = ISTSZ(2)
IMNH_BLOCK => IBL
YMNH_STACK => YLSTACK
INUMPIN = 0
Expand Down
42 changes: 28 additions & 14 deletions src/testprogs/rain_ice/main_rain_ice.F90
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,13 @@ PROGRAM MAIN_RAIN_ICE
LOGICAL :: LLCHECKDIFF
LOGICAL :: LLDIFF
INTEGER :: IBLOCK1, IBLOCK2
INTEGER :: ISTSZ, JBLK1, JBLK2
INTEGER :: ISTSZ(2), JBLK1, JBLK2
INTEGER :: NTID, ITID
INTEGER :: JRR
REAL :: ZTHVREFZIKB! for electricity use only

REAL, ALLOCATABLE, TARGET :: PSTACK(:,:)
REAL, ALLOCATABLE, TARGET :: PSTACK8(:,:)
REAL(KIND=4), ALLOCATABLE, TARGET :: PSTACK4(:,:)
TYPE(STACK), TARGET :: YLSTACK

REAL(KIND=8) :: TS,TE
Expand Down Expand Up @@ -135,13 +136,21 @@ PROGRAM MAIN_RAIN_ICE
D0%NKTB = 1
D0%NKTE = KLEV

ISTSZ=0
ISTSZ(KIND(LLMICRO)/4) = NPROMA * 3 * KLEV
ISTSZ(KIND(PRHODJ)/4) = NPROMA * 30 * KLEV
#ifdef USE_STACK
#if defined(USE_COLCALL) && defined(_OPENACC)
ISTSZ = NPROMA * 1500 * KLEV
ISTSZ(KIND(LLMICRO)/4) = ISTSZ(KIND(LLMICRO)/4) * NPROMA
ISTSZ(KIND(PRHODJ)/4) = ISTSZ(KIND(PRHODJ)/4) * NPROMA
#endif
#else
ISTSZ = NPROMA * 200 * KLEV
ISTSZ(2) = ISTSZ(2) + CEILING(ISTSZ(1) / 2.)
ISTSZ(1) = 0
#endif
ALLOCATE (PSTACK (ISTSZ, NGPBLKS))
ZMNH_STACK => PSTACK
ALLOCATE (PSTACK4 (ISTSZ(1), NGPBLKS))
ALLOCATE (PSTACK8 (ISTSZ(2), NGPBLKS))
ZMNH_STACK => PSTACK8

TS = OMP_GET_WTIME ()

Expand Down Expand Up @@ -169,7 +178,7 @@ PROGRAM MAIN_RAIN_ICE
!$acc & PTHT, PRT, PSIGS, PSEA, PTOWN) &
!$acc & copy (PCIT, PHLC_HRC, PHLC_HCF, PHLI_HRI, PHLI_HCF, PTHS, PRS) &
!$acc & copyout (ZINPRC, PINPRR, PEVAP, PINPRS, PINPRG, ZINDEP, ZRAINFR, PFPR) &
!$acc & create (PSTACK)
!$acc & create (PSTACK4, PSTACK8)

TSC = OMP_GET_WTIME ()

Expand Down Expand Up @@ -203,17 +212,22 @@ PROGRAM MAIN_RAIN_ICE
#ifdef USE_STACK
!Using cray pointers, AROME mechanism
#if defined(USE_COLCALL) && defined(_OPENACC)
!Due to the collapse (2) directive, each point must have its own stack
YLSTACK%L = LOC (PSTACK (1, IBL)) + CEILING(ISTSZ * KIND (PSTACK) / NPROMA / 8.) * 8 * (JLON - 1)
YLSTACK%U = YLSTACK%L + FLOOR(ISTSZ * KIND (PSTACK) / NPROMA / 8.) * 8
!Each point must have its own stack, otherwise the different threads will use the same
!memory region during the packing operation
YLSTACK%L(1) = LOC (PSTACK4 (1, IBL)) + CEILING(ISTSZ(1) * KIND (PSTACK4) / NPROMA / 4.) * 4 * (JLON - 1)
YLSTACK%U(1) = YLSTACK%L(1) + FLOOR(ISTSZ(1) * KIND (PSTACK4) / NPROMA / 4.) * 4
YLSTACK%L(2) = LOC (PSTACK8 (1, IBL)) + CEILING(ISTSZ(2) * KIND (PSTACK8) / NPROMA / 8.) * 8 * (JLON - 1)
YLSTACK%U(2) = YLSTACK%L(2) + FLOOR(ISTSZ(2) * KIND (PSTACK8) / NPROMA / 8.) * 8
#else
YLSTACK%L = LOC (PSTACK (1, IBL))
YLSTACK%U = YLSTACK%L + ISTSZ * KIND (PSTACK)
YLSTACK%L(1) = LOC (PSTACK4 (1, IBL))
YLSTACK%U(1) = YLSTACK%L(1) + ISTSZ(1) * KIND (PSTACK4)
YLSTACK%L(2) = LOC (PSTACK8 (1, IBL))
YLSTACK%U(2) = YLSTACK%L(2) + ISTSZ(2) * KIND (PSTACK8)
#endif
#else
!Using fortran indexing, Meso-NH mechanism
YLSTACK%L = 1
YLSTACK%U = ISTSZ
YLSTACK%L(2) = 1
YLSTACK%U(2) = ISTSZ(2)
IMNH_BLOCK => IBL
YMNH_STACK => YLSTACK
INUMPIN = 0
Expand Down
42 changes: 28 additions & 14 deletions src/testprogs/rain_ice_old/main_rain_ice_old.F90
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,13 @@ PROGRAM MAIN_RAIN_ICE_OLD
LOGICAL :: LLCHECKDIFF
LOGICAL :: LLDIFF
INTEGER :: IBLOCK1, IBLOCK2
INTEGER :: ISTSZ, JBLK1, JBLK2
INTEGER :: ISTSZ(2), JBLK1, JBLK2
INTEGER :: NTID, ITID
INTEGER :: JRR
TYPE(TLES_t) :: TLES

REAL, ALLOCATABLE, TARGET :: PSTACK(:,:)
REAL, ALLOCATABLE, TARGET :: PSTACK8(:,:)
REAL(KIND=4), ALLOCATABLE, TARGET :: PSTACK4(:,:)
TYPE(STACK), TARGET :: YLSTACK

REAL(KIND=8) :: TS,TE
Expand Down Expand Up @@ -199,13 +200,21 @@ PROGRAM MAIN_RAIN_ICE_OLD
D0%NKTB = 1
D0%NKTE = KLEV

ISTSZ=0
ISTSZ(KIND(LLMICRO)/4) = NPROMA * 3 * KLEV
ISTSZ(KIND(PRHODJ)/4) = NPROMA * 22 * KLEV
#ifdef USE_STACK
#if defined(USE_COLCALL) && defined(_OPENACC)
ISTSZ = NPROMA * 1500 * KLEV
ISTSZ(KIND(LLMICRO)/4) = ISTSZ(KIND(LLMICRO)/4) * NPROMA
ISTSZ(KIND(PRHODJ)/4) = ISTSZ(KIND(PRHODJ)/4) * NPROMA
#endif
#else
ISTSZ = NPROMA * 100 * KLEV
ISTSZ(2) = ISTSZ(2) + CEILING(ISTSZ(1) / 2.)
ISTSZ(1) = 0
#endif
ALLOCATE (PSTACK (ISTSZ, NGPBLKS))
ZMNH_STACK => PSTACK
ALLOCATE (PSTACK4 (ISTSZ(1), NGPBLKS))
ALLOCATE (PSTACK8 (ISTSZ(2), NGPBLKS))
ZMNH_STACK => PSTACK8

TS = OMP_GET_WTIME ()

Expand Down Expand Up @@ -234,7 +243,7 @@ PROGRAM MAIN_RAIN_ICE_OLD
!$acc & PTHT, PRT, PSIGS, PSEA, PTOWN, PICENU, PKGN_ACON, PKGN_SBGR) &
!$acc & copy (PCIT, PIFR, PTHS, PRS) &
!$acc & copyout (ZINPRC, PINPRR, PEVAP, PINPRS, PINPRG, PFPR) &
!$acc & create (PSTACK)
!$acc & create (PSTACK4, PSTACK8)

TSC = OMP_GET_WTIME ()

Expand Down Expand Up @@ -271,17 +280,22 @@ PROGRAM MAIN_RAIN_ICE_OLD
#ifdef USE_STACK
!Using cray pointers, AROME mechanism
#if defined(USE_COLCALL) && defined(_OPENACC)
!Due to the collapse (2) directive, each point must have its own stack
YLSTACK%L = LOC (PSTACK (1, IBL)) + CEILING(ISTSZ * KIND (PSTACK) / NPROMA / 8.) * 8 * (JLON - 1)
YLSTACK%U = YLSTACK%L + FLOOR(ISTSZ * KIND (PSTACK) / NPROMA / 8.) * 8
!Each point must have its own stack, otherwise the different threads will use the same
!memory region during the packing operation
YLSTACK%L(1) = LOC (PSTACK4 (1, IBL)) + CEILING(ISTSZ(1) * KIND (PSTACK4) / NPROMA / 4.) * 4 * (JLON - 1)
YLSTACK%U(1) = YLSTACK%L(1) + FLOOR(ISTSZ(1) * KIND (PSTACK4) / NPROMA / 4.) * 4
YLSTACK%L(2) = LOC (PSTACK8 (1, IBL)) + CEILING(ISTSZ(2) * KIND (PSTACK8) / NPROMA / 8.) * 8 * (JLON - 1)
YLSTACK%U(2) = YLSTACK%L(2) + FLOOR(ISTSZ(2) * KIND (PSTACK8) / NPROMA / 8.) * 8
#else
YLSTACK%L = LOC (PSTACK (1, IBL))
YLSTACK%U = YLSTACK%L + ISTSZ * KIND (PSTACK)
YLSTACK%L(1) = LOC (PSTACK4 (1, IBL))
YLSTACK%U(1) = YLSTACK%L(1) + ISTSZ(1) * KIND (PSTACK4)
YLSTACK%L(2) = LOC (PSTACK8 (1, IBL))
YLSTACK%U(2) = YLSTACK%L(2) + ISTSZ(2) * KIND (PSTACK8)
#endif
#else
!Using fortran indexing, Meso-NH mechanism
YLSTACK%L = 1
YLSTACK%U = ISTSZ
YLSTACK%L(2) = 1
YLSTACK%U(2) = ISTSZ(2)
IMNH_BLOCK => IBL
YMNH_STACK => YLSTACK
INUMPIN = 0
Expand Down
39 changes: 20 additions & 19 deletions src/testprogs/shallow/main_shallow.F90
Original file line number Diff line number Diff line change
Expand Up @@ -111,10 +111,11 @@ PROGRAM MAIN_SHALLOW
LOGICAL :: LLCHECKDIFF
LOGICAL :: LLDIFF
INTEGER :: IBLOCK1, IBLOCK2
INTEGER :: ISTSZ, JBLK1, JBLK2
INTEGER :: ISTSZ(2), JBLK1, JBLK2
INTEGER :: NTID, ITID

REAL, ALLOCATABLE, TARGET :: PSTACK(:,:)
REAL, ALLOCATABLE, TARGET :: PSTACK8(:,:)
REAL(KIND=4), ALLOCATABLE, TARGET :: PSTACK4(:,:)
TYPE(STACK), TARGET :: YLSTACK

REAL(KIND=8) :: TS,TE
Expand Down Expand Up @@ -208,13 +209,17 @@ PROGRAM MAIN_SHALLOW
D0%NIEC = D0%NIE
D0%NJEC = D0%NJT

#if defined(USE_COLCALL) && defined(_OPENACC)
ISTSZ = NPROMA * 3000 * KLEV
#else
ISTSZ = NPROMA * 100 * KLEV
ISTSZ=0
ISTSZ(KIND(IKLCL)/4) = NPROMA * 2 * KLEV
ISTSZ(KIND(PRHODJ)/4) = NPROMA * 39 * KLEV
ISTSZ(KIND(PRHODJ)/4) = ISTSZ(KIND(PRHODJ)/4) + NPROMA * 16 !for ZBUF(KLON,16)
#ifndef USE_STACK
ISTSZ(2) = ISTSZ(2) + CEILING(ISTSZ(1) / 2.)
ISTSZ(1) = 0
#endif
ALLOCATE (PSTACK (ISTSZ, NGPBLKS))
ZMNH_STACK => PSTACK
ALLOCATE (PSTACK4 (ISTSZ(1), NGPBLKS))
ALLOCATE (PSTACK8 (ISTSZ(2), NGPBLKS))
ZMNH_STACK => PSTACK8

TS = OMP_GET_WTIME ()

Expand All @@ -238,7 +243,7 @@ PROGRAM MAIN_SHALLOW
!$acc & copy (PTHL_UP, PRT_UP, PRV_UP, PRC_UP, PRI_UP, PU_UP, PV_UP, PTHV_UP, PW_UP, PFRAC_UP, PEMF) &
!$acc & copyout (PDUDT_MF, PDVDT_MF, PDTHLDT_MF, PDRTDT_MF, PDSVDT_MF, PSIGMF, PRC_MF, PRI_MF, PCF_MF, &
!$acc & PFLXZTHVMF, ZFLXZTHMF, ZFLXZRMF, ZFLXZUMF, ZFLXZVMF, ZDETR, ZENTR, IKLCL, IKETL, IKCTL) &
!$acc & create (PSTACK)
!$acc & create (PSTACK4, PSTACK8)

TSC = OMP_GET_WTIME ()

Expand Down Expand Up @@ -273,18 +278,14 @@ PROGRAM MAIN_SHALLOW

#ifdef USE_STACK
!Using cray pointers, AROME mechanism
#if defined(USE_COLCALL) && defined(_OPENACC)
!Due to the collapse (2) directive, each point must have its own stack
YLSTACK%L = LOC (PSTACK (1, IBL)) + CEILING(ISTSZ * KIND (PSTACK) / NPROMA / 8.) * 8 * (JLON - 1)
YLSTACK%U = YLSTACK%L + FLOOR(ISTSZ * KIND (PSTACK) / NPROMA / 8.) * 8
#else
YLSTACK%L = LOC (PSTACK (1, IBL))
YLSTACK%U = YLSTACK%L + ISTSZ * KIND (PSTACK)
#endif
YLSTACK%L(1) = LOC (PSTACK4 (1, IBL))
YLSTACK%U(1) = YLSTACK%L(1) + ISTSZ(1) * KIND (PSTACK4)
YLSTACK%L(2) = LOC (PSTACK8 (1, IBL))
YLSTACK%U(2) = YLSTACK%L(2) + ISTSZ(2) * KIND (PSTACK8)
#else
!Using fortran indexing, Meso-NH mechanism
YLSTACK%L = 1
YLSTACK%U = ISTSZ
YLSTACK%L(2) = 1
YLSTACK%U(2) = ISTSZ(2)
IMNH_BLOCK => IBL
YMNH_STACK => YLSTACK
INUMPIN = 0
Expand Down
Loading

0 comments on commit 914a1b2

Please sign in to comment.