diff --git a/src/testprogs/ice_adjust/main_ice_adjust.F90 b/src/testprogs/ice_adjust/main_ice_adjust.F90 index f56d888c..df5aa670 100644 --- a/src/testprogs/ice_adjust/main_ice_adjust.F90 +++ b/src/testprogs/ice_adjust/main_ice_adjust.F90 @@ -65,10 +65,11 @@ PROGRAM MAIN_ICE_ADJUST LOGICAL :: LLCHECKDIFF LOGICAL :: LLDIFF INTEGER :: IBLOCK1, IBLOCK2 -INTEGER :: ISTSZ, JBLK1, JBLK2 +INTEGER :: ISTSZ(2), JBLK1, JBLK2 INTEGER :: NTID, ITID -REAL, ALLOCATABLE, TARGET :: PSTACK(:,:) +REAL, ALLOCATABLE, TARGET :: PSTACK8(:,:) +REAL(KIND=4), ALLOCATABLE, TARGET :: PSTACK4(:,:) TYPE(STACK), TARGET :: YLSTACK REAL(KIND=8) :: TS,TE @@ -140,13 +141,15 @@ PROGRAM MAIN_ICE_ADJUST D0%NKTB = 1 D0%NKTE = KLEV -#if defined(USE_COLCALL) && defined(_OPENACC) -ISTSZ = NPROMA * 500 * KLEV -#else -ISTSZ = NPROMA * 20 * KLEV +ISTSZ=0 +ISTSZ(KIND(PRHODJ)/4) = NPROMA * 15 * KLEV +#ifndef USE_STACK +ISTSZ(2) = ISTSZ(2) + CEILING(ISTSZ(1) / 2.) +ISTSZ(1) = 0 #endif -ALLOCATE (PSTACK (ISTSZ, NGPBLKS)) -ZMNH_STACK => PSTACK +ALLOCATE (PSTACK4 (ISTSZ(1), NGPBLKS)) +ALLOCATE (PSTACK8 (ISTSZ(2), NGPBLKS)) +ZMNH_STACK => PSTACK8 TS = OMP_GET_WTIME () @@ -169,7 +172,7 @@ PROGRAM MAIN_ICE_ADJUST !$acc & ZDUM1, ZDUM2, ZDUM3, ZDUM4, ZDUM5, ZRS, ZICE_CLD_WGT) & !$acc & copy (PRS, PTHS) & !$acc & copyout (PSRCS, PCLDFR, PHLC_HRC, PHLC_HCF, PHLI_HRI, PHLI_HCF) & -!$acc & create (PSTACK) +!$acc & create (PSTACK4, PSTACK8) TSC = OMP_GET_WTIME () @@ -202,18 +205,14 @@ PROGRAM MAIN_ICE_ADJUST #ifdef USE_STACK !Using cray pointers, AROME mechanism -#if defined(USE_COLCALL) && defined(_OPENACC) - !Due to the collapse (2) directive, each point must have its own stack - YLSTACK%L = LOC (PSTACK (1, IBL)) + CEILING(ISTSZ * KIND (PSTACK) / NPROMA / 8.) * 8 * (JLON - 1) - YLSTACK%U = YLSTACK%L + FLOOR(ISTSZ * KIND (PSTACK) / NPROMA / 8.) * 8 -#else - YLSTACK%L = LOC (PSTACK (1, IBL)) - YLSTACK%U = YLSTACK%L + ISTSZ * KIND (PSTACK) -#endif + YLSTACK%L(1) = LOC (PSTACK4 (1, IBL)) + YLSTACK%U(1) = YLSTACK%L(1) + ISTSZ(1) * KIND (PSTACK4) + YLSTACK%L(2) = LOC (PSTACK8 (1, IBL)) + YLSTACK%U(2) = YLSTACK%L(2) + ISTSZ(2) * KIND (PSTACK8) #else !Using fortran indexing, Meso-NH mechanism - YLSTACK%L = 1 - YLSTACK%U = ISTSZ + YLSTACK%L(2) = 1 + YLSTACK%U(2) = ISTSZ(2) IMNH_BLOCK => IBL YMNH_STACK => YLSTACK INUMPIN = 0 diff --git a/src/testprogs/rain_ice/main_rain_ice.F90 b/src/testprogs/rain_ice/main_rain_ice.F90 index 4985c9da..6904f9ca 100644 --- a/src/testprogs/rain_ice/main_rain_ice.F90 +++ b/src/testprogs/rain_ice/main_rain_ice.F90 @@ -58,12 +58,13 @@ PROGRAM MAIN_RAIN_ICE LOGICAL :: LLCHECKDIFF LOGICAL :: LLDIFF INTEGER :: IBLOCK1, IBLOCK2 -INTEGER :: ISTSZ, JBLK1, JBLK2 +INTEGER :: ISTSZ(2), JBLK1, JBLK2 INTEGER :: NTID, ITID INTEGER :: JRR REAL :: ZTHVREFZIKB! for electricity use only -REAL, ALLOCATABLE, TARGET :: PSTACK(:,:) +REAL, ALLOCATABLE, TARGET :: PSTACK8(:,:) +REAL(KIND=4), ALLOCATABLE, TARGET :: PSTACK4(:,:) TYPE(STACK), TARGET :: YLSTACK REAL(KIND=8) :: TS,TE @@ -135,13 +136,21 @@ PROGRAM MAIN_RAIN_ICE D0%NKTB = 1 D0%NKTE = KLEV +ISTSZ=0 +ISTSZ(KIND(LLMICRO)/4) = NPROMA * 3 * KLEV +ISTSZ(KIND(PRHODJ)/4) = NPROMA * 30 * KLEV +#ifdef USE_STACK #if defined(USE_COLCALL) && defined(_OPENACC) -ISTSZ = NPROMA * 1500 * KLEV +ISTSZ(KIND(LLMICRO)/4) = ISTSZ(KIND(LLMICRO)/4) * NPROMA +ISTSZ(KIND(PRHODJ)/4) = ISTSZ(KIND(PRHODJ)/4) * NPROMA +#endif #else -ISTSZ = NPROMA * 200 * KLEV +ISTSZ(2) = ISTSZ(2) + CEILING(ISTSZ(1) / 2.) +ISTSZ(1) = 0 #endif -ALLOCATE (PSTACK (ISTSZ, NGPBLKS)) -ZMNH_STACK => PSTACK +ALLOCATE (PSTACK4 (ISTSZ(1), NGPBLKS)) +ALLOCATE (PSTACK8 (ISTSZ(2), NGPBLKS)) +ZMNH_STACK => PSTACK8 TS = OMP_GET_WTIME () @@ -169,7 +178,7 @@ PROGRAM MAIN_RAIN_ICE !$acc & PTHT, PRT, PSIGS, PSEA, PTOWN) & !$acc & copy (PCIT, PHLC_HRC, PHLC_HCF, PHLI_HRI, PHLI_HCF, PTHS, PRS) & !$acc & copyout (ZINPRC, PINPRR, PEVAP, PINPRS, PINPRG, ZINDEP, ZRAINFR, PFPR) & -!$acc & create (PSTACK) +!$acc & create (PSTACK4, PSTACK8) TSC = OMP_GET_WTIME () @@ -203,17 +212,22 @@ PROGRAM MAIN_RAIN_ICE #ifdef USE_STACK !Using cray pointers, AROME mechanism #if defined(USE_COLCALL) && defined(_OPENACC) - !Due to the collapse (2) directive, each point must have its own stack - YLSTACK%L = LOC (PSTACK (1, IBL)) + CEILING(ISTSZ * KIND (PSTACK) / NPROMA / 8.) * 8 * (JLON - 1) - YLSTACK%U = YLSTACK%L + FLOOR(ISTSZ * KIND (PSTACK) / NPROMA / 8.) * 8 + !Each point must have its own stack, otherwise the different threads will use the same + !memory region during the packing operation + YLSTACK%L(1) = LOC (PSTACK4 (1, IBL)) + CEILING(ISTSZ(1) * KIND (PSTACK4) / NPROMA / 4.) * 4 * (JLON - 1) + YLSTACK%U(1) = YLSTACK%L(1) + FLOOR(ISTSZ(1) * KIND (PSTACK4) / NPROMA / 4.) * 4 + YLSTACK%L(2) = LOC (PSTACK8 (1, IBL)) + CEILING(ISTSZ(2) * KIND (PSTACK8) / NPROMA / 8.) * 8 * (JLON - 1) + YLSTACK%U(2) = YLSTACK%L(2) + FLOOR(ISTSZ(2) * KIND (PSTACK8) / NPROMA / 8.) * 8 #else - YLSTACK%L = LOC (PSTACK (1, IBL)) - YLSTACK%U = YLSTACK%L + ISTSZ * KIND (PSTACK) + YLSTACK%L(1) = LOC (PSTACK4 (1, IBL)) + YLSTACK%U(1) = YLSTACK%L(1) + ISTSZ(1) * KIND (PSTACK4) + YLSTACK%L(2) = LOC (PSTACK8 (1, IBL)) + YLSTACK%U(2) = YLSTACK%L(2) + ISTSZ(2) * KIND (PSTACK8) #endif #else !Using fortran indexing, Meso-NH mechanism - YLSTACK%L = 1 - YLSTACK%U = ISTSZ + YLSTACK%L(2) = 1 + YLSTACK%U(2) = ISTSZ(2) IMNH_BLOCK => IBL YMNH_STACK => YLSTACK INUMPIN = 0 diff --git a/src/testprogs/rain_ice_old/main_rain_ice_old.F90 b/src/testprogs/rain_ice_old/main_rain_ice_old.F90 index 0d4b2f6e..ceb9db7a 100644 --- a/src/testprogs/rain_ice_old/main_rain_ice_old.F90 +++ b/src/testprogs/rain_ice_old/main_rain_ice_old.F90 @@ -67,12 +67,13 @@ PROGRAM MAIN_RAIN_ICE_OLD LOGICAL :: LLCHECKDIFF LOGICAL :: LLDIFF INTEGER :: IBLOCK1, IBLOCK2 -INTEGER :: ISTSZ, JBLK1, JBLK2 +INTEGER :: ISTSZ(2), JBLK1, JBLK2 INTEGER :: NTID, ITID INTEGER :: JRR TYPE(TLES_t) :: TLES -REAL, ALLOCATABLE, TARGET :: PSTACK(:,:) +REAL, ALLOCATABLE, TARGET :: PSTACK8(:,:) +REAL(KIND=4), ALLOCATABLE, TARGET :: PSTACK4(:,:) TYPE(STACK), TARGET :: YLSTACK REAL(KIND=8) :: TS,TE @@ -199,13 +200,21 @@ PROGRAM MAIN_RAIN_ICE_OLD D0%NKTB = 1 D0%NKTE = KLEV +ISTSZ=0 +ISTSZ(KIND(LLMICRO)/4) = NPROMA * 3 * KLEV +ISTSZ(KIND(PRHODJ)/4) = NPROMA * 22 * KLEV +#ifdef USE_STACK #if defined(USE_COLCALL) && defined(_OPENACC) -ISTSZ = NPROMA * 1500 * KLEV +ISTSZ(KIND(LLMICRO)/4) = ISTSZ(KIND(LLMICRO)/4) * NPROMA +ISTSZ(KIND(PRHODJ)/4) = ISTSZ(KIND(PRHODJ)/4) * NPROMA +#endif #else -ISTSZ = NPROMA * 100 * KLEV +ISTSZ(2) = ISTSZ(2) + CEILING(ISTSZ(1) / 2.) +ISTSZ(1) = 0 #endif -ALLOCATE (PSTACK (ISTSZ, NGPBLKS)) -ZMNH_STACK => PSTACK +ALLOCATE (PSTACK4 (ISTSZ(1), NGPBLKS)) +ALLOCATE (PSTACK8 (ISTSZ(2), NGPBLKS)) +ZMNH_STACK => PSTACK8 TS = OMP_GET_WTIME () @@ -234,7 +243,7 @@ PROGRAM MAIN_RAIN_ICE_OLD !$acc & PTHT, PRT, PSIGS, PSEA, PTOWN, PICENU, PKGN_ACON, PKGN_SBGR) & !$acc & copy (PCIT, PIFR, PTHS, PRS) & !$acc & copyout (ZINPRC, PINPRR, PEVAP, PINPRS, PINPRG, PFPR) & -!$acc & create (PSTACK) +!$acc & create (PSTACK4, PSTACK8) TSC = OMP_GET_WTIME () @@ -271,17 +280,22 @@ PROGRAM MAIN_RAIN_ICE_OLD #ifdef USE_STACK !Using cray pointers, AROME mechanism #if defined(USE_COLCALL) && defined(_OPENACC) - !Due to the collapse (2) directive, each point must have its own stack - YLSTACK%L = LOC (PSTACK (1, IBL)) + CEILING(ISTSZ * KIND (PSTACK) / NPROMA / 8.) * 8 * (JLON - 1) - YLSTACK%U = YLSTACK%L + FLOOR(ISTSZ * KIND (PSTACK) / NPROMA / 8.) * 8 + !Each point must have its own stack, otherwise the different threads will use the same + !memory region during the packing operation + YLSTACK%L(1) = LOC (PSTACK4 (1, IBL)) + CEILING(ISTSZ(1) * KIND (PSTACK4) / NPROMA / 4.) * 4 * (JLON - 1) + YLSTACK%U(1) = YLSTACK%L(1) + FLOOR(ISTSZ(1) * KIND (PSTACK4) / NPROMA / 4.) * 4 + YLSTACK%L(2) = LOC (PSTACK8 (1, IBL)) + CEILING(ISTSZ(2) * KIND (PSTACK8) / NPROMA / 8.) * 8 * (JLON - 1) + YLSTACK%U(2) = YLSTACK%L(2) + FLOOR(ISTSZ(2) * KIND (PSTACK8) / NPROMA / 8.) * 8 #else - YLSTACK%L = LOC (PSTACK (1, IBL)) - YLSTACK%U = YLSTACK%L + ISTSZ * KIND (PSTACK) + YLSTACK%L(1) = LOC (PSTACK4 (1, IBL)) + YLSTACK%U(1) = YLSTACK%L(1) + ISTSZ(1) * KIND (PSTACK4) + YLSTACK%L(2) = LOC (PSTACK8 (1, IBL)) + YLSTACK%U(2) = YLSTACK%L(2) + ISTSZ(2) * KIND (PSTACK8) #endif #else !Using fortran indexing, Meso-NH mechanism - YLSTACK%L = 1 - YLSTACK%U = ISTSZ + YLSTACK%L(2) = 1 + YLSTACK%U(2) = ISTSZ(2) IMNH_BLOCK => IBL YMNH_STACK => YLSTACK INUMPIN = 0 diff --git a/src/testprogs/shallow/main_shallow.F90 b/src/testprogs/shallow/main_shallow.F90 index 4840afc1..6d95a493 100644 --- a/src/testprogs/shallow/main_shallow.F90 +++ b/src/testprogs/shallow/main_shallow.F90 @@ -111,10 +111,11 @@ PROGRAM MAIN_SHALLOW LOGICAL :: LLCHECKDIFF LOGICAL :: LLDIFF INTEGER :: IBLOCK1, IBLOCK2 -INTEGER :: ISTSZ, JBLK1, JBLK2 +INTEGER :: ISTSZ(2), JBLK1, JBLK2 INTEGER :: NTID, ITID -REAL, ALLOCATABLE, TARGET :: PSTACK(:,:) +REAL, ALLOCATABLE, TARGET :: PSTACK8(:,:) +REAL(KIND=4), ALLOCATABLE, TARGET :: PSTACK4(:,:) TYPE(STACK), TARGET :: YLSTACK REAL(KIND=8) :: TS,TE @@ -208,13 +209,17 @@ PROGRAM MAIN_SHALLOW D0%NIEC = D0%NIE D0%NJEC = D0%NJT -#if defined(USE_COLCALL) && defined(_OPENACC) -ISTSZ = NPROMA * 3000 * KLEV -#else -ISTSZ = NPROMA * 100 * KLEV +ISTSZ=0 +ISTSZ(KIND(IKLCL)/4) = NPROMA * 2 * KLEV +ISTSZ(KIND(PRHODJ)/4) = NPROMA * 39 * KLEV +ISTSZ(KIND(PRHODJ)/4) = ISTSZ(KIND(PRHODJ)/4) + NPROMA * 16 !for ZBUF(KLON,16) +#ifndef USE_STACK +ISTSZ(2) = ISTSZ(2) + CEILING(ISTSZ(1) / 2.) +ISTSZ(1) = 0 #endif -ALLOCATE (PSTACK (ISTSZ, NGPBLKS)) -ZMNH_STACK => PSTACK +ALLOCATE (PSTACK4 (ISTSZ(1), NGPBLKS)) +ALLOCATE (PSTACK8 (ISTSZ(2), NGPBLKS)) +ZMNH_STACK => PSTACK8 TS = OMP_GET_WTIME () @@ -238,7 +243,7 @@ PROGRAM MAIN_SHALLOW !$acc & copy (PTHL_UP, PRT_UP, PRV_UP, PRC_UP, PRI_UP, PU_UP, PV_UP, PTHV_UP, PW_UP, PFRAC_UP, PEMF) & !$acc & copyout (PDUDT_MF, PDVDT_MF, PDTHLDT_MF, PDRTDT_MF, PDSVDT_MF, PSIGMF, PRC_MF, PRI_MF, PCF_MF, & !$acc & PFLXZTHVMF, ZFLXZTHMF, ZFLXZRMF, ZFLXZUMF, ZFLXZVMF, ZDETR, ZENTR, IKLCL, IKETL, IKCTL) & -!$acc & create (PSTACK) +!$acc & create (PSTACK4, PSTACK8) TSC = OMP_GET_WTIME () @@ -273,18 +278,14 @@ PROGRAM MAIN_SHALLOW #ifdef USE_STACK !Using cray pointers, AROME mechanism -#if defined(USE_COLCALL) && defined(_OPENACC) - !Due to the collapse (2) directive, each point must have its own stack - YLSTACK%L = LOC (PSTACK (1, IBL)) + CEILING(ISTSZ * KIND (PSTACK) / NPROMA / 8.) * 8 * (JLON - 1) - YLSTACK%U = YLSTACK%L + FLOOR(ISTSZ * KIND (PSTACK) / NPROMA / 8.) * 8 -#else - YLSTACK%L = LOC (PSTACK (1, IBL)) - YLSTACK%U = YLSTACK%L + ISTSZ * KIND (PSTACK) -#endif + YLSTACK%L(1) = LOC (PSTACK4 (1, IBL)) + YLSTACK%U(1) = YLSTACK%L(1) + ISTSZ(1) * KIND (PSTACK4) + YLSTACK%L(2) = LOC (PSTACK8 (1, IBL)) + YLSTACK%U(2) = YLSTACK%L(2) + ISTSZ(2) * KIND (PSTACK8) #else !Using fortran indexing, Meso-NH mechanism - YLSTACK%L = 1 - YLSTACK%U = ISTSZ + YLSTACK%L(2) = 1 + YLSTACK%U(2) = ISTSZ(2) IMNH_BLOCK => IBL YMNH_STACK => YLSTACK INUMPIN = 0 diff --git a/src/testprogs/support/mode_mnh_zwork.F90 b/src/testprogs/support/mode_mnh_zwork.F90 index 29bc4dcb..07895429 100644 --- a/src/testprogs/support/mode_mnh_zwork.F90 +++ b/src/testprogs/support/mode_mnh_zwork.F90 @@ -50,9 +50,9 @@ SUBROUTINE MNH_ALLOCATE_GT1DFLAT(OTAB, KIB, KIE) IOFFSET = CEILING(IOFFSET * REAL(KIND(OTAB)) / REAL(KIND(ZMNH_STACK))) ISIZE = CEILING(ISIZE * REAL(KIND(OTAB)) / REAL(KIND(ZMNH_STACK))) - CALL C_F_POINTER(C_LOC(ZMNH_STACK(YMNH_STACK%L - IOFFSET, IMNH_BLOCK)), OTAB, [KIE - KIB + 1]) - YMNH_STACK%L=YMNH_STACK%L+ISIZE - IF(YMNH_STACK%L>YMNH_STACK%U) CALL SOF('MNH_ALLOCATE_GT1DFLAT', 0) + CALL C_F_POINTER(C_LOC(ZMNH_STACK(YMNH_STACK%L(2) - IOFFSET, IMNH_BLOCK)), OTAB, [KIE - KIB + 1]) + YMNH_STACK%L(2)=YMNH_STACK%L(2)+ISIZE + IF(YMNH_STACK%L(2)>YMNH_STACK%U(2)) CALL SOF('MNH_ALLOCATE_GT1DFLAT', 0) END SUBROUTINE MNH_ALLOCATE_GT1DFLAT @@ -81,9 +81,9 @@ SUBROUTINE MNH_ALLOCATE_GT2DFLAT(OTAB, KIB, KIE, KJB, KJE) IOFFSET = CEILING(IOFFSET * REAL(KIND(OTAB)) / REAL(KIND(ZMNH_STACK))) ISIZE = CEILING(ISIZE * REAL(KIND(OTAB)) / REAL(KIND(ZMNH_STACK))) - CALL C_F_POINTER(C_LOC(ZMNH_STACK(YMNH_STACK%L - IOFFSET, IMNH_BLOCK)), OTAB, [KIE - KIB + 1, KJE - KJB + 1]) - YMNH_STACK%L=YMNH_STACK%L+ISIZE - IF(YMNH_STACK%L>YMNH_STACK%U) CALL SOF('MNH_ALLOCATE_GT2DFLAT', 0) + CALL C_F_POINTER(C_LOC(ZMNH_STACK(YMNH_STACK%L(2) - IOFFSET, IMNH_BLOCK)), OTAB, [KIE - KIB + 1, KJE - KJB + 1]) + YMNH_STACK%L(2)=YMNH_STACK%L(2)+ISIZE + IF(YMNH_STACK%L(2)>YMNH_STACK%U(2)) CALL SOF('MNH_ALLOCATE_GT2DFLAT', 0) END SUBROUTINE MNH_ALLOCATE_GT2DFLAT @@ -116,9 +116,9 @@ SUBROUTINE MNH_ALLOCATE_GT3DFLAT(OTAB, KIB, KIE, KJB, KJE, KKB, KKE) IOFFSET = CEILING(IOFFSET * REAL(KIND(OTAB)) / REAL(KIND(ZMNH_STACK))) ISIZE = CEILING(ISIZE * REAL(KIND(OTAB)) / REAL(KIND(ZMNH_STACK))) - CALL C_F_POINTER(C_LOC(ZMNH_STACK(YMNH_STACK%L - IOFFSET, IMNH_BLOCK)), OTAB, [KIE - KIB + 1, KJE - KJB + 1, KKE - KKB + 1]) - YMNH_STACK%L=YMNH_STACK%L+ISIZE - IF(YMNH_STACK%L>YMNH_STACK%U) CALL SOF('MNH_ALLOCATE_GT3DFLAT', 0) + CALL C_F_POINTER(C_LOC(ZMNH_STACK(YMNH_STACK%L(2) - IOFFSET, IMNH_BLOCK)), OTAB, [KIE - KIB + 1, KJE - KJB + 1, KKE - KKB + 1]) + YMNH_STACK%L(2)=YMNH_STACK%L(2)+ISIZE + IF(YMNH_STACK%L(2)>YMNH_STACK%U(2)) CALL SOF('MNH_ALLOCATE_GT3DFLAT', 0) END SUBROUTINE MNH_ALLOCATE_GT3DFLAT @@ -146,9 +146,9 @@ SUBROUTINE MNH_ALLOCATE_IT1DFLAT(KTAB, KIB, KIE) IOFFSET = CEILING(IOFFSET * REAL(KIND(KTAB)) / REAL(KIND(ZMNH_STACK))) ISIZE = CEILING(ISIZE * REAL(KIND(KTAB)) / REAL(KIND(ZMNH_STACK))) - CALL C_F_POINTER(C_LOC(ZMNH_STACK(YMNH_STACK%L - IOFFSET, IMNH_BLOCK)), KTAB, [KIE - KIB + 1]) - YMNH_STACK%L=YMNH_STACK%L+ISIZE - IF(YMNH_STACK%L>YMNH_STACK%U) CALL SOF('MNH_ALLOCATE_IT1DFLAT', 0) + CALL C_F_POINTER(C_LOC(ZMNH_STACK(YMNH_STACK%L(2) - IOFFSET, IMNH_BLOCK)), KTAB, [KIE - KIB + 1]) + YMNH_STACK%L(2)=YMNH_STACK%L(2)+ISIZE + IF(YMNH_STACK%L(2)>YMNH_STACK%U(2)) CALL SOF('MNH_ALLOCATE_IT1DFLAT', 0) END SUBROUTINE MNH_ALLOCATE_IT1DFLAT @@ -177,9 +177,9 @@ SUBROUTINE MNH_ALLOCATE_IT2DFLAT(KTAB, KIB, KIE, KJB, KJE) IOFFSET = CEILING(IOFFSET * REAL(KIND(KTAB)) / REAL(KIND(ZMNH_STACK))) ISIZE = CEILING(ISIZE * REAL(KIND(KTAB)) / REAL(KIND(ZMNH_STACK))) - CALL C_F_POINTER(C_LOC(ZMNH_STACK(YMNH_STACK%L - IOFFSET, IMNH_BLOCK)), KTAB, [KIE - KIB + 1, KJE - KJB + 1]) - YMNH_STACK%L=YMNH_STACK%L+ISIZE - IF(YMNH_STACK%L>YMNH_STACK%U) CALL SOF('MNH_ALLOCATE_IT2DFLAT', 0) + CALL C_F_POINTER(C_LOC(ZMNH_STACK(YMNH_STACK%L(2) - IOFFSET, IMNH_BLOCK)), KTAB, [KIE - KIB + 1, KJE - KJB + 1]) + YMNH_STACK%L(2)=YMNH_STACK%L(2)+ISIZE + IF(YMNH_STACK%L(2)>YMNH_STACK%U(2)) CALL SOF('MNH_ALLOCATE_IT2DFLAT', 0) END SUBROUTINE MNH_ALLOCATE_IT2DFLAT @@ -212,9 +212,9 @@ SUBROUTINE MNH_ALLOCATE_IT3DFLAT(KTAB, KIB, KIE, KJB, KJE, KKB, KKE) IOFFSET = CEILING(IOFFSET * REAL(KIND(KTAB)) / REAL(KIND(ZMNH_STACK))) ISIZE = CEILING(ISIZE * REAL(KIND(KTAB)) / REAL(KIND(ZMNH_STACK))) - CALL C_F_POINTER(C_LOC(ZMNH_STACK(YMNH_STACK%L - IOFFSET, IMNH_BLOCK)), KTAB, [KIE - KIB + 1, KJE - KJB + 1, KKE - KKB + 1]) - YMNH_STACK%L=YMNH_STACK%L+ISIZE - IF(YMNH_STACK%L>YMNH_STACK%U) CALL SOF('MNH_ALLOCATE_IT3DFLAT', 0) + CALL C_F_POINTER(C_LOC(ZMNH_STACK(YMNH_STACK%L(2) - IOFFSET, IMNH_BLOCK)), KTAB, [KIE - KIB + 1, KJE - KJB + 1, KKE - KKB + 1]) + YMNH_STACK%L(2)=YMNH_STACK%L(2)+ISIZE + IF(YMNH_STACK%L(2)>YMNH_STACK%U(2)) CALL SOF('MNH_ALLOCATE_IT3DFLAT', 0) END SUBROUTINE MNH_ALLOCATE_IT3DFLAT @@ -239,9 +239,9 @@ SUBROUTINE MNH_ALLOCATE_ZT1DFLAT(PTAB, KIB, KIE) ISIZE = (KIE - KIB + 1) - PTAB(KIB:KIE) => ZMNH_STACK(YMNH_STACK%L:YMNH_STACK%L+ISIZE-1, IMNH_BLOCK) - YMNH_STACK%L=YMNH_STACK%L+ISIZE - IF(YMNH_STACK%L>YMNH_STACK%U) CALL SOF('MNH_ALLOCATE_ZT1DFLAT', 0) + PTAB(KIB:KIE) => ZMNH_STACK(YMNH_STACK%L(2):YMNH_STACK%L(2)+ISIZE-1, IMNH_BLOCK) + YMNH_STACK%L(2)=YMNH_STACK%L(2)+ISIZE + IF(YMNH_STACK%L(2)>YMNH_STACK%U(2)) CALL SOF('MNH_ALLOCATE_ZT1DFLAT', 0) END SUBROUTINE MNH_ALLOCATE_ZT1DFLAT @@ -266,9 +266,9 @@ SUBROUTINE MNH_ALLOCATE_ZT2DFLAT(PTAB, KIB, KIE, KJB, KJE) ISIZE = (KIE - KIB + 1) * (KJE - KJB + 1) - PTAB(KIB:KIE, KJB:KJE) => ZMNH_STACK(YMNH_STACK%L:YMNH_STACK%L+ISIZE-1, IMNH_BLOCK) - YMNH_STACK%L=YMNH_STACK%L+ISIZE - IF(YMNH_STACK%L>YMNH_STACK%U) CALL SOF('MNH_ALLOCATE_ZT2DFLAT', 0) + PTAB(KIB:KIE, KJB:KJE) => ZMNH_STACK(YMNH_STACK%L(2):YMNH_STACK%L(2)+ISIZE-1, IMNH_BLOCK) + YMNH_STACK%L(2)=YMNH_STACK%L(2)+ISIZE + IF(YMNH_STACK%L(2)>YMNH_STACK%U(2)) CALL SOF('MNH_ALLOCATE_ZT2DFLAT', 0) END SUBROUTINE MNH_ALLOCATE_ZT2DFLAT @@ -296,9 +296,9 @@ SUBROUTINE MNH_ALLOCATE_ZT3DFLAT(PTAB, KIB, KIE, KJB, KJE, KKB, KKE) ISIZE = (KIE - KIB + 1) * (KJE - KJB + 1) * (KKE - KKB + 1) - PTAB(KIB:KIE, KJB:KJE, KKB:KKE) => ZMNH_STACK(YMNH_STACK%L:YMNH_STACK%L+ISIZE-1, IMNH_BLOCK) - YMNH_STACK%L=YMNH_STACK%L+ISIZE - IF(YMNH_STACK%L>YMNH_STACK%U) CALL SOF('MNH_ALLOCATE_ZT3DFLAT', 0) + PTAB(KIB:KIE, KJB:KJE, KKB:KKE) => ZMNH_STACK(YMNH_STACK%L(2):YMNH_STACK%L(2)+ISIZE-1, IMNH_BLOCK) + YMNH_STACK%L(2)=YMNH_STACK%L(2)+ISIZE + IF(YMNH_STACK%L(2)>YMNH_STACK%U(2)) CALL SOF('MNH_ALLOCATE_ZT3DFLAT', 0) END SUBROUTINE MNH_ALLOCATE_ZT3DFLAT @@ -329,9 +329,9 @@ SUBROUTINE MNH_ALLOCATE_ZT4DFLAT(PTAB, KIB, KIE, KJB, KJE, KKB, KKE, KPB, KPE) ISIZE = (KIE - KIB + 1) * (KJE - KJB + 1) * (KKE - KKB + 1) * (KPE - KPB + 1) - PTAB(KIB:KIE, KJB:KJE, KKB:KKE, KPB:KPE) => ZMNH_STACK(YMNH_STACK%L:YMNH_STACK%L+ISIZE-1, IMNH_BLOCK) - YMNH_STACK%L=YMNH_STACK%L+ISIZE - IF(YMNH_STACK%L>YMNH_STACK%U) CALL SOF('MNH_ALLOCATE_ZT4DFLAT', 0) + PTAB(KIB:KIE, KJB:KJE, KKB:KKE, KPB:KPE) => ZMNH_STACK(YMNH_STACK%L(2):YMNH_STACK%L(2)+ISIZE-1, IMNH_BLOCK) + YMNH_STACK%L(2)=YMNH_STACK%L(2)+ISIZE + IF(YMNH_STACK%L(2)>YMNH_STACK%U(2)) CALL SOF('MNH_ALLOCATE_ZT4DFLAT', 0) END SUBROUTINE MNH_ALLOCATE_ZT4DFLAT @@ -355,13 +355,13 @@ SUBROUTINE MNH_MEM_POSITION_PIN(HSUBR) PRINT*, "PINVAL too small" STOP 2 ENDIF - PINVAL(INUMPIN) = YMNH_STACK%L + PINVAL(INUMPIN) = YMNH_STACK%L(2) END SUBROUTINE MNH_MEM_POSITION_PIN SUBROUTINE MNH_MEM_RELEASE(HSUBR) CHARACTER(LEN=*), OPTIONAL, INTENT(IN) :: HSUBR !NAME OF THE CALLING SUBROUTINE - YMNH_STACK%L = PINVAL(INUMPIN) + YMNH_STACK%L(2) = PINVAL(INUMPIN) INUMPIN = INUMPIN - 1 END SUBROUTINE MNH_MEM_RELEASE diff --git a/src/testprogs/support/stack_mod.F90 b/src/testprogs/support/stack_mod.F90 index d24d74c0..63b291cc 100644 --- a/src/testprogs/support/stack_mod.F90 +++ b/src/testprogs/support/stack_mod.F90 @@ -3,7 +3,7 @@ MODULE STACK_MOD IMPLICIT NONE TYPE STACK - INTEGER*8 :: L, U + INTEGER*8 :: L(2), U(2) END TYPE PRIVATE @@ -12,13 +12,13 @@ MODULE STACK_MOD CONTAINS -SUBROUTINE SOF (CDFILE, KLINE) +SUBROUTINE SOF (CDFILE, KKIND) !$acc routine (SOF) seq CHARACTER (LEN=*), INTENT(IN) :: CDFILE -INTEGER, INTENT(IN) :: KLINE +INTEGER, INTENT(IN) :: KKIND -PRINT *, 'AT ', CDFILE, ':', KLINE +PRINT *, 'AT ', CDFILE, ', KIND=', KKIND #ifndef _OPENACC CALL ABORT !to get traceback #endif diff --git a/src/testprogs/turb_mnh/main_turb.F90 b/src/testprogs/turb_mnh/main_turb.F90 index de3879a7..22e7b3ba 100644 --- a/src/testprogs/turb_mnh/main_turb.F90 +++ b/src/testprogs/turb_mnh/main_turb.F90 @@ -121,12 +121,13 @@ PROGRAM MAIN_TURB LOGICAL :: LLCHECKDIFF LOGICAL :: LLDIFF INTEGER :: IBLOCK1, IBLOCK2 -INTEGER :: ISTSZ, JBLK1, JBLK2 +INTEGER :: ISTSZ(2), JBLK1, JBLK2 INTEGER :: NTID, ITID INTEGER :: JRR TYPE(TLES_t) :: TLES -REAL, ALLOCATABLE, TARGET :: PSTACK(:,:) +REAL, ALLOCATABLE, TARGET :: PSTACK8(:,:) +REAL(KIND=4), ALLOCATABLE, TARGET :: PSTACK4(:,:) TYPE(STACK), TARGET :: YLSTACK REAL(KIND=8) :: TS,TE @@ -225,13 +226,16 @@ PROGRAM MAIN_TURB D0%NIEC = D0%NIE D0%NJEC = D0%NJT -#if defined(USE_COLCALL) && defined(_OPENACC) -ISTSZ = NPROMA * 6000 * KLEV -#else -ISTSZ = NPROMA * 200 * KLEV +ISTSZ=0 +ISTSZ(KIND(KLEV)/4) = NPROMA * 1 * KLEV +ISTSZ(KIND(PRHODJ)/4) = NPROMA * 91 * KLEV +#ifndef USE_STACK +ISTSZ(2) = ISTSZ(2) + CEILING(ISTSZ(1) / 2.) +ISTSZ(1) = 0 #endif -ALLOCATE (PSTACK (ISTSZ, NGPBLKS)) -ZMNH_STACK => PSTACK +ALLOCATE (PSTACK4 (ISTSZ(1), NGPBLKS)) +ALLOCATE (PSTACK8 (ISTSZ(2), NGPBLKS)) +ZMNH_STACK => PSTACK8 TS = OMP_GET_WTIME () @@ -257,7 +261,7 @@ PROGRAM MAIN_TURB !$acc & copy (ZBL_DEPTH, ZSBL_DEPTH, PTHM, ZRM, PRUS, PRVS, PRWS, PRTHS, ZRRS, ZRSVS, PRTKES_OUT) & !$acc & copyout (PSIGS, ZWTH, ZWRC, ZWSV, PDP, PTP, PTDIFF, PTDISS, PEDR, PTPMF, PDRUS_TURB, PDRVS_TURB, & !$acc & PDRTHLS_TURB, PDRRTS_TURB, ZDRSVS_TURB) & -!$acc & create (PSTACK) +!$acc & create (PSTACK4, PSTACK8) TSC = OMP_GET_WTIME () @@ -292,18 +296,14 @@ PROGRAM MAIN_TURB #ifdef USE_STACK !Using cray pointers, AROME mechanism -#if defined(USE_COLCALL) && defined(_OPENACC) - !Due to the collapse (2) directive, each point must have its own stack - YLSTACK%L = LOC (PSTACK (1, IBL)) + CEILING(ISTSZ * KIND (PSTACK) / NPROMA / 8.) * 8 * (JLON - 1) - YLSTACK%U = YLSTACK%L + FLOOR(ISTSZ * KIND (PSTACK) / NPROMA / 8.) * 8 -#else - YLSTACK%L = LOC (PSTACK (1, IBL)) - YLSTACK%U = YLSTACK%L + ISTSZ * KIND (PSTACK) -#endif + YLSTACK%L(1) = LOC (PSTACK4 (1, IBL)) + YLSTACK%U(1) = YLSTACK%L(1) + ISTSZ(1) * KIND (PSTACK4) + YLSTACK%L(2) = LOC (PSTACK8 (1, IBL)) + YLSTACK%U(2) = YLSTACK%L(2) + ISTSZ(2) * KIND (PSTACK8) #else !Using fortran indexing, Meso-NH mechanism - YLSTACK%L = 1 - YLSTACK%U = ISTSZ + YLSTACK%L(2) = 1 + YLSTACK%U(2) = ISTSZ(2) IMNH_BLOCK => IBL YMNH_STACK => YLSTACK INUMPIN = 0 diff --git a/tools/INSTALL.sh b/tools/INSTALL.sh index 389c548b..cf7edd98 100755 --- a/tools/INSTALL.sh +++ b/tools/INSTALL.sh @@ -4,7 +4,7 @@ set -e set -o pipefail #abort if left command on a pipe fails -pyft_version=50c0efa5877feec73cff2b1ce73f197d52417b51 +pyft_version=9ac8679a7d3116530246a193c5f69d4abae4f80b #This script installs PHYEX #Call the script with the -h option to get more information.