Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove non-standard SIZEOF from GPU subtree #161

Merged
merged 4 commits into from
Oct 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions src/trans/gpu/algor/buffered_allocator_mod.F90
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ FUNCTION GET_ALLOCATION(ALLOCATOR, RESERVATION)
END FUNCTION GET_ALLOCATION

SUBROUTINE ASSIGN_PTR_FLOAT(DST, SRC, START_IN_BYTES, LENGTH_IN_BYTES, SET_VALUE, SET_STREAM)
USE ISO_C_BINDING, ONLY: C_FLOAT
USE ISO_C_BINDING, ONLY: C_FLOAT, C_F_POINTER, C_SIZEOF
IMPLICIT NONE
INTEGER(KIND=C_INT8_T), POINTER, INTENT(INOUT) :: SRC(:)
REAL(KIND=C_FLOAT), POINTER, INTENT(OUT) :: DST(:)
Expand Down Expand Up @@ -148,10 +148,10 @@ SUBROUTINE ASSIGN_PTR_FLOAT(DST, SRC, START_IN_BYTES, LENGTH_IN_BYTES, SET_VALUE
!$ACC END KERNELS!! LOOP
ENDIF
CALL C_F_POINTER(C_LOC(SRC(START_IN_BYTES:START_IN_BYTES+LENGTH_IN_BYTES-1)), DST, &
& [SIZEOF(SRC(START_IN_BYTES:START_IN_BYTES+LENGTH_IN_BYTES-1))/SIZEOF(DST(0))])
& [C_SIZEOF(SRC(START_IN_BYTES:START_IN_BYTES+LENGTH_IN_BYTES-1))/C_SIZEOF(DST(0))])
END SUBROUTINE ASSIGN_PTR_FLOAT
SUBROUTINE ASSIGN_PTR_DOUBLE(DST, SRC, START_IN_BYTES, LENGTH_IN_BYTES, SET_VALUE, SET_STREAM)
USE ISO_C_BINDING, ONLY: C_DOUBLE
USE ISO_C_BINDING, ONLY: C_DOUBLE, C_F_POINTER, C_SIZEOF
IMPLICIT NONE
INTEGER(KIND=C_INT8_T), POINTER, INTENT(INOUT) :: SRC(:)
REAL(KIND=C_DOUBLE), POINTER, INTENT(OUT) :: DST(:)
Expand Down Expand Up @@ -181,6 +181,6 @@ SUBROUTINE ASSIGN_PTR_DOUBLE(DST, SRC, START_IN_BYTES, LENGTH_IN_BYTES, SET_VALU
!$ACC END KERNELS!! LOOP
ENDIF
CALL C_F_POINTER(C_LOC(SRC(START_IN_BYTES:START_IN_BYTES+LENGTH_IN_BYTES-1)), DST, &
& [SIZEOF(SRC(START_IN_BYTES:START_IN_BYTES+LENGTH_IN_BYTES-1))/SIZEOF(DST(0))])
& [C_SIZEOF(SRC(START_IN_BYTES:START_IN_BYTES+LENGTH_IN_BYTES-1))/C_SIZEOF(DST(0))])
END SUBROUTINE ASSIGN_PTR_DOUBLE
END MODULE
10 changes: 5 additions & 5 deletions src/trans/gpu/algor/ext_acc.F90
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ module openacc_ext_type
end type
end module
module openacc_ext
use iso_c_binding, only: c_ptr, c_size_t, c_loc
use iso_c_binding, only: c_ptr, c_size_t, c_loc, c_sizeof
use openacc, only: acc_create, acc_copyin, acc_handle_kind
use openacc_ext_type, only: ext_acc_arr_desc
implicit none
Expand Down Expand Up @@ -268,7 +268,7 @@ subroutine ext_acc_create(ptrs, stream)
num_ranges = get_common_pointers(ptrs, common_ptrs)

do i = 1, num_ranges
call c_f_pointer(common_ptrs(i)%ptr, pp, shape=[common_ptrs(i)%sz/sizeof(pp(1))])
call c_f_pointer(common_ptrs(i)%ptr, pp, shape=[common_ptrs(i)%sz/c_sizeof(pp(1))])
!!call acc_create_async(pp, common_ptrs(i)%sz, async=stream_act)
call acc_create(pp, int(common_ptrs(i)%sz))
enddo
Expand All @@ -295,7 +295,7 @@ subroutine ext_acc_copyin(ptrs, stream)
num_ranges = get_common_pointers(ptrs, common_ptrs)

do i = 1, num_ranges
call c_f_pointer(common_ptrs(i)%ptr, pp, shape=[common_ptrs(i)%sz/sizeof(pp(1))])
call c_f_pointer(common_ptrs(i)%ptr, pp, shape=[common_ptrs(i)%sz/c_sizeof(pp(1))])
!!call acc_copyin_async(pp, common_ptrs(i)%sz, async=stream_act)
call acc_copyin(pp, int(common_ptrs(i)%sz))
enddo
Expand All @@ -322,7 +322,7 @@ subroutine ext_acc_copyout(ptrs, stream)
num_ranges = get_common_pointers(ptrs, common_ptrs)

do i = 1, num_ranges
call c_f_pointer(common_ptrs(i)%ptr, pp, shape=[common_ptrs(i)%sz/sizeof(pp(1))])
call c_f_pointer(common_ptrs(i)%ptr, pp, shape=[common_ptrs(i)%sz/c_sizeof(pp(1))])
!!call acc_copyout_async(pp, common_ptrs(i)%sz, async=stream_act)
call acc_copyout(pp, int(common_ptrs(i)%sz))
enddo
Expand All @@ -349,7 +349,7 @@ subroutine ext_acc_delete(ptrs, stream)
num_ranges = get_common_pointers(ptrs, common_ptrs)

do i = 1, num_ranges
call c_f_pointer(common_ptrs(i)%ptr, pp, shape=[common_ptrs(i)%sz/sizeof(pp(1))])
call c_f_pointer(common_ptrs(i)%ptr, pp, shape=[common_ptrs(i)%sz/c_sizeof(pp(1))])
!!call acc_delete_async(pp, common_ptrs(i)%sz, async=stream_act)
call acc_delete(pp, int(common_ptrs(i)%sz))
enddo
Expand Down
8 changes: 4 additions & 4 deletions src/trans/gpu/internal/ftdir_mod.F90
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ FUNCTION PREPARE_FTDIR(ALLOCATOR,KF_FS) RESULT(HFTDIR)
USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT
USE TPM_DISTR, ONLY: D
USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, RESERVE
USE ISO_C_BINDING, ONLY: C_SIZE_T
USE ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF

IMPLICIT NONE

Expand All @@ -36,7 +36,7 @@ FUNCTION PREPARE_FTDIR(ALLOCATOR,KF_FS) RESULT(HFTDIR)
REAL(KIND=JPRBT) :: DUMMY

#ifndef IN_PLACE_FFT
HFTDIR%HREEL_COMPLEX = RESERVE(ALLOCATOR, INT(KF_FS*D%NLENGTF*SIZEOF(DUMMY), KIND=C_SIZE_T))
HFTDIR%HREEL_COMPLEX = RESERVE(ALLOCATOR, INT(KF_FS*D%NLENGTF,KIND=C_SIZE_T)*C_SIZEOF(DUMMY))
#endif
END FUNCTION PREPARE_FTDIR

Expand Down Expand Up @@ -82,7 +82,7 @@ SUBROUTINE FTDIR(ALLOCATOR,HFTDIR,PREEL_REAL,PREEL_COMPLEX,KFIELD)
USE TPM_HICFFT, ONLY: EXECUTE_DIR_FFT
USE MPL_MODULE, ONLY: MPL_BARRIER,MPL_ALL_MS_COMM
USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX
USE ISO_C_BINDING, ONLY: C_SIZE_T
USE ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF

IMPLICIT NONE

Expand All @@ -98,7 +98,7 @@ SUBROUTINE FTDIR(ALLOCATOR,HFTDIR,PREEL_REAL,PREEL_COMPLEX,KFIELD)
PREEL_COMPLEX => PREEL_REAL
#else
CALL ASSIGN_PTR(PREEL_COMPLEX, GET_ALLOCATION(ALLOCATOR, HFTDIR%HREEL_COMPLEX),&
& 1_C_SIZE_T, INT(KFIELD*D%NLENGTF*SIZEOF(PREEL_COMPLEX(1)),KIND=C_SIZE_T))
& 1_C_SIZE_T, INT(KFIELD*D%NLENGTF,KIND=C_SIZE_T)*C_SIZEOF(PREEL_COMPLEX(1)))
#endif

#ifdef ACCGPU
Expand Down
8 changes: 4 additions & 4 deletions src/trans/gpu/internal/ftinv_mod.F90
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ FUNCTION PREPARE_FTINV(ALLOCATOR,KF_FS) RESULT(HFTINV)
USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT
USE TPM_DISTR, ONLY: D
USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, RESERVE
USE ISO_C_BINDING, ONLY: C_SIZE_T
USE ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF

IMPLICIT NONE

Expand All @@ -35,7 +35,7 @@ FUNCTION PREPARE_FTINV(ALLOCATOR,KF_FS) RESULT(HFTINV)
REAL(KIND=JPRBT) :: DUMMY

#ifndef IN_PLACE_FFT
HFTINV%HREEL_REAL = RESERVE(ALLOCATOR, INT(D%NLENGTF*KF_FS*SIZEOF(DUMMY),KIND=C_SIZE_T))
HFTINV%HREEL_REAL = RESERVE(ALLOCATOR, INT(D%NLENGTF*KF_FS,KIND=C_SIZE_T)*C_SIZEOF(DUMMY))
#endif
END FUNCTION

Expand Down Expand Up @@ -80,7 +80,7 @@ SUBROUTINE FTINV(ALLOCATOR,HFTINV,PREEL_COMPLEX,PREEL_REAL,KFIELD)
USE MPL_MODULE, ONLY: MPL_BARRIER,MPL_ALL_MS_COMM
USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX
USE BUFFERED_ALLOCATOR_MOD, ONLY: ASSIGN_PTR, GET_ALLOCATION
USE ISO_C_BINDING, ONLY: C_SIZE_T
USE ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF

IMPLICIT NONE

Expand All @@ -96,7 +96,7 @@ SUBROUTINE FTINV(ALLOCATOR,HFTINV,PREEL_COMPLEX,PREEL_REAL,KFIELD)
PREEL_REAL => PREEL_COMPLEX
#else
CALL ASSIGN_PTR(PREEL_REAL, GET_ALLOCATION(ALLOCATOR, HFTINV%HREEL_REAL),&
& 1_C_SIZE_T, INT(KFIELD*D%NLENGTF*SIZEOF(PREEL_REAL(1)),KIND=C_SIZE_T))
& 1_C_SIZE_T, INT(KFIELD*D%NLENGTF,KIND=C_SIZE_T)*C_SIZEOF(PREEL_REAL(1)))
#endif

#ifdef OMPGPU
Expand Down
20 changes: 10 additions & 10 deletions src/trans/gpu/internal/ltdir_mod.F90
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ MODULE LTDIR_MOD
FUNCTION PREPARE_LTDIR(ALLOCATOR, KF_FS, KF_UV) RESULT(HLTDIR)
USE TPM_DISTR, ONLY: D
USE TPM_DIM, ONLY: R
USE ISO_C_BINDING, ONLY: C_SIZE_T
USE ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF
USE LEDIR_MOD, ONLY: LEDIR_STRIDES
USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, RESERVE

Expand All @@ -47,13 +47,13 @@ FUNCTION PREPARE_LTDIR(ALLOCATOR, KF_FS, KF_UV) RESULT(HLTDIR)
IOUT0_STRIDES0=IOUT0_STRIDES0,IOUT0_SIZE=IOUT0_SIZE)

! POA1
IALLOC_SZ = ALIGN(2*KF_FS*(R%NTMAX+3)*D%NUMP*SIZEOF(ZPRBT_DUMMY),128)
IALLOC_SZ = ALIGN(INT(2*KF_FS*(R%NTMAX+3)*D%NUMP,KIND=C_SIZE_T)*C_SIZEOF(ZPRBT_DUMMY),128)
! POA2
IALLOC_SZ = IALLOC_SZ + ALIGN(4*KF_UV*(R%NTMAX+3)*D%NUMP*SIZEOF(ZPRBT_DUMMY),128)
IALLOC_SZ = IALLOC_SZ + ALIGN(INT(4*KF_UV*(R%NTMAX+3)*D%NUMP,KIND=C_SIZE_T)*C_SIZEOF(ZPRBT_DUMMY),128)
! ZOUT
IALLOC_SZ = IALLOC_SZ + ALIGN(IOUT_SIZE*SIZEOF(ZPRBT_DUMMY),128)
IALLOC_SZ = IALLOC_SZ + ALIGN(INT(IOUT_SIZE,KIND=C_SIZE_T)*C_SIZEOF(ZPRBT_DUMMY),128)
! ZOUT0
IALLOC_SZ = IALLOC_SZ+ ALIGN(IOUT0_SIZE*SIZEOF(ZPRD_DUMMY),128)
IALLOC_SZ = IALLOC_SZ+ ALIGN(INT(IOUT0_SIZE,KIND=C_SIZE_T)*C_SIZEOF(ZPRD_DUMMY),128)

HLTDIR%HOUT_AND_POA = RESERVE(ALLOCATOR, IALLOC_SZ)
END FUNCTION PREPARE_LTDIR
Expand All @@ -77,7 +77,7 @@ SUBROUTINE LTDIR(ALLOCATOR,HLTDIR,ZINPS,ZINPA,ZINPS0,ZINPA0,KF_FS,KF_UV,KF_SCALA
USE TPM_TRANS, ONLY: NF_SC2, NF_SC3A, NF_SC3B
USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX
USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, ASSIGN_PTR, GET_ALLOCATION
USE ISO_C_BINDING, ONLY: C_SIZE_T, C_F_POINTER, C_LOC
USE ISO_C_BINDING, ONLY: C_SIZE_T, C_F_POINTER, C_LOC, C_SIZEOF

!**** *LTDIR* - Control of Direct Legendre transform step

Expand Down Expand Up @@ -186,26 +186,26 @@ SUBROUTINE LTDIR(ALLOCATOR,HLTDIR,ZINPS,ZINPA,ZINPS0,ZINPA0,KF_FS,KF_UV,KF_SCALA

IALLOC_POS = 1

IALLOC_SZ = ALIGN(2*KF_FS*(R%NTMAX+3)*D%NUMP*SIZEOF(POA1_L(1)),128)
IALLOC_SZ = ALIGN(INT(2*KF_FS*(R%NTMAX+3)*D%NUMP,KIND=C_SIZE_T)*C_SIZEOF(POA1_L(1)),128)
CALL ASSIGN_PTR(POA1_L, GET_ALLOCATION(ALLOCATOR, HLTDIR%HOUT_AND_POA),&
& IALLOC_POS, IALLOC_SZ, SET_STREAM=1)
CALL C_F_POINTER(C_LOC(POA1_L), POA1, (/ 2*KF_FS, R%NTMAX+3, D%NUMP /))
IALLOC_POS = IALLOC_POS + IALLOC_SZ

IALLOC_SZ = ALIGN(4*KF_UV*(R%NTMAX+3)*D%NUMP*SIZEOF(POA2_L(1)),128)
IALLOC_SZ = ALIGN(INT(4*KF_UV*(R%NTMAX+3)*D%NUMP,KIND=C_SIZE_T)*C_SIZEOF(POA2_L(1)),128)
CALL ASSIGN_PTR(POA2_L, GET_ALLOCATION(ALLOCATOR, HLTDIR%HOUT_AND_POA),&
& IALLOC_POS, IALLOC_SZ, SET_STREAM=1)
CALL C_F_POINTER(C_LOC(POA2_L), POA2, (/ 4*KF_UV, R%NTMAX+3, D%NUMP /))
IALLOC_POS = IALLOC_POS + IALLOC_SZ

! ZOUT
IALLOC_SZ = ALIGN(IOUT_SIZE*SIZEOF(ZOUT(1)),128)
IALLOC_SZ = ALIGN(INT(IOUT_SIZE,C_SIZE_T)*C_SIZEOF(ZOUT(1)),128)
CALL ASSIGN_PTR(ZOUT, GET_ALLOCATION(ALLOCATOR, HLTDIR%HOUT_AND_POA),&
& IALLOC_POS, IALLOC_SZ, SET_STREAM=1)
IALLOC_POS = IALLOC_POS + IALLOC_SZ

! ZOUT0
IALLOC_SZ = ALIGN(IOUT0_SIZE*SIZEOF(ZOUT0(1)),128)
IALLOC_SZ = ALIGN(INT(IOUT0_SIZE,C_SIZE_T)*C_SIZEOF(ZOUT0(1)),128)
CALL ASSIGN_PTR(ZOUT0, GET_ALLOCATION(ALLOCATOR, HLTDIR%HOUT_AND_POA),&
& IALLOC_POS, IALLOC_SZ, SET_STREAM=1)
IALLOC_POS = IALLOC_POS + IALLOC_SZ
Expand Down
32 changes: 16 additions & 16 deletions src/trans/gpu/internal/ltinv_mod.F90
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ FUNCTION PREPARE_LTINV(ALLOCATOR,KF_UV,KF_SCALARS,LVORGP,LDIVGP,LSCDERS) RESULT(
USE PARKIND_ECTRANS, ONLY: JPIM, JPRBT, JPRD
USE TPM_DISTR, ONLY: D
USE TPM_DIM, ONLY: R
USE ISO_C_BINDING, ONLY: C_SIZE_T
USE ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF
USE LEINV_MOD, ONLY: LEINV_STRIDES
USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, RESERVE

Expand Down Expand Up @@ -63,7 +63,7 @@ FUNCTION PREPARE_LTINV(ALLOCATOR,KF_UV,KF_SCALARS,LVORGP,LDIVGP,LSCDERS) RESULT(
IF (LSCDERS) &
IF_READIN = IF_READIN + KF_SCALARS ! Scalars NS Derivatives

IPIA_SZ = ALIGN(2*IF_READIN*(R%NSMAX+3)*D%NUMP*SIZEOF(ZPRBT_DUMMY),128)
IPIA_SZ = ALIGN(INT(2*IF_READIN*(R%NSMAX+3)*D%NUMP,KIND=C_SIZE_T)*C_SIZEOF(ZPRBT_DUMMY),128)

! In Legendre space, we then ignore vorticity/divergence, if
! they don't need to be transformed.
Expand All @@ -77,21 +77,21 @@ FUNCTION PREPARE_LTINV(ALLOCATOR,KF_UV,KF_SCALARS,LVORGP,LDIVGP,LSCDERS) RESULT(
! PIA
IALLOC_SZ = IPIA_SZ
! ZINP
IALLOC_SZ = IALLOC_SZ + ALIGN(IIN_SIZE*SIZEOF(ZPRBT_DUMMY),128)
IALLOC_SZ = IALLOC_SZ + ALIGN(INT(IIN_SIZE,KIND=C_SIZE_T)*C_SIZEOF(ZPRBT_DUMMY),128)
! ZINP0
IALLOC_SZ = IALLOC_SZ + ALIGN(IIN0_SIZE*SIZEOF(ZPRD_DUMMY),128)
IALLOC_SZ = IALLOC_SZ + ALIGN(INT(IIN0_SIZE,KIND=C_SIZE_T)*C_SIZEOF(ZPRD_DUMMY),128)

HLTINV%HPIA_AND_IN = RESERVE(ALLOCATOR, IALLOC_SZ)

IALLOC_SZ = 0
! ZOUTA
IALLOC_SZ = IALLOC_SZ + ALIGN(IOUT_SIZE*SIZEOF(ZPRBT_DUMMY),128)
IALLOC_SZ = IALLOC_SZ + ALIGN(INT(IOUT_SIZE,KIND=C_SIZE_T)*C_SIZEOF(ZPRBT_DUMMY),128)
! ZOUTS
IALLOC_SZ = IALLOC_SZ + ALIGN(IOUT_SIZE*SIZEOF(ZPRBT_DUMMY),128)
IALLOC_SZ = IALLOC_SZ + ALIGN(INT(IOUT_SIZE,KIND=C_SIZE_T)*C_SIZEOF(ZPRBT_DUMMY),128)
! ZOUTA0
IALLOC_SZ = IALLOC_SZ + ALIGN(IOUT0_SIZE*SIZEOF(ZPRD_DUMMY),128)
IALLOC_SZ = IALLOC_SZ + ALIGN(INT(IOUT0_SIZE,KIND=C_SIZE_T)*C_SIZEOF(ZPRD_DUMMY),128)
! ZOUTS0
IALLOC_SZ = IALLOC_SZ + ALIGN(IOUT0_SIZE*SIZEOF(ZPRD_DUMMY),128)
IALLOC_SZ = IALLOC_SZ + ALIGN(INT(IOUT0_SIZE,KIND=C_SIZE_T)*C_SIZEOF(ZPRD_DUMMY),128)

HLTINV%HOUTS_AND_OUTA = RESERVE(ALLOCATOR, IALLOC_SZ)

Expand All @@ -118,7 +118,7 @@ SUBROUTINE LTINV(ALLOCATOR,HLTINV,KF_UV,KF_SCALARS,&
USE MPL_MODULE, ONLY: MPL_BARRIER,MPL_ALL_MS_COMM
USE TPM_GEN, ONLY: LSYNC_TRANS
USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX
USE ISO_C_BINDING, ONLY: C_SIZE_T, C_LOC
USE ISO_C_BINDING, ONLY: C_SIZE_T, C_LOC, C_SIZEOF

!**** *LTINV* - Inverse Legendre transform
!
Expand Down Expand Up @@ -233,46 +233,46 @@ SUBROUTINE LTINV(ALLOCATOR,HLTINV,KF_UV,KF_SCALARS,&
IALLOC_POS = 1

! PIA
IALLOC_SZ = ALIGN(2*IF_READIN*(R%NTMAX+3)*D%NUMP*SIZEOF(PIA_L(1)),128)
IALLOC_SZ = ALIGN(INT(2*IF_READIN*(R%NTMAX+3)*D%NUMP,KIND=C_SIZE_T)*C_SIZEOF(PIA_L(1)),128)
CALL ASSIGN_PTR(PIA_L, GET_ALLOCATION(ALLOCATOR, HLTINV%HPIA_AND_IN),&
& IALLOC_POS, IALLOC_SZ)
CALL C_F_POINTER(C_LOC(PIA_L), PIA, (/ 2*IF_READIN, R%NTMAX+3, D%NUMP /))
IALLOC_POS = IALLOC_POS + IALLOC_SZ

! ZINP
IALLOC_SZ = ALIGN(IIN_SIZE*SIZEOF(ZINP(1)),128)
IALLOC_SZ = ALIGN(INT(IIN_SIZE,KIND=C_SIZE_T)*C_SIZEOF(ZINP(1)),128)
CALL ASSIGN_PTR(ZINP, GET_ALLOCATION(ALLOCATOR, HLTINV%HPIA_AND_IN),&
& IALLOC_POS, IALLOC_SZ)
IALLOC_POS = IALLOC_POS + IALLOC_SZ

! ZINP0
IALLOC_SZ = ALIGN(IIN0_SIZE*SIZEOF(ZINP0(1)),128)
IALLOC_SZ = ALIGN(INT(IIN0_SIZE,KIND=C_SIZE_T)*C_SIZEOF(ZINP0(1)),128)
CALL ASSIGN_PTR(ZINP0, GET_ALLOCATION(ALLOCATOR, HLTINV%HPIA_AND_IN),&
& IALLOC_POS, IALLOC_SZ)
IALLOC_POS = IALLOC_POS + IALLOC_SZ

IALLOC_POS = 1

! ZOUTA
IALLOC_SZ = ALIGN(IOUT_SIZE*SIZEOF(ZOUTA(1)),128)
IALLOC_SZ = ALIGN(INT(IOUT_SIZE,KIND=C_SIZE_T)*C_SIZEOF(ZOUTA(1)),128)
CALL ASSIGN_PTR(ZOUTA, GET_ALLOCATION(ALLOCATOR, HLTINV%HOUTS_AND_OUTA),&
& IALLOC_POS, IALLOC_SZ)
IALLOC_POS = IALLOC_POS + IALLOC_SZ

! ZOUTS
IALLOC_SZ = ALIGN(IOUT_SIZE*SIZEOF(ZOUTS(1)),128)
IALLOC_SZ = ALIGN(INT(IOUT_SIZE,KIND=C_SIZE_T)*C_SIZEOF(ZOUTS(1)),128)
CALL ASSIGN_PTR(ZOUTS, GET_ALLOCATION(ALLOCATOR, HLTINV%HOUTS_AND_OUTA),&
& IALLOC_POS, IALLOC_SZ)
IALLOC_POS = IALLOC_POS + IALLOC_SZ

! ZOUTA0
IALLOC_SZ = ALIGN(IOUT0_SIZE*SIZEOF(ZOUTA0(1)),128)
IALLOC_SZ = ALIGN(INT(IOUT0_SIZE,KIND=C_SIZE_T)*C_SIZEOF(ZOUTA0(1)),128)
CALL ASSIGN_PTR(ZOUTA0, GET_ALLOCATION(ALLOCATOR, HLTINV%HOUTS_AND_OUTA),&
& IALLOC_POS, IALLOC_SZ)
IALLOC_POS = IALLOC_POS + IALLOC_SZ

! ZOUTS0
IALLOC_SZ = ALIGN(IOUT0_SIZE*SIZEOF(ZOUTS0(1)),128)
IALLOC_SZ = ALIGN(INT(IOUT0_SIZE,KIND=C_SIZE_T)*C_SIZEOF(ZOUTS0(1)),128)
CALL ASSIGN_PTR(ZOUTS0, GET_ALLOCATION(ALLOCATOR, HLTINV%HOUTS_AND_OUTA),&
& IALLOC_POS, IALLOC_SZ)
IALLOC_POS = IALLOC_POS + IALLOC_SZ
Expand Down
3 changes: 0 additions & 3 deletions src/trans/gpu/internal/suleg_mod.F90
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,6 @@
!

MODULE SULEG_MOD
#ifdef __NEC__
#define SIZEOF(x) STORAGE_SIZE(x)/KIND(x)
#endif
CONTAINS
SUBROUTINE SULEG
!DEC$ OPTIMIZE:1
Expand Down
17 changes: 9 additions & 8 deletions src/trans/gpu/internal/trgtol_mod.F90
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ FUNCTION PREPARE_TRGTOL(ALLOCATOR,KF_GP,KF_FS) RESULT(HTRGTOL)
USE PARKIND_ECTRANS, ONLY: JPIM, JPRB, JPRBT
USE TPM_DISTR, ONLY: D
USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, RESERVE
USE ISO_C_BINDING, ONLY: C_SIZE_T
USE ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF

IMPLICIT NONE

Expand All @@ -37,10 +37,10 @@ FUNCTION PREPARE_TRGTOL(ALLOCATOR,KF_GP,KF_FS) RESULT(HTRGTOL)

INTEGER(KIND=C_SIZE_T) :: NELEM

HTRGTOL%HCOMBUFS = RESERVE(ALLOCATOR, int(KF_GP*D%NGPTOT*SIZEOF(DUMMY),kind=c_size_t))
HTRGTOL%HCOMBUFS = RESERVE(ALLOCATOR, INT(KF_GP*D%NGPTOT,KIND=C_SIZE_T)*C_SIZEOF(DUMMY))

NELEM = KF_FS*D%NLENGTF*SIZEOF(DUMMY) ! ZCOMBUFR
NELEM = NELEM + KF_FS*D%NLENGTF*SIZEOF(DUMMY) ! PREEL_REAL
NELEM = INT(KF_FS*D%NLENGTF,KIND=C_SIZE_T)*C_SIZEOF(DUMMY) ! ZCOMBUFR
NELEM = NELEM + INT(KF_FS*D%NLENGTF,KIND=C_SIZE_T)*C_SIZEOF(DUMMY) ! PREEL_REAL
HTRGTOL%HCOMBUFR_AND_REEL = RESERVE(ALLOCATOR, NELEM)
END FUNCTION PREPARE_TRGTOL

Expand Down Expand Up @@ -119,7 +119,7 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G,
#endif
USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX
USE TPM_TRANS, ONLY: NPROMA
USE ISO_C_BINDING, ONLY: C_SIZE_T, C_FLOAT, C_DOUBLE, C_INT8_T
USE ISO_C_BINDING, ONLY: C_SIZE_T, C_FLOAT, C_DOUBLE, C_INT8_T, C_SIZEOF
USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, ASSIGN_PTR, GET_ALLOCATION
USE OPENACC_EXT, ONLY: EXT_ACC_ARR_DESC, EXT_ACC_PASS, EXT_ACC_CREATE, &
& EXT_ACC_DELETE
Expand Down Expand Up @@ -325,7 +325,8 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G,

block
CALL ASSIGN_PTR(PREEL_REAL, GET_ALLOCATION(ALLOCATOR, HTRGTOL%HCOMBUFR_AND_REEL),&
& int(KF_FS*D%NLENGTF*SIZEOF(PREEL_REAL(1))+1,kind=c_size_t), int(KF_FS*D%NLENGTF*SIZEOF(PREEL_REAL(1)),kind=c_size_t))
& INT(KF_FS*D%NLENGTF,KIND=C_SIZE_T)*C_SIZEOF(PREEL_REAL(1))+1_C_SIZE_T, &
& INT(KF_FS*D%NLENGTF,KIND=C_SIZE_T)*C_SIZEOF(PREEL_REAL(1)))
!!CALL ASSIGN_PTR(PREEL_REAL, GET_ALLOCATION(ALLOCATOR, HTRGTOL%HCOMBUFR_AND_REEL), size1, size2)
end block

Expand Down Expand Up @@ -456,7 +457,7 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G,

IF (ISEND_COUNTS > 0) THEN
CALL ASSIGN_PTR(ZCOMBUFS, GET_ALLOCATION(ALLOCATOR, HTRGTOL%HCOMBUFS),&
& 1_C_SIZE_T, int(ICOMBUFS_OFFSET(ISEND_COUNTS+1)*SIZEOF(ZCOMBUFS(1)),kind=c_size_t))
& 1_C_SIZE_T, INT(ICOMBUFS_OFFSET(ISEND_COUNTS+1),KIND=C_SIZE_T)*C_SIZEOF(ZCOMBUFS(1)))
ENDIF

!....Pack loop.........................................................
Expand Down Expand Up @@ -565,7 +566,7 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G,
CALL GSTATS(411,0)
IF (IRECV_COUNTS > 0) THEN
CALL ASSIGN_PTR(ZCOMBUFR, GET_ALLOCATION(ALLOCATOR, HTRGTOL%HCOMBUFR_AND_REEL),&
& 1_C_SIZE_T, int(ICOMBUFR_OFFSET(IRECV_COUNTS+1)*SIZEOF(ZCOMBUFR(1)),kind=c_size_t))
& 1_C_SIZE_T, INT(ICOMBUFR_OFFSET(IRECV_COUNTS+1),KIND=C_SIZE_T)*C_SIZEOF(ZCOMBUFR(1)))
ENDIF
#ifdef OMPGPU
#endif
Expand Down
Loading
Loading