diff --git a/build/with_fcm/arch/arch-MIMPIIFC1805.EPONA.SNG.env b/build/with_fcm/arch/arch-MIMPIIFC1805.EPONA.SNG.env index 8fdefeb2..82587874 100644 --- a/build/with_fcm/arch/arch-MIMPIIFC1805.EPONA.SNG.env +++ b/build/with_fcm/arch/arch-MIMPIIFC1805.EPONA.SNG.env @@ -1 +1,3 @@ module load perl/5.30.3 + +OMP_NUM_THREADS_perf=64 diff --git a/build/with_fcm/arch/arch-MIMPIIFC1805.EPONA.env b/build/with_fcm/arch/arch-MIMPIIFC1805.EPONA.env index 8fdefeb2..82587874 100644 --- a/build/with_fcm/arch/arch-MIMPIIFC1805.EPONA.env +++ b/build/with_fcm/arch/arch-MIMPIIFC1805.EPONA.env @@ -1 +1,3 @@ module load perl/5.30.3 + +OMP_NUM_THREADS_perf=64 diff --git a/build/with_fcm/arch/arch-pgi_CPU_preGPU_MFflavour.env b/build/with_fcm/arch/arch-pgi_CPU_preGPU_MFflavour.env index a2097916..598c1402 100644 --- a/build/with_fcm/arch/arch-pgi_CPU_preGPU_MFflavour.env +++ b/build/with_fcm/arch/arch-pgi_CPU_preGPU_MFflavour.env @@ -4,7 +4,8 @@ if [ $(hostname | cut -c 1-7) == 'belenos' -o \ module load nvhpc-hpcx-cuda12/24.3 fi -NPOINTS_perf=20480 #5120*4 +NPOINTS_perf=32000 +NPROMA_perf=32 #stackMode can be AROME or MESONH #if stackMode is AROME, we must use the -fcray-pointer compilation option with gfortran diff --git a/build/with_fcm/arch/arch-pgi_GPU_MFflavour_SNG.env b/build/with_fcm/arch/arch-pgi_GPU_MFflavour_SNG.env new file mode 120000 index 00000000..6706cbc5 --- /dev/null +++ b/build/with_fcm/arch/arch-pgi_GPU_MFflavour_SNG.env @@ -0,0 +1 @@ +arch-pgi_CPU_preGPU_MFflavour.env \ No newline at end of file diff --git a/build/with_fcm/arch/arch-pgi_GPU_MFflavour_SNG.fcm b/build/with_fcm/arch/arch-pgi_GPU_MFflavour_SNG.fcm new file mode 100644 index 00000000..35577e75 --- /dev/null +++ b/build/with_fcm/arch/arch-pgi_GPU_MFflavour_SNG.fcm @@ -0,0 +1,27 @@ +# Compilation +$FCOMPILER = nvfortran +$BASE_FFLAGS = -g -c -byteswapio -Mlarge_arrays -fPIC -Mbackslash -Kieee -acc=gpu -gpu=nofma,lineinfo -Minfo=accel,all,ccff -cuda +$PROD_FFLAGS = -O2 -fast +$DEV_FFLAGS = -O1 +$DEBUG_FFLAGS = -O0 -Ktrap=fp -C +$CCOMPILER = pgcc +$BASE_CFLAGS = -c -fPIC +$PROD_CFLAGS = -O2 +$DEV_CFLAGS = -O1 +$DEBUG_CFLAGS = +$OMP_FFLAGS = + +# Preprocessor +$FPP_FLAGS = LINUX LITTLE_ENDIAN LITTLE REPRO48 USE_STACK USE_COLCALL PARKIND1_SINGLE +$CPP_FLAGS = LINUX LITTLE_ENDIAN LITTLE PARKIND1_SINGLE +$FPP_FLAGS_TESTPROGS = WITHOUT_CXXDEMANGLE + +# Linker +$LINK = mpif90 +$BASE_LD = -lnvhpcwrapnvtx -cuda -acc +$OMP_LD = +$LD_EXE_TO_SHARED = -shared + +# Other +$AR = ar + diff --git a/tools/check_commit_testprogs.sh b/tools/check_commit_testprogs.sh index 4d198c38..668e7789 100755 --- a/tools/check_commit_testprogs.sh +++ b/tools/check_commit_testprogs.sh @@ -258,6 +258,7 @@ function submit { #!/bin/bash #SBATCH -n 1 #SBATCH -N 1 +#SBATCH -t 10 #SBATCH --export=$varToExport $GPU @@ -515,7 +516,8 @@ if [ $run -ge 1 -a "$perffile" != "" ]; then if [ $firstrun -eq 1 ]; then firstrun=0 - #Read prefered NPROMA and maximum number of points for performance evaluation + #Read prefered NPROMA, maximum number of points and number of openMP threads + #for performance evaluation . $TESTDIR/$name/build/with_fcm/arch_${archfile}/arch.env #Experiement size @@ -527,6 +529,7 @@ if [ $run -ge 1 -a "$perffile" != "" ]; then NTIMES=1 fi NPROMA=${NPROMA_perf-32} + OMP_NUM_THREADS=${OMP_NUM_THREADS_perf-8} NBLOCKS=$(($NPOINTS/$NPROMA/8*8)) #must be divisible by 8 perf_extrapolation_tag=$(NPROMA=$NPROMA; NBLOCKS=$NBLOCKS; NTIMES=$NTIMES; eval echo ${conf_extra_tag[4]}) @@ -540,7 +543,7 @@ if [ $run -ge 1 -a "$perffile" != "" ]; then fi fi - NPROMA=$NPROMA NBLOCKS=$NBLOCKS NTIMES=$NTIMES OMP_NUM_THREADS=8 $0 -r -t $t -a ${archfile} --no-check --no-perf -e 4 ${commit} + NPROMA=$NPROMA NBLOCKS=$NBLOCKS NTIMES=$NTIMES OMP_NUM_THREADS=${OMP_NUM_THREADS} $0 -r -t $t -a ${archfile} --no-check --no-perf -e 4 ${commit} file=$TESTDIR/$name/tests/with_fcm/arch_${archfile}/${t}${perf_extrapolation_tag}/Output_run if [ -f $file ]; then ZTD=$(grep -m 1 "ZTD =" $file | awk '{print $4}')