From d029c7183bce17a66616accce7885132c6d8251d Mon Sep 17 00:00:00 2001 From: smaeyama Date: Wed, 15 Mar 2023 17:54:21 +0900 Subject: [PATCH] Makefile for jfrs, flow --- run/Makefile | 201 +++++++++++++++++++-------------------- run/backup/Makefile_flow | 6 ++ run/backup/Makefile_jfrs | 6 ++ run/shoot | 14 +-- run/sub.q | 171 +++++++++++++++++++-------------- 5 files changed, 212 insertions(+), 186 deletions(-) diff --git a/run/Makefile b/run/Makefile index 31a60ec..71a0a2c 100644 --- a/run/Makefile +++ b/run/Makefile @@ -9,8 +9,10 @@ FFLAGS += -mcmodel=large # Static memory larger than 2GB #FFLAGS += -Haefosux -NRtrap #-O0 # Debug OPTRPT = 'lst' #FFLAGS += -Nfjprof # Fujitsu profiler fapp -#FFLAGS += -Ksimd_nouse_multiple_structures # Specific option for compiler tcs1.2.26 to avoid slowing down GKV -#FFLAGS += -Knosch_pre_ra # Specific option for compiler tcs1.2.26 to avoid slowing down GKV +FFLAGS += -Ksimd_nouse_multiple_structures # Specific option for compiler tcs1.2.26 to avoid slowing down GKV +FFLAGS += -Knosch_pre_ra # Specific option for compiler tcs1.2.26 to avoid slowing down GKV +INC = +LIB = PROG = 'gkvp.exe' @@ -21,121 +23,112 @@ MYL = ../lib/ MATH = gkvp_math_portable FFT = gkvp_fft_fftw -### Usage of FFTW +### Usage of FFTW (module load fftw-tune) ifeq ($(FFT),gkvp_fft_fftw) - ### FFTW-SVE - FFTW_DIR=/home/apps/r/OSS_CN/fftw-3.3.8/ - INC = -I$(FFTW_DIR)/include - LIB = -L$(FFTW_DIR)/lib64 -lfftw3 -lm -SSL2 - #### FFTW-SPACK (. /home/apps/oss/spack/share/spack/setup-env.sh; spack load fftw) ### - #FFTW_DIR=`spack location -i fftw` - #INC = -I$(FFTW_DIR)/include - #LIB = -L$(FFTW_DIR)/lib -lfftw3 -lm -SSL2 + #INC += -I$(FFTW_DIR)/include + #LIB += -L$(FFTW_DIR)/lib -lfftw3 -lm + LIB += -lfftw3 -lm endif FILEIO=gkvp_fileio_fortran #FILEIO=gkvp_fileio_netcdf -### Usage of NetCDF (. /home/apps/oss/spack/share/spack/setup-env.sh; spack load netcdf-fortran%fj) ### -### Operation of NetCDF has not yet been checked on Fugaku, Jan 26 2021 +### Usage of NetCDF (module load netcdf-fortran netcdf-c phdf5) +### NetCDF does not work on the FLOW supercomputer for now, Jan 17 2021 ifeq ($(FILEIO),gkvp_fileio_netcdf) - NETCDF_DIR=`spack location -i netcdf-fortran%fj` - INC += -I$(NETCDF_DIR)/include - LIB += -L$(NETCDF_DIR)/lib -lnetcdff -lnetcdf -lhdf5_hl -lhdf5 + #INC += -I$(NETCDF_FORTRAN_DIR)/include -I$(NETCDF_DIR)/include -I$(PHDF5_DIR)/include + #LIB += -L$(NETCDF_FORTRAN_DIR)/lib -L$(NETCDF_DIR)/lib -L$(PHDF5_DIR)/lib -lnetcdff -lnetcdf -lhdf5_hl -lhdf5 + LIB += -lnetcdff -lnetcdf -lhdf5_hl -lhdf5 endif -OBJS = gkvp_header.o\ - gkvp_mpienv.o\ - $(MATH).o\ - gkvp_clock.o\ - $(FILEIO).o\ - gkvp_intgrl.o\ - gkvp_tips.o\ - gkvp_vmecbzx.o\ - gkvp_igs.o\ - gkvp_ring.o\ - gkvp_bndry.o\ - gkvp_colli.o\ - $(FFT).o\ - gkvp_fld.o\ - gkvp_colliimp.o\ - gkvp_freq.o\ - gkvp_zfilter.o\ - gkvp_geom.o\ - gkvp_exb.o\ - gkvp_trans.o\ - gkvp_advnc.o\ - gkvp_shearflow.o\ - gkvp_dtc.o\ - gkvp_out.o\ - gkvp_set.o\ - gkvp_main.o -main: - (cp Makefile $(SRC); cd $(SRC); make gkvp) +gkvp: $(SRC)gkvp_header.f90\ + $(SRC)gkvp_mpienv.f90\ + $(MYL)$(MATH).f90\ + $(SRC)gkvp_clock.f90\ + $(SRC)$(FILEIO).f90\ + $(SRC)gkvp_intgrl.f90\ + $(SRC)gkvp_tips.f90\ + $(SRC)gkvp_vmecbzx.f90\ + $(SRC)gkvp_igs.f90\ + $(SRC)gkvp_ring.f90\ + $(SRC)gkvp_bndry.f90\ + $(SRC)gkvp_colli.f90\ + $(SRC)$(FFT).f90\ + $(SRC)gkvp_fld.f90\ + $(SRC)gkvp_colliimp.f90\ + $(SRC)gkvp_freq.f90\ + $(SRC)gkvp_zfilter.f90\ + $(SRC)gkvp_geom.f90\ + $(SRC)gkvp_exb.f90\ + $(SRC)gkvp_trans.f90\ + $(SRC)gkvp_advnc.f90\ + $(SRC)gkvp_shearflow.f90\ + $(SRC)gkvp_dtc.f90\ + $(SRC)gkvp_out.f90\ + $(SRC)gkvp_set.f90\ + $(SRC)gkvp_main.f90 -gkvp: $(OBJS) - $(FC) $(FFLAGS) $(OBJS) -o $(PROG) $(LIB) - mv $(PROG) ../run/ + $(FC) $(FFLAGS) -c $(SRC)gkvp_header.f90 + $(FC) $(FFLAGS) -c $(SRC)gkvp_mpienv.f90 + $(FC) $(FFLAGS) -c $(MYL)$(MATH).f90 + $(FC) $(FFLAGS) -c $(SRC)gkvp_clock.f90 + $(FC) $(FFLAGS) -c $(SRC)$(FILEIO).f90 $(INC) + $(FC) $(FFLAGS) -c $(SRC)gkvp_intgrl.f90 + $(FC) $(FFLAGS) -c $(SRC)gkvp_tips.f90 + $(FC) $(FFLAGS) -c $(SRC)gkvp_vmecbzx.f90 + $(FC) $(FFLAGS) -c $(SRC)gkvp_igs.f90 + $(FC) $(FFLAGS) -c $(SRC)gkvp_ring.f90 + $(FC) $(FFLAGS) -c $(SRC)gkvp_bndry.f90 + $(FC) $(FFLAGS) -c $(SRC)gkvp_colli.f90 + $(FC) $(FFLAGS) -c $(SRC)$(FFT).f90 $(INC) + $(FC) $(FFLAGS) -c $(SRC)gkvp_fld.f90 + $(FC) $(FFLAGS) -c $(SRC)gkvp_colliimp.f90 + $(FC) $(FFLAGS) -c $(SRC)gkvp_freq.f90 + $(FC) $(FFLAGS) -c $(SRC)gkvp_zfilter.f90 + $(FC) $(FFLAGS) -c $(SRC)gkvp_geom.f90 + $(FC) $(FFLAGS) -c $(SRC)gkvp_exb.f90 + $(FC) $(FFLAGS) -c $(SRC)gkvp_trans.f90 + $(FC) $(FFLAGS) -c $(SRC)gkvp_advnc.f90 + $(FC) $(FFLAGS) -c $(SRC)gkvp_shearflow.f90 + $(FC) $(FFLAGS) -c $(SRC)gkvp_dtc.f90 + $(FC) $(FFLAGS) -c $(SRC)gkvp_out.f90 + $(FC) $(FFLAGS) -c $(SRC)gkvp_set.f90 + $(FC) $(FFLAGS) -c $(SRC)gkvp_main.f90 -#------------------------------> -gkvp_advnc.o : gkvp_advnc.f90 gkvp_geom.o gkvp_tips.o gkvp_zfilter.o gkvp_clock.o gkvp_bndry.o gkvp_colliimp.o gkvp_colli.o gkvp_exb.o gkvp_fld.o gkvp_mpienv.o gkvp_header.o - $(FC) $(FFLAGS) -c $< -gkvp_bndry.o : gkvp_bndry.f90 gkvp_clock.o gkvp_mpienv.o gkvp_header.o - $(FC) $(FFLAGS) -c $< -gkvp_clock.o : gkvp_clock.f90 gkvp_mpienv.o gkvp_header.o - $(FC) $(FFLAGS) -c $< -gkvp_colli.o : gkvp_colli.f90 gkvp_bndry.o gkvp_clock.o gkvp_mpienv.o gkvp_header.o - $(FC) $(FFLAGS) -c $< -gkvp_colliimp.o : gkvp_colliimp.f90 gkvp_fld.o $(MATH).o gkvp_clock.o gkvp_mpienv.o gkvp_header.o - $(FC) $(FFLAGS) -c $< -gkvp_dtc.o : gkvp_dtc.f90 gkvp_colliimp.o gkvp_exb.o gkvp_mpienv.o gkvp_header.o - $(FC) $(FFLAGS) -c $< -gkvp_exb.o : gkvp_exb.f90 gkvp_clock.o $(FFT).o gkvp_mpienv.o gkvp_header.o - $(FC) $(FFLAGS) -c $< -$(FFT).o : $(FFT).f90 gkvp_clock.o gkvp_mpienv.o gkvp_header.o - $(FC) $(FFLAGS) -c $< $(INC) -$(FILEIO).o : $(FILEIO).f90 gkvp_mpienv.o gkvp_header.o - $(FC) $(FFLAGS) -c $< $(INC) -gkvp_fld.o : gkvp_fld.f90 gkvp_clock.o gkvp_intgrl.o gkvp_mpienv.o gkvp_header.o - $(FC) $(FFLAGS) -c $< -gkvp_freq.o : gkvp_freq.f90 gkvp_mpienv.o gkvp_header.o - $(FC) $(FFLAGS) -c $< -gkvp_geom.o : gkvp_geom.f90 gkvp_ring.o gkvp_igs.o gkvp_vmecbzx.o gkvp_intgrl.o $(MATH).o gkvp_mpienv.o gkvp_header.o - $(FC) $(FFLAGS) -c $< -gkvp_header.o : gkvp_header.f90 - $(FC) $(FFLAGS) -c $< -gkvp_igs.o : gkvp_igs.f90 gkvp_mpienv.o gkvp_header.o - $(FC) $(FFLAGS) -c $< -gkvp_intgrl.o : gkvp_intgrl.f90 gkvp_mpienv.o gkvp_header.o - $(FC) $(FFLAGS) -c $< -gkvp_main.o : gkvp_main.f90 gkvp_shearflow.o gkvp_tips.o gkvp_freq.o $(FFT).o gkvp_colliimp.o gkvp_advnc.o gkvp_fld.o gkvp_dtc.o gkvp_out.o gkvp_clock.o gkvp_set.o gkvp_mpienv.o gkvp_header.o - $(FC) $(FFLAGS) -c $< -gkvp_mpienv.o : gkvp_mpienv.f90 gkvp_header.o - $(FC) $(FFLAGS) -c $< -gkvp_out.o : gkvp_out.f90 $(FILEIO).o gkvp_tips.o gkvp_dtc.o gkvp_colliimp.o gkvp_advnc.o gkvp_freq.o gkvp_trans.o gkvp_fld.o gkvp_intgrl.o gkvp_mpienv.o gkvp_header.o - $(FC) $(FFLAGS) -c $< -gkvp_ring.o : gkvp_ring.f90 $(MATH).o - $(FC) $(FFLAGS) -c $< -gkvp_set.o : gkvp_set.f90 gkvp_geom.o $(FILEIO).o gkvp_tips.o gkvp_colliimp.o gkvp_colli.o gkvp_dtc.o gkvp_advnc.o gkvp_bndry.o gkvp_fld.o $(MATH).o gkvp_mpienv.o gkvp_header.o - $(FC) $(FFLAGS) -c $< -gkvp_shearflow.o : gkvp_shearflow.f90 gkvp_tips.o gkvp_fld.o gkvp_mpienv.o gkvp_header.o - $(FC) $(FFLAGS) -c $< -gkvp_tips.o : gkvp_tips.f90 gkvp_mpienv.o gkvp_header.o - $(FC) $(FFLAGS) -c $< -gkvp_trans.o : gkvp_trans.f90 $(FILEIO).o gkvp_exb.o gkvp_clock.o gkvp_intgrl.o gkvp_mpienv.o gkvp_header.o - $(FC) $(FFLAGS) -c $< -gkvp_vmecbzx.o : gkvp_vmecbzx.f90 gkvp_mpienv.o gkvp_header.o - $(FC) $(FFLAGS) -c $< -gkvp_vmecin.o : gkvp_vmecin.f90 gkvp_mpienv.o gkvp_header.o - $(FC) $(FFLAGS) -c $< -gkvp_zfilter.o : gkvp_zfilter.f90 gkvp_clock.o gkvp_mpienv.o gkvp_header.o - $(FC) $(FFLAGS) -c $< -$(MATH).o : $(MYL)$(MATH).f90 $(MYL)Bessel0_Zeros.f90 gkvp_header.o - $(FC) $(FFLAGS) -c $< -#------------------------------< + $(FC) $(FFLAGS) \ + gkvp_header.o\ + gkvp_mpienv.o\ + $(MATH).o\ + gkvp_clock.o\ + $(FILEIO).o\ + gkvp_intgrl.o\ + gkvp_tips.o\ + gkvp_vmecbzx.o\ + gkvp_igs.o\ + gkvp_ring.o\ + gkvp_bndry.o\ + gkvp_colli.o\ + $(FFT).o\ + gkvp_fld.o\ + gkvp_colliimp.o\ + gkvp_freq.o\ + gkvp_zfilter.o\ + gkvp_geom.o\ + gkvp_exb.o\ + gkvp_trans.o\ + gkvp_advnc.o\ + gkvp_shearflow.o\ + gkvp_dtc.o\ + gkvp_out.o\ + gkvp_set.o\ + gkvp_main.o\ + -o $(PROG) $(LIB) + + cp *.o *.mod *.$(OPTRPT) ../src/ + rm -f *.o *.mod *.$(OPTRPT) clean: - rm -f ../src/Makefile ../src/*.o ../src/*.mod ../src/*.$(OPTRPT) ./*.exe ./sub.q.*.o* \ + rm -f ../src/*.o ../src/*.mod ../src/*.$(OPTRPT) ./*.exe ./sub.q.*.o* \ ./*.o ./*.mod ./*.$(OPTRPT) ./*namelist.* ./sub.q.* clear: diff --git a/run/backup/Makefile_flow b/run/backup/Makefile_flow index e28c901..71a0a2c 100644 --- a/run/backup/Makefile_flow +++ b/run/backup/Makefile_flow @@ -50,6 +50,7 @@ gkvp: $(SRC)gkvp_header.f90\ $(SRC)gkvp_tips.f90\ $(SRC)gkvp_vmecbzx.f90\ $(SRC)gkvp_igs.f90\ + $(SRC)gkvp_ring.f90\ $(SRC)gkvp_bndry.f90\ $(SRC)gkvp_colli.f90\ $(SRC)$(FFT).f90\ @@ -57,6 +58,7 @@ gkvp: $(SRC)gkvp_header.f90\ $(SRC)gkvp_colliimp.f90\ $(SRC)gkvp_freq.f90\ $(SRC)gkvp_zfilter.f90\ + $(SRC)gkvp_geom.f90\ $(SRC)gkvp_exb.f90\ $(SRC)gkvp_trans.f90\ $(SRC)gkvp_advnc.f90\ @@ -75,6 +77,7 @@ gkvp: $(SRC)gkvp_header.f90\ $(FC) $(FFLAGS) -c $(SRC)gkvp_tips.f90 $(FC) $(FFLAGS) -c $(SRC)gkvp_vmecbzx.f90 $(FC) $(FFLAGS) -c $(SRC)gkvp_igs.f90 + $(FC) $(FFLAGS) -c $(SRC)gkvp_ring.f90 $(FC) $(FFLAGS) -c $(SRC)gkvp_bndry.f90 $(FC) $(FFLAGS) -c $(SRC)gkvp_colli.f90 $(FC) $(FFLAGS) -c $(SRC)$(FFT).f90 $(INC) @@ -82,6 +85,7 @@ gkvp: $(SRC)gkvp_header.f90\ $(FC) $(FFLAGS) -c $(SRC)gkvp_colliimp.f90 $(FC) $(FFLAGS) -c $(SRC)gkvp_freq.f90 $(FC) $(FFLAGS) -c $(SRC)gkvp_zfilter.f90 + $(FC) $(FFLAGS) -c $(SRC)gkvp_geom.f90 $(FC) $(FFLAGS) -c $(SRC)gkvp_exb.f90 $(FC) $(FFLAGS) -c $(SRC)gkvp_trans.f90 $(FC) $(FFLAGS) -c $(SRC)gkvp_advnc.f90 @@ -101,6 +105,7 @@ gkvp: $(SRC)gkvp_header.f90\ gkvp_tips.o\ gkvp_vmecbzx.o\ gkvp_igs.o\ + gkvp_ring.o\ gkvp_bndry.o\ gkvp_colli.o\ $(FFT).o\ @@ -108,6 +113,7 @@ gkvp: $(SRC)gkvp_header.f90\ gkvp_colliimp.o\ gkvp_freq.o\ gkvp_zfilter.o\ + gkvp_geom.o\ gkvp_exb.o\ gkvp_trans.o\ gkvp_advnc.o\ diff --git a/run/backup/Makefile_jfrs b/run/backup/Makefile_jfrs index 11ffec3..f771e28 100644 --- a/run/backup/Makefile_jfrs +++ b/run/backup/Makefile_jfrs @@ -61,6 +61,7 @@ gkvp: $(SRC)gkvp_header.f90\ $(SRC)gkvp_tips.f90\ $(SRC)gkvp_vmecbzx.f90\ $(SRC)gkvp_igs.f90\ + $(SRC)gkvp_ring.f90\ $(SRC)gkvp_bndry.f90\ $(SRC)gkvp_colli.f90\ $(SRC)$(FFT).f90\ @@ -68,6 +69,7 @@ gkvp: $(SRC)gkvp_header.f90\ $(SRC)gkvp_colliimp.f90\ $(SRC)gkvp_freq.f90\ $(SRC)gkvp_zfilter.f90\ + $(SRC)gkvp_geom.f90\ $(SRC)gkvp_exb.f90\ $(SRC)gkvp_trans.f90\ $(SRC)gkvp_advnc.f90\ @@ -87,6 +89,7 @@ gkvp: $(SRC)gkvp_header.f90\ $(FC) $(FFLAGS) -c $(SRC)gkvp_tips.f90 $(FC) $(FFLAGS) -c $(SRC)gkvp_vmecbzx.f90 $(FC) $(FFLAGS) -c $(SRC)gkvp_igs.f90 + $(FC) $(FFLAGS) -c $(SRC)gkvp_ring.f90 $(FC) $(FFLAGS) -c $(SRC)gkvp_bndry.f90 $(FC) $(FFLAGS) -c $(SRC)gkvp_colli.f90 $(FC) $(FFLAGS) -c $(SRC)$(FFT).f90 $(INC) @@ -94,6 +97,7 @@ gkvp: $(SRC)gkvp_header.f90\ $(FC) $(FFLAGS) -c $(SRC)gkvp_colliimp.f90 $(FC) $(FFLAGS) -c $(SRC)gkvp_freq.f90 $(FC) $(FFLAGS) -c $(SRC)gkvp_zfilter.f90 + $(FC) $(FFLAGS) -c $(SRC)gkvp_geom.f90 $(FC) $(FFLAGS) -c $(SRC)gkvp_exb.f90 $(FC) $(FFLAGS) -c $(SRC)gkvp_trans.f90 $(FC) $(FFLAGS) -c $(SRC)gkvp_advnc.f90 @@ -112,6 +116,7 @@ gkvp: $(SRC)gkvp_header.f90\ gkvp_tips.o\ gkvp_vmecbzx.o\ gkvp_igs.o\ + gkvp_ring.o\ gkvp_bndry.o\ gkvp_colli.o\ $(FFT).o\ @@ -119,6 +124,7 @@ gkvp: $(SRC)gkvp_header.f90\ gkvp_colliimp.o\ gkvp_freq.o\ gkvp_zfilter.o\ + gkvp_geom.o\ gkvp_exb.o\ gkvp_trans.o\ gkvp_advnc.o\ diff --git a/run/shoot b/run/shoot index 507eeea..ce833cd 100755 --- a/run/shoot +++ b/run/shoot @@ -14,10 +14,10 @@ if [ $# -lt 2 ]; then fi #### Environment setting -DIR=/data/lng/maeyama/gkvp/f0.62/dev_shearflow/dev19_gamma0.2_remap +DIR=/data/group1/z43460z/gkvp/f0.61/ITGae-lin LDM=gkvp.exe NL=gkvp_namelist -SC=qsub +SC=pjsub JS=sub.q ### For VMEC, set VMCDIR including metric_boozer.bin.dat #VMCDIR=./input_vmec/vmec_sample_nss501ntheta1024nzeta0 @@ -76,19 +76,15 @@ do #${SC} ${fln_JS} if [ -z "$j" -a $i -eq $1 ]; then echo "*** submit first step job ${fln_JS} ***" - ${SC} ${fln_JS} | tee shoottempfile - j=`awk '{sub(".nqsv*",""); print $2}' shoottempfile` + ${SC} --step --sparam "sn=$i" ${fln_JS} | tee shoottempfile + j=`awk '{sub("_.*",""); print $6}' shoottempfile` rm shoottempfile else echo "*** submit sequential step job ${fln_JS} ***" - ${SC} --after $j ${fln_JS} | tee shoottempfile - j=`awk '{sub(".nqsv*",""); print $2}' shoottempfile` - rm shoottempfile + ${SC} --step --sparam "jid=$j,sd=ec!=0:all" ${fln_JS} fi - sleep 1 i=$(( $i + 1 )) done - diff --git a/run/sub.q b/run/sub.q index d9cc221..6876642 100755 --- a/run/sub.q +++ b/run/sub.q @@ -1,65 +1,66 @@ -#!/bin/bash +#!/bin/sh ### NOTE ### -### Plasma simulator, NEC SX-Aurora TSUBASA A412-8 (NIFS, 2020) +### Flow supercomputer Type I sub-system, PRIMEHPC FX1000 (Nagoya Univ, 2020) ### -### - Computation nodes (total 4320 VE (Vector engine)) -### VE model: Type 10AE (8cores) -### Peak performance: DP 2.433 TFLOPS per VE -### Memory: HBM2 48 GiB -### Memory Bandwidth: ? GB/s per node +### - Computation nodes(total 2304 nodes) +### CPU: A64FX (2.0GHz, 12coresx4CMG=48cores, 512bit SIMD) x1 per node +### Peak performance: DP 3.379 TFLOPS per node (Boost: 3.3792 TFLOPS) +### Cache L1: 64 KiB, 4 way +### Cache L1 Bandwidth: 230+ GB/s(load), 115+ GB/s (store) +### Cache L2: 8 MiB, 16 way per CMG(NUMA), 4CMG per node +### Cache L2 Bandwidth: 3.6+ TB/s per node +### 115+ GB/s(load), 57+ GB/s(store) per core +### Memory: HBM2 32 GiB +### Memory Bandwidth: 1024 GB/s per node ### -### (For now, flat MPI is recommended.) +### Therefore, a recommended GKV parallelization may be +### (MPI Processes)x(12 OpenMP Threads) +### =(12 cores per CMG)x(4 CMG)x(Node numbers) +### 1 MPI process should be assigined to 1 CMG. ### ### - Interconnect -### Infiniband HDR200 x2, 1000BASE-Tx1, BMC +### Tofu Interconnect D (28 Gbps x 2 lane x 10 port) +### [Performance] 8B Put latency: 0.49-0.54 usec +### 1MiB Put throughput: 6.35 GB/s ### -### - Job class : Computation server (SX-Aurora) -### small : 1 - 16 VE, 15 min., 1 run/ 1 submit -### small24VE : 1 - 4 VE, 24 hour, 8 run/16 submit -### small24VH : 8 - 32 VE, 24 hour, 8 run/16 submit -### medium : 40 - 768 VE, 10 hour, 4 run/ 8 submit -### large : 1920 - 2160 VE, 10 hour, 1 run/ 4 submit -### large1h : 1920 - 2160 VE, 1 hour, 1 run/ 2 submit -### debug : 8 - 16 VE, 30 min., 1 run/ 1 submit, interactive -### -### - Job class : Data analysis server (LX) -### gpu-b : 1 - 4 Servers, 10 hour, 1 run/2 submit -### gpu-i : 1 - 2 Servers, 10 hour, 1 run/1 submit, interactive +### - Job class (May 2020) +### fx-debug : 1 - 36 nodes, 1 hour, 50 run/300 submit +### fx-small : 1 - 24 nodes, 168 hour, 100 run/300 submit +### fx-middle : 12 - 96 nodes, 72 hour, 50 run/300 submit +### fx-large : 96 - 192 nodes, 72 hour, 25 run/300 submit +### fx-xlarge : 96 - 768 nodes, 24 hour, 5 run/300 submit ### ### - Commands -### (Submit a batch job : "qsub sub.q") Use shoot script for GKV. -### Check job status : "qstat -a" -### Delete job : "qdel JOBID" -### Show budget info : "pstime" -### Show disk usage : "lsquota" +### (Submit a batch job : "pjsub sub.q") Use shoot script for GKV. +### Check job status : "pjstat" or "pjstat -E" for step jobs +### Delete job : "pjdel JOBID" +### Show budget info : "charge" +### Show disk usage : "lfs quota -u (YOUR ACCOUNT ID) /home" +### : "lfs quota -u (YOUR ACCOUNT ID) /data" ############## -#PBS -q small # queue name -#PBS --group=21234 # resource group -#PBS -T necmpi # necessary for MPI job -#PBS -l elapstim_req=00:15:00 # elapsed time limit - -#PBS --venode=2 # total number of VE -#### --venum-lhost=2 # number of VE per a logical node -#PBS --venum-lhost=8 # number of VE per a logical node -#PBS -v OMP_NUM_THREADS=1 # number of threads per MPI process +#PJM --rsc-list "rscgrp=fx-debug" +#PJM --rsc-list "node=8" +#### --rsc-list "node=5x8x8" +#PJM --rsc-list "elapse=00:10:00" +#PJM --mpi "proc=32" +#### --mpi "rank-map-bynode" +#### --mpi "rank-map-hostfile=rankmapfile.dat" +#PJM -j +#PJM -s -MPI_procs=16 # number of MPI processes (= venode*8 for flat MPI) +NUM_NODES=${PJM_NODE} # Nodes +NUM_CORES=12 # Cores per node +NUM_PROCS=$(( ${NUM_NODES} * 4 )) # MPI processes +export OMP_NUM_THREADS=12 # OpenMP threads per MPI -#PBS -v VE_FORT_SETBUF=10240 -#PBS -v FTRACE=YES -#PBS -v NMPI_PROGINF=DETAIL -#PBS -v NMPI_SEPSELECT=3 -#PBS -v LANG=C +echo " Nodes: ${NUM_NODES}" +echo " Cores per node: ${NUM_CORES}" +echo " MPI Processes: ${NUM_PROCS}" +echo " OpenMP threads per MPI: ${OMP_NUM_THREADS}" -source /ect/profile.d/modules.sh - -module load NECNLC-sx -# module load NECNLC-mpi-sx -### For NetCDF -module load netcdf-parallelIO-fortran-sx ### Working directory @@ -67,36 +68,60 @@ DIR=%%DIR%% LDM=gkvp.exe NL=gkvp_namelist.%%% +export XOS_MMM_L_PAGING_POLICY=demand:demand:demand # For Largepage -date -cd ${DIR} -export fu05=${DIR}/${NL} - +export PLE_MPI_STD_EMPTYFILE="off" # Suppress stdout of filesize-0 -#cat << 'EOF-S' > ./mpisep.sh -##!/bin/sh -#ulimit -s unlimited -#ID=${MPIUNIVERSE}.`printf "%05d" ${MPIRANK}` -#case ${NMPI_SEPSELECT:-${MPISEPSELECT:-2}} in -#1) exec $* 1>> stdout.${ID} ;; -#2) exec $* 2>> stderr.${ID} ;; -#3) exec $* 1>> stdout.${ID} 2>> stderr.${ID} ;; -#4) exec $* 1>> std.${ID} 2>&1 ;; -#*) exec $* ;; -#esac -#EOF-S -#chmod 777 ./mpisep.sh -# -##---( time mpiexec -v -nn ${_NECMPI_VH_NUM_NODES} -ve 0-7 -ppn 64 ./mpisep.sh ./${LDM} ) > log.mpi 2>&1 -#( time mpiexec -v -nn ${_NECMPI_VH_NUM_NODES} -ve 0-7 -ppn 64 -n ${MPI_procs} ./mpisep.sh ./${LDM} ) > log.mpi 2>&1 +module load fftw-tune phdf5 netcdf-c netcdf-fortran +###module unload tcs +###module load fftw/3.3.8 +###export PATH=/opt/FJSVxtclanga/tcsds-1.2.25/bin:$PATH +###export LD_LIBRARY_PATH=/opt/FJSVxtclanga/tcsds-1.2.25/lib64:$LD_LIBRARY_PATH +###export OPAL_PREFIX=/opt/FJSVxtclanga/tcsds-1.2.25 -mpirun -n ${MPI_procs} ${DIR}/${LDM} +#### Run +date +cd ${DIR} +export fu05=${DIR}/${NL} +mpiexec -n ${NUM_PROCS} ${DIR}/${LDM} + # -n "Total number of MPI processes" date -#touch complete - -#---#PBS -l coresz_prc=10 -#---#PBS --venum-lhost=8 -#---#PBS -b 4 # number of nodes +##### Run with Fujitsu profiler fipp (re-compile with -Nfjprof option) +#date +#cd ${DIR} +#export fu05=${DIR}/${NL} +#fipp -C -d ${DIR}/fjprof_dir/pa0 -Icpupa -Impi -Sregion mpiexec -n ${NUM_PROCS} ${DIR}/${LDM} +#date +#echo "#!/bin/sh" > ${DIR}/fjprof_dir/fugaku_fipppx.sh +#echo "set -Ceu" >> ${DIR}/fjprof_dir/fugaku_fipppx.sh +#echo "set -x" >> ${DIR}/fjprof_dir/fugaku_fipppx.sh +#echo "fipppx -A -d pa0 -Icpupa -p0,limit=4 -o prof_cpupa.txt" >> ${DIR}/fjprof_dir/fugaku_fipppx.sh +#echo "fipppx -A -d pa0 -Ibalance -p0,limit=4 -o prof_balance.txt" >> ${DIR}/fjprof_dir/fugaku_fipppx.sh +#echo "#fipppx -A -d pa0 -Icall -p0,limit=4 -o prof_call.txt" >> ${DIR}/fjprof_dir/fugaku_fipppx.sh +#echo "fipppx -A -d pa0 -Isrc:./src -p0,limit=4 -o prof_src.txt" >> ${DIR}/fjprof_dir/fugaku_fipppx.sh + + +##### Run with Fujitsu profiler fapp (re-compile with -Nfjprof option) +#date +#cd ${DIR} +#export fu05=${DIR}/${NL} +#Npa=1 # Elementary report +##Npa=5 # Simple report +##Npa=11 # Standard report +##Npa=17 # Detailed report +#for i in `seq 1 ${Npa}`; do +# echo "pa"${i} `date` +# fapp -C -d ${DIR}/fjprof_dir/pa${i} -Hevent=pa${i} -Sregion mpiexec -n ${NUM_PROCS} ${DIR}/${LDM} +#done +#date +# +#echo "#!/bin/sh" > ${DIR}/fjprof_dir/fugaku_fapppx.sh +#for i in `seq 1 ${Npa}`; do +# echo "fapppx -A -d ./pa${i} -Icpupa,mpi -tcsv -o pa${i}.csv" >> ${DIR}/fjprof_dir/fugaku_fapppx.sh +#done +#echo "cp /opt/FJSVxtclanga/tcsds-1.2.25/misc/cpupa/cpu_pa_report.xlsm ./" >> ${DIR}/fjprof_dir/fugaku_fapppx.sh +# +#